feat: rag初始版
This commit is contained in:
88
dao/dataset.go
Normal file
88
dao/dataset.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package dao
|
||||
|
||||
import (
|
||||
"context"
|
||||
"rag/consts/public"
|
||||
"rag/model/dto"
|
||||
"rag/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"github.com/gogf/gf/v2/database/gdb"
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
var Dataset = new(datasetDao)
|
||||
|
||||
type datasetDao struct{}
|
||||
|
||||
// Insert 插入数据集
|
||||
func (d *datasetDao) Insert(ctx context.Context, req *dto.CreateDatasetReq) (id int64, err error) {
|
||||
var res *entity.Dataset
|
||||
if err = gconv.Struct(req, &res); err != nil {
|
||||
return
|
||||
}
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Data(&res).Insert()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.LastInsertId()
|
||||
}
|
||||
|
||||
// Update 更新数据集
|
||||
func (d *datasetDao) Update(ctx context.Context, req *dto.UpdateDatasetReq) (rows int64, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).OmitEmpty()
|
||||
if !g.IsEmpty(req.DocumentCount) {
|
||||
model.Data(entity.DatasetCol.DocumentCount, &gdb.Counter{
|
||||
Field: entity.DatasetCol.DocumentCount,
|
||||
Value: gconv.Float64(req.DocumentCount),
|
||||
})
|
||||
}
|
||||
if !g.IsEmpty(req.DocumentSize) {
|
||||
model.Data(entity.DatasetCol.DocumentSize, &gdb.Counter{
|
||||
Field: entity.DatasetCol.DocumentSize,
|
||||
Value: gconv.Float64(req.DocumentSize),
|
||||
})
|
||||
}
|
||||
r, err := model.Data(&req).Where(entity.DatasetCol.Id, req.Id).Update()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
// Delete 删除数据集
|
||||
func (d *datasetDao) Delete(ctx context.Context, req *dto.DeleteDatasetReq) (rows int64, err error) {
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Where(entity.DatasetCol.Id, req.Id).Delete()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
func (d *datasetDao) GetByID(ctx context.Context, req *dto.GetDatasetReq, fields ...string) (res *entity.Dataset, err error) {
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Where(entity.DatasetCol.Id, req.Id).Fields(fields).One()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.Struct(&res)
|
||||
return
|
||||
}
|
||||
|
||||
// List 获取数据集列表
|
||||
func (d *datasetDao) List(ctx context.Context, req *dto.ListDatasetReq, fields ...string) (res []*entity.Dataset, total int, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Fields(fields).OmitEmpty()
|
||||
if !g.IsEmpty(req.Keyword) {
|
||||
model.WhereLike(entity.DatasetCol.Name, "%"+req.Keyword+"%")
|
||||
}
|
||||
model.OrderDesc(entity.DatasetCol.CreatedAt)
|
||||
if req.Page != nil {
|
||||
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
|
||||
}
|
||||
r, total, err := model.AllAndCount(false)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.Structs(&res)
|
||||
return
|
||||
}
|
||||
59
dao/dataset_index.go
Normal file
59
dao/dataset_index.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package dao
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"rag/consts/public"
|
||||
"rag/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
)
|
||||
|
||||
var DatasetIndex = new(datasetIndexDao)
|
||||
|
||||
type datasetIndexDao struct{}
|
||||
|
||||
// Insert 插入数据集索引
|
||||
func (d *datasetIndexDao) Insert(ctx context.Context, index *entity.DatasetIndex) (id int64, err error) {
|
||||
_, err = gfdb.DB(ctx).Model(ctx, public.TableNameDatasetIndex).Data(index).Insert()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// GetByDatasetId 根据数据集ID获取索引
|
||||
func (d *datasetIndexDao) GetByDatasetId(ctx context.Context, datasetId int64) (result *entity.DatasetIndex, err error) {
|
||||
err = gfdb.DB(ctx).Model(ctx, public.TableNameDatasetIndex).Where(entity.DatasetIndexCol.DatasetId, datasetId).Scan(&result)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
return
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// IncVectorCount 增加或减少向量数量
|
||||
func (d *datasetIndexDao) IncVectorCount(ctx context.Context, id int64, delta int64) (err error) {
|
||||
_, err = gfdb.DB(ctx).Model(ctx, public.TableNameDatasetIndex).
|
||||
Where(entity.DatasetIndexCol.Id, id).
|
||||
Increment(entity.DatasetIndexCol.VectorCount, delta)
|
||||
return
|
||||
}
|
||||
|
||||
func (d *datasetIndexDao) InsertIndex(ctx context.Context, indexName string) (err error) {
|
||||
prefix, err := gfdb.GetTablePrefix(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
sqlStr := fmt.Sprintf(`
|
||||
CREATE INDEX IF NOT EXISTS %s
|
||||
ON %s
|
||||
USING ivfflat (vector vector_cosine_ops)
|
||||
WHERE vector IS NOT NULL;
|
||||
`, indexName, prefix+public.TableNameDocumentChunk)
|
||||
_, err = gfdb.DB(ctx).Exec(ctx, sqlStr)
|
||||
return
|
||||
}
|
||||
87
dao/document.go
Normal file
87
dao/document.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package dao
|
||||
|
||||
import (
|
||||
"context"
|
||||
"rag/consts/public"
|
||||
"rag/model/dto"
|
||||
"rag/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"github.com/gogf/gf/v2/database/gdb"
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
var Document = new(documentDao)
|
||||
|
||||
type documentDao struct{}
|
||||
|
||||
// Insert 插入文件
|
||||
func (d *documentDao) Insert(ctx context.Context, req *dto.CreateDocumentReq) (id int64, err error) {
|
||||
var res *entity.Document
|
||||
if err = gconv.Struct(req, &res); err != nil {
|
||||
return
|
||||
}
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).Data(&res).Insert()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.LastInsertId()
|
||||
}
|
||||
|
||||
// Update 更新文件
|
||||
func (d *documentDao) Update(ctx context.Context, req *dto.UpdateDocumentReq) (rows int64, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).OmitEmpty()
|
||||
if !g.IsEmpty(req.ChunkCount) {
|
||||
model.Data(entity.DocumentCol.ChunkCount, &gdb.Counter{
|
||||
Field: entity.DocumentCol.ChunkCount,
|
||||
Value: gconv.Float64(req.ChunkCount),
|
||||
})
|
||||
}
|
||||
|
||||
r, err := model.Data(&req).Where(entity.DocumentCol.Id, req.Id).Update()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
// Delete 删除文件
|
||||
func (d *documentDao) Delete(ctx context.Context, req *dto.DeleteDocumentReq) (rows int64, err error) {
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).Where(entity.DocumentCol.Id, req.Id).Delete()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
// GetByID 根据ID获取文件
|
||||
func (d *documentDao) GetByID(ctx context.Context, req *dto.GetDocumentReq, fields ...string) (res *entity.Document, err error) {
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).Where(entity.DocumentCol.Id, req.Id).Fields(fields).One()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.Struct(&res)
|
||||
return
|
||||
}
|
||||
|
||||
// List 获取文件列表
|
||||
func (d *documentDao) List(ctx context.Context, req *dto.ListDocumentReq, fields ...string) (res []*entity.Document, total int, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).OmitEmpty()
|
||||
if !g.IsEmpty(req.Keyword) {
|
||||
model.WhereLike(entity.DocumentCol.Title, "%"+req.Keyword+"%")
|
||||
}
|
||||
model.Where(entity.DocumentCol.DatasetId, req.DatasetId)
|
||||
model.Where(entity.DocumentCol.Status, req.Status)
|
||||
model.Fields(fields)
|
||||
model.OrderDesc(entity.DocumentCol.CreatedAt)
|
||||
if req.Page != nil {
|
||||
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
|
||||
}
|
||||
r, total, err := model.AllAndCount(false)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.Structs(&res)
|
||||
return
|
||||
}
|
||||
104
dao/document_chunk.go
Normal file
104
dao/document_chunk.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package dao
|
||||
|
||||
import (
|
||||
"context"
|
||||
"rag/consts/public"
|
||||
"rag/model/dto"
|
||||
"rag/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
var DocumentChunk = new(documentChunkDao)
|
||||
|
||||
type documentChunkDao struct{}
|
||||
|
||||
// BatchInsert 批量插入文件块
|
||||
func (d *documentChunkDao) BatchInsert(ctx context.Context, req []*dto.VectorDocumentChunkMsg) (rows int64, err error) {
|
||||
var res []*entity.DocumentChunk
|
||||
if err = gconv.Structs(req, &res); err != nil {
|
||||
return
|
||||
}
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocumentChunk).Data(&res).Insert()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
// Update 更新文件块
|
||||
func (d *documentChunkDao) Update(ctx context.Context, req *dto.UpdateDocumentChunkReq) (rows int64, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocumentChunk)
|
||||
r, err := model.Data(&req).Where(entity.DocumentChunkCol.Id, req.Id).Update()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
// List 文件块列表
|
||||
func (d *documentChunkDao) List(ctx context.Context, req *dto.ListDocumentChunkReq, fields ...string) (res []*entity.DocumentChunk, total int, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocumentChunk).Fields(fields).OmitEmpty().
|
||||
Where(entity.DocumentChunkCol.DatasetId, req.DatasetId).
|
||||
Where(entity.DocumentChunkCol.DocumentId, req.DocumentId).
|
||||
Where(entity.DocumentChunkCol.Status, req.Status).
|
||||
Where(entity.DocumentChunkCol.VectorStatus, req.VectorStatus).
|
||||
OrderDesc(entity.DocumentChunkCol.CreatedAt)
|
||||
if req.Page != nil {
|
||||
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
|
||||
}
|
||||
r, total, err := model.AllAndCount(false)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.Structs(&res)
|
||||
return
|
||||
}
|
||||
|
||||
//// Insert 插入向量文档
|
||||
//func (d *vectorDocumentDao) Insert(ctx context.Context, docs []*entity.DocumentChunk) (ids []interface{}, err error) {
|
||||
// if len(docs) == 0 {
|
||||
// return
|
||||
// }
|
||||
// interfaces := make([]interface{}, len(docs))
|
||||
// for i := range docs {
|
||||
// interfaces[i] = docs[i]
|
||||
// }
|
||||
// return mongoDB.Insert(ctx, interfaces, CollectionVectorDoc)
|
||||
//}
|
||||
//
|
||||
//// DeleteByIDs 根据ID删除向量文档
|
||||
//func (d *vectorDocumentDao) DeleteByIDs(ctx context.Context, ids []string) (err error) {
|
||||
// if len(ids) == 0 {
|
||||
// return
|
||||
// }
|
||||
// objectIDs := make([]bson.ObjectID, len(ids))
|
||||
// for i, id := range ids {
|
||||
// objectIDs[i], err = bson.ObjectIDFromHex(id)
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
// filter := bson.M{"_id": bson.M{"$in": objectIDs}}
|
||||
// _, err = mongoDB.Delete(ctx, filter, CollectionVectorDoc)
|
||||
// return
|
||||
//}
|
||||
//
|
||||
//// GetByIndexID 根据索引ID获取向量文档
|
||||
//func (d *vectorDocumentDao) GetByIndexID(ctx context.Context, indexID string, limit int) (result []*entity.DocumentChunk, err error) {
|
||||
// filter := bson.M{"indexId": indexID}
|
||||
// page := &beans.Page{PageNum: 1, PageSize: int64(limit)}
|
||||
// _, err = mongoDB.Find(ctx, filter, &result, CollectionVectorDoc, page, nil)
|
||||
// return
|
||||
//}
|
||||
//
|
||||
//// GetByVectorIDs 根据向量ID获取向量文档
|
||||
//func (d *vectorDocumentDao) GetByVectorIDs(ctx context.Context, vectorIDs []string) (result []*entity.DocumentChunk, err error) {
|
||||
// if len(vectorIDs) == 0 {
|
||||
// return
|
||||
// }
|
||||
// filter := bson.M{"vectorId": bson.M{"$in": vectorIDs}}
|
||||
// _, err = mongoDB.Find(ctx, filter, &result, CollectionVectorDoc, &beans.Page{PageSize: -1}, nil)
|
||||
// return
|
||||
//}
|
||||
96
dao/keyword.go
Normal file
96
dao/keyword.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package dao
|
||||
|
||||
import (
|
||||
"context"
|
||||
"rag/consts/public"
|
||||
"rag/model/dto"
|
||||
"rag/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
var Keyword = new(keywordDao)
|
||||
|
||||
type keywordDao struct{}
|
||||
|
||||
func (d *keywordDao) Insert(ctx context.Context, req *dto.CreateKeywordReq) (id int64, err error) {
|
||||
var res *entity.Keyword
|
||||
if err = gconv.Struct(req, &res); err != nil {
|
||||
return
|
||||
}
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Data(&res).Insert()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.LastInsertId()
|
||||
}
|
||||
|
||||
func (d *keywordDao) BatchSaveOrUpdate(ctx context.Context, req []*dto.CreateKeywordReq) (rows int64, err error) {
|
||||
var res []*entity.Keyword
|
||||
if err = gconv.Structs(req, &res); err != nil {
|
||||
return
|
||||
}
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Data(&res).OnConflict(
|
||||
entity.KeywordCol.TenantId,
|
||||
entity.KeywordCol.DatasetId,
|
||||
entity.KeywordCol.DocumentId,
|
||||
entity.KeywordCol.Word).Save()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
func (d *keywordDao) Update(ctx context.Context, req *dto.UpdateKeywordReq) (rows int64, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword)
|
||||
r, err := model.Data(&req).Where(entity.KeywordCol.Id, req.Id).Update()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
func (d *keywordDao) Delete(ctx context.Context, req *dto.DeleteKeywordReq) (rows int64, err error) {
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Where(entity.KeywordCol.Id, req.Id).Delete()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return r.RowsAffected()
|
||||
}
|
||||
|
||||
func (d *keywordDao) Count(ctx context.Context, req *dto.ListKeywordReq) (count int, err error) {
|
||||
count, err = gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).OmitEmpty().
|
||||
Where(entity.KeywordCol.DatasetId, req.DatasetId).
|
||||
Where(entity.KeywordCol.DocumentId, req.DocumentId).
|
||||
Where(entity.KeywordCol.Word, req.Word).Count()
|
||||
return
|
||||
}
|
||||
|
||||
func (d *keywordDao) GetByID(ctx context.Context, req *dto.GetKeywordReq, fields ...string) (res *entity.Document, err error) {
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Where(entity.KeywordCol.Id, req.Id).Fields(fields).One()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.Struct(&res)
|
||||
return
|
||||
}
|
||||
|
||||
func (d *keywordDao) List(ctx context.Context, req *dto.ListKeywordReq, fields ...string) (res []*entity.Keyword, total int, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Fields(fields).OmitEmpty()
|
||||
if !g.IsEmpty(req.Keyword) {
|
||||
model.WhereLike(entity.KeywordCol.Word, "%"+req.Keyword+"%")
|
||||
}
|
||||
model.OrderDesc(entity.KeywordCol.Weight)
|
||||
model.OrderDesc(entity.KeywordCol.CreatedAt)
|
||||
if req.Page != nil {
|
||||
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
|
||||
}
|
||||
r, total, err := model.AllAndCount(false)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.Structs(&res)
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user