feat: rag初始版

This commit is contained in:
2026-04-03 09:16:53 +08:00
commit 6f5c80da16
38 changed files with 3840 additions and 0 deletions

88
dao/dataset.go Normal file
View File

@@ -0,0 +1,88 @@
package dao
import (
"context"
"rag/consts/public"
"rag/model/dto"
"rag/model/entity"
"gitea.com/red-future/common/db/gfdb"
"github.com/gogf/gf/v2/database/gdb"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
var Dataset = new(datasetDao)
type datasetDao struct{}
// Insert 插入数据集
func (d *datasetDao) Insert(ctx context.Context, req *dto.CreateDatasetReq) (id int64, err error) {
var res *entity.Dataset
if err = gconv.Struct(req, &res); err != nil {
return
}
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Data(&res).Insert()
if err != nil {
return
}
return r.LastInsertId()
}
// Update 更新数据集
func (d *datasetDao) Update(ctx context.Context, req *dto.UpdateDatasetReq) (rows int64, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).OmitEmpty()
if !g.IsEmpty(req.DocumentCount) {
model.Data(entity.DatasetCol.DocumentCount, &gdb.Counter{
Field: entity.DatasetCol.DocumentCount,
Value: gconv.Float64(req.DocumentCount),
})
}
if !g.IsEmpty(req.DocumentSize) {
model.Data(entity.DatasetCol.DocumentSize, &gdb.Counter{
Field: entity.DatasetCol.DocumentSize,
Value: gconv.Float64(req.DocumentSize),
})
}
r, err := model.Data(&req).Where(entity.DatasetCol.Id, req.Id).Update()
if err != nil {
return
}
return r.RowsAffected()
}
// Delete 删除数据集
func (d *datasetDao) Delete(ctx context.Context, req *dto.DeleteDatasetReq) (rows int64, err error) {
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Where(entity.DatasetCol.Id, req.Id).Delete()
if err != nil {
return
}
return r.RowsAffected()
}
func (d *datasetDao) GetByID(ctx context.Context, req *dto.GetDatasetReq, fields ...string) (res *entity.Dataset, err error) {
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Where(entity.DatasetCol.Id, req.Id).Fields(fields).One()
if err != nil {
return
}
err = r.Struct(&res)
return
}
// List 获取数据集列表
func (d *datasetDao) List(ctx context.Context, req *dto.ListDatasetReq, fields ...string) (res []*entity.Dataset, total int, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameDataset).Fields(fields).OmitEmpty()
if !g.IsEmpty(req.Keyword) {
model.WhereLike(entity.DatasetCol.Name, "%"+req.Keyword+"%")
}
model.OrderDesc(entity.DatasetCol.CreatedAt)
if req.Page != nil {
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
}
r, total, err := model.AllAndCount(false)
if err != nil {
return
}
err = r.Structs(&res)
return
}

59
dao/dataset_index.go Normal file
View File

@@ -0,0 +1,59 @@
package dao
import (
"context"
"database/sql"
"fmt"
"rag/consts/public"
"rag/model/entity"
"gitea.com/red-future/common/db/gfdb"
)
var DatasetIndex = new(datasetIndexDao)
type datasetIndexDao struct{}
// Insert 插入数据集索引
func (d *datasetIndexDao) Insert(ctx context.Context, index *entity.DatasetIndex) (id int64, err error) {
_, err = gfdb.DB(ctx).Model(ctx, public.TableNameDatasetIndex).Data(index).Insert()
if err != nil {
return
}
return 0, nil
}
// GetByDatasetId 根据数据集ID获取索引
func (d *datasetIndexDao) GetByDatasetId(ctx context.Context, datasetId int64) (result *entity.DatasetIndex, err error) {
err = gfdb.DB(ctx).Model(ctx, public.TableNameDatasetIndex).Where(entity.DatasetIndexCol.DatasetId, datasetId).Scan(&result)
if err != nil {
if err == sql.ErrNoRows {
return nil, nil
}
return
}
return result, nil
}
// IncVectorCount 增加或减少向量数量
func (d *datasetIndexDao) IncVectorCount(ctx context.Context, id int64, delta int64) (err error) {
_, err = gfdb.DB(ctx).Model(ctx, public.TableNameDatasetIndex).
Where(entity.DatasetIndexCol.Id, id).
Increment(entity.DatasetIndexCol.VectorCount, delta)
return
}
func (d *datasetIndexDao) InsertIndex(ctx context.Context, indexName string) (err error) {
prefix, err := gfdb.GetTablePrefix(ctx)
if err != nil {
return
}
sqlStr := fmt.Sprintf(`
CREATE INDEX IF NOT EXISTS %s
ON %s
USING ivfflat (vector vector_cosine_ops)
WHERE vector IS NOT NULL;
`, indexName, prefix+public.TableNameDocumentChunk)
_, err = gfdb.DB(ctx).Exec(ctx, sqlStr)
return
}

87
dao/document.go Normal file
View File

@@ -0,0 +1,87 @@
package dao
import (
"context"
"rag/consts/public"
"rag/model/dto"
"rag/model/entity"
"gitea.com/red-future/common/db/gfdb"
"github.com/gogf/gf/v2/database/gdb"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
var Document = new(documentDao)
type documentDao struct{}
// Insert 插入文件
func (d *documentDao) Insert(ctx context.Context, req *dto.CreateDocumentReq) (id int64, err error) {
var res *entity.Document
if err = gconv.Struct(req, &res); err != nil {
return
}
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).Data(&res).Insert()
if err != nil {
return
}
return r.LastInsertId()
}
// Update 更新文件
func (d *documentDao) Update(ctx context.Context, req *dto.UpdateDocumentReq) (rows int64, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).OmitEmpty()
if !g.IsEmpty(req.ChunkCount) {
model.Data(entity.DocumentCol.ChunkCount, &gdb.Counter{
Field: entity.DocumentCol.ChunkCount,
Value: gconv.Float64(req.ChunkCount),
})
}
r, err := model.Data(&req).Where(entity.DocumentCol.Id, req.Id).Update()
if err != nil {
return
}
return r.RowsAffected()
}
// Delete 删除文件
func (d *documentDao) Delete(ctx context.Context, req *dto.DeleteDocumentReq) (rows int64, err error) {
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).Where(entity.DocumentCol.Id, req.Id).Delete()
if err != nil {
return
}
return r.RowsAffected()
}
// GetByID 根据ID获取文件
func (d *documentDao) GetByID(ctx context.Context, req *dto.GetDocumentReq, fields ...string) (res *entity.Document, err error) {
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).Where(entity.DocumentCol.Id, req.Id).Fields(fields).One()
if err != nil {
return
}
err = r.Struct(&res)
return
}
// List 获取文件列表
func (d *documentDao) List(ctx context.Context, req *dto.ListDocumentReq, fields ...string) (res []*entity.Document, total int, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocument).OmitEmpty()
if !g.IsEmpty(req.Keyword) {
model.WhereLike(entity.DocumentCol.Title, "%"+req.Keyword+"%")
}
model.Where(entity.DocumentCol.DatasetId, req.DatasetId)
model.Where(entity.DocumentCol.Status, req.Status)
model.Fields(fields)
model.OrderDesc(entity.DocumentCol.CreatedAt)
if req.Page != nil {
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
}
r, total, err := model.AllAndCount(false)
if err != nil {
return
}
err = r.Structs(&res)
return
}

104
dao/document_chunk.go Normal file
View File

@@ -0,0 +1,104 @@
package dao
import (
"context"
"rag/consts/public"
"rag/model/dto"
"rag/model/entity"
"gitea.com/red-future/common/db/gfdb"
"github.com/gogf/gf/v2/util/gconv"
)
var DocumentChunk = new(documentChunkDao)
type documentChunkDao struct{}
// BatchInsert 批量插入文件块
func (d *documentChunkDao) BatchInsert(ctx context.Context, req []*dto.VectorDocumentChunkMsg) (rows int64, err error) {
var res []*entity.DocumentChunk
if err = gconv.Structs(req, &res); err != nil {
return
}
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameDocumentChunk).Data(&res).Insert()
if err != nil {
return
}
return r.RowsAffected()
}
// Update 更新文件块
func (d *documentChunkDao) Update(ctx context.Context, req *dto.UpdateDocumentChunkReq) (rows int64, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocumentChunk)
r, err := model.Data(&req).Where(entity.DocumentChunkCol.Id, req.Id).Update()
if err != nil {
return
}
return r.RowsAffected()
}
// List 文件块列表
func (d *documentChunkDao) List(ctx context.Context, req *dto.ListDocumentChunkReq, fields ...string) (res []*entity.DocumentChunk, total int, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameDocumentChunk).Fields(fields).OmitEmpty().
Where(entity.DocumentChunkCol.DatasetId, req.DatasetId).
Where(entity.DocumentChunkCol.DocumentId, req.DocumentId).
Where(entity.DocumentChunkCol.Status, req.Status).
Where(entity.DocumentChunkCol.VectorStatus, req.VectorStatus).
OrderDesc(entity.DocumentChunkCol.CreatedAt)
if req.Page != nil {
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
}
r, total, err := model.AllAndCount(false)
if err != nil {
return
}
err = r.Structs(&res)
return
}
//// Insert 插入向量文档
//func (d *vectorDocumentDao) Insert(ctx context.Context, docs []*entity.DocumentChunk) (ids []interface{}, err error) {
// if len(docs) == 0 {
// return
// }
// interfaces := make([]interface{}, len(docs))
// for i := range docs {
// interfaces[i] = docs[i]
// }
// return mongoDB.Insert(ctx, interfaces, CollectionVectorDoc)
//}
//
//// DeleteByIDs 根据ID删除向量文档
//func (d *vectorDocumentDao) DeleteByIDs(ctx context.Context, ids []string) (err error) {
// if len(ids) == 0 {
// return
// }
// objectIDs := make([]bson.ObjectID, len(ids))
// for i, id := range ids {
// objectIDs[i], err = bson.ObjectIDFromHex(id)
// if err != nil {
// return err
// }
// }
// filter := bson.M{"_id": bson.M{"$in": objectIDs}}
// _, err = mongoDB.Delete(ctx, filter, CollectionVectorDoc)
// return
//}
//
//// GetByIndexID 根据索引ID获取向量文档
//func (d *vectorDocumentDao) GetByIndexID(ctx context.Context, indexID string, limit int) (result []*entity.DocumentChunk, err error) {
// filter := bson.M{"indexId": indexID}
// page := &beans.Page{PageNum: 1, PageSize: int64(limit)}
// _, err = mongoDB.Find(ctx, filter, &result, CollectionVectorDoc, page, nil)
// return
//}
//
//// GetByVectorIDs 根据向量ID获取向量文档
//func (d *vectorDocumentDao) GetByVectorIDs(ctx context.Context, vectorIDs []string) (result []*entity.DocumentChunk, err error) {
// if len(vectorIDs) == 0 {
// return
// }
// filter := bson.M{"vectorId": bson.M{"$in": vectorIDs}}
// _, err = mongoDB.Find(ctx, filter, &result, CollectionVectorDoc, &beans.Page{PageSize: -1}, nil)
// return
//}

96
dao/keyword.go Normal file
View File

@@ -0,0 +1,96 @@
package dao
import (
"context"
"rag/consts/public"
"rag/model/dto"
"rag/model/entity"
"gitea.com/red-future/common/db/gfdb"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
var Keyword = new(keywordDao)
type keywordDao struct{}
func (d *keywordDao) Insert(ctx context.Context, req *dto.CreateKeywordReq) (id int64, err error) {
var res *entity.Keyword
if err = gconv.Struct(req, &res); err != nil {
return
}
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Data(&res).Insert()
if err != nil {
return
}
return r.LastInsertId()
}
func (d *keywordDao) BatchSaveOrUpdate(ctx context.Context, req []*dto.CreateKeywordReq) (rows int64, err error) {
var res []*entity.Keyword
if err = gconv.Structs(req, &res); err != nil {
return
}
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Data(&res).OnConflict(
entity.KeywordCol.TenantId,
entity.KeywordCol.DatasetId,
entity.KeywordCol.DocumentId,
entity.KeywordCol.Word).Save()
if err != nil {
return
}
return r.RowsAffected()
}
func (d *keywordDao) Update(ctx context.Context, req *dto.UpdateKeywordReq) (rows int64, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword)
r, err := model.Data(&req).Where(entity.KeywordCol.Id, req.Id).Update()
if err != nil {
return
}
return r.RowsAffected()
}
func (d *keywordDao) Delete(ctx context.Context, req *dto.DeleteKeywordReq) (rows int64, err error) {
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Where(entity.KeywordCol.Id, req.Id).Delete()
if err != nil {
return
}
return r.RowsAffected()
}
func (d *keywordDao) Count(ctx context.Context, req *dto.ListKeywordReq) (count int, err error) {
count, err = gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).OmitEmpty().
Where(entity.KeywordCol.DatasetId, req.DatasetId).
Where(entity.KeywordCol.DocumentId, req.DocumentId).
Where(entity.KeywordCol.Word, req.Word).Count()
return
}
func (d *keywordDao) GetByID(ctx context.Context, req *dto.GetKeywordReq, fields ...string) (res *entity.Document, err error) {
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Where(entity.KeywordCol.Id, req.Id).Fields(fields).One()
if err != nil {
return
}
err = r.Struct(&res)
return
}
func (d *keywordDao) List(ctx context.Context, req *dto.ListKeywordReq, fields ...string) (res []*entity.Keyword, total int, err error) {
model := gfdb.DB(ctx).Model(ctx, public.TableNameKeyword).Fields(fields).OmitEmpty()
if !g.IsEmpty(req.Keyword) {
model.WhereLike(entity.KeywordCol.Word, "%"+req.Keyword+"%")
}
model.OrderDesc(entity.KeywordCol.Weight)
model.OrderDesc(entity.KeywordCol.CreatedAt)
if req.Page != nil {
model.Page(int(req.Page.PageNum), int(req.Page.PageSize))
}
r, total, err := model.AllAndCount(false)
if err != nil {
return
}
err = r.Structs(&res)
return
}