feat: 新增关键词类型及优化查询逻辑

支持关键词类型区分,优化文件向量查询SQL及DAO更新逻辑,移除冗余配置和注释代码。
This commit is contained in:
2026-04-11 18:24:37 +08:00
parent 94df015aa9
commit a05cac7591
14 changed files with 128 additions and 95 deletions

View File

@@ -45,43 +45,3 @@ func (s *datasetService) List(ctx context.Context, req *dto.ListDatasetReq) (res
err = gconv.Struct(list, &res.List)
return
}
//// Search 搜索(示例,实际需要调用向量库)
//func (s *datasetService) Search(ctx context.Context, req *dto.SearchReq) (res *dto.SearchRes, err error) {
// // 1. 获取数据集信息
// kb, err := dao.Dataset.GetByID(ctx, req)
// if err != nil {
// return nil, err
// }
//
// // 2. 获取文件块
// chunks, err := dao.Chunk.FindChunksByKBIDWithLimit(ctx, req.KBID, 0, req.TopK)
// if err != nil {
// return nil, err
// }
//
// // 3. TODO: 使用向量检索(需要集成向量库)
// // 暂时使用简单的关键词匹配
// results := make([]dto.SearchResult, 0)
// for _, chunk := range chunks {
// results = append(results, dto.SearchResult{
// Content: chunk.Content,
// Score: 0.8, // TODO: 计算实际向量相似度
// DocumentID: chunk.DocumentID,
// ChunkIndex: chunk.Index,
// })
// }
//
// g.Log().Infof(ctx, "数据集[%s]搜索完成,查询:%s,结果数:%d", kb.Name, req.Query, len(results))
//
// return &dto.SearchRes{Results: results}, nil
//}
//
//// formatChunks 格式化文件块为上下文
//func (s *datasetService) formatChunks(chunks []*entity.DocumentChunk) string {
// var sb strings.Builder
// for i, chunk := range chunks {
// sb.WriteString(fmt.Sprintf("[%d] %s\n\n", i+1, chunk.Content))
// }
// return sb.String()
//}

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"rag/common/eino"
"rag/consts/document"
"rag/consts/keyword"
"rag/consts/public"
"rag/consts/task"
"rag/dao"
@@ -104,9 +105,17 @@ func (s *documentService) Delete(ctx context.Context, req *dto.DeleteDocumentReq
}
// Get 获取文件详情
func (s *documentService) Get(ctx context.Context, req *dto.GetDocumentReq) (res *dto.DocumentVO, err error) {
func (s *documentService) Get(ctx context.Context, req *dto.GetDocumentReq) (res *dto.GetDocumentRes, err error) {
r, err := dao.Document.GetByID(ctx, req)
err = gconv.Struct(r, &res)
if err != nil {
return
}
res = &dto.GetDocumentRes{}
err = gconv.Struct(r, &res.DocumentVO)
if err != nil {
return
}
res.ImgAddressPrefix, err = utils.GetFileAddressPrefix(ctx)
return
}
@@ -280,10 +289,11 @@ func (s *documentService) extractDocument(ctx context.Context, doc *entity.Docum
var keywordReqs = make([]*dto.CreateKeywordReq, 0)
for _, word := range words {
keywordReqs = append(keywordReqs, &dto.CreateKeywordReq{
DatasetId: doc.DatasetId,
DocumentId: doc.Id,
Word: word.Word,
Weight: gconv.Int16(word.Score),
DatasetId: doc.DatasetId,
DocumentId: doc.Id,
Word: word.Word,
Weight: gconv.Int16(word.Score),
KeywordType: keyword.KeywordTypeInitial.Code(),
})
}
if len(keywordReqs) > 0 {