refactor: 重构文档向量相关代码结构

This commit is contained in:
2026-04-10 13:12:19 +08:00
parent a7b8713e26
commit 94df015aa9
30 changed files with 335 additions and 506 deletions

View File

@@ -5,9 +5,9 @@ import (
"errors"
"fmt"
"rag/common/eino"
"rag/common/task"
"rag/consts/document"
"rag/consts/public"
"rag/consts/task"
"rag/dao"
"rag/model/dto"
"rag/model/entity"
@@ -123,8 +123,8 @@ func (s *documentService) List(ctx context.Context, req *dto.ListDocumentReq) (r
return
}
// Process 处理文件(使用eino框架切分和向量化)
func (s *documentService) Process(ctx context.Context, req *dto.ProcessDocumentReq) (err error) {
// Vector 处理文件(使用eino框架切分和向量化)
func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq) (err error) {
// 1. 查询文件信息
documentReq := dto.GetDocumentReq{Id: req.Id}
doc, err := dao.Document.GetByID(ctx, &documentReq)
@@ -403,9 +403,9 @@ func (s *documentService) sqlSplitDocument(ctx context.Context, doc *entity.Docu
metaData[entity.DocumentCol.TenantId] = doc.TenantId
metaData[entity.DocumentCol.Creator] = doc.Creator
metaData[entity.DocumentCol.DatasetId] = doc.DatasetId
metaData[entity.DocumentChunkCol.DocumentId] = doc.Id
metaData[entity.DocumentChunkCol.ContentHash] = contentHash
metaData[entity.DocumentChunkCol.ChunkIndex] = gconv.Int64(i)
metaData[entity.DocumentVectorCol.DocumentId] = doc.Id
metaData[entity.DocumentVectorCol.ContentHash] = contentHash
metaData[entity.DocumentVectorCol.ChunkIndex] = gconv.Int64(i)
t.MetaData = metaData
docsChunk = append(docsChunk, t)
}
@@ -423,9 +423,9 @@ func (s *documentService) sqlSplitDocument(ctx context.Context, doc *entity.Docu
// 4. 发送消息到队列
if len(docsChunk) > 0 {
err = gmq.GetGmq("primary").GmqPublish(ctx, &mq.RedisPubMessage{
err = gmq.GetGmq(public.GmqMsgPluginsName).GmqPublish(ctx, &mq.RedisPubMessage{
PubMessage: types.PubMessage{
Topic: public.KnowledgeDocumentChunkTopic,
Topic: public.KnowledgeDocumentVectorTopic,
Data: docsChunk,
},
})
@@ -541,12 +541,12 @@ func (s *documentService) esSplitDocument(ctx context.Context, doc *entity.Docum
continue
}
meiliDocs = append(meiliDocs, map[string]interface{}{
entity.DocumentChunkCol.Id: contentHash,
entity.DocumentChunkCol.DatasetId: doc.DatasetId,
entity.DocumentChunkCol.DocumentId: doc.Id,
entity.DocumentChunkCol.Content: t.Content,
entity.DocumentChunkCol.ContentHash: contentHash,
entity.DocumentChunkCol.ChunkIndex: i,
entity.DocumentVectorCol.Id: contentHash,
entity.DocumentVectorCol.DatasetId: doc.DatasetId,
entity.DocumentVectorCol.DocumentId: doc.Id,
entity.DocumentVectorCol.Content: t.Content,
entity.DocumentVectorCol.ContentHash: contentHash,
entity.DocumentVectorCol.ChunkIndex: i,
})
}
@@ -621,7 +621,7 @@ func (s *documentService) getHistoryData(ctx context.Context, doc *entity.Docume
}
// 3. Redis 无数据:根据 contentKey 类型选择查询方式
var dictData = make([]*dto.DocumentChunkRPC, 0)
var dictData = make([]*dto.DocumentVectorRPC, 0)
if public.KnowledgeContentHashSqlKey == contentKey {
// SQL 方式:调用 HTTP 接口查询
dictData, err = s.getHistoryDataFromHttp(ctx, doc)
@@ -658,9 +658,9 @@ func (s *documentService) getHistoryData(ctx context.Context, doc *entity.Docume
}
// getHistoryDataFromHttp 通过 HTTP 接口查询历史数据
func (s *documentService) getHistoryDataFromHttp(ctx context.Context, doc *entity.Document) (dictData []*dto.DocumentChunkRPC, err error) {
func (s *documentService) getHistoryDataFromHttp(ctx context.Context, doc *entity.Document) (dictData []*dto.DocumentVectorRPC, err error) {
// 调用接口获取数据
res, _, err := dao.DocumentChunk.List(ctx, &dto.ListDocumentChunkReq{
res, _, err := dao.DocumentVector.List(ctx, &dto.ListDocumentVectorReq{
DatasetId: doc.DatasetId,
Status: gconv.PtrInt8(1),
})
@@ -669,7 +669,7 @@ func (s *documentService) getHistoryDataFromHttp(ctx context.Context, doc *entit
}
// getHistoryDataFromMeilisearch 通过 meilisearch 查询历史数据
func (s *documentService) getHistoryDataFromMeilisearch(ctx context.Context, doc *entity.Document) (dictData []*dto.DocumentChunkRPC, err error) {
func (s *documentService) getHistoryDataFromMeilisearch(ctx context.Context, doc *entity.Document) (dictData []*dto.DocumentVectorRPC, err error) {
// 构建 meilisearch 查询参数
searchParams := &meilisearch.SearchParams{
Filter: fmt.Sprintf("datasetId = %d", doc.DatasetId),
@@ -684,9 +684,9 @@ func (s *documentService) getHistoryDataFromMeilisearch(ctx context.Context, doc
}
// 转换查询结果
dictData = make([]*dto.DocumentChunkRPC, 0)
dictData = make([]*dto.DocumentVectorRPC, 0)
for _, hit := range hits {
item := &dto.DocumentChunkRPC{}
item := &dto.DocumentVectorRPC{}
if err = gconv.Struct(hit, item); err != nil {
return
}