feat: rag初始版

This commit is contained in:
2026-04-03 11:14:44 +08:00
parent 6f5c80da16
commit b00d544fb7
21 changed files with 1228 additions and 141 deletions

View File

@@ -3,6 +3,8 @@ package service
import (
"context"
"fmt"
"rag/common/eino"
"rag/common/gse"
"rag/consts/document"
"rag/consts/public"
"rag/dao"
@@ -16,8 +18,6 @@ import (
"gitea.com/red-future/common/db/gfdb"
"gitea.com/red-future/common/full-text-search/meilisearch"
"gitea.com/red-future/common/http"
"gitea.com/red-future/common/rag/eino"
"gitea.com/red-future/common/rag/gse"
"gitea.com/red-future/common/utils"
gmq "github.com/bjang03/gmq/core/gmq"
"github.com/bjang03/gmq/mq"
@@ -251,7 +251,7 @@ func (s *documentService) sqlSplitDocument(ctx context.Context, doc *entity.Docu
return
}
// 3. 组装向量文档
var vectorDocs = make([]dto.VectorDocumentChunkMsg, 0)
var docsChunk = make([]*schema.Document, 0)
for i, t := range docsSplit {
contentHash := gmd5.MustEncryptString(t.Content)
// 检查是否重复
@@ -263,27 +263,26 @@ func (s *documentService) sqlSplitDocument(ctx context.Context, doc *entity.Docu
if !success {
continue
}
vectorDocs = append(vectorDocs, dto.VectorDocumentChunkMsg{
TenantId: doc.TenantId,
Creator: doc.Creator,
DatasetId: doc.DatasetId,
DocumentId: doc.Id,
Content: t.Content,
ContentHash: contentHash,
ChunkIndex: gconv.Int64(i),
})
var metaData = make(map[string]any)
metaData[entity.DocumentCol.TenantId] = doc.TenantId
metaData[entity.DocumentCol.Creator] = doc.Creator
metaData[entity.DocumentCol.DatasetId] = doc.DatasetId
metaData[entity.DocumentChunkCol.DocumentId] = doc.Id
metaData[entity.DocumentChunkCol.ContentHash] = contentHash
metaData[entity.DocumentChunkCol.ChunkIndex] = gconv.Int64(i)
t.MetaData = metaData
docsChunk = append(docsChunk, t)
}
// 4. 发送消息到队列
if len(vectorDocs) > 0 {
if len(docsChunk) > 0 {
err = gmq.GetGmq("primary").GmqPublish(ctx, &mq.RedisPubMessage{
PubMessage: types.PubMessage{
Topic: public.KnowledgeDocumentChunkTopic,
Data: vectorDocs,
Data: docsChunk,
},
})
}
vectorDocsCount = gconv.Int64(len(vectorDocs))
vectorDocsCount = gconv.Int64(len(docsChunk))
return
}
@@ -318,12 +317,12 @@ func (s *documentService) esSplitDocument(ctx context.Context, doc *entity.Docum
}
// 构建Meilisearch文档
meiliDocs = append(meiliDocs, map[string]interface{}{
"id": contentHash,
"datasetId": doc.DatasetId,
"documentId": doc.Id,
"content": t.Content,
"contentHash": contentHash,
"chunkIndex": i,
entity.DocumentChunkCol.Id: contentHash,
entity.DocumentChunkCol.DatasetId: doc.DatasetId,
entity.DocumentChunkCol.DocumentId: doc.Id,
entity.DocumentChunkCol.Content: t.Content,
entity.DocumentChunkCol.ContentHash: contentHash,
entity.DocumentChunkCol.ChunkIndex: i,
})
}
// 4. 写入到meilisearch数据库中