feat: rag初始版
This commit is contained in:
@@ -3,6 +3,8 @@ package service
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"rag/common/eino"
|
||||
"rag/common/gse"
|
||||
"rag/consts/document"
|
||||
"rag/consts/public"
|
||||
"rag/dao"
|
||||
@@ -16,8 +18,6 @@ import (
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"gitea.com/red-future/common/full-text-search/meilisearch"
|
||||
"gitea.com/red-future/common/http"
|
||||
"gitea.com/red-future/common/rag/eino"
|
||||
"gitea.com/red-future/common/rag/gse"
|
||||
"gitea.com/red-future/common/utils"
|
||||
gmq "github.com/bjang03/gmq/core/gmq"
|
||||
"github.com/bjang03/gmq/mq"
|
||||
@@ -251,7 +251,7 @@ func (s *documentService) sqlSplitDocument(ctx context.Context, doc *entity.Docu
|
||||
return
|
||||
}
|
||||
// 3. 组装向量文档
|
||||
var vectorDocs = make([]dto.VectorDocumentChunkMsg, 0)
|
||||
var docsChunk = make([]*schema.Document, 0)
|
||||
for i, t := range docsSplit {
|
||||
contentHash := gmd5.MustEncryptString(t.Content)
|
||||
// 检查是否重复
|
||||
@@ -263,27 +263,26 @@ func (s *documentService) sqlSplitDocument(ctx context.Context, doc *entity.Docu
|
||||
if !success {
|
||||
continue
|
||||
}
|
||||
vectorDocs = append(vectorDocs, dto.VectorDocumentChunkMsg{
|
||||
TenantId: doc.TenantId,
|
||||
Creator: doc.Creator,
|
||||
DatasetId: doc.DatasetId,
|
||||
DocumentId: doc.Id,
|
||||
Content: t.Content,
|
||||
ContentHash: contentHash,
|
||||
ChunkIndex: gconv.Int64(i),
|
||||
})
|
||||
|
||||
var metaData = make(map[string]any)
|
||||
metaData[entity.DocumentCol.TenantId] = doc.TenantId
|
||||
metaData[entity.DocumentCol.Creator] = doc.Creator
|
||||
metaData[entity.DocumentCol.DatasetId] = doc.DatasetId
|
||||
metaData[entity.DocumentChunkCol.DocumentId] = doc.Id
|
||||
metaData[entity.DocumentChunkCol.ContentHash] = contentHash
|
||||
metaData[entity.DocumentChunkCol.ChunkIndex] = gconv.Int64(i)
|
||||
t.MetaData = metaData
|
||||
docsChunk = append(docsChunk, t)
|
||||
}
|
||||
// 4. 发送消息到队列
|
||||
if len(vectorDocs) > 0 {
|
||||
if len(docsChunk) > 0 {
|
||||
err = gmq.GetGmq("primary").GmqPublish(ctx, &mq.RedisPubMessage{
|
||||
PubMessage: types.PubMessage{
|
||||
Topic: public.KnowledgeDocumentChunkTopic,
|
||||
Data: vectorDocs,
|
||||
Data: docsChunk,
|
||||
},
|
||||
})
|
||||
}
|
||||
vectorDocsCount = gconv.Int64(len(vectorDocs))
|
||||
vectorDocsCount = gconv.Int64(len(docsChunk))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -318,12 +317,12 @@ func (s *documentService) esSplitDocument(ctx context.Context, doc *entity.Docum
|
||||
}
|
||||
// 构建Meilisearch文档
|
||||
meiliDocs = append(meiliDocs, map[string]interface{}{
|
||||
"id": contentHash,
|
||||
"datasetId": doc.DatasetId,
|
||||
"documentId": doc.Id,
|
||||
"content": t.Content,
|
||||
"contentHash": contentHash,
|
||||
"chunkIndex": i,
|
||||
entity.DocumentChunkCol.Id: contentHash,
|
||||
entity.DocumentChunkCol.DatasetId: doc.DatasetId,
|
||||
entity.DocumentChunkCol.DocumentId: doc.Id,
|
||||
entity.DocumentChunkCol.Content: t.Content,
|
||||
entity.DocumentChunkCol.ContentHash: contentHash,
|
||||
entity.DocumentChunkCol.ChunkIndex: i,
|
||||
})
|
||||
}
|
||||
// 4. 写入到meilisearch数据库中
|
||||
|
||||
Reference in New Issue
Block a user