feat: 添加文档处理API和配置更新
This commit is contained in:
@@ -80,13 +80,16 @@ func (s *documentService) Create(ctx context.Context, req *dto.CreateDocumentReq
|
||||
return
|
||||
}
|
||||
res = &dto.CreateDocumentRes{Id: id}
|
||||
// 写入任务进度待处理 任务类型为文档解析
|
||||
// 写入任务进度进行中 任务类型为文档解析
|
||||
err = Task.WriteTaskProgress(ctx, &dto.WriteTaskProgressReq{
|
||||
TaskId: id,
|
||||
TaskType: task.TaskTypeDocParse,
|
||||
Status: task.TaskStatusPending,
|
||||
Remark: "文档上传成功待解析: " + req.Title,
|
||||
Status: task.TaskStatusCompleted,
|
||||
Remark: "文档上传完成",
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
})
|
||||
|
||||
@@ -171,8 +174,7 @@ func (s *documentService) List(ctx context.Context, req *dto.ListDocumentReq) (r
|
||||
return
|
||||
}
|
||||
|
||||
// Vector 处理文件(使用eino框架切分和向量化)
|
||||
func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq) (err error) {
|
||||
func (s *documentService) VectorSemanticSplit(ctx context.Context, req *dto.VectorSemanticSplitReq) (err error) {
|
||||
// 1. 查询文件信息
|
||||
documentReq := dto.GetDocumentReq{Id: req.Id}
|
||||
doc, err := dao.Document.Get(ctx, &documentReq)
|
||||
@@ -182,8 +184,56 @@ func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq
|
||||
if g.IsEmpty(doc) {
|
||||
return errors.New("document not found")
|
||||
}
|
||||
err = Task.WriteTaskProgress(ctx, &dto.WriteTaskProgressReq{
|
||||
TaskId: req.Id,
|
||||
TaskType: task.TaskTypeGenerateVector,
|
||||
Status: task.TaskStatusRunning,
|
||||
Remark: "向量化执行中",
|
||||
})
|
||||
return s.semanticSplitDocument(ctx, doc)
|
||||
}
|
||||
|
||||
// 2. 更新文档状态为处理中
|
||||
func (s *documentService) SearchRecursiveSplit(ctx context.Context, req *dto.SearchRecursiveSplitReq) (err error) {
|
||||
// 1. 查询文件信息
|
||||
documentReq := dto.GetDocumentReq{Id: req.Id}
|
||||
doc, err := dao.Document.Get(ctx, &documentReq)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if g.IsEmpty(doc) {
|
||||
return errors.New("document not found")
|
||||
}
|
||||
err = Task.WriteTaskProgress(ctx, &dto.WriteTaskProgressReq{
|
||||
TaskId: req.Id,
|
||||
TaskType: task.TaskTypeFullTextSearch,
|
||||
Status: task.TaskStatusRunning,
|
||||
Remark: "全文检索执行中",
|
||||
})
|
||||
return s.recursiveSplitDocument(ctx, doc)
|
||||
}
|
||||
|
||||
func (s *documentService) KeywordExtract(ctx context.Context, req *dto.KeywordExtractReq) (err error) {
|
||||
// 1. 查询文件信息
|
||||
documentReq := dto.GetDocumentReq{Id: req.Id}
|
||||
doc, err := dao.Document.Get(ctx, &documentReq)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if g.IsEmpty(doc) {
|
||||
return errors.New("document not found")
|
||||
}
|
||||
err = Task.WriteTaskProgress(ctx, &dto.WriteTaskProgressReq{
|
||||
TaskId: req.Id,
|
||||
TaskType: task.TaskTypeExtractKeywords,
|
||||
Status: task.TaskStatusRunning,
|
||||
Remark: "提取关键词执行中",
|
||||
})
|
||||
return s.extractDocument(ctx, doc)
|
||||
}
|
||||
|
||||
// Vector 处理文件(使用eino框架切分和向量化)
|
||||
func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq) (err error) {
|
||||
// 更新文档状态为处理中
|
||||
updateDocumentReq := new(dto.UpdateDocumentReq)
|
||||
updateDocumentReq.Id = req.Id
|
||||
updateDocumentReq.VectorStatus = document.VectorStatusProcessing.Code()
|
||||
@@ -197,16 +247,7 @@ func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq
|
||||
})
|
||||
return
|
||||
}
|
||||
// 写入任务进度进行中 任务类型为文档解析
|
||||
err = Task.WriteTaskProgress(ctx, &dto.WriteTaskProgressReq{
|
||||
TaskId: req.Id,
|
||||
TaskType: task.TaskTypeDocParse,
|
||||
Status: task.TaskStatusRunning,
|
||||
Remark: "文档解析开始",
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
user, err := utils.GetUserInfo(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -217,7 +258,7 @@ func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq
|
||||
// 任务1: 语义 切分文档
|
||||
grpool.Add(taskCtx, func(ctx context.Context) {
|
||||
g.TryCatch(ctx, func(ctx context.Context) {
|
||||
if innerErr := s.semanticSplitDocument(ctx, doc); innerErr != nil {
|
||||
if innerErr := s.VectorSemanticSplit(ctx, &dto.VectorSemanticSplitReq{Id: req.Id}); innerErr != nil {
|
||||
cancel()
|
||||
}
|
||||
}, func(ctx context.Context, err error) {
|
||||
@@ -228,7 +269,7 @@ func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq
|
||||
// 任务2: 递归 切分文档
|
||||
grpool.Add(taskCtx, func(ctx context.Context) {
|
||||
g.TryCatch(ctx, func(ctx context.Context) {
|
||||
if innerErr := s.recursiveSplitDocument(ctx, doc); innerErr != nil {
|
||||
if innerErr := s.SearchRecursiveSplit(ctx, &dto.SearchRecursiveSplitReq{Id: req.Id}); innerErr != nil {
|
||||
cancel()
|
||||
}
|
||||
}, func(ctx context.Context, err error) {
|
||||
@@ -239,7 +280,7 @@ func (s *documentService) Vector(ctx context.Context, req *dto.DocumentVectorReq
|
||||
// 任务3: 提取文档
|
||||
grpool.Add(taskCtx, func(ctx context.Context) {
|
||||
g.TryCatch(ctx, func(ctx context.Context) {
|
||||
if innerErr := s.extractDocument(ctx, doc); innerErr != nil {
|
||||
if innerErr := s.KeywordExtract(ctx, &dto.KeywordExtractReq{Id: req.Id}); innerErr != nil {
|
||||
cancel()
|
||||
}
|
||||
}, func(ctx context.Context, err error) {
|
||||
|
||||
Reference in New Issue
Block a user