refactor: 重构文档向量相关代码结构

This commit is contained in:
2026-04-10 13:12:19 +08:00
parent a7b8713e26
commit 94df015aa9
30 changed files with 335 additions and 506 deletions

View File

@@ -100,9 +100,9 @@ func (i *PGVectorIndexer) doStore(ctx context.Context, docs []*schema.Document,
}
// 转成业务实体
var chunks []*dto.VectorDocumentChunkMsg
var chunks []*dto.VectorDocumentVectorMsg
for idx, doc := range docs {
ck := new(dto.VectorDocumentChunkMsg)
ck := new(dto.VectorDocumentVectorMsg)
err = gconv.Struct(doc.MetaData, ck)
if err != nil {
glog.Errorf(ctx, "doStore err: %v", err)
@@ -126,7 +126,7 @@ func (i *PGVectorIndexer) doStore(ctx context.Context, docs []*schema.Document,
return
}
// 入库
rows, err = dao.DocumentChunk.BatchInsert(ctx, chunks)
rows, err = dao.DocumentVector.BatchInsert(ctx, chunks)
return
}

View File

@@ -210,7 +210,7 @@ func (r *PGVectorRetriever) doRetrieveVector(ctx context.Context, query string,
}
datasetIds := gconv.Int64s(opts.DSLInfo["dataset_ids"])
rows, err := dao.DocumentChunk.GetAllByVector(ctx, datasetIds, queryVec, topK)
rows, err := dao.DocumentVector.GetAllByVector(ctx, datasetIds, queryVec, topK)
if err != nil {
return nil, err
}
@@ -239,7 +239,7 @@ func (r *PGVectorRetriever) doRetrieveMeilisearch(ctx context.Context, query str
datasetIds := gconv.Int64s(opts.DSLInfo["dataset_ids"])
// 调用你已有的 Meilisearch DAO
rows, err := dao.DocumentChunk.SearchByKeywords(ctx, query, datasetIds, topK)
rows, err := dao.DocumentVector.SearchByKeywords(ctx, query, datasetIds, topK)
if err != nil {
return nil, err
}

View File

@@ -1,69 +0,0 @@
package task
import (
"time"
"gitea.com/red-future/common/beans"
)
type baseTaskCol struct {
beans.SQLBaseCol
TaskType string
Status string
Priority string
ParentTaskID string
TotalItems string
ProcessedItems string
Progress string
StartTime string
EndTime string
Duration string
SuccessCount string
FailCount string
Executor string
DocumentID string
Remark string
}
var BaseTaskCol = baseTaskCol{
SQLBaseCol: beans.DefSQLBaseCol,
TaskType: "task_type",
Status: "status",
Priority: "task_priority",
ParentTaskID: "parent_task_id",
TotalItems: "total_items",
ProcessedItems: "processed_items",
Progress: "progress",
StartTime: "start_time",
EndTime: "end_time",
Duration: "duration",
SuccessCount: "success_count",
FailCount: "fail_count",
Executor: "executor",
DocumentID: "document_id",
Remark: "remark",
}
// SQLBaseTask 任务基类 - SQL版本
type SQLBaseTask struct {
beans.SQLBaseDO `orm:",inline"`
// 任务核心信息
TaskType TaskType `orm:"task_type" json:"taskType" dc:"任务类型"`
Status TaskStatus `orm:"status" json:"status" dc:"任务状态"`
Priority TaskPriority `orm:"task_priority" json:"priority,omitempty" dc:"任务优先级"`
ParentTaskID int64 `orm:"parent_task_id" json:"parentTaskId,omitempty" dc:"父任务ID"`
// 任务进度
TotalItems int64 `orm:"total_items" json:"totalItems" dc:"总数"`
ProcessedItems int64 `orm:"processed_items" json:"processedItems" dc:"已处理数"`
Progress float64 `orm:"progress" json:"progress" dc:"进度"` // 0~100 百分比
// 任务结果
StartTime *time.Time `orm:"start_time" json:"startTime" dc:"开始时间"`
EndTime *time.Time `orm:"end_time" json:"endTime,omitempty" dc:"结束时间"`
Duration int64 `orm:"duration" json:"duration,omitempty" dc:"耗时(毫秒)"`
SuccessCount int64 `orm:"success_count" json:"successCount" dc:"成功数"`
FailCount int64 `orm:"fail_count" json:"failCount" dc:"失败数"`
// 其他
Executor string `orm:"executor" json:"executor,omitempty" dc:"执行器标识"`
DocumentID int64 `orm:"document_id" json:"documentId,omitempty" dc:"文档ID"`
Remark string `orm:"remark" json:"remark,omitempty" dc:"备注/错误信息"`
}

View File

@@ -1,30 +0,0 @@
package task
// TaskType 任务类型枚举:文档解析的三个子任务
type TaskType string
const (
TaskTypeExtractKeywords TaskType = "EXTRACT_KEYWORDS" // 提取关键词
TaskTypeGenerateVector TaskType = "GENERATE_VECTOR" // 生成向量
TaskTypeFullTextSearch TaskType = "FULL_TEXT_SEARCH" // 全文检索
TaskTypeDocParse TaskType = "DOC_PARSE" // 顶层文档解析总任务
)
// TaskStatus 任务状态枚举
type TaskStatus string
const (
TaskStatusPending TaskStatus = "PENDING" // 待执行
TaskStatusRunning TaskStatus = "RUNNING" // 执行中
TaskStatusCompleted TaskStatus = "COMPLETED" // 已完成
TaskStatusFailed TaskStatus = "FAILED" // 执行失败
)
// TaskPriority 任务优先级
type TaskPriority int
const (
TaskPriorityLow TaskPriority = 1 // 低
TaskPriorityMedium TaskPriority = 2 // 中
TaskPriorityHigh TaskPriority = 3 // 高
)