refactor: 重构文档向量相关代码结构
This commit is contained in:
@@ -100,9 +100,9 @@ func (i *PGVectorIndexer) doStore(ctx context.Context, docs []*schema.Document,
|
||||
}
|
||||
|
||||
// 转成业务实体
|
||||
var chunks []*dto.VectorDocumentChunkMsg
|
||||
var chunks []*dto.VectorDocumentVectorMsg
|
||||
for idx, doc := range docs {
|
||||
ck := new(dto.VectorDocumentChunkMsg)
|
||||
ck := new(dto.VectorDocumentVectorMsg)
|
||||
err = gconv.Struct(doc.MetaData, ck)
|
||||
if err != nil {
|
||||
glog.Errorf(ctx, "doStore err: %v", err)
|
||||
@@ -126,7 +126,7 @@ func (i *PGVectorIndexer) doStore(ctx context.Context, docs []*schema.Document,
|
||||
return
|
||||
}
|
||||
// 入库
|
||||
rows, err = dao.DocumentChunk.BatchInsert(ctx, chunks)
|
||||
rows, err = dao.DocumentVector.BatchInsert(ctx, chunks)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -210,7 +210,7 @@ func (r *PGVectorRetriever) doRetrieveVector(ctx context.Context, query string,
|
||||
}
|
||||
datasetIds := gconv.Int64s(opts.DSLInfo["dataset_ids"])
|
||||
|
||||
rows, err := dao.DocumentChunk.GetAllByVector(ctx, datasetIds, queryVec, topK)
|
||||
rows, err := dao.DocumentVector.GetAllByVector(ctx, datasetIds, queryVec, topK)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -239,7 +239,7 @@ func (r *PGVectorRetriever) doRetrieveMeilisearch(ctx context.Context, query str
|
||||
datasetIds := gconv.Int64s(opts.DSLInfo["dataset_ids"])
|
||||
|
||||
// 调用你已有的 Meilisearch DAO
|
||||
rows, err := dao.DocumentChunk.SearchByKeywords(ctx, query, datasetIds, topK)
|
||||
rows, err := dao.DocumentVector.SearchByKeywords(ctx, query, datasetIds, topK)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
package task
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"gitea.com/red-future/common/beans"
|
||||
)
|
||||
|
||||
type baseTaskCol struct {
|
||||
beans.SQLBaseCol
|
||||
TaskType string
|
||||
Status string
|
||||
Priority string
|
||||
ParentTaskID string
|
||||
TotalItems string
|
||||
ProcessedItems string
|
||||
Progress string
|
||||
StartTime string
|
||||
EndTime string
|
||||
Duration string
|
||||
SuccessCount string
|
||||
FailCount string
|
||||
Executor string
|
||||
DocumentID string
|
||||
Remark string
|
||||
}
|
||||
|
||||
var BaseTaskCol = baseTaskCol{
|
||||
SQLBaseCol: beans.DefSQLBaseCol,
|
||||
TaskType: "task_type",
|
||||
Status: "status",
|
||||
Priority: "task_priority",
|
||||
ParentTaskID: "parent_task_id",
|
||||
TotalItems: "total_items",
|
||||
ProcessedItems: "processed_items",
|
||||
Progress: "progress",
|
||||
StartTime: "start_time",
|
||||
EndTime: "end_time",
|
||||
Duration: "duration",
|
||||
SuccessCount: "success_count",
|
||||
FailCount: "fail_count",
|
||||
Executor: "executor",
|
||||
DocumentID: "document_id",
|
||||
Remark: "remark",
|
||||
}
|
||||
|
||||
// SQLBaseTask 任务基类 - SQL版本
|
||||
type SQLBaseTask struct {
|
||||
beans.SQLBaseDO `orm:",inline"`
|
||||
// 任务核心信息
|
||||
TaskType TaskType `orm:"task_type" json:"taskType" dc:"任务类型"`
|
||||
Status TaskStatus `orm:"status" json:"status" dc:"任务状态"`
|
||||
Priority TaskPriority `orm:"task_priority" json:"priority,omitempty" dc:"任务优先级"`
|
||||
ParentTaskID int64 `orm:"parent_task_id" json:"parentTaskId,omitempty" dc:"父任务ID"`
|
||||
// 任务进度
|
||||
TotalItems int64 `orm:"total_items" json:"totalItems" dc:"总数"`
|
||||
ProcessedItems int64 `orm:"processed_items" json:"processedItems" dc:"已处理数"`
|
||||
Progress float64 `orm:"progress" json:"progress" dc:"进度"` // 0~100 百分比
|
||||
// 任务结果
|
||||
StartTime *time.Time `orm:"start_time" json:"startTime" dc:"开始时间"`
|
||||
EndTime *time.Time `orm:"end_time" json:"endTime,omitempty" dc:"结束时间"`
|
||||
Duration int64 `orm:"duration" json:"duration,omitempty" dc:"耗时(毫秒)"`
|
||||
SuccessCount int64 `orm:"success_count" json:"successCount" dc:"成功数"`
|
||||
FailCount int64 `orm:"fail_count" json:"failCount" dc:"失败数"`
|
||||
// 其他
|
||||
Executor string `orm:"executor" json:"executor,omitempty" dc:"执行器标识"`
|
||||
DocumentID int64 `orm:"document_id" json:"documentId,omitempty" dc:"文档ID"`
|
||||
Remark string `orm:"remark" json:"remark,omitempty" dc:"备注/错误信息"`
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
package task
|
||||
|
||||
// TaskType 任务类型枚举:文档解析的三个子任务
|
||||
type TaskType string
|
||||
|
||||
const (
|
||||
TaskTypeExtractKeywords TaskType = "EXTRACT_KEYWORDS" // 提取关键词
|
||||
TaskTypeGenerateVector TaskType = "GENERATE_VECTOR" // 生成向量
|
||||
TaskTypeFullTextSearch TaskType = "FULL_TEXT_SEARCH" // 全文检索
|
||||
TaskTypeDocParse TaskType = "DOC_PARSE" // 顶层文档解析总任务
|
||||
)
|
||||
|
||||
// TaskStatus 任务状态枚举
|
||||
type TaskStatus string
|
||||
|
||||
const (
|
||||
TaskStatusPending TaskStatus = "PENDING" // 待执行
|
||||
TaskStatusRunning TaskStatus = "RUNNING" // 执行中
|
||||
TaskStatusCompleted TaskStatus = "COMPLETED" // 已完成
|
||||
TaskStatusFailed TaskStatus = "FAILED" // 执行失败
|
||||
)
|
||||
|
||||
// TaskPriority 任务优先级
|
||||
type TaskPriority int
|
||||
|
||||
const (
|
||||
TaskPriorityLow TaskPriority = 1 // 低
|
||||
TaskPriorityMedium TaskPriority = 2 // 中
|
||||
TaskPriorityHigh TaskPriority = 3 // 高
|
||||
)
|
||||
Reference in New Issue
Block a user