diff --git a/Dockerfile b/Dockerfile index 67a7da3..07020e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ RUN mkdir -p /logs /app/resource/log/run /app/resource/log/server # 添加执行权限 RUN chmod +x /app/main -EXPOSE 3008 +EXPOSE 3006 # 使用root用户运行 CMD ["./main"] diff --git a/config-dev.yml b/config-dev.yml new file mode 100644 index 0000000..f15927b --- /dev/null +++ b/config-dev.yml @@ -0,0 +1,149 @@ +server: + address: :3006 + name: rag + workerId: 1 + +# Database. +database: + default: + - type: "pgsql" + host: "116.204.74.41" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "rag" + prefix: "rag_knowledge_" # (可选)表名前缀 + role: "master" # (可选)数据库主从角色(master/slave),默认为master。如果不使用应用主从机制请不配置或留空即可。 + debug: true # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + - type: "pgsql" + host: "116.204.74.41" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "tenant-1" + prefix: "rag_knowledge_" # (可选)表名前缀 + role: "slave" # (可选)数据库主从角色(master/slave),默认为master。如果不使用应用主从机制请不配置或留空即可。 + debug: false # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + rag_knowledge: + - type: "pgsql" + host: "116.204.74.41" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "tenant-1" + prefix: "rag_knowledge_" # (可选)表名前缀 + role: "master" + debug: true # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + rag_vector: + - type: "pgsql" + host: "116.204.74.41" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "tenant-1" + prefix: "rag_vector_" # (可选)表名前缀 + role: "master" + debug: true # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + +redis: + default: + address: "116.204.74.41:6379" + db: 0 + +consul: + address: 116.204.74.41:8500 + +jaeger: + addr: 116.204.74.41:4318 + +# eino框架配置 +eino: + # 文件切分配置 + splitter: + bufferSize: 1 + minChunkSize: 64 + percentile: 0.75 + # 向量化配置 + embedding: + provider: "dashscope" +# apiKey: "d158d896-8c54-40ee-9d61-4c5d37cd545c" +# model: "ep-20260326123502-khmdq" +# apiType: "multi_modal_api" + apiKey: "sk-4a8b82770bf74bc490eb3e4c5a8e2be9" + model: "text-embedding-v3" + chatmodel: + provider: "dashscope" + apiKey: "sk-4a8b82770bf74bc490eb3e4c5a8e2be9" + model: "qwen-turbo" + +# 文件上传服务地址,与oss模块minio中的endpoint一致 +filePrefix: "http://116.204.74.41:9000" + +gmq: + redis: + primary: + addr: "116.204.74.41" + port: "6379" + db: 0 + username: "" + password: "" + poolSize: 10 + minIdleConn: 5 + maxActiveConn: 10 + maxRetries: 30 + +# Meilisearch 全文检索配置 +meilisearch: + default: + host: "http://localhost" + port: 7700 + apiKey: "admin" +# apiKey: "6b8b6062bcb5e31f150427961d9da1a9e81758aa" + +cache: + localTTL: 60 + redisTTL: 300 diff --git a/config-master.yml b/config-master.yml new file mode 100644 index 0000000..8a83b48 --- /dev/null +++ b/config-master.yml @@ -0,0 +1,149 @@ +server: + address: :3006 + name: rag + workerId: 1 + +# Database. +database: + default: + - type: "pgsql" + host: "192.168.0.169" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "rag" + prefix: "rag_knowledge_" # (可选)表名前缀 + role: "master" # (可选)数据库主从角色(master/slave),默认为master。如果不使用应用主从机制请不配置或留空即可。 + debug: true # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + - type: "pgsql" + host: "192.168.0.169" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "tenant-1" + prefix: "rag_knowledge_" # (可选)表名前缀 + role: "slave" # (可选)数据库主从角色(master/slave),默认为master。如果不使用应用主从机制请不配置或留空即可。 + debug: false # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + rag_knowledge: + - type: "pgsql" + host: "192.168.0.169" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "tenant-1" + prefix: "rag_knowledge_" # (可选)表名前缀 + role: "master" + debug: true # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + rag_vector: + - type: "pgsql" + host: "192.168.0.169" + port: "15432" + user: "postgres" + pass: "Bjang09@686^*^" + name: "tenant-1" + prefix: "rag_vector_" # (可选)表名前缀 + role: "master" + debug: true # (可选)开启调试模式 + dryRun: false # (可选)ORM空跑(只读不写) + charset: "utf8" # (可选)数据库编码(如: utf8mb4/utf8/gbk/gb2312),一般设置为utf8mb4。默认为utf8。 + timezone: "Asia/Shanghai" # (可选)时区配置,例如:Local + maxIdle: 5 # (可选)连接池最大闲置的连接数(默认10) + maxOpen: 20 # (可选)连接池最大打开的连接数(默认无限制) + maxLifetime: "30s" # (可选)连接对象可重复使用的时间长度(默认30秒) + maxIdleConnTime: "30s" # (可选,v2.10新增)连接池中空闲连接的最大生存时间(默认30秒)。可以通过配置文件或SetConnMaxIdleTime方法设置,避免长时间空闲连接占用资源。 + createdAt: "created_at" # (可选)自动创建时间字段名称 + updatedAt: "updated_at" # (可选)自动更新时间字段名称 + deletedAt: "deleted_at" # (可选)软删除时间字段名称 + timeMaintainDisabled: false # (可选)是否完全关闭时间更新特性,为true时CreatedAt/UpdatedAt/DeletedAt都将失效 + +redis: + default: + address: "192.168.0.169:6379" + db: 0 + +consul: + address: 192.168.0.169:8500 + +jaeger: + addr: 192.168.0.169:4318 + +# eino框架配置 +eino: + # 文件切分配置 + splitter: + bufferSize: 1 + minChunkSize: 64 + percentile: 0.75 + # 向量化配置 + embedding: + provider: "dashscope" +# apiKey: "d158d896-8c54-40ee-9d61-4c5d37cd545c" +# model: "ep-20260326123502-khmdq" +# apiType: "multi_modal_api" + apiKey: "sk-4a8b82770bf74bc490eb3e4c5a8e2be9" + model: "text-embedding-v3" + chatmodel: + provider: "dashscope" + apiKey: "sk-4a8b82770bf74bc490eb3e4c5a8e2be9" + model: "qwen-turbo" + +# 文件上传服务地址,与oss模块minio中的endpoint一致 +filePrefix: "http://192.168.0.169:9000" + +gmq: + redis: + primary: + addr: "192.168.0.169" + port: "6379" + db: 0 + username: "" + password: "" + poolSize: 10 + minIdleConn: 5 + maxActiveConn: 10 + maxRetries: 30 + +# Meilisearch 全文检索配置 +meilisearch: + default: + host: "http://localhost" + port: 7700 + apiKey: "admin" +# apiKey: "6b8b6062bcb5e31f150427961d9da1a9e81758aa" + +cache: + localTTL: 60 + redisTTL: 300 diff --git a/consts/public/redis_key.go b/consts/public/redis_key.go index b59bb8b..696f283 100644 --- a/consts/public/redis_key.go +++ b/consts/public/redis_key.go @@ -1,9 +1,9 @@ package public -const KnowledgeLockEsKey = "rag:knowledge:lock:knowledgeIdEs-%v" -const KnowledgeLockSqlKey = "rag:knowledge:lock:knowledgeIdSql-%v" -const KnowledgeContentHashEsKey = "rag:knowledge:knowledgeId:contentHashEs-%v" -const KnowledgeContentHashSqlKey = "rag:knowledge:knowledgeId:contentHashSql-%v" +const KnowledgeLockEsKey = "rag_binary:knowledge:lock:knowledgeIdEs-%v" +const KnowledgeLockSqlKey = "rag_binary:knowledge:lock:knowledgeIdSql-%v" +const KnowledgeContentHashEsKey = "rag_binary:knowledge:knowledgeId:contentHashEs-%v" +const KnowledgeContentHashSqlKey = "rag_binary:knowledge:knowledgeId:contentHashSql-%v" const ( KnowledgeDocumentVectorStatusTopic = "knowledge:document:vector:status:stream" diff --git a/rag_binary b/rag_binary new file mode 100755 index 0000000..460003d Binary files /dev/null and b/rag_binary differ diff --git a/service/document.go b/service/document.go index a78448f..59afb6e 100644 --- a/service/document.go +++ b/service/document.go @@ -597,7 +597,7 @@ func (s *documentService) loadDocument(ctx context.Context, doc *entity.Document func (s *documentService) getHistoryData(ctx context.Context, doc *entity.Document, lockKey, contentKey string) (err error) { docsLockKey := fmt.Sprintf(lockKey, doc.DatasetId) success, err := utils.Lock(ctx, docsLockKey, int64(60), func(ctx context.Context) error { - // 1. 扫描 Redis 中所有 前缀为 rag:knowledge:xxx:contentHash 的 key + // 1. 扫描 Redis 中所有 前缀为 rag_binary:knowledge:xxx:contentHash 的 key pattern := fmt.Sprintf(contentKey, "*") keys, err := g.Redis().Keys(ctx, pattern) if err != nil { @@ -666,7 +666,7 @@ func (s *documentService) getHistoryDataFromHttp(ctx context.Context, doc *entit // 调用接口获取数据 d := &dto.ListDocumentChunkRPC{} - if err = http.Get(ctx, "rag-vector/document/chunk/listDocumentChunk", headers, &d, + if err = http.Get(ctx, "rag_binary-vector/document/chunk/listDocumentChunk", headers, &d, "datasetId", gconv.String(doc.DatasetId), "status", 1); err != nil { return diff --git a/timezone/Shanghai b/timezone/Shanghai new file mode 100644 index 0000000..91f6f8b Binary files /dev/null and b/timezone/Shanghai differ diff --git a/timezone/localtime b/timezone/localtime new file mode 100644 index 0000000..91f6f8b Binary files /dev/null and b/timezone/localtime differ diff --git a/timezone/timezone b/timezone/timezone new file mode 100644 index 0000000..421b7a4 --- /dev/null +++ b/timezone/timezone @@ -0,0 +1 @@ +Asia/Shanghai