feat: 支持多数据库配置与PGVector检索
This commit is contained in:
123
update.sql
123
update.sql
@@ -134,9 +134,9 @@ CREATE TABLE IF NOT EXISTS rag_knowledge_keyword (
|
||||
);
|
||||
|
||||
-- 唯一索引:保证 租户 + 数据集 + 文档 + 关键词 全局唯一
|
||||
CREATE UNIQUE INDEX uk_rag_knowledge_keyword_tenant_dataset_doc_word
|
||||
ON rag_knowledge_keyword(tenant_id, dataset_id, document_id, word)
|
||||
WHERE deleted_at IS NULL;
|
||||
-- CREATE UNIQUE INDEX uk_rag_knowledge_keyword_tenant_dataset_doc_word
|
||||
-- ON rag_knowledge_keyword(tenant_id, dataset_id, document_id, word)
|
||||
-- WHERE deleted_at IS NULL;
|
||||
|
||||
-- 索引(按业务高频查询)
|
||||
CREATE INDEX idx_keyword_tenant_id ON rag_knowledge_keyword(tenant_id);
|
||||
@@ -159,4 +159,119 @@ COMMENT ON COLUMN rag_knowledge_keyword.document_id IS '文档ID';
|
||||
COMMENT ON COLUMN rag_knowledge_keyword.word IS '关键词';
|
||||
COMMENT ON COLUMN rag_knowledge_keyword.weight IS '权重';
|
||||
|
||||
--------------------pgsql创建rag_knowledge_keyword表语句---------------------------
|
||||
--------------------pgsql创建rag_knowledge_keyword表语句---------------------------
|
||||
|
||||
|
||||
|
||||
--------------------pgsql创建rag_vector_dataset_index表语句---------------------------
|
||||
-- 向量数据集索引表
|
||||
CREATE TABLE IF NOT EXISTS rag_vector_dataset_index (
|
||||
-- 基础字段
|
||||
id BIGINT PRIMARY KEY, -- 主键ID(非自增)
|
||||
tenant_id BIGINT NOT NULL DEFAULT 0, -- 租户ID int8
|
||||
creator VARCHAR(64) NOT NULL,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updater VARCHAR(64) NOT NULL,
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
deleted_at timestamp(6),
|
||||
|
||||
-- 核心字段
|
||||
dataset_id INT8 NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
collection VARCHAR(255) NOT NULL,
|
||||
dimension INT NOT NULL,
|
||||
field_type VARCHAR(50) NOT NULL,
|
||||
metric_type VARCHAR(50) NOT NULL,
|
||||
status SMALLINT NOT NULL DEFAULT 1, -- 状态:1启用/0停用
|
||||
vector_count INT8 NOT NULL DEFAULT 0,
|
||||
description TEXT
|
||||
);
|
||||
|
||||
-- 唯一约束
|
||||
ALTER TABLE rag_vector_dataset_index ADD CONSTRAINT uk_dataset_id_name UNIQUE (dataset_id, name);
|
||||
|
||||
-- 索引
|
||||
CREATE INDEX idx_dataset_index_tenant_id ON rag_vector_dataset_index(tenant_id);
|
||||
CREATE INDEX idx_dataset_index_dataset_id ON rag_vector_dataset_index(dataset_id);
|
||||
CREATE INDEX idx_dataset_index_status ON rag_vector_dataset_index(status);
|
||||
|
||||
-- 注释
|
||||
COMMENT ON TABLE rag_vector_dataset_index IS '向量数据集索引表';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.id IS '主键ID(非自增)';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.tenant_id IS '租户ID';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.creator IS '创建人';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.created_at IS '创建时间';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.updater IS '更新人';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.updated_at IS '更新时间';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.deleted_at IS '删除时间(软删)';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.dataset_id IS '数据集ID';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.name IS '索引名称';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.collection IS '向量集合名称';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.dimension IS '向量维度';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.field_type IS '字段类型';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.metric_type IS '度量类型';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.status IS '状态';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.vector_count IS '向量数量';
|
||||
COMMENT ON COLUMN rag_vector_dataset_index.description IS '描述';
|
||||
|
||||
--------------------pgsql创建rag_vector_dataset_index表语句---------------------------
|
||||
|
||||
--------------------pgsql创建rag_vector_document_chunk表语句---------------------------
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- 文档分块向量表
|
||||
CREATE TABLE IF NOT EXISTS rag_vector_document_chunk (
|
||||
-- 基础字段
|
||||
id BIGINT PRIMARY KEY, -- 主键ID(非自增)
|
||||
tenant_id BIGINT NOT NULL DEFAULT 0, -- 租户ID int8
|
||||
creator VARCHAR(64) NOT NULL,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updater VARCHAR(64) NOT NULL,
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
deleted_at timestamp(6),
|
||||
|
||||
-- 核心字段
|
||||
status SMALLINT NOT NULL DEFAULT 1, -- 状态:1启用/0停用
|
||||
vector_status SMALLINT NOT NULL DEFAULT 1, -- 向量化状态: 1pending, 2processing, 3completed, 4failed,5partCompleted
|
||||
dataset_id INT8 NOT NULL,
|
||||
document_id INT8 NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
content_hash VARCHAR(128) NOT NULL,
|
||||
chunk_index INT8 NOT NULL,
|
||||
|
||||
-- 向量字段(pgvector)
|
||||
vector vector(1024) NOT NULL,
|
||||
|
||||
-- 扩展信息
|
||||
metadata JSONB
|
||||
);
|
||||
|
||||
-- 索引
|
||||
CREATE INDEX idx_chunk_tenant_id ON rag_vector_document_chunk(tenant_id);
|
||||
CREATE INDEX idx_chunk_dataset_id ON rag_vector_document_chunk(dataset_id);
|
||||
CREATE INDEX idx_chunk_document_id ON rag_vector_document_chunk(document_id);
|
||||
CREATE INDEX idx_chunk_content_hash ON rag_vector_document_chunk(content_hash);
|
||||
CREATE INDEX idx_chunk_status ON rag_vector_document_chunk(status);
|
||||
CREATE INDEX idx_chunk_vector_status ON rag_vector_document_chunk(vector_status);
|
||||
|
||||
-- 注释
|
||||
COMMENT ON TABLE rag_vector_document_chunk IS '文档分块向量表';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.id IS '主键ID(非自增)';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.tenant_id IS '租户ID';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.creator IS '创建人';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.created_at IS '创建时间';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.updater IS '更新人';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.updated_at IS '更新时间';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.deleted_at IS '删除时间(软删)';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.status IS '状态';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.vector_status IS '向量生成状态';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.dataset_id IS '数据集ID';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.document_id IS '文档ID';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.content IS '分块内容';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.content_hash IS '内容哈希';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.chunk_index IS '分块序号';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.vector IS '向量数据';
|
||||
COMMENT ON COLUMN rag_vector_document_chunk.metadata IS '扩展元数据';
|
||||
|
||||
--------------------pgsql创建rag_vector_document_chunk表语句---------------------------
|
||||
Reference in New Issue
Block a user