Files
common/ragflow/chunk.go
2026-03-12 08:50:26 +08:00

179 lines
5.9 KiB
Go

package ragflow
import (
"context"
"fmt"
)
// 数据集内知识块管理
// 参考: https://ragflow.com.cn/docs/dev/http_api_reference#数据集内知识块管理
// Chunk 知识块结构体
type Chunk struct {
Id string `json:"id"`
Content string `json:"content"`
DocumentId string `json:"document_id"`
DatasetId string `json:"dataset_id"`
CreateTime string `json:"create_time"`
CreateTimestamp float64 `json:"create_timestamp"`
ImportantKeywords []string `json:"important_keywords"`
Questions []string `json:"questions"`
Available bool `json:"available"`
ImageId string `json:"image_id"`
Positions []string `json:"positions"`
}
// AddChunkReq 添加知识块请求
type AddChunkReq struct {
Content string `json:"content"`
ImportantKeywords []string `json:"important_keywords,omitempty"`
Questions []string `json:"questions,omitempty"`
}
// ListChunksReq 列出知识块请求
type ListChunksReq struct {
Keywords string `json:"keywords,omitempty"`
Page int `json:"page,omitempty"`
PageSize int `json:"page_size,omitempty"`
Id string `json:"id,omitempty"`
}
// ListChunksRes 列出知识块响应
type ListChunksRes struct {
Code int `json:"code"`
Data struct {
Chunks []*Chunk `json:"chunks"`
Doc interface{} `json:"doc"` // 文档信息,暂时用 interface{}
Total int `json:"total"`
} `json:"data"`
}
// DeleteChunksReq 删除知识块请求
type DeleteChunksReq struct {
ChunkIds []string `json:"chunk_ids,omitempty"` // 如果为空,删除所有
}
// UpdateChunkReq 更新知识块请求
type UpdateChunkReq struct {
Content string `json:"content,omitempty"`
ImportantKeywords []string `json:"important_keywords,omitempty"`
Available *bool `json:"available,omitempty"`
}
// RetrieveChunksReq 检索知识块请求
type RetrieveChunksReq struct {
Question string `json:"question"`
DatasetIds []string `json:"dataset_ids,omitempty"`
DocumentIds []string `json:"document_ids,omitempty"`
Page int `json:"page,omitempty"`
PageSize int `json:"page_size,omitempty"`
SimilarityThreshold float64 `json:"similarity_threshold,omitempty"`
VectorSimilarityWeight float64 `json:"vector_similarity_weight,omitempty"`
TopK int `json:"top_k,omitempty"`
RerankId string `json:"rerank_id,omitempty"`
Keyword bool `json:"keyword,omitempty"`
Highlight bool `json:"highlight,omitempty"`
CrossLanguages []string `json:"cross_languages,omitempty"`
MetadataCondition map[string]interface{} `json:"metadata_condition,omitempty"`
}
// RetrieveChunksRes 检索知识块响应 (结构比较复杂,暂时简化,根据实际返回调整)
// 官方文档未给出详细响应结构,假设返回 chunks 列表
type RetrieveChunksRes struct {
Code int `json:"code"`
Data struct {
Chunks []interface{} `json:"chunks"` // 检索结果可能包含额外信息
Total int `json:"total"`
} `json:"data"`
}
// AddChunk 添加知识块
func (c *Client) AddChunk(ctx context.Context, datasetId, documentId string, req *AddChunkReq) (*Chunk, error) {
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks", datasetId, documentId)
var res struct {
Code int `json:"code"`
Data struct {
Chunk *Chunk `json:"chunk"`
} `json:"data"`
Msg string `json:"message"`
}
if err := c.request(ctx, "POST", path, req, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, fmt.Errorf("add chunk failed: %s", res.Msg)
}
return res.Data.Chunk, nil
}
// ListChunks 列出知识块
func (c *Client) ListChunks(ctx context.Context, datasetId, documentId string, req *ListChunksReq) (*ListChunksRes, error) {
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks?", datasetId, documentId)
params := map[string]interface{}{}
if req.Keywords != "" {
params["keywords"] = req.Keywords
}
if req.Page > 0 {
params["page"] = req.Page
}
if req.PageSize > 0 {
params["page_size"] = req.PageSize
}
if req.Id != "" {
params["id"] = req.Id
}
query := buildQueryString(params)
if query != "" {
path += "?" + query
}
var res ListChunksRes
if err := c.request(ctx, "GET", path, nil, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, fmt.Errorf("list chunks failed: code=%d", res.Code)
}
return &res, nil
}
// DeleteChunks 删除知识块
func (c *Client) DeleteChunks(ctx context.Context, datasetId, documentId string, chunkIds []string) error {
req := DeleteChunksReq{ChunkIds: chunkIds}
var res CommonResponse
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks", datasetId, documentId)
if err := c.request(ctx, "DELETE", path, req, &res); err != nil {
return err
}
if !res.IsSuccess() {
return fmt.Errorf("delete chunks failed: %s", res.Message)
}
return nil
}
// UpdateChunk 更新知识块
func (c *Client) UpdateChunk(ctx context.Context, datasetId, documentId, chunkId string, req *UpdateChunkReq) error {
var res CommonResponse
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks/%s", datasetId, documentId, chunkId)
if err := c.request(ctx, "PUT", path, req, &res); err != nil {
return err
}
if !res.IsSuccess() {
return fmt.Errorf("update chunk failed: %s", res.Message)
}
return nil
}
// RetrieveChunks 检索知识块
func (c *Client) RetrieveChunks(ctx context.Context, req *RetrieveChunksReq) (*RetrieveChunksRes, error) {
var res RetrieveChunksRes
if err := c.request(ctx, "POST", "/api/v1/retrieval", req, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, fmt.Errorf("retrieve chunks failed: code=%d", res.Code)
}
return &res, nil
}