180 lines
6.1 KiB
Go
180 lines
6.1 KiB
Go
package ragflow
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
)
|
||
|
||
// 数据集内知识块管理
|
||
// 参考: https://ragflow.com.cn/docs/dev/http_api_reference#数据集内知识块管理
|
||
|
||
// Chunk 知识块结构体
|
||
type Chunk struct {
|
||
Id string `json:"id"`
|
||
Content string `json:"content"`
|
||
DocumentId string `json:"document_id"`
|
||
DatasetId string `json:"dataset_id"`
|
||
CreateTime string `json:"create_time"`
|
||
CreateTimestamp float64 `json:"create_timestamp"`
|
||
ImportantKeywords []string `json:"important_keywords"`
|
||
Questions []string `json:"questions"`
|
||
Available bool `json:"available"`
|
||
ImageId string `json:"image_id"`
|
||
Positions []string `json:"positions"`
|
||
}
|
||
|
||
// AddChunkReq 添加知识块请求
|
||
type AddChunkReq struct {
|
||
Content string `json:"content"`
|
||
ImportantKeywords []string `json:"important_keywords,omitempty"`
|
||
Questions []string `json:"questions,omitempty"`
|
||
}
|
||
|
||
// ListChunksReq 列出知识块请求
|
||
type ListChunksReq struct {
|
||
Keywords string `json:"keywords,omitempty"`
|
||
Page int `json:"page,omitempty"`
|
||
PageSize int `json:"page_size,omitempty"`
|
||
Id string `json:"id,omitempty"`
|
||
}
|
||
|
||
// ListChunksRes 列出知识块响应
|
||
// 注意:响应结构包含 chunks(知识块列表)、doc(关联文档信息)和 total(总数)
|
||
type ListChunksRes struct {
|
||
Code int `json:"code"` // 状态码,0 表示成功
|
||
Data struct {
|
||
Chunks []*Chunk `json:"chunks"` // 知识块列表
|
||
Doc interface{} `json:"doc"` // 关联文档信息(完整的 Document 对象)
|
||
Total int `json:"total"` // 知识块总数
|
||
} `json:"data"`
|
||
}
|
||
|
||
// DeleteChunksReq 删除知识块请求
|
||
type DeleteChunksReq struct {
|
||
ChunkIds []string `json:"chunk_ids,omitempty"` // 如果为空,删除所有
|
||
}
|
||
|
||
// UpdateChunkReq 更新知识块请求
|
||
type UpdateChunkReq struct {
|
||
Content string `json:"content,omitempty"`
|
||
ImportantKeywords []string `json:"important_keywords,omitempty"`
|
||
Available *bool `json:"available,omitempty"`
|
||
}
|
||
|
||
// RetrieveChunksReq 检索知识块请求
|
||
type RetrieveChunksReq struct {
|
||
Question string `json:"question"`
|
||
DatasetIds []string `json:"dataset_ids,omitempty"`
|
||
DocumentIds []string `json:"document_ids,omitempty"`
|
||
Page int `json:"page,omitempty"`
|
||
PageSize int `json:"page_size,omitempty"`
|
||
SimilarityThreshold float64 `json:"similarity_threshold,omitempty"`
|
||
VectorSimilarityWeight float64 `json:"vector_similarity_weight,omitempty"`
|
||
TopK int `json:"top_k,omitempty"`
|
||
RerankId string `json:"rerank_id,omitempty"`
|
||
Keyword bool `json:"keyword,omitempty"`
|
||
Highlight bool `json:"highlight,omitempty"`
|
||
CrossLanguages []string `json:"cross_languages,omitempty"`
|
||
MetadataCondition map[string]interface{} `json:"metadata_condition,omitempty"`
|
||
}
|
||
|
||
// RetrieveChunksRes 检索知识块响应 (结构比较复杂,暂时简化,根据实际返回调整)
|
||
// 官方文档未给出详细响应结构,假设返回 chunks 列表
|
||
type RetrieveChunksRes struct {
|
||
Code int `json:"code"`
|
||
Data struct {
|
||
Chunks []interface{} `json:"chunks"` // 检索结果可能包含额外信息
|
||
Total int `json:"total"`
|
||
} `json:"data"`
|
||
}
|
||
|
||
// AddChunk 添加知识块
|
||
func (c *Client) AddChunk(ctx context.Context, datasetId, documentId string, req *AddChunkReq) (*Chunk, error) {
|
||
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks", datasetId, documentId)
|
||
var res struct {
|
||
Code int `json:"code"`
|
||
Data struct {
|
||
Chunk *Chunk `json:"chunk"`
|
||
} `json:"data"`
|
||
Msg string `json:"message"`
|
||
}
|
||
if err := c.request(ctx, "POST", path, req, &res); err != nil {
|
||
return nil, err
|
||
}
|
||
if res.Code != 0 {
|
||
return nil, fmt.Errorf("add chunk failed: %s", res.Msg)
|
||
}
|
||
return res.Data.Chunk, nil
|
||
}
|
||
|
||
// ListChunks 列出知识块
|
||
func (c *Client) ListChunks(ctx context.Context, datasetId, documentId string, req *ListChunksReq) (*ListChunksRes, error) {
|
||
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks", datasetId, documentId)
|
||
params := map[string]interface{}{}
|
||
if req.Keywords != "" {
|
||
params["keywords"] = req.Keywords
|
||
}
|
||
if req.Page > 0 {
|
||
params["page"] = req.Page
|
||
}
|
||
if req.PageSize > 0 {
|
||
params["page_size"] = req.PageSize
|
||
}
|
||
if req.Id != "" {
|
||
params["id"] = req.Id
|
||
}
|
||
|
||
query := buildQueryString(params)
|
||
if query != "" {
|
||
path += "?" + query
|
||
}
|
||
|
||
var res ListChunksRes
|
||
if err := c.request(ctx, "GET", path, nil, &res); err != nil {
|
||
return nil, err
|
||
}
|
||
if res.Code != 0 {
|
||
return nil, fmt.Errorf("list chunks failed: code=%d", res.Code)
|
||
}
|
||
return &res, nil
|
||
}
|
||
|
||
// DeleteChunks 删除知识块
|
||
func (c *Client) DeleteChunks(ctx context.Context, datasetId, documentId string, chunkIds []string) error {
|
||
req := DeleteChunksReq{ChunkIds: chunkIds}
|
||
var res CommonResponse
|
||
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks", datasetId, documentId)
|
||
if err := c.request(ctx, "DELETE", path, req, &res); err != nil {
|
||
return err
|
||
}
|
||
if !res.IsSuccess() {
|
||
return fmt.Errorf("delete chunks failed: %s", res.Message)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// UpdateChunk 更新知识块
|
||
func (c *Client) UpdateChunk(ctx context.Context, datasetId, documentId, chunkId string, req *UpdateChunkReq) error {
|
||
var res CommonResponse
|
||
path := fmt.Sprintf("/api/v1/datasets/%s/documents/%s/chunks/%s", datasetId, documentId, chunkId)
|
||
if err := c.request(ctx, "PUT", path, req, &res); err != nil {
|
||
return err
|
||
}
|
||
if !res.IsSuccess() {
|
||
return fmt.Errorf("update chunk failed: %s", res.Message)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// RetrieveChunks 检索知识块
|
||
func (c *Client) RetrieveChunks(ctx context.Context, req *RetrieveChunksReq) (*RetrieveChunksRes, error) {
|
||
var res RetrieveChunksRes
|
||
if err := c.request(ctx, "POST", "/api/v1/retrieval", req, &res); err != nil {
|
||
return nil, err
|
||
}
|
||
if res.Code != 0 {
|
||
return nil, fmt.Errorf("retrieve chunks failed: code=%d", res.Code)
|
||
}
|
||
return &res, nil
|
||
}
|