Files
common/ragflow/chunk.go

181 lines
6.1 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package ragflow
import (
"context"
"github.com/gogf/gf/v2/errors/gerror"
)
// 数据集内知识块管理
// 参考: https://ragflow.com.cn/docs/dev/http_api_reference#数据集内知识块管理
// Chunk 知识块结构体
type Chunk struct {
Id string `json:"id"`
Content string `json:"content"`
DocumentId string `json:"document_id"`
DatasetId string `json:"dataset_id"`
CreateTime string `json:"create_time"`
CreateTimestamp float64 `json:"create_timestamp"`
ImportantKeywords []string `json:"important_keywords"`
Questions []string `json:"questions"`
Available bool `json:"available"`
ImageId string `json:"image_id"`
Positions []string `json:"positions"`
}
// AddChunkReq 添加知识块请求
type AddChunkReq struct {
Content string `json:"content"`
ImportantKeywords []string `json:"important_keywords,omitempty"`
Questions []string `json:"questions,omitempty"`
}
// ListChunksReq 列出知识块请求
type ListChunksReq struct {
Keywords string `json:"keywords,omitempty"`
Page int `json:"page,omitempty"`
PageSize int `json:"page_size,omitempty"`
Id string `json:"id,omitempty"`
}
// ListChunksRes 列出知识块响应
// 注意:响应结构包含 chunks知识块列表、doc关联文档信息和 total总数
type ListChunksRes struct {
Code int `json:"code"` // 状态码0 表示成功
Data struct {
Chunks []*Chunk `json:"chunks"` // 知识块列表
Doc interface{} `json:"doc"` // 关联文档信息(完整的 Document 对象)
Total int `json:"total"` // 知识块总数
} `json:"data"`
}
// DeleteChunksReq 删除知识块请求
type DeleteChunksReq struct {
ChunkIds []string `json:"chunk_ids,omitempty"` // 如果为空,删除所有
}
// UpdateChunkReq 更新知识块请求
type UpdateChunkReq struct {
Content string `json:"content,omitempty"`
ImportantKeywords []string `json:"important_keywords,omitempty"`
Available *bool `json:"available,omitempty"`
}
// RetrieveChunksReq 检索知识块请求
type RetrieveChunksReq struct {
Question string `json:"question"`
DatasetIds []string `json:"dataset_ids,omitempty"`
DocumentIds []string `json:"document_ids,omitempty"`
Page int `json:"page,omitempty"`
PageSize int `json:"page_size,omitempty"`
SimilarityThreshold float64 `json:"similarity_threshold,omitempty"`
VectorSimilarityWeight float64 `json:"vector_similarity_weight,omitempty"`
TopK int `json:"top_k,omitempty"`
RerankId string `json:"rerank_id,omitempty"`
Keyword bool `json:"keyword,omitempty"`
Highlight bool `json:"highlight,omitempty"`
CrossLanguages []string `json:"cross_languages,omitempty"`
MetadataCondition map[string]interface{} `json:"metadata_condition,omitempty"`
}
// RetrieveChunksRes 检索知识块响应 (结构比较复杂,暂时简化,根据实际返回调整)
// 官方文档未给出详细响应结构,假设返回 chunks 列表
type RetrieveChunksRes struct {
Code int `json:"code"`
Data struct {
Chunks []interface{} `json:"chunks"` // 检索结果可能包含额外信息
Total int `json:"total"`
} `json:"data"`
}
// AddChunk 添加知识块
func (c *Client) AddChunk(ctx context.Context, datasetId, documentId string, req *AddChunkReq) (*Chunk, error) {
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
var res struct {
Code int `json:"code"`
Data struct {
Chunk *Chunk `json:"chunk"`
} `json:"data"`
Msg string `json:"message"`
}
if err := c.request(ctx, "POST", path, req, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, gerror.Newf("add chunk failed: %s", res.Msg)
}
return res.Data.Chunk, nil
}
// ListChunks 列出知识块
func (c *Client) ListChunks(ctx context.Context, datasetId, documentId string, req *ListChunksReq) (*ListChunksRes, error) {
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
params := map[string]interface{}{}
if req.Keywords != "" {
params["keywords"] = req.Keywords
}
if req.Page > 0 {
params["page"] = req.Page
}
if req.PageSize > 0 {
params["page_size"] = req.PageSize
}
if req.Id != "" {
params["id"] = req.Id
}
query := buildQueryString(params)
if query != "" {
path += "?" + query
}
var res ListChunksRes
if err := c.request(ctx, "GET", path, nil, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, gerror.Newf("list chunks failed: code=%d", res.Code)
}
return &res, nil
}
// DeleteChunks 删除知识块
func (c *Client) DeleteChunks(ctx context.Context, datasetId, documentId string, chunkIds []string) (err error) {
req := DeleteChunksReq{ChunkIds: chunkIds}
var res CommonResponse
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks"
if err = c.request(ctx, "DELETE", path, req, &res); err != nil {
return
}
if !res.IsSuccess() {
return gerror.Newf("delete chunks failed: %s", res.Message)
}
return
}
// UpdateChunk 更新知识块
func (c *Client) UpdateChunk(ctx context.Context, datasetId, documentId, chunkId string, req *UpdateChunkReq) (err error) {
var res CommonResponse
path := "/api/v1/datasets/" + datasetId + "/documents/" + documentId + "/chunks/" + chunkId
if err = c.request(ctx, "PUT", path, req, &res); err != nil {
return
}
if !res.IsSuccess() {
return gerror.Newf("update chunk failed: %s", res.Message)
}
return
}
// RetrieveChunks 检索知识块
func (c *Client) RetrieveChunks(ctx context.Context, req *RetrieveChunksReq) (*RetrieveChunksRes, error) {
var res RetrieveChunksRes
if err := c.request(ctx, "POST", "/api/v1/retrieval", req, &res); err != nil {
return nil, err
}
if res.Code != 0 {
return nil, gerror.Newf("retrieve chunks failed: code=%d", res.Code)
}
return &res, nil
}