package eino import ( "context" "fmt" "gitea.com/red-future/common/utils" "github.com/cloudwego/eino-ext/components/document/loader/url" "github.com/cloudwego/eino-ext/components/document/parser/docx" "github.com/cloudwego/eino-ext/components/document/parser/pdf" "github.com/cloudwego/eino-ext/components/document/parser/xlsx" "github.com/cloudwego/eino/components/document" "github.com/cloudwego/eino/components/document/parser" "github.com/cloudwego/eino/schema" ) // LoadDocument 业务函数:加载文件 func LoadDocument(ctx context.Context, filePath, fileFormat string) (docs []*schema.Document, err error) { p, err := docsParser(ctx, fileFormat) if err != nil { return } loader, err := url.NewLoader(ctx, &url.LoaderConfig{ Parser: p, }) imageUrl, err := utils.GetFileAddressPrefix(ctx) if err != nil { return } docs, err = loader.Load(context.Background(), document.Source{ URI: fmt.Sprintf("%s%s", imageUrl, filePath), }) return } func docsParser(ctx context.Context, fileFormat string) (p parser.Parser, err error) { switch fileFormat { case "docx": p, err = docx.NewDocxParser(ctx, &docx.Config{ ToSections: true, IncludeHeaders: true, IncludeFooters: true, IncludeTables: true, }) case "pdf": p, err = pdf.NewPDFParser(ctx, &pdf.Config{}) case "xlsx": p, err = xlsx.NewXlsxParser(ctx, &xlsx.Config{}) } return }