refactor(prompt): 重构提示词构建服务与数据模型
This commit is contained in:
@@ -10,10 +10,15 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
|
||||
"prompts-core/common/util"
|
||||
"prompts-core/service/gateway"
|
||||
)
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
const (
|
||||
bytesPerKB = 1024
|
||||
bytesPerMB = 1024 * 1024
|
||||
)
|
||||
|
||||
// FetchFileTexts 从 URL 列表获取文件内容,支持 zip 内文件
|
||||
@@ -24,51 +29,49 @@ func FetchFileTexts(ctx context.Context, urls []string) map[string]string {
|
||||
return result
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: time.Duration(g.Cfg().MustGet(ctx, "userFiles.httpTimeoutSec", 8).Int()) * time.Second,
|
||||
}
|
||||
client := createHTTPClient(ctx, "userFiles.httpTimeoutSec", 8)
|
||||
|
||||
for _, rawURL := range urls {
|
||||
url := util.SanitizeURL(rawURL)
|
||||
if url == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if util.IsBannedExtension(url) {
|
||||
if url == "" || util.IsBannedExtension(url) {
|
||||
continue
|
||||
}
|
||||
|
||||
if util.IsZipExtension(url) {
|
||||
zipTexts := fetchZipFileTexts(ctx, client, url)
|
||||
for k, v := range zipTexts {
|
||||
result[k] = v
|
||||
}
|
||||
mergeMap(result, fetchZipFileTexts(ctx, client, url))
|
||||
continue
|
||||
}
|
||||
|
||||
text, err := fetchFileContent(ctx, client, url)
|
||||
if err != nil {
|
||||
continue
|
||||
if text := fetchAndCleanFileContent(ctx, client, url); text != "" {
|
||||
result[url] = text
|
||||
}
|
||||
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
text = util.CleanSymbols(text)
|
||||
result[url] = text
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// mergeMap 合并 map
|
||||
func mergeMap(dst, src map[string]string) {
|
||||
for k, v := range src {
|
||||
dst[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// fetchAndCleanFileContent 获取并清理文件内容
|
||||
func fetchAndCleanFileContent(ctx context.Context, client *http.Client, url string) string {
|
||||
text, err := fetchFileContent(ctx, client, url)
|
||||
if err != nil || text == "" {
|
||||
return ""
|
||||
}
|
||||
return util.CleanSymbols(text)
|
||||
}
|
||||
|
||||
// fetchZipFileTexts 下载并解压 zip 文件,提取可读文本内容
|
||||
func fetchZipFileTexts(ctx context.Context, client *http.Client, url string) map[string]string {
|
||||
result := make(map[string]string)
|
||||
|
||||
zipBytes, err := downloadFile(client, url,
|
||||
int64(g.Cfg().MustGet(ctx, "userFiles.zipMaxSizeMB", 10).Int())*1024*1024,
|
||||
)
|
||||
maxSize := int64(g.Cfg().MustGet(ctx, "userFiles.zipMaxSizeMB", 10).Int()) * bytesPerMB
|
||||
zipBytes, err := downloadFile(client, url, maxSize)
|
||||
if err != nil {
|
||||
return result
|
||||
}
|
||||
@@ -78,61 +81,61 @@ func fetchZipFileTexts(ctx context.Context, client *http.Client, url string) map
|
||||
return result
|
||||
}
|
||||
|
||||
entryMaxSize := int64(g.Cfg().MustGet(ctx, "userFiles.zipEntryMaxSizeKB", 500).Int()) * 1024
|
||||
entryMaxSize := int64(g.Cfg().MustGet(ctx, "userFiles.zipEntryMaxSizeKB", 500).Int()) * bytesPerKB
|
||||
|
||||
for _, file := range reader.File {
|
||||
if file.FileInfo().IsDir() {
|
||||
if shouldSkipZipEntry(file.Name) {
|
||||
continue
|
||||
}
|
||||
|
||||
fileName := file.Name
|
||||
|
||||
if util.IsBannedExtension(fileName) {
|
||||
continue
|
||||
if text := extractZipEntryContent(file, entryMaxSize); text != "" {
|
||||
result[url+"::"+file.Name] = text
|
||||
}
|
||||
|
||||
if util.IsZipExtension(fileName) {
|
||||
continue
|
||||
}
|
||||
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
content, err := io.ReadAll(io.LimitReader(rc, entryMaxSize))
|
||||
rc.Close()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
contentType := http.DetectContentType(content)
|
||||
if !util.IsReadableContentType(contentType) {
|
||||
continue
|
||||
}
|
||||
|
||||
text := util.CleanSymbols(string(content))
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
key := url + "::" + fileName
|
||||
result[key] = text
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// shouldSkipZipEntry 判断是否应该跳过 zip 条目
|
||||
func shouldSkipZipEntry(fileName string) bool {
|
||||
return util.IsBannedExtension(fileName) || util.IsZipExtension(fileName)
|
||||
}
|
||||
|
||||
// extractZipEntryContent 提取 zip 条目内容
|
||||
func extractZipEntryContent(file *zip.File, maxSize int64) string {
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
content, err := io.ReadAll(io.LimitReader(rc, maxSize))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if !util.IsReadableContentType(http.DetectContentType(content)) {
|
||||
return ""
|
||||
}
|
||||
|
||||
text := util.CleanSymbols(string(content))
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
// downloadFile 下载文件,限制最大大小
|
||||
func downloadFile(client *http.Client, url string, maxSize int64) ([]byte, error) {
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("创建请求失败: %w", err)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("执行请求失败: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
@@ -140,19 +143,24 @@ func downloadFile(client *http.Client, url string, maxSize int64) ([]byte, error
|
||||
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
return io.ReadAll(io.LimitReader(resp.Body, maxSize))
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("读取响应失败: %w", err)
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// fetchFileContent 获取单个文本文件内容
|
||||
func fetchFileContent(ctx context.Context, client *http.Client, url string) (string, error) {
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return "", fmt.Errorf("创建请求失败: %w", err)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return "", fmt.Errorf("执行请求失败: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
@@ -162,16 +170,13 @@ func fetchFileContent(ctx context.Context, client *http.Client, url string) (str
|
||||
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if !util.IsReadableContentType(contentType) {
|
||||
return "", fmt.Errorf("unreadable content-type: %s", contentType)
|
||||
return "", fmt.Errorf("不可读的内容类型: %s", contentType)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(
|
||||
io.LimitReader(resp.Body,
|
||||
int64(g.Cfg().MustGet(ctx, "userFiles.textFileMaxSizeKB", 500).Int())*1024,
|
||||
),
|
||||
)
|
||||
maxSize := int64(g.Cfg().MustGet(ctx, "userFiles.textFileMaxSizeKB", 500).Int()) * bytesPerKB
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
|
||||
if err != nil {
|
||||
return "", err
|
||||
return "", fmt.Errorf("读取响应失败: %w", err)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(string(body)), nil
|
||||
@@ -186,27 +191,26 @@ func SkillMdContent(ctx context.Context, skillName string) string {
|
||||
|
||||
fullUrl := skillResp.ImgAddressPrefix + skillResp.FileUrl
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: time.Duration(g.Cfg().MustGet(ctx, "skillFiles.httpTimeoutSec", 30).Int()) * time.Second,
|
||||
}
|
||||
client := createHTTPClient(ctx, "skillFiles.httpTimeoutSec", 30)
|
||||
maxSize := int64(g.Cfg().MustGet(ctx, "skillFiles.zipMaxSizeMB", 10).Int()) * bytesPerMB
|
||||
|
||||
zipBytes, err := downloadFile(client, fullUrl,
|
||||
int64(g.Cfg().MustGet(ctx, "skillFiles.zipMaxSizeMB", 10).Int())*1024*1024,
|
||||
)
|
||||
zipBytes, err := downloadFile(client, fullUrl, maxSize)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
mdContents, err := extractMdFiles(ctx, zipBytes)
|
||||
if err != nil {
|
||||
if err != nil || len(mdContents) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
if len(mdContents) == 0 {
|
||||
return ""
|
||||
}
|
||||
return buildSkillMarkdown(skillResp, mdContents)
|
||||
}
|
||||
|
||||
// buildSkillMarkdown 构建技能 Markdown 内容
|
||||
func buildSkillMarkdown(skillResp *gateway.SkillUserVO, mdContents map[string]string) string {
|
||||
var builder strings.Builder
|
||||
|
||||
builder.WriteString(fmt.Sprintf("# Skill: %s\n\n", skillResp.Name))
|
||||
if skillResp.Description != "" {
|
||||
builder.WriteString(fmt.Sprintf("> %s\n\n", skillResp.Description))
|
||||
@@ -227,35 +231,53 @@ func extractMdFiles(ctx context.Context, zipBytes []byte) (map[string]string, er
|
||||
|
||||
reader, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("创建 zip 阅读器失败: %w", err)
|
||||
}
|
||||
|
||||
entryMaxSize := int64(g.Cfg().MustGet(ctx, "skillFiles.mdMaxSizeKB", 500).Int()) * 1024
|
||||
entryMaxSize := int64(g.Cfg().MustGet(ctx, "skillFiles.mdMaxSizeKB", 500).Int()) * bytesPerKB
|
||||
|
||||
for _, file := range reader.File {
|
||||
if file.FileInfo().IsDir() {
|
||||
if file.FileInfo().IsDir() || !isMarkdownFile(file.Name) {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(strings.ToLower(file.Name), ".md") {
|
||||
continue
|
||||
}
|
||||
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
content, err := io.ReadAll(io.LimitReader(rc, entryMaxSize))
|
||||
rc.Close()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(content) > 0 {
|
||||
result[file.Name] = strings.TrimSpace(string(content))
|
||||
if content := readMarkdownFileContent(file, entryMaxSize); content != "" {
|
||||
result[file.Name] = content
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// isMarkdownFile 判断是否为 Markdown 文件
|
||||
func isMarkdownFile(fileName string) bool {
|
||||
return strings.HasSuffix(strings.ToLower(fileName), ".md")
|
||||
}
|
||||
|
||||
// readMarkdownFileContent 读取 Markdown 文件内容
|
||||
func readMarkdownFileContent(file *zip.File, maxSize int64) string {
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
content, err := io.ReadAll(io.LimitReader(rc, maxSize))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if len(content) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
return strings.TrimSpace(string(content))
|
||||
}
|
||||
|
||||
// createHTTPClient 创建 HTTP 客户端
|
||||
func createHTTPClient(ctx context.Context, configKey string, defaultSeconds int) *http.Client {
|
||||
timeout := time.Duration(g.Cfg().MustGet(ctx, configKey, defaultSeconds).Int()) * time.Second
|
||||
return &http.Client{
|
||||
Timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user