package service import ( "context" "fmt" "io" "net/http" "strings" "time" "github.com/gogf/gf/v2/os/glog" ) // ============================================ // 文件处理 // ============================================ func fetchFileTexts(ctx context.Context, urls []string) map[string]string { result := make(map[string]string) if len(urls) == 0 { return result } client := &http.Client{ Timeout: 8 * time.Second, } for _, rawURL := range urls { url := sanitizeURL(rawURL) if url == "" { continue } text, err := fetchFileContent(ctx, client, url) if err != nil { glog.Warningf(ctx, "[FetchFile] failed url=%s err=%v", url, err, ) continue } if text == "" { continue } result[url] = text } return result } func fetchFileContent( ctx context.Context, client *http.Client, url string, ) (string, error) { req, err := http.NewRequestWithContext( ctx, http.MethodGet, url, nil, ) if err != nil { return "", err } resp, err := client.Do(req) if err != nil { return "", err } defer resp.Body.Close() // HTTP状态检查 if resp.StatusCode < 200 || resp.StatusCode >= 300 { return "", fmt.Errorf("HTTP %d", resp.StatusCode) } // Content-Type检查 contentType := strings.ToLower(resp.Header.Get("Content-Type")) if !isTextContentType(contentType) { return "", fmt.Errorf("unsupported content-type: %s", contentType) } // 最大读取20KB body, err := io.ReadAll( io.LimitReader(resp.Body, 20*1024), ) if err != nil { return "", err } return strings.TrimSpace(string(body)), nil } // 判断是否为文本类型 func isTextContentType(contentType string) bool { // text/* if strings.HasPrefix(contentType, "text/") { return true } // 常见文本类型 allowTypes := []string{ "application/json", "application/xml", "application/javascript", "application/x-yaml", "application/yaml", "application/toml", } for _, t := range allowTypes { if strings.Contains(contentType, t) { return true } } return false } func sanitizeURL(raw string) string { s := strings.TrimSpace(raw) s = strings.Trim(s, "`\"") return s }