132 lines
2.1 KiB
Go
132 lines
2.1 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/gogf/gf/v2/os/glog"
|
|
)
|
|
|
|
// ============================================
|
|
// 文件处理
|
|
// ============================================
|
|
|
|
func fetchFileTexts(ctx context.Context, urls []string) map[string]string {
|
|
result := make(map[string]string)
|
|
|
|
if len(urls) == 0 {
|
|
return result
|
|
}
|
|
|
|
client := &http.Client{
|
|
Timeout: 8 * time.Second,
|
|
}
|
|
|
|
for _, rawURL := range urls {
|
|
url := sanitizeURL(rawURL)
|
|
if url == "" {
|
|
continue
|
|
}
|
|
|
|
text, err := fetchFileContent(ctx, client, url)
|
|
if err != nil {
|
|
glog.Warningf(ctx,
|
|
"[FetchFile] failed url=%s err=%v",
|
|
url,
|
|
err,
|
|
)
|
|
continue
|
|
}
|
|
|
|
if text == "" {
|
|
continue
|
|
}
|
|
|
|
result[url] = text
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func fetchFileContent(
|
|
ctx context.Context,
|
|
client *http.Client,
|
|
url string,
|
|
) (string, error) {
|
|
|
|
req, err := http.NewRequestWithContext(
|
|
ctx,
|
|
http.MethodGet,
|
|
url,
|
|
nil,
|
|
)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// HTTP状态检查
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
|
}
|
|
|
|
// Content-Type检查
|
|
contentType := strings.ToLower(resp.Header.Get("Content-Type"))
|
|
|
|
if !isTextContentType(contentType) {
|
|
return "", fmt.Errorf("unsupported content-type: %s", contentType)
|
|
}
|
|
|
|
// 最大读取20KB
|
|
body, err := io.ReadAll(
|
|
io.LimitReader(resp.Body, 20*1024),
|
|
)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return strings.TrimSpace(string(body)), nil
|
|
}
|
|
|
|
// 判断是否为文本类型
|
|
func isTextContentType(contentType string) bool {
|
|
|
|
// text/*
|
|
if strings.HasPrefix(contentType, "text/") {
|
|
return true
|
|
}
|
|
|
|
// 常见文本类型
|
|
allowTypes := []string{
|
|
"application/json",
|
|
"application/xml",
|
|
"application/javascript",
|
|
"application/x-yaml",
|
|
"application/yaml",
|
|
"application/toml",
|
|
}
|
|
|
|
for _, t := range allowTypes {
|
|
if strings.Contains(contentType, t) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func sanitizeURL(raw string) string {
|
|
s := strings.TrimSpace(raw)
|
|
s = strings.Trim(s, "`\"")
|
|
return s
|
|
}
|