代码初始化
This commit is contained in:
232
service/asr/transcribe_service.go
Normal file
232
service/asr/transcribe_service.go
Normal file
@@ -0,0 +1,232 @@
|
||||
package asr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
dto "media/model/dto/audio"
|
||||
serviceAudio "media/service/audio"
|
||||
serviceScene "media/service/scene"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// VideoTranscribeReq 视频语音识别请求
|
||||
type VideoTranscribeReq struct {
|
||||
VideoPath string
|
||||
Model string
|
||||
Language string
|
||||
KeepAudio bool
|
||||
}
|
||||
|
||||
// VideoTranscribeRes 视频语音识别响应
|
||||
type VideoTranscribeRes struct {
|
||||
Text string `json:"text"`
|
||||
Model string `json:"model"`
|
||||
Language string `json:"language"`
|
||||
AudioPath string `json:"audioPath"`
|
||||
AudioSize int64 `json:"audioSize"`
|
||||
AudioDuration string `json:"audioDuration"`
|
||||
}
|
||||
|
||||
type transcribeService struct{}
|
||||
|
||||
var VideoTranscribe = new(transcribeService)
|
||||
|
||||
// TranscribeWithURLs 从 URL 下载视频并转录
|
||||
func (s *transcribeService) TranscribeWithURLs(ctx context.Context, req *dto.TranscribeReq) (res *dto.TranscribeRes, err error) {
|
||||
if len(req.VideoURLs) == 0 {
|
||||
return nil, errors.New("video_urls 不能为空")
|
||||
}
|
||||
|
||||
tempDir := getTempDir(ctx)
|
||||
os.MkdirAll(tempDir, 0755)
|
||||
|
||||
var savePaths []string
|
||||
for _, videoURL := range req.VideoURLs {
|
||||
savePath, dlErr := downloadFromURL(ctx, videoURL, tempDir)
|
||||
if dlErr != nil {
|
||||
continue
|
||||
}
|
||||
savePaths = append(savePaths, savePath)
|
||||
}
|
||||
if len(savePaths) == 0 {
|
||||
return nil, errors.New("所有视频下载均失败")
|
||||
}
|
||||
|
||||
results := s.processVideos(ctx, savePaths, req.Model, req.Language, req.Threshold)
|
||||
res = &dto.TranscribeRes{Results: results}
|
||||
return
|
||||
}
|
||||
|
||||
// TranscribeUpload 从已保存的文件转录
|
||||
func (s *transcribeService) TranscribeUpload(ctx context.Context, savePaths []string, model, language string, threshold float64) []dto.TranscribeItem {
|
||||
return s.processVideos(ctx, savePaths, model, language, threshold)
|
||||
}
|
||||
|
||||
// processVideos 逐个处理视频
|
||||
func (s *transcribeService) processVideos(ctx context.Context, savePaths []string, model, language string, threshold float64) []dto.TranscribeItem {
|
||||
var results []dto.TranscribeItem
|
||||
|
||||
for _, savePath := range savePaths {
|
||||
fileName := filepath.Base(savePath)
|
||||
if idx := strings.Index(fileName, "_"); idx > 0 {
|
||||
fileName = fileName[idx+1:]
|
||||
}
|
||||
|
||||
// 场景分析
|
||||
var scenes *dto.SceneSummaryDTO
|
||||
sceneRes, sceneErr := serviceScene.SceneAnalyzer.Analyze(ctx, &serviceScene.SceneAnalyzeReq{
|
||||
VideoPaths: []string{savePath},
|
||||
Threshold: threshold,
|
||||
ExtractKeyframes: false,
|
||||
})
|
||||
if sceneErr == nil && len(sceneRes.Analyses) > 0 {
|
||||
scenes = toSceneDTO(&sceneRes.Analyses[0])
|
||||
}
|
||||
|
||||
// 语音转文字(内部删除视频文件)
|
||||
transRes, transErr := s.TranscribeVideo(ctx, &VideoTranscribeReq{
|
||||
VideoPath: savePath,
|
||||
Model: model,
|
||||
Language: language,
|
||||
})
|
||||
if transErr != nil {
|
||||
os.Remove(savePath)
|
||||
results = append(results, dto.TranscribeItem{FileName: fileName, Error: transErr.Error()})
|
||||
continue
|
||||
}
|
||||
|
||||
results = append(results, dto.TranscribeItem{
|
||||
FileName: fileName,
|
||||
Result: &dto.TranscribeResult{
|
||||
Text: transRes.Text,
|
||||
Model: transRes.Model,
|
||||
Language: transRes.Language,
|
||||
AudioPath: transRes.AudioPath,
|
||||
AudioSize: transRes.AudioSize,
|
||||
AudioDuration: transRes.AudioDuration,
|
||||
Scenes: scenes,
|
||||
},
|
||||
})
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
// TranscribeVideo 从视频提取音频并转为文字
|
||||
func (s *transcribeService) TranscribeVideo(ctx context.Context, req *VideoTranscribeReq) (res *VideoTranscribeRes, err error) {
|
||||
audioReq := &serviceAudio.ExtractAudioReq{VideoPath: req.VideoPath, Format: "mp3"}
|
||||
audioRes, err := serviceAudio.AudioExtract.Extract(ctx, audioReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("音频提取失败: %v", err)
|
||||
}
|
||||
|
||||
whisperRes, err := Whisper.Transcribe(ctx, &TranscribeReq{AudioPath: audioRes.AudioPath, Model: req.Model, Language: req.Language})
|
||||
if err != nil {
|
||||
os.Remove(audioRes.AudioPath)
|
||||
return nil, fmt.Errorf("语音识别失败: %v", err)
|
||||
}
|
||||
|
||||
os.Remove(req.VideoPath)
|
||||
if !req.KeepAudio {
|
||||
os.Remove(audioRes.AudioPath)
|
||||
baseName := strings.TrimSuffix(audioRes.AudioPath, filepath.Ext(audioRes.AudioPath))
|
||||
os.Remove(baseName + ".txt")
|
||||
os.Remove(baseName + "." + whisperRes.Model + ".txt")
|
||||
}
|
||||
|
||||
res = &VideoTranscribeRes{
|
||||
Text: whisperRes.Text,
|
||||
Model: whisperRes.Model,
|
||||
Language: whisperRes.Language,
|
||||
AudioPath: audioRes.AudioPath,
|
||||
AudioSize: audioRes.Size,
|
||||
AudioDuration: audioRes.Duration,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func downloadFromURL(ctx context.Context, rawURL, tempDir string) (string, error) {
|
||||
parsedURL, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
segments := strings.Split(parsedURL.Path, "/")
|
||||
fileName := segments[len(segments)-1]
|
||||
if fileName == "" {
|
||||
fileName = fmt.Sprintf("video_%d.mp4", time.Now().UnixMilli())
|
||||
}
|
||||
savePath := filepath.Join(tempDir, fmt.Sprintf("%d_%s", time.Now().UnixMilli(), fileName))
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Minute}
|
||||
resp, err := client.Get(rawURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
out, err := os.Create(savePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
os.Remove(savePath)
|
||||
}
|
||||
return savePath, err
|
||||
}
|
||||
|
||||
func getTempDir(ctx context.Context) string {
|
||||
tempDir := g.Cfg().MustGet(ctx, "ffmpeg.temp_dir", "resource/temp").String()
|
||||
if tempDir == "" {
|
||||
tempDir = "resource/temp"
|
||||
}
|
||||
if !filepath.IsAbs(tempDir) {
|
||||
absDir, _ := filepath.Abs(tempDir)
|
||||
tempDir = absDir
|
||||
}
|
||||
return tempDir
|
||||
}
|
||||
|
||||
// toSceneDTO 将场景服务的原始结果转为 DTO 格式
|
||||
func toSceneDTO(analysis *serviceScene.VideoSceneAnalysis) *dto.SceneSummaryDTO {
|
||||
if analysis == nil {
|
||||
return nil
|
||||
}
|
||||
shots := make([]dto.SceneShotDTO, 0, len(analysis.Scenes))
|
||||
for _, s := range analysis.Scenes {
|
||||
shots = append(shots, dto.SceneShotDTO{
|
||||
SceneIndex: s.SceneIndex,
|
||||
StartTimeStr: s.StartTimeStr,
|
||||
EndTimeStr: s.EndTimeStr,
|
||||
DurationStr: s.DurationStr,
|
||||
ShotType: s.ShotType,
|
||||
Composition: s.Composition,
|
||||
NarrativePos: s.NarrativePos,
|
||||
Description: s.Description,
|
||||
})
|
||||
}
|
||||
return &dto.SceneSummaryDTO{
|
||||
TotalScenes: analysis.TotalScenes,
|
||||
DurationStr: analysis.DurationStr,
|
||||
AspectRatio: analysis.AspectRatio,
|
||||
Orientation: analysis.Orientation,
|
||||
Pacing: analysis.Summary.Pacing,
|
||||
ShotTypes: analysis.Summary.ShotTypeDist,
|
||||
Scenes: shots,
|
||||
}
|
||||
}
|
||||
391
service/asr/whisper_service.go
Normal file
391
service/asr/whisper_service.go
Normal file
@@ -0,0 +1,391 @@
|
||||
package asr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"media/service/setup"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// WhisperBackend 后端类型
|
||||
type WhisperBackend int
|
||||
|
||||
const (
|
||||
backendPython WhisperBackend = iota // python -m whisper
|
||||
backendCLI // openai-whisper CLI (whisper 命令)
|
||||
backendCpp // whisper.cpp (whisper-cpp)
|
||||
)
|
||||
|
||||
// WhisperService 语音识别服务
|
||||
type WhisperService struct{}
|
||||
|
||||
// Whisper 语音识别服务单例
|
||||
var Whisper = new(WhisperService)
|
||||
|
||||
// TranscribeReq 语音识别请求
|
||||
type TranscribeReq struct {
|
||||
AudioPath string // 音频文件路径
|
||||
Model string // whisper 模型: tiny/base/small/medium/large
|
||||
Language string // 语言代码,默认 zh(中文)
|
||||
}
|
||||
|
||||
// TranscribeRes 语音识别响应
|
||||
type TranscribeRes struct {
|
||||
Text string // 完整识别文本
|
||||
Segments []Segment
|
||||
Model string // 使用的模型
|
||||
Language string // 识别的语言
|
||||
OutputPath string // 输出的 txt 文件路径
|
||||
}
|
||||
|
||||
// Segment 识别片段(带时间戳)
|
||||
type Segment struct {
|
||||
Start float64 `json:"start"` // 开始时间(秒)
|
||||
End float64 `json:"end"` // 结束时间(秒)
|
||||
Text string `json:"text"` // 文本内容
|
||||
}
|
||||
|
||||
// Transcribe 对音频文件进行语音识别(自动检测后端,自动降级)
|
||||
func (s *WhisperService) Transcribe(ctx context.Context, req *TranscribeReq) (res *TranscribeRes, err error) {
|
||||
// 1. 校验音频文件
|
||||
if _, err = os.Stat(req.AudioPath); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("音频文件不存在: %s", req.AudioPath)
|
||||
}
|
||||
|
||||
// 2. 设置默认值
|
||||
model := req.Model
|
||||
if model == "" {
|
||||
model = g.Cfg().MustGet(ctx, "whisper.model", "small").String()
|
||||
}
|
||||
language := req.Language
|
||||
if language == "" {
|
||||
language = g.Cfg().MustGet(ctx, "whisper.language", "zh").String()
|
||||
}
|
||||
|
||||
// 3. 检测后端,C++ 版找不到模型文件时自动降级
|
||||
backend, whisperPath := s.detectBackend()
|
||||
if backend == backendCpp {
|
||||
modelPath := s.resolveCppModelPath(model)
|
||||
if modelPath == "" {
|
||||
g.Log().Warningf(ctx, "whisper.cpp 模型文件(%s)未找到,降级到 Python whisper", model)
|
||||
backend = backendPython
|
||||
} else {
|
||||
g.Log().Infof(ctx, "语音识别(whisper.cpp): audio=%s, model=%s", req.AudioPath, modelPath)
|
||||
return s.transcribeWithCpp(ctx, req, whisperPath, modelPath, language)
|
||||
}
|
||||
}
|
||||
|
||||
switch backend {
|
||||
case backendCLI:
|
||||
g.Log().Infof(ctx, "语音识别(CLI): audio=%s, model=%s, language=%s", req.AudioPath, model, language)
|
||||
return s.transcribeWithCLI(ctx, req, whisperPath, model, language)
|
||||
default:
|
||||
g.Log().Infof(ctx, "语音识别(python): audio=%s, model=%s, language=%s", req.AudioPath, model, language)
|
||||
return s.transcribeWithPython(ctx, req, model, language)
|
||||
}
|
||||
}
|
||||
|
||||
// transcribeWithCLI 使用 whisper CLI 命令
|
||||
func (s *WhisperService) transcribeWithCLI(ctx context.Context, req *TranscribeReq, whisperPath, model, language string) (res *TranscribeRes, err error) {
|
||||
outputDir := filepath.Dir(req.AudioPath)
|
||||
modelDir := g.Cfg().MustGet(ctx, "whisper.model_dir", "").String()
|
||||
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
|
||||
|
||||
args := []string{
|
||||
req.AudioPath,
|
||||
"--model", model,
|
||||
"--language", language,
|
||||
"--output_dir", outputDir,
|
||||
"--output_format", "txt",
|
||||
"--threads", fmt.Sprintf("%d", threads),
|
||||
}
|
||||
if modelDir != "" {
|
||||
args = append(args, "--model_dir", modelDir)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, whisperPath, args...)
|
||||
output, execErr := cmd.CombinedOutput()
|
||||
if execErr != nil {
|
||||
g.Log().Errorf(ctx, "whisper CLI 执行失败: %v\n%s", execErr, string(output))
|
||||
return nil, fmt.Errorf("语音识别失败: %v", execErr)
|
||||
}
|
||||
|
||||
return s.readTxtResult(outputDir, req.AudioPath, model)
|
||||
}
|
||||
|
||||
// transcribeWithPython 使用 python -m whisper
|
||||
func (s *WhisperService) transcribeWithPython(ctx context.Context, req *TranscribeReq, model, language string) (res *TranscribeRes, err error) {
|
||||
// 查找 python
|
||||
pythonPath, err := exec.LookPath("python3")
|
||||
if err != nil {
|
||||
pythonPath, err = exec.LookPath("python")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("未找到 python,请安装: pip3 install openai-whisper")
|
||||
}
|
||||
}
|
||||
|
||||
outputDir := filepath.Dir(req.AudioPath)
|
||||
modelDir := g.Cfg().MustGet(ctx, "whisper.model_dir", "").String()
|
||||
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
|
||||
|
||||
args := []string{
|
||||
"-m", "whisper",
|
||||
req.AudioPath,
|
||||
"--model", model,
|
||||
"--language", language,
|
||||
"--output_dir", outputDir,
|
||||
"--output_format", "txt",
|
||||
"--threads", fmt.Sprintf("%d", threads),
|
||||
}
|
||||
if modelDir != "" {
|
||||
args = append(args, "--model_dir", modelDir)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, pythonPath, args...)
|
||||
output, execErr := cmd.CombinedOutput()
|
||||
if execErr != nil {
|
||||
g.Log().Errorf(ctx, "whisper(python) 执行失败: %v\n%s", execErr, string(output))
|
||||
return nil, fmt.Errorf("语音识别失败: %v", execErr)
|
||||
}
|
||||
|
||||
return s.readTxtResult(outputDir, req.AudioPath, model)
|
||||
}
|
||||
|
||||
// readTxtResult 读取 whisper 输出的 txt 文件
|
||||
func (s *WhisperService) readTxtResult(outputDir, audioPath, model string) (res *TranscribeRes, err error) {
|
||||
baseName := strings.TrimSuffix(filepath.Base(audioPath), filepath.Ext(audioPath))
|
||||
txtPaths := []string{
|
||||
filepath.Join(outputDir, baseName+".txt"),
|
||||
filepath.Join(outputDir, baseName+"."+model+".txt"),
|
||||
}
|
||||
|
||||
var textBytes []byte
|
||||
var txtPath string
|
||||
for _, p := range txtPaths {
|
||||
if b, e := os.ReadFile(p); e == nil {
|
||||
textBytes = b
|
||||
txtPath = p
|
||||
break
|
||||
}
|
||||
}
|
||||
if textBytes == nil {
|
||||
return nil, fmt.Errorf("读取识别结果文件失败")
|
||||
}
|
||||
|
||||
res = &TranscribeRes{
|
||||
Text: cleanTranscript(string(textBytes)),
|
||||
Model: model,
|
||||
OutputPath: txtPath,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// cleanTranscript 清理识别结果:去换行、合并空格
|
||||
func cleanTranscript(text string) string {
|
||||
text = strings.ReplaceAll(text, "\r\n", " ")
|
||||
text = strings.ReplaceAll(text, "\n", " ")
|
||||
text = strings.ReplaceAll(text, "\r", " ")
|
||||
// 合并多个空格
|
||||
for strings.Contains(text, " ") {
|
||||
text = strings.ReplaceAll(text, " ", " ")
|
||||
}
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
|
||||
// detectBackend 检测可用的 whisper 后端,返回后端类型和可执行路径
|
||||
func (s *WhisperService) detectBackend() (WhisperBackend, string) {
|
||||
// 1. 优先检测 C++ 版 whisper.cpp(最快,但参数格式不同)
|
||||
for _, name := range []string{"whisper-cpp", "whisper-cli"} {
|
||||
if path, err := exec.LookPath(name); err == nil {
|
||||
return backendCpp, path
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 检查 setup 检测到的 C++ 路径
|
||||
if setup.DetectedWhisperPath != "" {
|
||||
base := filepath.Base(setup.DetectedWhisperPath)
|
||||
if base == "whisper-cpp" || base == "whisper-cli" {
|
||||
if _, err := os.Stat(setup.DetectedWhisperPath); err == nil {
|
||||
return backendCpp, setup.DetectedWhisperPath
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 检测 Python CLI(whisper 命令)
|
||||
if path, err := exec.LookPath("whisper"); err == nil {
|
||||
return backendCLI, path
|
||||
}
|
||||
|
||||
// 4. 检查 setup 检测到的 Python CLI 路径
|
||||
if setup.DetectedWhisperPath != "" {
|
||||
if _, err := os.Stat(setup.DetectedWhisperPath); err == nil {
|
||||
return backendCLI, setup.DetectedWhisperPath
|
||||
}
|
||||
}
|
||||
|
||||
// 5. 检查配置中的路径
|
||||
if p := g.Cfg().MustGet(context.Background(), "whisper.path", "").String(); p != "" {
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
return backendCLI, p
|
||||
}
|
||||
}
|
||||
|
||||
return backendPython, ""
|
||||
}
|
||||
|
||||
// resolveCppModelPath 查找或下载 whisper.cpp 模型文件
|
||||
func (s *WhisperService) resolveCppModelPath(model string) string {
|
||||
modelName := strings.TrimPrefix(model, "ggml-")
|
||||
modelName = strings.TrimSuffix(modelName, ".bin")
|
||||
|
||||
cppModelName := "ggml-" + modelName + ".bin"
|
||||
home, _ := os.UserHomeDir()
|
||||
|
||||
// 目标路径:~/.cache/whisper/ggml-{model}.bin
|
||||
targetDir := filepath.Join(home, ".cache", "whisper")
|
||||
targetPath := filepath.Join(targetDir, cppModelName)
|
||||
|
||||
// 1. 如果已存在,直接返回
|
||||
if _, err := os.Stat(targetPath); err == nil {
|
||||
return targetPath
|
||||
}
|
||||
|
||||
// 2. 检查其他常见位置
|
||||
altPaths := []string{
|
||||
cppModelName,
|
||||
filepath.Join(home, ".cache", "whisper", "ggml-"+modelName+"-q5_0.bin"),
|
||||
"/opt/homebrew/share/whisper-cpp/models/" + cppModelName,
|
||||
"/usr/local/share/whisper-cpp/models/" + cppModelName,
|
||||
}
|
||||
for _, p := range altPaths {
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
return p
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 自动下载
|
||||
modelSize := map[string]string{
|
||||
"tiny": "75MB",
|
||||
"base": "150MB",
|
||||
"small": "500MB",
|
||||
"medium": "1.5GB",
|
||||
}
|
||||
size, _ := modelSize[modelName]
|
||||
|
||||
// 下载源:先试 hf-mirror(国内可访问),失败再试官方
|
||||
modelPath := fmt.Sprintf("ggerganov/whisper.cpp/resolve/main/%s", cppModelName)
|
||||
urls := []string{
|
||||
fmt.Sprintf("https://hf-mirror.com/%s", modelPath),
|
||||
fmt.Sprintf("https://huggingface.co/%s", modelPath),
|
||||
}
|
||||
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 正在下载模型 %s (%s)...", cppModelName, size)
|
||||
|
||||
// 创建目录
|
||||
os.MkdirAll(targetDir, 0755)
|
||||
|
||||
// 下载文件(多个源,依次尝试)
|
||||
var lastErr error
|
||||
for _, url := range urls {
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 下载地址: %s", url)
|
||||
if err := s.downloadFile(url, targetPath, 5*time.Minute); err == nil {
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 模型下载完成: %s", targetPath)
|
||||
return targetPath
|
||||
} else {
|
||||
lastErr = err
|
||||
g.Log().Warningf(context.TODO(), "[whisper.cpp] 从 %s 下载失败: %v,尝试下一个源...", url, err)
|
||||
}
|
||||
}
|
||||
|
||||
g.Log().Errorf(context.TODO(), "[whisper.cpp] 所有下载源均失败: %v", lastErr)
|
||||
return ""
|
||||
}
|
||||
|
||||
// downloadFile 下载文件到指定路径(支持超时)
|
||||
func (s *WhisperService) downloadFile(url, destPath string, timeout time.Duration) error {
|
||||
tmpPath := destPath + ".tmp"
|
||||
out, err := os.Create(tmpPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("创建临时文件失败: %v", err)
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
client := &http.Client{Timeout: timeout}
|
||||
resp, err := client.Get(url)
|
||||
if err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
os.Remove(tmpPath)
|
||||
return fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
written, err := io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpPath, destPath); err != nil {
|
||||
return fmt.Errorf("文件重命名失败: %v", err)
|
||||
}
|
||||
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 下载完成: %d bytes", written)
|
||||
return nil
|
||||
}
|
||||
|
||||
// transcribeWithCpp 使用 whisper.cpp(C++ 版,参数格式不同)
|
||||
func (s *WhisperService) transcribeWithCpp(ctx context.Context, req *TranscribeReq, binaryPath, model, language string) (res *TranscribeRes, err error) {
|
||||
outputDir := filepath.Dir(req.AudioPath)
|
||||
baseName := strings.TrimSuffix(filepath.Base(req.AudioPath), filepath.Ext(req.AudioPath))
|
||||
outputPrefix := filepath.Join(outputDir, baseName)
|
||||
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
|
||||
|
||||
// whisper.cpp 参数:
|
||||
// -f input.mp3 输入文件
|
||||
// -l zh 语言
|
||||
// -t 2 线程数
|
||||
// -otxt 输出 txt
|
||||
// -of /path/prefix 输出文件前缀(自动加 .txt)
|
||||
args := []string{
|
||||
"-f", req.AudioPath,
|
||||
"-l", language,
|
||||
"-t", fmt.Sprintf("%d", threads),
|
||||
"-otxt",
|
||||
"-of", outputPrefix,
|
||||
"-m", model,
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, binaryPath, args...)
|
||||
output, execErr := cmd.CombinedOutput()
|
||||
if execErr != nil {
|
||||
g.Log().Errorf(ctx, "whisper.cpp 执行失败: %v\n%s", execErr, string(output))
|
||||
return nil, fmt.Errorf("语音识别失败: %v", execErr)
|
||||
}
|
||||
|
||||
// whisper.cpp 输出: {prefix}.txt
|
||||
txtPath := outputPrefix + ".txt"
|
||||
textBytes, readErr := os.ReadFile(txtPath)
|
||||
if readErr != nil {
|
||||
return nil, fmt.Errorf("读取识别结果文件失败: %v", readErr)
|
||||
}
|
||||
|
||||
res = &TranscribeRes{
|
||||
Text: cleanTranscript(string(textBytes)),
|
||||
Model: model,
|
||||
Language: language,
|
||||
OutputPath: txtPath,
|
||||
}
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user