Files
media/model/dto/audio/audio_dto.go
2026-05-20 11:32:39 +08:00

60 lines
2.7 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package audio
import "github.com/gogf/gf/v2/frame/g"
// TranscribeReq 语音转文字请求JSON body / URL 方式)
type TranscribeReq struct {
g.Meta `path:"/transcribe" method:"post" tags:"音频转写" summary:"语音转文字(异步)" dc:"创建异步语音转文字任务,返回taskId"`
VideoURLs []string `json:"video_urls" v:"required#视频URL列表不能为空" dc:"视频URL列表"`
Model string `json:"model" dc:"whisper模型(tiny/base/small/medium)" d:"medium"`
Language string `json:"language" dc:"语言(zh/en/ja)" d:"zh"`
Threshold float64 `json:"threshold" dc:"场景检测阈值(0.1-0.5)" d:"0.3"`
CallbackURL string `json:"callback_url" dc:"任务完成后的回调地址(可选)成功后POST结果到此URL"`
}
// TranscribeRes 语音转文字响应
type TranscribeRes struct {
Results []TranscribeItem `json:"results" dc:"处理结果列表"`
}
// TranscribeItem 单视频处理结果
type TranscribeItem struct {
FileName string `json:"fileName" dc:"文件名"`
Result interface{} `json:"result,omitempty" dc:"识别结果"`
Error string `json:"error,omitempty" dc:"错误信息"`
}
// TranscribeResult 语音识别结果详情
type TranscribeResult struct {
Text string `json:"text" dc:"识别文本"`
Model string `json:"model" dc:"使用的模型"`
Language string `json:"language" dc:"语言"`
AudioPath string `json:"audioPath" dc:"音频文件路径"`
AudioSize int64 `json:"audioSize" dc:"音频文件大小(字节)"`
AudioDuration string `json:"audioDuration" dc:"音频时长"`
Scenes *SceneSummaryDTO `json:"scenes,omitempty" dc:"分镜分析"`
}
// SceneSummaryDTO 分镜分析摘要
type SceneSummaryDTO struct {
TotalScenes int `json:"totalScenes" dc:"场景总数"`
DurationStr string `json:"durationStr" dc:"总时长"`
AspectRatio string `json:"aspectRatio" dc:"画面比例"`
Orientation string `json:"orientation" dc:"横屏/竖屏"`
Pacing string `json:"pacing" dc:"剪辑节奏"`
ShotTypes map[string]int `json:"shotTypes" dc:"镜头类型分布"`
Scenes []SceneShotDTO `json:"scenes" dc:"分镜列表"`
}
// SceneShotDTO 单镜头信息
type SceneShotDTO struct {
SceneIndex int `json:"sceneIndex" dc:"场景序号"`
StartTimeStr string `json:"startTimeStr" dc:"开始时间"`
EndTimeStr string `json:"endTimeStr" dc:"结束时间"`
DurationStr string `json:"durationStr" dc:"时长"`
ShotType string `json:"shotType" dc:"镜头类型"`
Composition string `json:"composition" dc:"构图"`
NarrativePos string `json:"narrativePos" dc:"叙事位置"`
Description string `json:"description" dc:"场景描述"`
}