数字人项目迁移
This commit is contained in:
117
digitalhuman/service/tts_service.go
Normal file
117
digitalhuman/service/tts_service.go
Normal file
@@ -0,0 +1,117 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
|
||||
"digital-human/consts/public"
|
||||
|
||||
"github.com/gogf/gf/v2/errors/gerror"
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
type tts struct{}
|
||||
|
||||
// TTS 统一的模型异步调用封装(通过 model-asynch 中间件)
|
||||
var TTS = new(tts)
|
||||
|
||||
// CreateVoiceDesignTask 设计音频任务(VoiceDesign)
|
||||
func (s *tts) CreateVoiceDesignTask(
|
||||
ctx context.Context,
|
||||
text string,
|
||||
instruct string,
|
||||
language string, // 空则 Auto
|
||||
speed float64, // <=0 则 1.0
|
||||
) (taskID string, err error) {
|
||||
if language == "" {
|
||||
language = "Auto"
|
||||
}
|
||||
if speed <= 0 {
|
||||
speed = 1.0
|
||||
}
|
||||
payload := map[string]any{
|
||||
"text": text,
|
||||
"language": language,
|
||||
"instruct": instruct,
|
||||
"speed": speed,
|
||||
"response_format": "wav",
|
||||
}
|
||||
g.Log().Info(ctx, "[CreateVoiceDesignTask] %v", payload)
|
||||
return createModelAsynchTask(ctx, public.ModelNameVoiceDesign, payload, "")
|
||||
}
|
||||
|
||||
// CreateCustomVoiceTask 预设音色(CustomVoice)任务
|
||||
// - speaker: 预设说话人(如 Vivian/Serena/Ryan/...)
|
||||
// - instruct: 可选,情绪/风格控制
|
||||
func (s *tts) CreateCustomVoiceTask(
|
||||
ctx context.Context,
|
||||
text string,
|
||||
speaker string,
|
||||
language string, // 例如 "Chinese"/"English"/"Auto",空则默认 "Auto"
|
||||
instruct string, // 可空
|
||||
speed float64, // 0.5~2.0,<=0 则默认 1.0
|
||||
) (taskID string, err error) {
|
||||
if language == "" {
|
||||
language = "Auto"
|
||||
}
|
||||
if speed <= 0 {
|
||||
speed = 1.0
|
||||
}
|
||||
payload := map[string]any{
|
||||
"text": text,
|
||||
"language": language,
|
||||
"speaker": speaker,
|
||||
"instruct": instruct,
|
||||
"speed": speed,
|
||||
"response_format": "wav", // 建议统一用 wav
|
||||
}
|
||||
g.Log().Info(ctx, "[CreateCustomVoiceTask] %v", payload)
|
||||
return createModelAsynchTask(ctx, public.ModelNameCustomVoice, payload, "")
|
||||
}
|
||||
|
||||
// CreateBaseTask 声音克隆(Base / clone)任务
|
||||
// 说明:ref_audio_url 与 ref_audio_base64 二选一
|
||||
func (s *tts) CreateBaseTask(
|
||||
ctx context.Context,
|
||||
text string,
|
||||
language string, // 例如 "Chinese"/"English"/"Auto",空则默认 "Auto"
|
||||
refText string, // 当 xVectorOnlyMode=false 时必填
|
||||
refAudioURL string, // 可空
|
||||
refAudioBase64 string, // 可空(不带 data: 前缀也可以)
|
||||
xVectorOnlyMode bool, // true=不需要 refText,但质量可能下降
|
||||
speed float64, // 0.5~2.0,<=0 则默认 1.0
|
||||
) (taskID string, err error) {
|
||||
if language == "" {
|
||||
language = "Auto"
|
||||
}
|
||||
if speed <= 0 {
|
||||
speed = 1.0
|
||||
}
|
||||
|
||||
payload := map[string]any{
|
||||
"text": text,
|
||||
"language": language,
|
||||
"ref_text": refText,
|
||||
"ref_audio_url": refAudioURL,
|
||||
"ref_audio_base64": refAudioBase64,
|
||||
"x_vector_only_mode": xVectorOnlyMode,
|
||||
"speed": speed,
|
||||
"response_format": "wav",
|
||||
}
|
||||
g.Log().Info(ctx, "[CreateBaseTask] %v", payload)
|
||||
return createModelAsynchTask(ctx, public.ModelNameBase, payload, "")
|
||||
}
|
||||
|
||||
// SpeechToText 语音转文本(预留)
|
||||
// audioBase64:base64 编码的音频数据(WAV/MP3等)
|
||||
func (s *tts) SpeechToText(ctx context.Context, audioBase64 string) (text string, err error) {
|
||||
_ = ctx
|
||||
if audioBase64 == "" {
|
||||
return "", gerror.New("audioBase64 不能为空")
|
||||
}
|
||||
// 简单校验 base64 合法性
|
||||
if _, err := base64.StdEncoding.DecodeString(audioBase64); err != nil {
|
||||
return "", gerror.Wrap(err, "audioBase64 非法")
|
||||
}
|
||||
return "", gerror.New("SpeechToText 暂未实现:后续接入语音识别模型后补齐")
|
||||
}
|
||||
Reference in New Issue
Block a user