Files
media/service/scene/scene_service.go
2026-05-19 14:33:06 +08:00

658 lines
17 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package scene
import (
"bufio"
"context"
"fmt"
"math"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"github.com/gogf/gf/v2/frame/g"
)
// SceneAnalyzerService 场景分析服务
type SceneAnalyzerService struct{}
// SceneAnalyzer 场景分析服务单例
var SceneAnalyzer = new(SceneAnalyzerService)
// KeyframeInfo 关键帧信息
type KeyframeInfo struct {
Path string `json:"path"` // 关键帧图片路径
TimeStr string `json:"timeStr"` // 时间点
Width int `json:"width"` // 图片宽度
Height int `json:"height"` // 图片高度
}
// SceneInfo 单个场景信息
type SceneInfo struct {
SceneIndex int `json:"sceneIndex"` // 场景序号
StartTime float64 `json:"startTime"` // 开始时间精确到3位小数
EndTime float64 `json:"endTime"` // 结束时间(秒)
Duration float64 `json:"duration"` // 时长(秒)
StartTimeStr string `json:"startTimeStr"` // HH:MM:SS.mmm
EndTimeStr string `json:"endTimeStr"`
DurationStr string `json:"durationStr"`
ShotType string `json:"shotType"` // 镜头类型
MotionLevel string `json:"motionLevel"` // 运动程度
Composition string `json:"composition"` // 构图类型
NarrativePos string `json:"narrativePos"` // 叙事位置
Keyframe *KeyframeInfo `json:"keyframe,omitempty"` // 关键帧(如有提取)
Description string `json:"description"` // 场景描述(供 AI 使用)
}
// VideoSceneAnalysis 单视频场景分析结果
type VideoSceneAnalysis struct {
FileName string `json:"fileName"`
FilePath string `json:"filePath"`
Duration float64 `json:"duration"`
DurationStr string `json:"durationStr"`
FrameRate float64 `json:"frameRate"`
Width int `json:"width"`
Height int `json:"height"`
AspectRatio string `json:"aspectRatio"` // 画面比例
Orientation string `json:"orientation"` // 横屏/竖屏
TotalScenes int `json:"totalScenes"`
Scenes []SceneInfo `json:"scenes"`
DetectParams DetectParams `json:"detectParams"`
Summary SceneSummary `json:"summary"` // 场景总览
}
// SceneSummary 场景总览
type SceneSummary struct {
AvgShotDuration float64 `json:"avgShotDuration"` // 平均镜头时长
MinShotDuration float64 `json:"minShotDuration"`
MaxShotDuration float64 `json:"maxShotDuration"`
ShotTypeDist map[string]int `json:"shotTypeDist"` // 镜头类型分布
MotionDist map[string]int `json:"motionDist"` // 运动程度分布
CompositionDist map[string]int `json:"compositionDist"` // 构图分布
Pacing string `json:"pacing"` // 剪辑节奏
KeyframesDir string `json:"keyframesDir,omitempty"` // 关键帧目录
}
// DetectParams 检测参数
type DetectParams struct {
Threshold float64 `json:"threshold"`
Method string `json:"method"`
ExtractKeyframes bool `json:"extractKeyframes"`
}
// SceneAnalyzeReq 场景分析请求
type SceneAnalyzeReq struct {
VideoPaths []string // 视频文件路径列表
Threshold float64 // 场景检测阈值 0.1-0.5,默认 0.3
ExtractKeyframes bool // 是否提取关键帧图片
}
// SceneAnalyzeRes 场景分析响应
type SceneAnalyzeRes struct {
Analyses []VideoSceneAnalysis `json:"analyses"`
}
var (
ptsTimeRegex = regexp.MustCompile(`pts_time:([\d.]+)`)
)
// Analyze 分析多个视频的场景
func (s *SceneAnalyzerService) Analyze(ctx context.Context, req *SceneAnalyzeReq) (res *SceneAnalyzeRes, err error) {
threshold := req.Threshold
if threshold <= 0 || threshold > 1 {
threshold = 0.3
}
var (
mu sync.Mutex
analyses []VideoSceneAnalysis
wg sync.WaitGroup
errCh = make(chan error, len(req.VideoPaths))
)
for _, videoPath := range req.VideoPaths {
wg.Add(1)
go func(vp string) {
defer wg.Done()
analysis, aErr := s.analyzeSingle(ctx, vp, threshold, req.ExtractKeyframes)
if aErr != nil {
errCh <- fmt.Errorf("分析失败 [%s]: %v", filepath.Base(vp), aErr)
return
}
mu.Lock()
analyses = append(analyses, *analysis)
mu.Unlock()
}(videoPath)
}
wg.Wait()
close(errCh)
var errs []string
for e := range errCh {
errs = append(errs, e.Error())
}
if len(errs) > 0 {
g.Log().Errorf(ctx, "部分视频分析失败: %s", strings.Join(errs, "; "))
}
if len(analyses) == 0 {
return nil, fmt.Errorf("所有视频分析均失败: %s", strings.Join(errs, "; "))
}
res = &SceneAnalyzeRes{Analyses: analyses}
return
}
// analyzeSingle 分析单个视频
func (s *SceneAnalyzerService) analyzeSingle(ctx context.Context, videoPath string, threshold float64, extractKeyframes bool) (*VideoSceneAnalysis, error) {
ffmpegPath, err := s.getFFmpegPath()
if err != nil {
return nil, err
}
// 1. 视频元数据
duration, frameRate, width, height, err := s.getVideoMeta(ctx, ffmpegPath, videoPath)
if err != nil {
return nil, fmt.Errorf("获取视频元数据失败: %v", err)
}
// 2. 场景检测
sceneChanges, err := s.detectScenes(ctx, ffmpegPath, videoPath, threshold)
if err != nil {
return nil, fmt.Errorf("场景检测失败: %v", err)
}
// 3. 构建场景列表 + 分析
rawScenes := s.buildScenes(sceneChanges, duration)
totalDuration := duration
// 4. 提取关键帧(如果需要)
keyframesDir := ""
if extractKeyframes {
keyframesDir = filepath.Join(filepath.Dir(videoPath), "keyframes_"+filepath.Base(videoPath))
os.MkdirAll(keyframesDir, 0755)
}
// 构建带分析信息的场景
aspectRatio := fmt.Sprintf("%d:%d", width/gcd(width, height), height/gcd(width, height))
orientation := "横屏"
if height > width {
orientation = "竖屏"
}
fileName := filepath.Base(videoPath)
if idx := strings.Index(fileName, "_"); idx > 0 {
fileName = fileName[idx+1:]
}
// 生成场景分析
totalScenes := len(rawScenes)
scenes := make([]SceneInfo, totalScenes)
shotDist := make(map[string]int)
motionDist := make(map[string]int)
compDist := make(map[string]int)
var durTotal float64
for i, rs := range rawScenes {
scene := SceneInfo{
SceneIndex: rs.SceneIndex,
StartTime: round3(rs.StartTime),
EndTime: round3(rs.EndTime),
Duration: round3(rs.Duration),
StartTimeStr: rs.StartTimeStr,
EndTimeStr: rs.EndTimeStr,
DurationStr: rs.DurationStr,
}
// 镜头类型
scene.ShotType = classifyShotType(rs.Duration)
shotDist[scene.ShotType]++
// 运动程度
scene.MotionLevel = classifyMotionLevel(rs.Duration, totalDuration)
motionDist[scene.MotionLevel]++
// 构图
scene.Composition = classifyComposition(rs.Duration, width, height)
compDist[scene.Composition]++
// 叙事位置
ratio := rs.StartTime / totalDuration
switch {
case ratio < 0.15:
scene.NarrativePos = "开头引入"
case ratio < 0.35:
scene.NarrativePos = "前段发展"
case ratio < 0.65:
scene.NarrativePos = "中段高潮"
case ratio < 0.85:
scene.NarrativePos = "后段收束"
default:
scene.NarrativePos = "结尾总结"
}
// 关键帧
if extractKeyframes && keyframesDir != "" {
midTime := (rs.StartTime + rs.EndTime) / 2
kfPath := filepath.Join(keyframesDir, fmt.Sprintf("scene_%03d.jpg", rs.SceneIndex))
if kfErr := s.extractKeyframe(ctx, ffmpegPath, videoPath, midTime, kfPath); kfErr == nil {
scene.Keyframe = &KeyframeInfo{
Path: kfPath,
TimeStr: formatTime(midTime),
Width: width,
Height: height,
}
}
}
// AI 描述
scene.Description = buildSceneDescription(scene)
durTotal += rs.Duration
scenes[i] = scene
}
analysis := &VideoSceneAnalysis{
FileName: fileName,
FilePath: videoPath,
Duration: round3(totalDuration),
DurationStr: formatTime(totalDuration),
FrameRate: round3(frameRate),
Width: width,
Height: height,
AspectRatio: aspectRatio,
Orientation: orientation,
TotalScenes: totalScenes,
Scenes: scenes,
DetectParams: DetectParams{
Threshold: threshold,
Method: "ffmpeg scene filter",
ExtractKeyframes: extractKeyframes,
},
Summary: s.buildSummary(scenes, shotDist, motionDist, compDist, keyframesDir),
}
return analysis, nil
}
// buildSummary 构建场景总览
func (s *SceneAnalyzerService) buildSummary(scenes []SceneInfo, shotDist, motionDist, compDist map[string]int, kfDir string) SceneSummary {
if len(scenes) == 0 {
return SceneSummary{}
}
var minD, maxD, sumD float64
minD = math.MaxFloat64
for _, sc := range scenes {
sumD += sc.Duration
if sc.Duration < minD {
minD = sc.Duration
}
if sc.Duration > maxD {
maxD = sc.Duration
}
}
avgD := sumD / float64(len(scenes))
pacing := "平稳"
if avgD < 2 {
pacing = "快节奏(快速剪辑)"
} else if avgD < 4 {
pacing = "适中节奏"
} else if avgD < 8 {
pacing = "舒缓节奏"
} else {
pacing = "慢节奏(长镜头为主)"
}
sm := SceneSummary{
AvgShotDuration: round3(avgD),
MinShotDuration: round3(minD),
MaxShotDuration: round3(maxD),
ShotTypeDist: shotDist,
MotionDist: motionDist,
CompositionDist: compDist,
Pacing: pacing,
}
if kfDir != "" {
sm.KeyframesDir = kfDir
}
return sm
}
// getVideoMeta 获取视频元数据
func (s *SceneAnalyzerService) getVideoMeta(ctx context.Context, ffmpegPath, videoPath string) (duration, frameRate float64, width, height int, err error) {
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
if _, statErr := os.Stat(ffprobePath); os.IsNotExist(statErr) {
ffprobePath = "ffprobe"
}
cmd := exec.CommandContext(ctx, ffprobePath,
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
videoPath,
)
output, execErr := cmd.Output()
if execErr != nil {
err = fmt.Errorf("ffprobe 执行失败: %v", execErr)
return
}
text := string(output)
duration = parseJSONFloat(text, `"duration":`)
frameRate = parseFrameRate(text)
width = parseJSONInt(text, `"width":`)
height = parseJSONInt(text, `"height":`)
return
}
// detectScenes 通过 ffmpeg scene filter 检测场景变化
func (s *SceneAnalyzerService) detectScenes(ctx context.Context, ffmpegPath, videoPath string, threshold float64) ([]float64, error) {
thresholdStr := strconv.FormatFloat(threshold, 'f', 1, 64)
args := []string{
"-i", videoPath,
"-filter:v", fmt.Sprintf("select='gt(scene,%s)',showinfo", thresholdStr),
"-f", "null",
"-",
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
output, _ := cmd.CombinedOutput()
var timestamps []float64
scanner := bufio.NewScanner(strings.NewReader(string(output)))
for scanner.Scan() {
line := scanner.Text()
matches := ptsTimeRegex.FindStringSubmatch(line)
if len(matches) >= 2 {
ts, parseErr := strconv.ParseFloat(matches[1], 64)
if parseErr == nil && ts > 0 {
timestamps = append(timestamps, ts)
}
}
}
return timestamps, nil
}
// extractKeyframe 提取指定时间点的关键帧
func (s *SceneAnalyzerService) extractKeyframe(ctx context.Context, ffmpegPath, videoPath string, timeSec float64, outputPath string) error {
timeStr := strconv.FormatFloat(timeSec, 'f', 3, 64)
args := []string{
"-ss", timeStr,
"-i", videoPath,
"-vframes", "1",
"-q:v", "3",
"-y",
outputPath,
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
return cmd.Run()
}
// buildScenes 根据场景变化时间戳构建场景列表
func (s *SceneAnalyzerService) buildScenes(sceneChanges []float64, totalDuration float64) []SceneInfo {
var scenes []SceneInfo
if len(sceneChanges) == 0 {
scenes = append(scenes, SceneInfo{
SceneIndex: 1,
StartTime: 0,
EndTime: totalDuration,
Duration: totalDuration,
StartTimeStr: formatTime(0),
EndTimeStr: formatTime(totalDuration),
DurationStr: formatTime(totalDuration),
})
return scenes
}
startTime := 0.0
for i, ts := range sceneChanges {
if ts <= startTime || ts > totalDuration {
continue
}
scenes = append(scenes, SceneInfo{
SceneIndex: i + 1,
StartTime: startTime,
EndTime: ts,
Duration: ts - startTime,
StartTimeStr: formatTime(startTime),
EndTimeStr: formatTime(ts),
DurationStr: formatTime(ts - startTime),
})
startTime = ts
}
if startTime < totalDuration {
scenes = append(scenes, SceneInfo{
SceneIndex: len(scenes) + 1,
StartTime: startTime,
EndTime: totalDuration,
Duration: totalDuration - startTime,
StartTimeStr: formatTime(startTime),
EndTimeStr: formatTime(totalDuration),
DurationStr: formatTime(totalDuration - startTime),
})
}
return scenes
}
// ---------- 镜头分类逻辑 ----------
// classifyShotType 根据时长判断镜头类型
func classifyShotType(duration float64) string {
switch {
case duration < 0.8:
return "极速闪切"
case duration < 1.5:
return "快速切换"
case duration < 2.5:
return "短镜头"
case duration < 4:
return "标准镜头"
case duration < 8:
return "中长镜头"
case duration < 15:
return "长镜头"
default:
return "超长镜头"
}
}
// classifyMotionLevel 基于时长和相对比例推断运动程度
func classifyMotionLevel(duration, totalDuration float64) string {
switch {
case duration < 1.0:
return "高动态(快速切换)"
case duration < 2.0:
return "中高动态"
case duration < 4.0:
return "中等动态"
case duration < 8.0:
return "低动态(平稳)"
default:
return "静态/固定机位"
}
}
// classifyComposition 基于时长和画面比例推断构图类型
func classifyComposition(duration float64, width, height int) string {
isVertical := height > width
switch {
case duration < 1.2:
if isVertical {
return "竖屏特写/细节"
}
return "特写/细节"
case duration < 2.5:
if isVertical {
return "竖屏近景"
}
return "近景/中近景"
case duration < 5:
if isVertical {
return "竖屏中景"
}
return "中景/半身"
case duration < 10:
if isVertical {
return "竖屏全景"
}
return "全景/环境"
default:
if isVertical {
return "竖屏远景/固定机位"
}
return "远景/广角"
}
}
// buildSceneDescription 生成可读的场景描述(供 AI 使用)
func buildSceneDescription(scene SceneInfo) string {
return fmt.Sprintf(
"场景%d%s%s时长%s%s%s%s%s",
scene.SceneIndex,
scene.StartTimeStr, scene.EndTimeStr,
scene.DurationStr,
scene.ShotType,
scene.Composition,
scene.MotionLevel,
scene.NarrativePos,
)
}
// ---------- 工具函数 ----------
func round3(v float64) float64 {
return math.Round(v*1000) / 1000
}
func gcd(a, b int) int {
for b != 0 {
a, b = b, a%b
}
return a
}
func getFFmpegPath() (string, error) {
ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String()
if ffmpegPath != "" {
if _, err := os.Stat(ffmpegPath); err == nil {
return ffmpegPath, nil
}
}
path, err := exec.LookPath("ffmpeg")
if err != nil {
return "", fmt.Errorf("未找到 ffmpeg")
}
return path, nil
}
func formatTime(seconds float64) string {
h := int(seconds) / 3600
m := (int(seconds) % 3600) / 60
s := int(seconds) % 60
ms := int(math.Round((seconds - float64(int(seconds))) * 1000))
return fmt.Sprintf("%02d:%02d:%02d.%03d", h, m, s, ms)
}
func parseJSONFloat(text, key string) float64 {
idx := strings.Index(text, key)
if idx < 0 {
return 0
}
start := idx + len(key)
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
start++
}
end := start
for end < len(text) && (isDigit(text[end]) || text[end] == '.') {
end++
}
if start < end {
val, _ := strconv.ParseFloat(text[start:end], 64)
return val
}
return 0
}
func parseJSONInt(text, key string) int {
idx := strings.Index(text, key)
if idx < 0 {
return 0
}
start := idx + len(key)
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
start++
}
end := start
for end < len(text) && isDigit(text[end]) {
end++
}
if start < end {
val, _ := strconv.Atoi(text[start:end])
return val
}
return 0
}
func parseFrameRate(text string) float64 {
for _, key := range []string{`"r_frame_rate":`, `"avg_frame_rate":`} {
idx := strings.Index(text, key)
if idx < 0 {
continue
}
start := idx + len(key)
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
start++
}
end := start
for end < len(text) && text[end] != '"' && text[end] != ',' && text[end] != '}' && text[end] != ' ' {
end++
}
valStr := text[start:end]
if strings.Contains(valStr, "/") {
parts := strings.Split(valStr, "/")
if len(parts) == 2 {
num, _ := strconv.ParseFloat(parts[0], 64)
den, _ := strconv.ParseFloat(parts[1], 64)
if den > 0 {
return num / den
}
}
}
val, _ := strconv.ParseFloat(valStr, 64)
if val > 0 {
return val
}
}
return 0
}
func isDigit(b byte) bool {
return b >= '0' && b <= '9'
}
// Cleanup 清理视频和关键帧文件
func Cleanup(paths []string) {
for _, p := range paths {
os.RemoveAll(p)
}
}
// getFFmpegPath on SceneAnalyzerService
func (s *SceneAnalyzerService) getFFmpegPath() (string, error) {
return getFFmpegPath()
}