refactor(service): 重构服务模块结构并优化模型配置
This commit is contained in:
201
service/queue/auto_tune.go
Normal file
201
service/queue/auto_tune.go
Normal file
@@ -0,0 +1,201 @@
|
||||
package queue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"model-gateway/model/dto"
|
||||
|
||||
"model-gateway/consts/public"
|
||||
"model-gateway/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// AutoTuneResult 单次调参结果(按 model_name)
|
||||
type AutoTuneResult struct {
|
||||
ModelName string `json:"modelName"` // 模型名称(asynch_models.model_name)
|
||||
Samples int `json:"samples"` // 统计样本数(窗口内 state=2/3 且 started_at/finished_at 非空的任务数量)
|
||||
P90Exec float64 `json:"p90ExecSeconds"` // 执行耗时 P90(秒),口径:finished_at - started_at
|
||||
|
||||
CapMaxConcurrency int `json:"capMaxConcurrency"` // 配置上限:asynch_models.max_concurrency(cap,不会被动态调参覆盖)
|
||||
OldMaxConcurrency int `json:"oldMaxConcurrency"` // 调参前运行时值(Redis),若无则等于 cap
|
||||
NewMaxConcurrency int `json:"newMaxConcurrency"` // 本次计算出的运行时值(将写入 Redis),受 ±50% 约束且不超过 cap
|
||||
|
||||
CapQueueLimit int `json:"capQueueLimit"` // 配置上限:asynch_models.queue_limit(cap,不会被动态调参覆盖)
|
||||
OldQueueLimit int `json:"oldQueueLimit"` // 调参前运行时值(Redis),若无则等于 cap
|
||||
NewQueueLimit int `json:"newQueueLimit"` // 本次计算出的运行时值(将写入 Redis),受 ±50% 约束且不超过 cap
|
||||
|
||||
ExpectedSeconds int `json:"expectedSeconds"` // 模型预计执行时间(秒):asynch_models.expected_seconds(用于 queue_limit 计算绑定)
|
||||
}
|
||||
|
||||
// AutoTune 由上层定时任务通过接口触发:
|
||||
// - 统计指定时间窗口内该模型任务的执行耗时(finished_at - started_at,取 P90)
|
||||
// - 基于吞吐与 P90 执行耗时估算 max_concurrency 的运行时值(不超过 cap)
|
||||
// - queue_limit 与 expected_seconds 绑定(允许排队时间 = expected_seconds * 2),生成运行时值(不超过 cap)
|
||||
// - 单次调整幅度限制 ±50%,写入 Redis(带 TTL)
|
||||
func AutoTune(ctx context.Context, req *dto.AutoTuneReq) (res *dto.AutoTuneRes, err error) {
|
||||
if req == nil {
|
||||
return nil, errors.New("request cannot be nil")
|
||||
}
|
||||
if req.WindowSeconds <= 0 {
|
||||
req.WindowSeconds = 3600 // 默认1小时
|
||||
}
|
||||
// 1) 读取模型配置(cap),按 model_name 聚合去重(如果表里有多租户重复数据,取较大上限)
|
||||
var modelRows []*entity.AsynchModel
|
||||
if err := gfdb.DB(ctx).Model(ctx, public.TableNameModel).
|
||||
Where("deleted_at IS NULL").
|
||||
Where(entity.AsynchModelCol.Enabled, 1).
|
||||
Scan(&modelRows); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modelMap := make(map[string]*entity.AsynchModel)
|
||||
for _, m := range modelRows {
|
||||
if m == nil || m.ModelName == "" {
|
||||
continue
|
||||
}
|
||||
cur := modelMap[m.ModelName]
|
||||
if cur == nil {
|
||||
modelMap[m.ModelName] = m
|
||||
continue
|
||||
}
|
||||
// 取更大的 cap
|
||||
if m.MaxConcurrency > cur.MaxConcurrency {
|
||||
cur.MaxConcurrency = m.MaxConcurrency
|
||||
}
|
||||
if m.QueueLimit > cur.QueueLimit {
|
||||
cur.QueueLimit = m.QueueLimit
|
||||
}
|
||||
if m.ExpectedSeconds > cur.ExpectedSeconds {
|
||||
cur.ExpectedSeconds = m.ExpectedSeconds
|
||||
}
|
||||
}
|
||||
if len(modelMap) == 0 {
|
||||
return nil, errors.New("no models found")
|
||||
}
|
||||
|
||||
// 2) 统计指定窗口:按 model_name 计算 cnt 和 P90 执行耗时
|
||||
type statRow struct {
|
||||
ModelName string
|
||||
Cnt int
|
||||
P90Exec float64
|
||||
}
|
||||
var stats []statRow
|
||||
sql := fmt.Sprintf(`
|
||||
SELECT model_name,
|
||||
COUNT(1) AS cnt,
|
||||
COALESCE(percentile_cont(0.9) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (finished_at - started_at))), 0) AS p90_exec
|
||||
FROM %s
|
||||
WHERE deleted_at IS NULL
|
||||
AND state IN (2,3)
|
||||
AND started_at IS NOT NULL
|
||||
AND finished_at IS NOT NULL
|
||||
AND finished_at >= (NOW() - (? || ' seconds')::interval)
|
||||
GROUP BY model_name`, public.TableNameTask)
|
||||
r, err := gfdb.DB(ctx).GetAll(ctx, sql, req.WindowSeconds)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_ = r.Structs(&stats)
|
||||
statMap := make(map[string]statRow, len(stats))
|
||||
for _, s := range stats {
|
||||
statMap[s.ModelName] = s
|
||||
}
|
||||
|
||||
// 3) 调参计算
|
||||
const utilization = 0.8
|
||||
const maxChangeRatio = 0.5 // ±50%
|
||||
const queueFactor = 2.0 // 与 expected_seconds 绑定:W_target = expected_seconds * 2
|
||||
|
||||
out := make([]AutoTuneResult, 0, len(modelMap))
|
||||
for modelName, m := range modelMap {
|
||||
s := statMap[modelName]
|
||||
capMax := m.MaxConcurrency
|
||||
capQueue := m.QueueLimit
|
||||
oldMax := GetRuntimeMaxConcurrency(ctx, modelName, capMax)
|
||||
oldQueue := GetRuntimeQueueLimit(ctx, modelName, capQueue)
|
||||
|
||||
// 默认:无样本则不调整
|
||||
if s.Cnt <= 0 || s.P90Exec <= 0 {
|
||||
out = append(out, AutoTuneResult{
|
||||
ModelName: modelName,
|
||||
Samples: s.Cnt,
|
||||
P90Exec: s.P90Exec,
|
||||
CapMaxConcurrency: capMax,
|
||||
OldMaxConcurrency: oldMax,
|
||||
NewMaxConcurrency: oldMax,
|
||||
CapQueueLimit: capQueue,
|
||||
OldQueueLimit: oldQueue,
|
||||
NewQueueLimit: oldQueue,
|
||||
ExpectedSeconds: m.ExpectedSeconds,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// arrival_rate ≈ 完成数/3600
|
||||
arrivalRate := float64(s.Cnt) / 3600.0
|
||||
|
||||
// desiredMax = ceil(arrivalRate * p90 / utilization)
|
||||
desiredMax := int(math.Ceil(arrivalRate * s.P90Exec / utilization))
|
||||
if desiredMax < 1 {
|
||||
desiredMax = 1
|
||||
}
|
||||
// 单次变化幅度限制
|
||||
minMax := int(math.Floor(float64(oldMax) * (1 - maxChangeRatio)))
|
||||
maxMax := int(math.Ceil(float64(oldMax) * (1 + maxChangeRatio)))
|
||||
if minMax < 1 {
|
||||
minMax = 1
|
||||
}
|
||||
newMax := clampInt(desiredMax, minMax, maxMax)
|
||||
if capMax > 0 {
|
||||
newMax = clampInt(newMax, 1, capMax)
|
||||
}
|
||||
setRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName), newMax)
|
||||
|
||||
// queue_limit:W_target = expected_seconds * queueFactor
|
||||
exp := m.ExpectedSeconds
|
||||
if exp <= 0 {
|
||||
exp = 60
|
||||
}
|
||||
wTarget := float64(exp) * queueFactor
|
||||
desiredQueue := int(math.Ceil(arrivalRate*wTarget)) + newMax
|
||||
if desiredQueue < newMax {
|
||||
desiredQueue = newMax
|
||||
}
|
||||
|
||||
newQueue := oldQueue
|
||||
if capQueue > 0 {
|
||||
minQ := int(math.Floor(float64(oldQueue) * (1 - maxChangeRatio)))
|
||||
maxQ := int(math.Ceil(float64(oldQueue) * (1 + maxChangeRatio)))
|
||||
if minQ < newMax {
|
||||
minQ = newMax
|
||||
}
|
||||
if maxQ < minQ {
|
||||
maxQ = minQ
|
||||
}
|
||||
newQueue = clampInt(desiredQueue, minQ, maxQ)
|
||||
newQueue = clampInt(newQueue, newMax, capQueue)
|
||||
setRuntimeInt(ctx, runtimeQueueLimitKey(modelName), newQueue)
|
||||
}
|
||||
|
||||
out = append(out, AutoTuneResult{
|
||||
ModelName: modelName,
|
||||
Samples: s.Cnt,
|
||||
P90Exec: s.P90Exec,
|
||||
CapMaxConcurrency: capMax,
|
||||
OldMaxConcurrency: oldMax,
|
||||
NewMaxConcurrency: newMax,
|
||||
CapQueueLimit: capQueue,
|
||||
OldQueueLimit: oldQueue,
|
||||
NewQueueLimit: newQueue,
|
||||
ExpectedSeconds: m.ExpectedSeconds,
|
||||
})
|
||||
}
|
||||
|
||||
g.Log().Infof(ctx, "[auto_tune] done models=%d windowSeconds=%d", len(out), req.WindowSeconds)
|
||||
return &dto.AutoTuneRes{
|
||||
List: out,
|
||||
}, nil
|
||||
}
|
||||
107
service/queue/queue_gate.go
Normal file
107
service/queue/queue_gate.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package queue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
// ===== 严格 queue_limit:Redis 原子闸门 =====
|
||||
//
|
||||
// 背景:原来的 queue_limit 通过“Count + Insert”做近似控制,分布式并发创建时会短暂超限。
|
||||
// 目标:以 Redis Lua 脚本实现原子校验 + 入队占位,做到严格不超限。
|
||||
//
|
||||
// 计数口径与原逻辑保持一致:只统计 state=0/1(排队中/执行中)。
|
||||
// - CreateTask 成功入库后占用 1 个 slot
|
||||
// - 任务成功/失败(state->2/3)释放 slot
|
||||
// - 失败任务重试(state 3->0)需要再次占用 slot,若占位失败则暂不重试(留在 state=3,下次 cleaner 再尝试)
|
||||
//
|
||||
// 说明:为避免极端情况下“占位泄漏”导致永久占满,采用 ZSET + 过期时间的方式自动回收。
|
||||
// 只要任务实际生命周期远小于 gateTTLSeconds,就可保持严格。
|
||||
|
||||
const (
|
||||
queueGateKeyPrefix = "asynch:qgate:" // asynch:qgate:{modelName}
|
||||
)
|
||||
|
||||
// Lua:清理过期 slot,然后按 limit 做原子判定并占位
|
||||
var queueGateAcquireLua = `
|
||||
local key = KEYS[1]
|
||||
local now = tonumber(ARGV[1])
|
||||
local limit = tonumber(ARGV[2])
|
||||
local expireAt = tonumber(ARGV[3])
|
||||
local member = ARGV[4]
|
||||
local keyTTL = tonumber(ARGV[5])
|
||||
|
||||
-- 先清理过期的占位
|
||||
redis.call("ZREMRANGEBYSCORE", key, "-inf", now)
|
||||
|
||||
local current = tonumber(redis.call("ZCARD", key) or "0")
|
||||
if current >= limit then
|
||||
return 0
|
||||
end
|
||||
redis.call("ZADD", key, expireAt, member)
|
||||
redis.call("EXPIRE", key, keyTTL)
|
||||
return 1
|
||||
`
|
||||
|
||||
// Lua:释放 slot(幂等)
|
||||
var queueGateReleaseLua = `
|
||||
local key = KEYS[1]
|
||||
local member = ARGV[1]
|
||||
redis.call("ZREM", key, member)
|
||||
return 1
|
||||
`
|
||||
|
||||
func queueGateKey(modelName string) string {
|
||||
return fmt.Sprintf("%s%s", queueGateKeyPrefix, modelName)
|
||||
}
|
||||
|
||||
// calcGateTTLSeconds 计算闸门占位的“自动回收 TTL”
|
||||
// 取 expectedSeconds 的倍数并做上下限,避免任务异常导致永久占位。
|
||||
func calcGateTTLSeconds(expectedSeconds int) int {
|
||||
// 默认至少 1 小时;最多 24 小时
|
||||
minTTL := 3600
|
||||
maxTTL := 24 * 3600
|
||||
if expectedSeconds <= 0 {
|
||||
return minTTL
|
||||
}
|
||||
ttl := int(math.Ceil(float64(expectedSeconds) * 10)) // 预计耗时 * 10 做兜底
|
||||
if ttl < minTTL {
|
||||
ttl = minTTL
|
||||
}
|
||||
if ttl > maxTTL {
|
||||
ttl = maxTTL
|
||||
}
|
||||
return ttl
|
||||
}
|
||||
|
||||
// AcquireQueueSlot 严格入队:原子占位(成功返回 true)
|
||||
func AcquireQueueSlot(ctx context.Context, modelName, taskId string, limit int, expectedSeconds int) (bool, error) {
|
||||
if limit <= 0 {
|
||||
return true, nil
|
||||
}
|
||||
key := queueGateKey(modelName)
|
||||
now := time.Now().Unix()
|
||||
ttl := calcGateTTLSeconds(expectedSeconds)
|
||||
expireAt := now + int64(ttl)
|
||||
// keyTTL 要略大于 member TTL,避免 key 先过期导致计数丢失
|
||||
keyTTL := ttl + 60
|
||||
r, err := g.Redis().Do(ctx, "EVAL", queueGateAcquireLua, 1, key, now, limit, expireAt, taskId, keyTTL)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("queue gate acquire failed: %w", err)
|
||||
}
|
||||
return gconv.Int(r) == 1, nil
|
||||
}
|
||||
|
||||
// ReleaseQueueSlot 释放占位(幂等)
|
||||
func ReleaseQueueSlot(ctx context.Context, modelName, taskId string) {
|
||||
if taskId == "" || modelName == "" {
|
||||
return
|
||||
}
|
||||
key := queueGateKey(modelName)
|
||||
_, _ = g.Redis().Do(ctx, "EVAL", queueGateReleaseLua, 1, key, taskId)
|
||||
}
|
||||
82
service/queue/runtime_tune.go
Normal file
82
service/queue/runtime_tune.go
Normal file
@@ -0,0 +1,82 @@
|
||||
package queue
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
// 运行时调参存储在 Redis,不修改 asynch_models 中的 cap(最大上限)。
|
||||
// 上层每小时调用 /model/autoTune 写入运行时值;Worker/CreateTask 读取运行时值生效。
|
||||
|
||||
const (
|
||||
runtimeMaxCKeyPrefix = "asynch:runtime:max_concurrency:" // + model_name
|
||||
runtimeQueueKeyPrefix = "asynch:runtime:queue_limit:" // + model_name
|
||||
runtimeTTLSeconds = 2 * 3600 // 2小时,避免一次调参失败导致立即回退
|
||||
)
|
||||
|
||||
func runtimeMaxConcurrencyKey(modelName string) string {
|
||||
return runtimeMaxCKeyPrefix + modelName
|
||||
}
|
||||
func runtimeQueueLimitKey(modelName string) string {
|
||||
return runtimeQueueKeyPrefix + modelName
|
||||
}
|
||||
|
||||
func getRuntimeInt(ctx context.Context, key string) (int, bool) {
|
||||
v, err := g.Redis().Do(ctx, "GET", key)
|
||||
if err != nil || v == nil {
|
||||
return 0, false
|
||||
}
|
||||
iv := gconv.Int(v)
|
||||
if iv <= 0 {
|
||||
return 0, false
|
||||
}
|
||||
return iv, true
|
||||
}
|
||||
|
||||
func setRuntimeInt(ctx context.Context, key string, val int) {
|
||||
if val <= 0 {
|
||||
return
|
||||
}
|
||||
// SETEX key ttl val
|
||||
_, _ = g.Redis().Do(ctx, "SETEX", key, runtimeTTLSeconds, val)
|
||||
}
|
||||
|
||||
// GetRuntimeMaxConcurrency 返回运行时并发上限(<= cap)。若不存在运行时值,则返回 cap。
|
||||
func GetRuntimeMaxConcurrency(ctx context.Context, modelName string, cap int) int {
|
||||
if cap <= 0 {
|
||||
return cap
|
||||
}
|
||||
if v, ok := getRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName)); ok {
|
||||
if v > cap {
|
||||
return cap
|
||||
}
|
||||
return v
|
||||
}
|
||||
return cap
|
||||
}
|
||||
|
||||
// GetRuntimeQueueLimit 返回运行时队列上限(<= cap)。若不存在运行时值,则返回 cap。
|
||||
func GetRuntimeQueueLimit(ctx context.Context, modelName string, cap int) int {
|
||||
if cap <= 0 {
|
||||
return cap
|
||||
}
|
||||
if v, ok := getRuntimeInt(ctx, runtimeQueueLimitKey(modelName)); ok {
|
||||
if v > cap {
|
||||
return cap
|
||||
}
|
||||
return v
|
||||
}
|
||||
return cap
|
||||
}
|
||||
|
||||
func clampInt(v, minV, maxV int) int {
|
||||
if v < minV {
|
||||
return minV
|
||||
}
|
||||
if v > maxV {
|
||||
return maxV
|
||||
}
|
||||
return v
|
||||
}
|
||||
57
service/queue/semaphore.go
Normal file
57
service/queue/semaphore.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package queue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
var acquireLua = `
|
||||
local current = tonumber(redis.call("GET", KEYS[1]) or "0")
|
||||
local max = tonumber(ARGV[1])
|
||||
local ttl = tonumber(ARGV[2])
|
||||
if current >= max then
|
||||
return 0
|
||||
end
|
||||
current = redis.call("INCR", KEYS[1])
|
||||
if current == 1 then
|
||||
redis.call("EXPIRE", KEYS[1], ttl)
|
||||
end
|
||||
if current > max then
|
||||
redis.call("DECR", KEYS[1])
|
||||
return 0
|
||||
end
|
||||
return 1
|
||||
`
|
||||
|
||||
var releaseLua = `
|
||||
local current = tonumber(redis.call("DECR", KEYS[1]) or "0")
|
||||
if current <= 0 then
|
||||
redis.call("DEL", KEYS[1])
|
||||
end
|
||||
return 1
|
||||
`
|
||||
|
||||
// AcquireSemaphore 获取并发令牌
|
||||
func AcquireSemaphore(ctx context.Context, key string, max int, ttlSeconds int64) (bool, error) {
|
||||
if max <= 0 {
|
||||
// 不限制
|
||||
return true, nil
|
||||
}
|
||||
if ttlSeconds <= 0 {
|
||||
ttlSeconds = 3600
|
||||
}
|
||||
r, err := g.Redis().Do(ctx, "EVAL", acquireLua, 1, key, max, ttlSeconds)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("获取并发令牌失败: %w", err)
|
||||
}
|
||||
return gconv.Int(r) == 1, nil
|
||||
}
|
||||
|
||||
// ReleaseSemaphore 释放并发令牌
|
||||
func ReleaseSemaphore(ctx context.Context, key string) error {
|
||||
_, err := g.Redis().Do(ctx, "EVAL", releaseLua, 1, key)
|
||||
return err
|
||||
}
|
||||
Reference in New Issue
Block a user