Files
model-gateway/service/runtime_tune.go
2026-04-29 15:54:14 +08:00

84 lines
2.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"context"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
// 运行时调参存储在 Redis不修改 asynch_models 中的 cap最大上限
// 上层每小时调用 /model/autoTune 写入运行时值Worker/CreateTask 读取运行时值生效。
const (
runtimeMaxCKeyPrefix = "asynch:runtime:max_concurrency:" // + model_name
runtimeQueueKeyPrefix = "asynch:runtime:queue_limit:" // + model_name
runtimeTTLSeconds = 2 * 3600 // 2小时避免一次调参失败导致立即回退
)
func runtimeMaxConcurrencyKey(modelName string) string {
return runtimeMaxCKeyPrefix + modelName
}
func runtimeQueueLimitKey(modelName string) string {
return runtimeQueueKeyPrefix + modelName
}
func getRuntimeInt(ctx context.Context, key string) (int, bool) {
v, err := g.Redis().Do(ctx, "GET", key)
if err != nil || v == nil {
return 0, false
}
iv := gconv.Int(v)
if iv <= 0 {
return 0, false
}
return iv, true
}
func setRuntimeInt(ctx context.Context, key string, val int) {
if val <= 0 {
return
}
// SETEX key ttl val
_, _ = g.Redis().Do(ctx, "SETEX", key, runtimeTTLSeconds, val)
}
// GetRuntimeMaxConcurrency 返回运行时并发上限(<= cap。若不存在运行时值则返回 cap。
func GetRuntimeMaxConcurrency(ctx context.Context, modelName string, cap int) int {
if cap <= 0 {
return cap
}
if v, ok := getRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName)); ok {
if v > cap {
return cap
}
return v
}
return cap
}
// GetRuntimeQueueLimit 返回运行时队列上限(<= cap。若不存在运行时值则返回 cap。
func GetRuntimeQueueLimit(ctx context.Context, modelName string, cap int) int {
if cap <= 0 {
return cap
}
if v, ok := getRuntimeInt(ctx, runtimeQueueLimitKey(modelName)); ok {
if v > cap {
return cap
}
return v
}
return cap
}
func clampInt(v, minV, maxV int) int {
if v < minV {
return minV
}
if v > maxV {
return maxV
}
return v
}