@@ -5,7 +5,6 @@ import (
taskDto "cid/model/dto/copydata"
"cid/sync"
"context"
"encoding/json"
"fmt"
"strings"
"time"
@@ -36,149 +35,92 @@ func (s *CompensationScheduler) RunCompensationOnce() {
}
func ( s * CompensationScheduler ) processCompensation ( ctx context . Context ) {
logrus . Info ( ">>> 开始检查需要同步补偿的 任务..." )
logrus . Info ( ">>> 开始扫描需要补偿的失败分页 任务..." )
queryReq := & taskDto . QueryFailedTasksReq {
Status : [ ] string { "failed" , "retrying" , "partial_failed" },
MaxRetries : nil ,
Status : [ ] string { "failed" } ,
TaskType : "account_report_page" ,
Limit : 50 ,
}
failedTasks , err := dao . SyncTaskLog . QueryFailedTasks ( ctx , queryReq )
failedPage Tasks , err := dao . SyncTaskLog . QueryFailedTasks ( ctx , queryReq )
if err != nil {
logrus . Errorf ( "查询失败任务异常:%v" , err )
logrus . Errorf ( "查询失败的分页 任务异常:%v" , err )
return
}
if len ( failedTasks ) == 0 {
logrus . Info ( "✓ 没有需要补偿的任务" )
if len ( failedPage Tasks ) == 0 {
logrus . Info ( "✓ 当前 没有需要补偿的失败分页 任务" )
return
}
logrus . Infof ( "发现 %d 个需要补偿的任务 " , len ( failedTasks ) )
logrus . Infof ( "发现 %d 个需要补偿的失败分页任务,开始逐个处理... " , len ( failedPage Tasks ) )
successCount := 0
failCount := 0
partialCount := 0
for _ , t ask := range failedTasks {
if t ask. RetryCount >= t ask. MaxRetry {
logrus . Warnf ( "任务 %s 已达到最大重试次数 %d, 标记为需人工处理" , t ask. TaskID , t ask. MaxRetry )
for _ , pageT ask := range failedPage Tasks {
if pageT ask. RetryCount >= pageT ask. MaxRetry {
logrus . Warnf ( "⚠ 分页 任务 %s 已达到最大重试次数 %d, 标记为需人工处理" , pageT ask. TaskID , pageT ask. MaxRetry )
updateReq := & taskDto . UpdateSyncTaskLogReq {
ID : t ask. ID ,
ID : pageT ask. Id ,
Status : "manual_review" ,
ErrorMessage : fmt . Sprintf ( "已达到最大重试次数 %d 次" , t ask. MaxRetry ) ,
ErrorMessage : fmt . Sprintf ( "已达到最大重试次数 %d 次" , pageT ask. MaxRetry ) ,
ErrorCode : "MAX_RETRY_EXCEEDED" ,
}
dao . SyncTaskLog . Update ( ctx , updateReq )
s . sendAlert ( t ask)
s . sendAlert ( pageT ask)
failCount ++
continue
}
logrus . Infof ( ">>> 开始补偿任务:%s (类型 =%s , 第 %d/%d 次重试)" ,
t ask. TaskID , t ask. TaskType , t ask. RetryCount + 1 , t ask. MaxRetry )
logrus . Infof ( "▶ 开始补偿分页 任务:%s (广告主 =%d , 第 %d/%d 次重试)" ,
pageT ask. TaskID , pageT ask. AdvertiserID , pageT ask. RetryCount + 1 , pageT ask. MaxRetry )
if s . compensateTask ( ctx , t ask) {
if s . compensatePage Task ( ctx , pageT ask) {
successCount ++
logrus . Infof ( "✓ 分页任务 %s 补偿成功" , pageTask . TaskID )
parentTaskID := s . extractParentTaskID ( pageTask . TaskID )
if parentTaskID != "" {
s . checkAndUpdateParentTaskStatus ( ctx , parentTaskID )
}
} else {
failCount ++
logrus . Warnf ( "✗ 分页任务 %s 补偿失败" , pageTask . TaskID )
}
time . Sleep ( 1 * time . Second )
}
logrus . Infof ( "✓ 补偿任务完成:成功 =%d, 部分 成功=%d, 失败=%d" ,
successCount , partial Count, failCount )
logrus . Infof ( "=== 补偿任务执行 完成:总计 =%d, 成功=%d, 失败=%d === " ,
len ( failedPageTasks ) , success Count, failCount )
}
func ( s * CompensationScheduler ) compensateTask ( ctx context . Context , t ask * taskDto . SyncTaskLogItem ) bool {
retryCount := t ask. RetryCount + 1
func ( s * CompensationScheduler ) compensatePage Task ( ctx context . Context , pageT ask * taskDto . SyncTaskLogItem ) bool {
retryCount := pageT ask. RetryCount + 1
updateReq := & taskDto . UpdateSyncTaskLogReq {
ID : t ask. ID ,
ID : pageT ask. Id ,
Status : "retrying" ,
RetryCount : & retryCount ,
}
dao . SyncTaskLog . Update ( ctx , updateReq )
startTime := s . parseTime ( t ask. StartTime )
endTime := s . parseTime ( t ask. EndTime )
startTime := s . parseTime ( pageT ask. StartTime )
endTime := s . parseTime ( pageT ask. EndTime )
logrus . Infof ( ">>> 开始补偿任务: %s (advertiser=%d, time=[%s, %s], 第 %d/%d 次重试)" ,
task . TaskID , task . AdvertiserID ,
startTime . Format ( "2006-01-02" ) , endTime . Format ( "2006-01-02" ) ,
retryCount , t ask. MaxRetry )
if task . TaskType == "account_report_page" {
return s . compensatePageTask ( ctx , task , retryCount )
}
if task . TaskType == "account_report" && task . Status == "partial_failed" {
return s . compensatePartialFailedTask ( ctx , task , startTime , endTime , retryCount )
}
return s . compensateMainTask ( ctx , task , startTime , endTime , retryCount )
}
func ( s * CompensationScheduler ) compensatePartialFailedTask ( ctx context . Context , task * taskDto . SyncTaskLogItem , startTime , endTime time . Time , retryCount int ) bool {
logrus . Infof ( ">>> 检测到部分失败任务 %s, 开始智能补偿( 只重试失败的页) " , task . TaskID )
failedPages := s . extractFailedPages ( task )
if len ( failedPages ) == 0 {
logrus . Warnf ( "任务 %s 标记为部分失败,但未找到失败的页信息,将重新同步所有页" , task . TaskID )
return s . compensateMainTask ( ctx , task , startTime , endTime , retryCount )
}
logrus . Infof ( "任务 %s 共有 %d 个失败的页需要补偿: %v" , task . TaskID , len ( failedPages ) , failedPages )
allSuccess := true
compensatedPages := 0
for _ , pageNumber := range failedPages {
logrus . Infof ( ">>> 开始补偿第 %d 页..." , pageNumber )
pageSuccess := s . compensateSinglePage ( ctx , task , startTime , endTime , pageNumber , retryCount )
if pageSuccess {
compensatedPages ++
} else {
allSuccess = false
}
time . Sleep ( 500 * time . Millisecond )
}
if allSuccess {
logrus . Infof ( "✓ 部分失败任务 %s 补偿成功 - 共补偿 %d 个页" , task . TaskID , compensatedPages )
updateReq := & taskDto . UpdateSyncTaskLogReq {
ID : task . ID ,
Status : "success" ,
}
dao . SyncTaskLog . Update ( ctx , updateReq )
return true
} else {
logrus . Warnf ( "⚠ 部分失败任务 %s 补偿后仍有失败的页 - 成功补偿 %d/%d 个页" ,
task . TaskID , compensatedPages , len ( failedPages ) )
backoffMinutes := s . calculateBackoff ( retryCount )
nextRetry := time . Now ( ) . Add ( time . Duration ( backoffMinutes ) * time . Minute )
updateReq := & taskDto . UpdateSyncTaskLogReq {
ID : task . ID ,
Status : "partial_failed" ,
NextRetryTime : nextRetry ,
}
dao . SyncTaskLog . Update ( ctx , updateReq )
pageNumber := s . extractPageNumber ( pageTask . TaskID )
if pageNumber == 0 {
logrus . Errorf ( "无法从任务ID %s 解析页码" , pageTask . TaskID )
s . markPageTaskFailed ( ctx , pageT ask. Id , retryCount , "无法解析页码" , "PARSE_PAGE_NUMBER_FAILED" )
return false
}
}
func ( s * CompensationScheduler ) compensateSinglePage ( ctx context . Context , task * taskDto . SyncTaskLogItem , startTime , endTime time . Time , pageNumber int , retryCount int ) bool {
req := & sync . AccountReportRequest {
AdvertiserID : t ask. AdvertiserID ,
AdvertiserID : pageT ask. AdvertiserID ,
StartTime : startTime . UnixMilli ( ) ,
EndTime : endTime . UnixMilli ( ) ,
SelectColumns : [ ] string { "impression" , "click" , "cost" , "t0GMV" } ,
@@ -191,146 +133,97 @@ func (s *CompensationScheduler) compensateSinglePage(ctx context.Context, task *
}
maxRetries := 3
page TaskID := fmt . Sprintf ( "%s_page_%d" , task . TaskID , pageNumber )
parent TaskID := s . extractParentTaskID ( pageTask . TaskID )
pageTaskID := fmt . Sprintf ( "%s_page_%d" , parentTaskID , pageNumber )
result , err := s . syncService . SyncSinglePageWithTask ( ctx , req , true , maxRetries , pageTaskID , pageNumber )
if err != nil {
logrus . Errorf ( "补偿第 %d 页失败:%v" , pageNumber , err )
logrus . Errorf ( "补偿分页任务 %s 失败(第 %d 次):%v" , pageTask . TaskID , retryCount , err )
s . markPageTaskFailed ( ctx , pageTask . Id , retryCount , err . Error ( ) , "PAGE_COMPENSATION_FAILED" )
return false
}
logrus . Infof ( "✓ 补偿第 %d 页 成功 - 记录数=%d" , pageNumber , result . DetailCount )
logrus . Infof ( "✓ 补偿分页任务 %s 成功 - 记录数=%d" , pageTask . TaskID , result . DetailCount )
return true
}
func ( s * CompensationScheduler ) extractFailedPages ( task * taskDto . SyncTaskLogItem ) [ ] int {
if task . ResultSummary == nil {
return nil
func ( s * CompensationScheduler ) markPageTaskFailed ( ctx context . Context , taskID int64 , retryCount int , errMsg , errCode string ) {
backoffMinutes := s . calculateBackoff ( retryCount )
nextRetry := time . Now ( ) . Add ( time . Duration ( backoffMinutes ) * time . Minute )
updateReq := & taskDto . UpdateSyncTaskLogReq {
ID : taskID ,
Status : "failed" ,
ErrorMessage : errMsg ,
ErrorCode : errCode ,
NextRetryTime : nextRetry ,
}
dao . SyncTaskLog . Update ( ctx , updateReq )
}
summaryMap , ok := task . ResultSummary . ( map [ string ] interface { } )
if ! ok {
return nil
func ( s * CompensationScheduler ) checkAndUpdateParentTaskStatus ( ctx context . Context , parentTaskID string ) {
logrus . Infof ( ">>> 检查主任务 %s 的所有分页任务状态..." , parentTaskID )
parentTask , err := dao . SyncTaskLog . GetByTaskID ( ctx , parentTaskID , "account_report" )
if err != nil || parentTask == nil {
logrus . Warnf ( "未找到主任务 %s, 跳过状态更新" , parentTaskID )
return
}
pageResultsRaw , exist s : = summaryMap [ "page_results" ]
if ! exists {
return nil
if parentTask . Statu s = = "success" {
logrus . Infof ( "主任务 %s 已经是成功状态,无需更新" , parentTaskID )
return
}
pageResultsJSON , err := json . Marshal ( pageResultsRaw )
allPageTasks , err := dao . SyncTaskLog . QueryAllPageTasksByParentID ( ctx , parentTaskID , 1000 )
if err != nil {
logrus . Errorf ( "序列化 page_results 失败:%v" , err )
return nil
logrus . Errorf ( "查询主任务 %s 的分页任务失败:%v" , parentTaskID , err )
return
}
var pageResults [ ] map [ string ] interface { }
if err := json . Unmarshal ( pageResultsJSON , & pageResults ) ; err != nil {
logrus . Errorf ( "反序列化 page_results 失败:%v" , err )
return nil
if len ( allPageTasks ) == 0 {
logrus . Warnf ( "主任务 %s 没有找到任何分页任务" , parentTaskID )
return
}
failedPages := make ( [ ] int , 0 )
for _ , pageResult := range pageResults {
success , _ := pageResult [ "success" ] . ( bool )
pageNumberFloat , _ := pageResult [ "page_number" ] . ( float64 )
pageNumber := int ( pageNumberFloat )
successPages := make ( [ ] int , 0 )
i f ! success && pageNumber > 0 {
for _ , pageTask := range allPageTasks {
pageNumber := s . extractPageNumber ( pageTask . TaskID )
if pageTask . Status == "success" {
successPages = append ( successPages , pageNumber )
} else if pageTask . Status == "failed" || pageTask . Status == "manual_review" {
failedPages = append ( failedPages , pageNumber )
}
}
return failedPages
logrus . Infof ( "主任务 %s 分页状态:总数=%d, 成功=%d, 失败=%d" ,
parentTaskID , len ( allPageTasks ) , len ( successPages ) , len ( failedPages ) )
if len ( failedPages ) == 0 {
logrus . Infof ( "✓ 主任务 %s 的所有分页任务都已成功,更新主任务状态为 success" , parentTaskID )
summary := map [ string ] interface { } {
"total_pages" : len ( allPageTasks ) ,
"success_pages" : len ( successPages ) ,
"failed_pages" : 0 ,
"compensated" : true ,
}
func ( s * CompensationScheduler ) compensateMainTask ( ctx context . Context , task * taskDto . SyncTaskLogItem , startTime , endTime time . Time , retryCount int ) bool {
req := & sync . AccountReportRequest {
AdvertiserID : task . AdvertiserID ,
StartTime : startTime . UnixMilli ( ) ,
EndTime : endTime . UnixMilli ( ) ,
SelectColumns : [ ] string { "impression" , "click" , "cost" , "t0GMV" } ,
GroupType : 1 ,
QueryVersion : 1 ,
}
maxRetries := 3
result , err := s . syncService . SyncAccountReportWithPagination ( ctx , req , true , maxRetries )
if err != nil {
logrus . Errorf ( "补偿主任务 %s 失败(第 %d 次):%v" , task . TaskID , retryCount , err )
backoffMinutes := s . calculateBackoff ( retryCount )
nextRetry := time . Now ( ) . Add ( time . Duration ( backoffMinutes ) * time . Minute )
updateReq := & taskDto . UpdateSyncTaskLogReq {
ID : t ask. ID ,
Status : "failed " ,
ErrorMessage : err . Error ( ) ,
ErrorCode : "COMPENSATION_FAILED" ,
NextRetryTime : nextRetry ,
ID : parentT ask. Id ,
Status : "success " ,
ResultSummary : summary ,
}
dao . SyncTaskLog . Update ( ctx , updateReq )
return false
if err := dao . SyncTaskLog . Update ( ctx , updateReq ) ; err != nil {
logrus . Errorf ( "更新主任务 %s 状态失败:%v" , parentTaskID , err )
}
logrus . Infof ( "✓ 补偿 主任务 %s 成功 - 汇总ID=%d, 明细成功=%d, 失败=%d, 页数=%d " ,
task . TaskID, result . SumID , result . DetailSuccessCount , result . DetailFailCount , len ( result . PageResults ) )
return true
} else {
logrus . Infof ( "⚠ 主任务 %s 仍有 %d 个失败的分页任务:%v, 保持部分失败状态 " ,
parent TaskID, len ( failedPages ) , failedPages )
}
func ( s * CompensationScheduler ) compensatePageTask ( ctx context . Context , task * taskDto . SyncTaskLogItem , retryCount int ) bool {
logrus . Infof ( ">>> 补偿分页任务: %s (重试第 %d 次)" , task . TaskID , retryCount )
parentTaskID := s . extractParentTaskID ( task . TaskID )
pageNumber := s . extractPageNumber ( task . TaskID )
if parentTaskID == "" || pageNumber == 0 {
logrus . Errorf ( "无法解析分页任务ID: %s" , task . TaskID )
return false
}
startTime := s . parseTime ( task . StartTime )
endTime := s . parseTime ( task . EndTime )
req := & sync . AccountReportRequest {
AdvertiserID : task . AdvertiserID ,
StartTime : startTime . UnixMilli ( ) ,
EndTime : endTime . UnixMilli ( ) ,
SelectColumns : [ ] string { "impression" , "click" , "cost" , "t0GMV" } ,
GroupType : 1 ,
QueryVersion : 1 ,
PageInfo : & sync . PageInfo {
CurrentPage : pageNumber ,
PageSize : 100 ,
} ,
}
maxRetries := 3
pageTaskID := fmt . Sprintf ( "%s_page_%d" , parentTaskID , pageNumber )
result , err := s . syncService . SyncSinglePageWithTask ( ctx , req , true , maxRetries , pageTaskID , pageNumber )
if err != nil {
logrus . Errorf ( "补偿分页任务 %s 失败(第 %d 次):%v" , task . TaskID , retryCount , err )
backoffMinutes := s . calculateBackoff ( retryCount )
nextRetry := time . Now ( ) . Add ( time . Duration ( backoffMinutes ) * time . Minute )
updateReq := & taskDto . UpdateSyncTaskLogReq {
ID : task . ID ,
Status : "failed" ,
ErrorMessage : err . Error ( ) ,
ErrorCode : "PAGE_COMPENSATION_FAILED" ,
NextRetryTime : nextRetry ,
}
dao . SyncTaskLog . Update ( ctx , updateReq )
return false
}
logrus . Infof ( "✓ 补偿分页任务 %s 成功 - 记录数=%d" , task . TaskID , result . DetailCount )
return true
}
func ( s * CompensationScheduler ) extractParentTaskID ( taskID string ) string {
@@ -370,12 +263,8 @@ func (s *CompensationScheduler) parseTime(t interface{}) time.Time {
}
func ( s * CompensationScheduler ) sendAlert ( task * taskDto . SyncTaskLogItem ) {
logrus . Errorf ( "【告警】任务 %s 需要人工介入:广告主=%d, 类型=%s, 错误=%s" ,
task . TaskID , task . AdvertiserID , task . TaskType , task . ErrorMessage)
// TODO: 集成钉钉/企业微信/邮件告警
// s.sendDingTalkAlert(task)
// s.sendEmailAlert(task)
logrus . Errorf ( "【告警】分页 任务 %s 需要人工介入:广告主=%d, 错误=%s" ,
task . TaskID , task . AdvertiserID , task . ErrorMessage )
}
func main ( ) {