Browse Source

fix: don't over-fetch in stateful detection to avoid orphaned pending tasks

Detection registers planned moves in ActiveTopology via AddPendingTask,
so requesting maxResults+1 would create an extra pending task that gets
discarded during trim. Use len(results) >= maxResults as the hasMore
signal instead, which is correct since Detection already caps internally.
pull/8559/head
Chris Lu 1 day ago
parent
commit
a2eafcc9cc
  1. 15
      weed/plugin/worker/volume_balance_handler.go

15
weed/plugin/worker/volume_balance_handler.go

@ -225,20 +225,15 @@ func (h *VolumeBalanceHandler) Detect(
clusterInfo := &workertypes.ClusterInfo{ActiveTopology: activeTopology}
maxResults := int(request.MaxResults)
detectionLimit := maxResults
if maxResults > 0 {
detectionLimit = maxResults + 1 // over-fetch by 1 to detect truncation
}
results, err := balancetask.Detection(metrics, clusterInfo, workerConfig.TaskConfig, detectionLimit)
results, err := balancetask.Detection(metrics, clusterInfo, workerConfig.TaskConfig, maxResults)
if err != nil {
return err
}
hasMore := false
if maxResults > 0 && len(results) > maxResults {
hasMore = true
results = results[:maxResults]
}
// Detection is stateful (registers planned moves in ActiveTopology), so we
// cannot over-fetch to probe for truncation. Instead, hitting the exact
// limit signals that more work may exist.
hasMore := maxResults > 0 && len(results) >= maxResults
if traceErr := emitVolumeBalanceDetectionDecisionTrace(sender, metrics, workerConfig.TaskConfig, results); traceErr != nil {
glog.Warningf("Plugin worker failed to emit volume_balance detection trace: %v", traceErr)

Loading…
Cancel
Save