fix: don't over-fetch in stateful detection to avoid orphaned pending tasks

Detection registers planned moves in ActiveTopology via AddPendingTask, so requesting maxResults+1 would create an extra pending task that gets discarded during trim. Use len(results) >= maxResults as the hasMore signal instead, which is correct since Detection already caps internally.
3 weeks ago · a2eafcc9cc
1 changed files with 5 additions and 10 deletions
--- a/weed/plugin/worker/volume_balance_handler.go
+++ b/weed/plugin/worker/volume_balance_handler.go
@ -225,20 +225,15 @@ func (h *VolumeBalanceHandler) Detect(

 	clusterInfo := &workertypes.ClusterInfo{ActiveTopology: activeTopology}
 	maxResults := int(request.MaxResults)
-	detectionLimit := maxResults
-	if maxResults > 0 {
-		detectionLimit = maxResults + 1 // over-fetch by 1 to detect truncation
-	}
-	results, err := balancetask.Detection(metrics, clusterInfo, workerConfig.TaskConfig, detectionLimit)
+	results, err := balancetask.Detection(metrics, clusterInfo, workerConfig.TaskConfig, maxResults)
 	if err != nil {
 		return err
 	}

-	hasMore := false
-	if maxResults > 0 && len(results) > maxResults {
-		hasMore = true
-		results = results[:maxResults]
-	}
+	// Detection is stateful (registers planned moves in ActiveTopology), so we
+	// cannot over-fetch to probe for truncation. Instead, hitting the exact
+	// limit signals that more work may exist.
+	hasMore := maxResults > 0 && len(results) >= maxResults

 	if traceErr := emitVolumeBalanceDetectionDecisionTrace(sender, metrics, workerConfig.TaskConfig, results); traceErr != nil {
 		glog.Warningf("Plugin worker failed to emit volume_balance detection trace: %v", traceErr)