From a2eafcc9ccb1b5ed2d89a0cc6447c8ebaddc2c65 Mon Sep 17 00:00:00 2001
From: Chris Lu <chris.lu@gmail.com>
Date: Sun, 8 Mar 2026 19:34:34 -0700
Subject: [PATCH] fix: don't over-fetch in stateful detection to avoid orphaned
 pending tasks

Detection registers planned moves in ActiveTopology via AddPendingTask,
so requesting maxResults+1 would create an extra pending task that gets
discarded during trim. Use len(results) >= maxResults as the hasMore
signal instead, which is correct since Detection already caps internally.
---
 weed/plugin/worker/volume_balance_handler.go | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/weed/plugin/worker/volume_balance_handler.go b/weed/plugin/worker/volume_balance_handler.go
index 55e9972f2..92aaa5366 100644
--- a/weed/plugin/worker/volume_balance_handler.go
+++ b/weed/plugin/worker/volume_balance_handler.go
@@ -225,20 +225,15 @@ func (h *VolumeBalanceHandler) Detect(
 
 	clusterInfo := &workertypes.ClusterInfo{ActiveTopology: activeTopology}
 	maxResults := int(request.MaxResults)
-	detectionLimit := maxResults
-	if maxResults > 0 {
-		detectionLimit = maxResults + 1 // over-fetch by 1 to detect truncation
-	}
-	results, err := balancetask.Detection(metrics, clusterInfo, workerConfig.TaskConfig, detectionLimit)
+	results, err := balancetask.Detection(metrics, clusterInfo, workerConfig.TaskConfig, maxResults)
 	if err != nil {
 		return err
 	}
 
-	hasMore := false
-	if maxResults > 0 && len(results) > maxResults {
-		hasMore = true
-		results = results[:maxResults]
-	}
+	// Detection is stateful (registers planned moves in ActiveTopology), so we
+	// cannot over-fetch to probe for truncation. Instead, hitting the exact
+	// limit signals that more work may exist.
+	hasMore := maxResults > 0 && len(results) >= maxResults
 
 	if traceErr := emitVolumeBalanceDetectionDecisionTrace(sender, metrics, workerConfig.TaskConfig, results); traceErr != nil {
 		glog.Warningf("Plugin worker failed to emit volume_balance detection trace: %v", traceErr)