From d6fa3c933138f655101f04fcf495256f20d3bf5b Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 8 Mar 2026 20:58:49 -0700 Subject: [PATCH] fix: deterministic disk tie-breaking and stronger pre-existing task test - Sort available disks by NodeID then DiskID before scoring so destination selection is deterministic when two disks score equally - Add task count bounds assertion to SkipsPreExistingPendingTasks test: with 15 of 20 volumes already having pending tasks, at most 5 new tasks should be created and at least 1 (imbalance still exists) --- weed/worker/tasks/balance/detection.go | 8 ++++++++ weed/worker/tasks/balance/detection_test.go | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/weed/worker/tasks/balance/detection.go b/weed/worker/tasks/balance/detection.go index 707d91bc8..71643d5e6 100644 --- a/weed/worker/tasks/balance/detection.go +++ b/weed/worker/tasks/balance/detection.go @@ -377,6 +377,14 @@ func planBalanceDestination(activeTopology *topology.ActiveTopology, selectedVol return nil, fmt.Errorf("no available disks for balance operation") } + // Sort available disks by NodeID then DiskID for deterministic tie-breaking + sort.Slice(availableDisks, func(i, j int) bool { + if availableDisks[i].NodeID != availableDisks[j].NodeID { + return availableDisks[i].NodeID < availableDisks[j].NodeID + } + return availableDisks[i].DiskID < availableDisks[j].DiskID + }) + // Find the best destination disk based on balance criteria var bestDisk *topology.DiskInfo bestScore := math.Inf(-1) diff --git a/weed/worker/tasks/balance/detection_test.go b/weed/worker/tasks/balance/detection_test.go index 8d6e5f19c..57b3e9340 100644 --- a/weed/worker/tasks/balance/detection_test.go +++ b/weed/worker/tasks/balance/detection_test.go @@ -576,6 +576,15 @@ func TestDetection_SkipsPreExistingPendingTasks(t *testing.T) { } } + // With 15 of 20 volumes on node-a already having pending tasks, only + // volumes 16-20 are eligible. Detection should produce at most 5 new tasks. + if len(tasks) > 5 { + t.Errorf("Expected at most 5 new tasks (only 5 eligible volumes remain), got %d", len(tasks)) + } + if len(tasks) == 0 { + t.Errorf("Expected at least 1 new task since imbalance still exists with actual volume counts") + } + assertNoDuplicateVolumes(t, tasks) }