From 4b3eeab25940a8b88571164717eaef2ea5b1b6a4 Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 4 Dec 2025 15:01:44 -0800 Subject: [PATCH] fix: collect topology once and track capacity changes locally Remove the topology refresh within the loop as it gives a false sense of correctness - the refreshed topology could still be stale (minutes old). Instead, we: 1. Collect topology once at the start 2. Track capacity changes ourselves via freeEcSlot decrement after each move This is more accurate because we know exactly what moves we've made, rather than relying on potentially stale topology refreshes. --- weed/shell/command_volume_server_evacuate.go | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 868a00b01..d962c047d 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -157,6 +157,9 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this ec volume server + // We collect topology once at the start and track capacity changes ourselves + // (via freeEcSlot decrement after each move) rather than repeatedly refreshing, + // which would give a false sense of correctness since topology could be stale. ecNodes, _ := collectEcVolumeServersByDc(c.topologyInfo, "") thisNodes, otherNodes := c.ecNodesOtherThan(ecNodes, volumeServer) if len(thisNodes) == 0 { @@ -167,20 +170,6 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { for _, ecShardInfo := range diskInfo.EcShardInfos { - // Refresh topology to get updated free slot counts before moving each EC volume - if applyChange { - if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { - fmt.Fprintf(writer, "update topologyInfo for EC: %v\n", err) - } else { - ecNodesNew, _ := collectEcVolumeServersByDc(topologyInfo, "") - _, otherNodesNew := c.ecNodesOtherThan(ecNodesNew, volumeServer) - if len(otherNodesNew) > 0 { - otherNodes = otherNodesNew - c.topologyInfo = topologyInfo - } - } - } - hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange, writer) if err != nil { fmt.Fprintf(writer, "move away volume %d from %s: %v\n", ecShardInfo.Id, volumeServer, err)