From ff8bd8626251892c5436c9493737aa777be442b7 Mon Sep 17 00:00:00 2001 From: Lisandro Pin Date: Thu, 30 Jan 2025 17:35:22 +0100 Subject: [PATCH] Improve EC shards balancing logic regarding replica placement settings. The replica placement type specifies numebr of _replicas_ on the same/different rack; that means we can have one EC shard copy on each, even if the replica setting is zero. This PR reworks replica placement parsing for EC rebalancing, so we check allow (replica placement + 1) when selecting racks and nodes to balance EC shards into. --- weed/shell/command_ec_common.go | 8 ++++---- weed/shell/command_ec_common_test.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/weed/shell/command_ec_common.go b/weed/shell/command_ec_common.go index f1d7012d8..0affc7365 100644 --- a/weed/shell/command_ec_common.go +++ b/weed/shell/command_ec_common.go @@ -783,8 +783,8 @@ func (ecb *ecBalancer) pickRackToBalanceShardsInto(rackToEcNodes map[RackId]*EcR details += fmt.Sprintf(" Skipped %s because it has no free slots\n", rackId) continue } - if ecb.replicaPlacement != nil && shards >= ecb.replicaPlacement.DiffRackCount { - details += fmt.Sprintf(" Skipped %s because shards %d >= replica placement limit for other racks (%d)\n", rackId, shards, ecb.replicaPlacement.DiffRackCount) + if ecb.replicaPlacement != nil && shards > ecb.replicaPlacement.DiffRackCount { + details += fmt.Sprintf(" Skipped %s because shards %d > replica placement limit for other racks (%d)\n", rackId, shards, ecb.replicaPlacement.DiffRackCount) continue } @@ -977,8 +977,8 @@ func (ecb *ecBalancer) pickEcNodeToBalanceShardsInto(vid needle.VolumeId, existi } shards := nodeShards[node] - if ecb.replicaPlacement != nil && shards >= ecb.replicaPlacement.SameRackCount { - details += fmt.Sprintf(" Skipped %s because shards %d >= replica placement limit for the rack (%d)\n", node.info.Id, shards, ecb.replicaPlacement.SameRackCount) + if ecb.replicaPlacement != nil && shards > ecb.replicaPlacement.SameRackCount { + details += fmt.Sprintf(" Skipped %s because shards %d > replica placement limit for the rack (%d)\n", node.info.Id, shards, ecb.replicaPlacement.SameRackCount) continue } diff --git a/weed/shell/command_ec_common_test.go b/weed/shell/command_ec_common_test.go index f76840f3c..87cf371d7 100644 --- a/weed/shell/command_ec_common_test.go +++ b/weed/shell/command_ec_common_test.go @@ -138,8 +138,8 @@ func TestPickRackToBalanceShardsInto(t *testing.T) { {topologyEc, "6241", "123", []string{"rack1", "rack2", "rack3", "rack4", "rack5", "rack6"}, ""}, {topologyEc, "6242", "123", []string{"rack1", "rack2", "rack3", "rack4", "rack5", "rack6"}, ""}, // EC volumes. - {topologyEc, "9577", "", nil, "shards 1 >= replica placement limit for other racks (0)"}, - {topologyEc, "9577", "111", nil, "shards 1 >= replica placement limit for other racks (1)"}, + {topologyEc, "9577", "", nil, "shards 1 > replica placement limit for other racks (0)"}, + {topologyEc, "9577", "111", []string{"rack1", "rack2", "rack3"}, ""}, {topologyEc, "9577", "222", []string{"rack1", "rack2", "rack3"}, ""}, {topologyEc, "10457", "222", []string{"rack1"}, ""}, {topologyEc, "12737", "222", []string{"rack2"}, ""},