From 331c1f0f3f1a227b76a6752aa051c031a3655903 Mon Sep 17 00:00:00 2001 From: Lisandro Pin Date: Thu, 30 Jan 2025 18:26:45 +0100 Subject: [PATCH] Improve EC shards balancing logic regarding replica placement settings. (#6491) The replica placement type specifies numebr of _replicas_ on the same/different rack; that means we can have one EC shard copy on each, even if the replica setting is zero. This PR reworks replica placement parsing for EC rebalancing, so we check allow (replica placement + 1) when selecting racks and nodes to balance EC shards into. --- weed/shell/command_ec_common.go | 8 ++++---- weed/shell/command_ec_common_test.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/weed/shell/command_ec_common.go b/weed/shell/command_ec_common.go index f1d7012d8..0affc7365 100644 --- a/weed/shell/command_ec_common.go +++ b/weed/shell/command_ec_common.go @@ -783,8 +783,8 @@ func (ecb *ecBalancer) pickRackToBalanceShardsInto(rackToEcNodes map[RackId]*EcR details += fmt.Sprintf(" Skipped %s because it has no free slots\n", rackId) continue } - if ecb.replicaPlacement != nil && shards >= ecb.replicaPlacement.DiffRackCount { - details += fmt.Sprintf(" Skipped %s because shards %d >= replica placement limit for other racks (%d)\n", rackId, shards, ecb.replicaPlacement.DiffRackCount) + if ecb.replicaPlacement != nil && shards > ecb.replicaPlacement.DiffRackCount { + details += fmt.Sprintf(" Skipped %s because shards %d > replica placement limit for other racks (%d)\n", rackId, shards, ecb.replicaPlacement.DiffRackCount) continue } @@ -977,8 +977,8 @@ func (ecb *ecBalancer) pickEcNodeToBalanceShardsInto(vid needle.VolumeId, existi } shards := nodeShards[node] - if ecb.replicaPlacement != nil && shards >= ecb.replicaPlacement.SameRackCount { - details += fmt.Sprintf(" Skipped %s because shards %d >= replica placement limit for the rack (%d)\n", node.info.Id, shards, ecb.replicaPlacement.SameRackCount) + if ecb.replicaPlacement != nil && shards > ecb.replicaPlacement.SameRackCount { + details += fmt.Sprintf(" Skipped %s because shards %d > replica placement limit for the rack (%d)\n", node.info.Id, shards, ecb.replicaPlacement.SameRackCount) continue } diff --git a/weed/shell/command_ec_common_test.go b/weed/shell/command_ec_common_test.go index f76840f3c..87cf371d7 100644 --- a/weed/shell/command_ec_common_test.go +++ b/weed/shell/command_ec_common_test.go @@ -138,8 +138,8 @@ func TestPickRackToBalanceShardsInto(t *testing.T) { {topologyEc, "6241", "123", []string{"rack1", "rack2", "rack3", "rack4", "rack5", "rack6"}, ""}, {topologyEc, "6242", "123", []string{"rack1", "rack2", "rack3", "rack4", "rack5", "rack6"}, ""}, // EC volumes. - {topologyEc, "9577", "", nil, "shards 1 >= replica placement limit for other racks (0)"}, - {topologyEc, "9577", "111", nil, "shards 1 >= replica placement limit for other racks (1)"}, + {topologyEc, "9577", "", nil, "shards 1 > replica placement limit for other racks (0)"}, + {topologyEc, "9577", "111", []string{"rack1", "rack2", "rack3"}, ""}, {topologyEc, "9577", "222", []string{"rack1", "rack2", "rack3"}, ""}, {topologyEc, "10457", "222", []string{"rack1"}, ""}, {topologyEc, "12737", "222", []string{"rack2"}, ""},