Browse Source
feat(balance): replica placement validation for volume moves (#8622)
feat(balance): replica placement validation for volume moves (#8622)
* feat(balance): add replica placement validation for volume moves When the volume balance detection proposes moving a volume, validate that the move does not violate the volume's replication policy (e.g., ReplicaPlacement=010 requires replicas on different racks). If the preferred destination violates the policy, fall back to score-based planning; if that also violates, skip the volume entirely. - Add ReplicaLocation type and VolumeReplicaMap to ClusterInfo - Build replica map from all volumes before collection filtering - Port placement validation logic from command_volume_fix_replication.go - Thread replica map through collectVolumeMetrics call chain - Add IsGoodMove check in createBalanceTask before destination use * address PR review: extract validation closure, add defensive checks - Extract validateMove closure to eliminate duplicated ReplicaLocation construction and IsGoodMove calls - Add defensive check for empty replica map entries (len(replicas) == 0) - Add bounds check for int-to-byte cast on ExpectedReplicas (0-255) * address nitpick: rp test helper accepts *testing.T and fails on error Prevents silent failures from typos in replica placement codes. * address review: add composite replica placement tests (011, 110) Test multi-constraint placement policies where both rack and DC rules must be satisfied simultaneously. * address review: use struct keys instead of string concatenation Replace string-concatenated map keys with typed rackKey/nodeKey structs to eliminate allocations and avoid ambiguity if IDs contain spaces. * address review: simplify bounds check, log fallback error, guard source - Remove unreachable ExpectedReplicas < 0 branch (outer condition already guarantees > 0), fold bounds check into single condition - Log error from planBalanceDestination in replica validation fallback - Return false from IsGoodMove when sourceNodeID not found in existing replicas (inconsistent cluster state) * address review: use slices.Contains instead of hand-rolled helpers Replace isAmongDC and isAmongRack with slices.Contains from the standard library, reducing boilerplate.pull/8626/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 364 additions and 30 deletions
-
3weed/plugin/worker/erasure_coding_handler.go
-
3weed/plugin/worker/vacuum_handler.go
-
13weed/plugin/worker/volume_balance_handler.go
-
33weed/plugin/worker/volume_metrics.go
-
10weed/plugin/worker/volume_metrics_test.go
-
41weed/worker/tasks/balance/detection.go
-
146weed/worker/tasks/balance/replica_placement.go
-
127weed/worker/tasks/balance/replica_placement_test.go
-
18weed/worker/types/data_types.go
@ -0,0 +1,146 @@ |
|||
package balance |
|||
|
|||
import ( |
|||
"slices" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/storage/super_block" |
|||
"github.com/seaweedfs/seaweedfs/weed/worker/types" |
|||
) |
|||
|
|||
// rackKey uniquely identifies a rack within a data center.
|
|||
type rackKey struct { |
|||
DataCenter string |
|||
Rack string |
|||
} |
|||
|
|||
// nodeKey uniquely identifies a node within a rack.
|
|||
type nodeKey struct { |
|||
DataCenter string |
|||
Rack string |
|||
NodeID string |
|||
} |
|||
|
|||
// IsGoodMove checks whether moving a volume from sourceNodeID to target
|
|||
// would satisfy the volume's replica placement policy, given the current
|
|||
// set of replica locations.
|
|||
func IsGoodMove(rp *super_block.ReplicaPlacement, existingReplicas []types.ReplicaLocation, sourceNodeID string, target types.ReplicaLocation) bool { |
|||
if rp == nil || !rp.HasReplication() { |
|||
return true // no replication constraint
|
|||
} |
|||
|
|||
// Build the replica set after the move: remove source, add target
|
|||
afterMove := make([]types.ReplicaLocation, 0, len(existingReplicas)) |
|||
sourceFound := false |
|||
for _, r := range existingReplicas { |
|||
if r.NodeID == sourceNodeID { |
|||
sourceFound = true |
|||
} else { |
|||
afterMove = append(afterMove, r) |
|||
} |
|||
} |
|||
if !sourceFound { |
|||
// Source not in replica list — cluster state may be inconsistent.
|
|||
// Treat as unsafe to avoid incorrect placement decisions.
|
|||
return false |
|||
} |
|||
|
|||
return satisfyReplicaPlacement(rp, afterMove, target) |
|||
} |
|||
|
|||
// satisfyReplicaPlacement checks whether placing a replica at target
|
|||
// is consistent with the replication policy, given the existing replicas.
|
|||
// Ported from weed/shell/command_volume_fix_replication.go
|
|||
func satisfyReplicaPlacement(rp *super_block.ReplicaPlacement, replicas []types.ReplicaLocation, target types.ReplicaLocation) bool { |
|||
existingDCs, _, existingNodes := countReplicas(replicas) |
|||
|
|||
targetNK := nodeKey{DataCenter: target.DataCenter, Rack: target.Rack, NodeID: target.NodeID} |
|||
if _, found := existingNodes[targetNK]; found { |
|||
// avoid duplicated volume on the same data node
|
|||
return false |
|||
} |
|||
|
|||
primaryDCs, _ := findTopDCKeys(existingDCs) |
|||
|
|||
// ensure data center count is within limit
|
|||
if _, found := existingDCs[target.DataCenter]; !found { |
|||
// different from existing dcs
|
|||
if len(existingDCs) < rp.DiffDataCenterCount+1 { |
|||
return true |
|||
} |
|||
return false |
|||
} |
|||
// now same as one of existing data centers
|
|||
if !slices.Contains(primaryDCs, target.DataCenter) { |
|||
return false |
|||
} |
|||
|
|||
// now on a primary dc - check racks within this DC
|
|||
primaryDcRacks := make(map[rackKey]int) |
|||
for _, r := range replicas { |
|||
if r.DataCenter != target.DataCenter { |
|||
continue |
|||
} |
|||
primaryDcRacks[rackKey{DataCenter: r.DataCenter, Rack: r.Rack}]++ |
|||
} |
|||
|
|||
targetRK := rackKey{DataCenter: target.DataCenter, Rack: target.Rack} |
|||
primaryRacks, _ := findTopRackKeys(primaryDcRacks) |
|||
sameRackCount := primaryDcRacks[targetRK] |
|||
|
|||
if _, found := primaryDcRacks[targetRK]; !found { |
|||
// different from existing racks
|
|||
if len(primaryDcRacks) < rp.DiffRackCount+1 { |
|||
return true |
|||
} |
|||
return false |
|||
} |
|||
// same as one of existing racks
|
|||
if !slices.Contains(primaryRacks, targetRK) { |
|||
return false |
|||
} |
|||
|
|||
// on primary rack - check same-rack count
|
|||
if sameRackCount < rp.SameRackCount+1 { |
|||
return true |
|||
} |
|||
return false |
|||
} |
|||
|
|||
func countReplicas(replicas []types.ReplicaLocation) (dcCounts map[string]int, rackCounts map[rackKey]int, nodeCounts map[nodeKey]int) { |
|||
dcCounts = make(map[string]int) |
|||
rackCounts = make(map[rackKey]int) |
|||
nodeCounts = make(map[nodeKey]int) |
|||
for _, r := range replicas { |
|||
dcCounts[r.DataCenter]++ |
|||
rackCounts[rackKey{DataCenter: r.DataCenter, Rack: r.Rack}]++ |
|||
nodeCounts[nodeKey{DataCenter: r.DataCenter, Rack: r.Rack, NodeID: r.NodeID}]++ |
|||
} |
|||
return |
|||
} |
|||
|
|||
func findTopDCKeys(m map[string]int) (topKeys []string, max int) { |
|||
for k, c := range m { |
|||
if max < c { |
|||
topKeys = topKeys[:0] |
|||
topKeys = append(topKeys, k) |
|||
max = c |
|||
} else if max == c { |
|||
topKeys = append(topKeys, k) |
|||
} |
|||
} |
|||
return |
|||
} |
|||
|
|||
func findTopRackKeys(m map[rackKey]int) (topKeys []rackKey, max int) { |
|||
for k, c := range m { |
|||
if max < c { |
|||
topKeys = topKeys[:0] |
|||
topKeys = append(topKeys, k) |
|||
max = c |
|||
} else if max == c { |
|||
topKeys = append(topKeys, k) |
|||
} |
|||
} |
|||
return |
|||
} |
|||
|
|||
@ -0,0 +1,127 @@ |
|||
package balance |
|||
|
|||
import ( |
|||
"testing" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/storage/super_block" |
|||
"github.com/seaweedfs/seaweedfs/weed/worker/types" |
|||
) |
|||
|
|||
func rp(t *testing.T, code string) *super_block.ReplicaPlacement { |
|||
t.Helper() |
|||
r, err := super_block.NewReplicaPlacementFromString(code) |
|||
if err != nil { |
|||
t.Fatalf("invalid replica placement code %q: %v", code, err) |
|||
} |
|||
return r |
|||
} |
|||
|
|||
func loc(dc, rack, node string) types.ReplicaLocation { |
|||
return types.ReplicaLocation{DataCenter: dc, Rack: rack, NodeID: node} |
|||
} |
|||
|
|||
func TestIsGoodMove_NoReplication(t *testing.T) { |
|||
// 000 = no replication. Any move is fine.
|
|||
if !IsGoodMove(rp(t, "000"), []types.ReplicaLocation{loc("dc1", "r1", "n1")}, "n1", loc("dc1", "r1", "n2")) { |
|||
t.Error("000: any move should be allowed") |
|||
} |
|||
} |
|||
|
|||
func TestIsGoodMove_001_SameRack(t *testing.T) { |
|||
// 001 = 1 replica on same rack (2 total on same rack)
|
|||
existing := []types.ReplicaLocation{ |
|||
loc("dc1", "r1", "n1"), |
|||
loc("dc1", "r1", "n2"), |
|||
} |
|||
// Move n1 -> n3 on same rack: good
|
|||
if !IsGoodMove(rp(t, "001"), existing, "n1", loc("dc1", "r1", "n3")) { |
|||
t.Error("001: move to same rack should be allowed") |
|||
} |
|||
// Move n1 -> n3 on different rack: bad (would leave only 1 on r1, need 2)
|
|||
if IsGoodMove(rp(t, "001"), existing, "n1", loc("dc1", "r2", "n3")) { |
|||
t.Error("001: move to different rack should not be allowed when it breaks same-rack count") |
|||
} |
|||
} |
|||
|
|||
func TestIsGoodMove_010_DiffRack(t *testing.T) { |
|||
// 010 = 1 replica on different rack (2 racks total)
|
|||
existing := []types.ReplicaLocation{ |
|||
loc("dc1", "r1", "n1"), |
|||
loc("dc1", "r2", "n2"), |
|||
} |
|||
// Move n1 -> n3 on r2: bad (both replicas on same rack)
|
|||
if IsGoodMove(rp(t, "010"), existing, "n1", loc("dc1", "r2", "n3")) { |
|||
t.Error("010: move to same rack as other replica should not be allowed") |
|||
} |
|||
// Move n1 -> n3 on r3: good (still 2 different racks)
|
|||
if !IsGoodMove(rp(t, "010"), existing, "n1", loc("dc1", "r3", "n3")) { |
|||
t.Error("010: move to different rack should be allowed") |
|||
} |
|||
} |
|||
|
|||
func TestIsGoodMove_100_DiffDC(t *testing.T) { |
|||
// 100 = 1 replica in different DC
|
|||
existing := []types.ReplicaLocation{ |
|||
loc("dc1", "r1", "n1"), |
|||
loc("dc2", "r1", "n2"), |
|||
} |
|||
// Move n1 -> n3 in dc2: bad (both in same DC)
|
|||
if IsGoodMove(rp(t, "100"), existing, "n1", loc("dc2", "r1", "n3")) { |
|||
t.Error("100: move to same DC as other replica should not be allowed") |
|||
} |
|||
// Move n1 -> n3 in dc3: good (different DCs)
|
|||
if !IsGoodMove(rp(t, "100"), existing, "n1", loc("dc3", "r1", "n3")) { |
|||
t.Error("100: move to different DC should be allowed") |
|||
} |
|||
} |
|||
|
|||
func TestIsGoodMove_SameNode(t *testing.T) { |
|||
// Moving to the same node as an existing replica should always be rejected
|
|||
existing := []types.ReplicaLocation{ |
|||
loc("dc1", "r1", "n1"), |
|||
loc("dc1", "r2", "n2"), |
|||
} |
|||
if IsGoodMove(rp(t, "010"), existing, "n1", loc("dc1", "r2", "n2")) { |
|||
t.Error("should reject move to same node as existing replica") |
|||
} |
|||
} |
|||
|
|||
func TestIsGoodMove_011_Composite(t *testing.T) { |
|||
// 011 = 1 same-rack + 1 different-rack (3 replicas: 2 on same rack, 1 on different)
|
|||
existing := []types.ReplicaLocation{ |
|||
loc("dc1", "r1", "n1"), |
|||
loc("dc1", "r1", "n2"), |
|||
loc("dc1", "r2", "n3"), |
|||
} |
|||
// Move n1 -> n4 on r1: good (maintains 2 on r1, 1 on r2)
|
|||
if !IsGoodMove(rp(t, "011"), existing, "n1", loc("dc1", "r1", "n4")) { |
|||
t.Error("011: move within same rack should be allowed") |
|||
} |
|||
// Move n3 -> n4 on r1: bad (would have 3 on r1, 0 on different rack)
|
|||
if IsGoodMove(rp(t, "011"), existing, "n3", loc("dc1", "r1", "n4")) { |
|||
t.Error("011: move that eliminates different-rack replica should not be allowed") |
|||
} |
|||
} |
|||
|
|||
func TestIsGoodMove_110_Composite(t *testing.T) { |
|||
// 110 = 1 different-rack + 1 different-DC (3 replicas across 2 DCs and 2 racks)
|
|||
existing := []types.ReplicaLocation{ |
|||
loc("dc1", "r1", "n1"), |
|||
loc("dc1", "r2", "n2"), |
|||
loc("dc2", "r1", "n3"), |
|||
} |
|||
// Move n1 -> n4 in dc1/r3: good (dc1 still has r2+r3, dc2 has r1)
|
|||
if !IsGoodMove(rp(t, "110"), existing, "n1", loc("dc1", "r3", "n4")) { |
|||
t.Error("110: move to new rack in same DC should be allowed") |
|||
} |
|||
// Move n3 -> n4 in dc1/r1: bad (would lose the different-DC replica)
|
|||
if IsGoodMove(rp(t, "110"), existing, "n3", loc("dc1", "r1", "n4")) { |
|||
t.Error("110: move that eliminates different-DC replica should not be allowed") |
|||
} |
|||
} |
|||
|
|||
func TestIsGoodMove_NilReplicaPlacement(t *testing.T) { |
|||
if !IsGoodMove(nil, []types.ReplicaLocation{loc("dc1", "r1", "n1")}, "n1", loc("dc1", "r1", "n2")) { |
|||
t.Error("nil replica placement should allow any move") |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue