|
|
@ -363,6 +363,23 @@ func createBalanceTask(diskType string, selectedVolume *types.VolumeHealthMetric |
|
|
return nil, "" |
|
|
return nil, "" |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Parse replica placement once for use in both scoring and validation.
|
|
|
|
|
|
var volumeRP *super_block.ReplicaPlacement |
|
|
|
|
|
if selectedVolume.ExpectedReplicas > 0 && selectedVolume.ExpectedReplicas <= 255 { |
|
|
|
|
|
if parsed, rpErr := super_block.NewReplicaPlacementFromByte(byte(selectedVolume.ExpectedReplicas)); rpErr == nil && parsed.HasReplication() { |
|
|
|
|
|
volumeRP = parsed |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
var replicas []types.ReplicaLocation |
|
|
|
|
|
if clusterInfo.VolumeReplicaMap != nil { |
|
|
|
|
|
replicas = clusterInfo.VolumeReplicaMap[selectedVolume.VolumeID] |
|
|
|
|
|
if volumeRP != nil && len(replicas) == 0 { |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: No replica locations found for volume %d, skipping placement validation", |
|
|
|
|
|
diskType, selectedVolume.VolumeID) |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
// Resolve the target server chosen by the detection loop's effective counts.
|
|
|
// Resolve the target server chosen by the detection loop's effective counts.
|
|
|
// This keeps destination selection in sync with the greedy algorithm rather
|
|
|
// This keeps destination selection in sync with the greedy algorithm rather
|
|
|
// than relying on topology LoadCount which can diverge across iterations.
|
|
|
// than relying on topology LoadCount which can diverge across iterations.
|
|
|
@ -371,62 +388,43 @@ func createBalanceTask(diskType string, selectedVolume *types.VolumeHealthMetric |
|
|
// Fall back to score-based planning if the preferred target can't be resolved
|
|
|
// Fall back to score-based planning if the preferred target can't be resolved
|
|
|
glog.V(1).Infof("BALANCE [%s]: Cannot resolve target %s for volume %d, falling back to score-based planning: %v", |
|
|
glog.V(1).Infof("BALANCE [%s]: Cannot resolve target %s for volume %d, falling back to score-based planning: %v", |
|
|
diskType, targetServer, selectedVolume.VolumeID, err) |
|
|
diskType, targetServer, selectedVolume.VolumeID, err) |
|
|
destinationPlan, err = planBalanceDestination(clusterInfo.ActiveTopology, selectedVolume) |
|
|
|
|
|
|
|
|
destinationPlan, err = planBalanceDestination(clusterInfo.ActiveTopology, selectedVolume, volumeRP, replicas, allowedServers) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
glog.Warningf("Failed to plan balance destination for volume %d: %v", selectedVolume.VolumeID, err) |
|
|
glog.Warningf("Failed to plan balance destination for volume %d: %v", selectedVolume.VolumeID, err) |
|
|
return nil, "" |
|
|
return nil, "" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Verify the destination is within the filtered scope. When DC/rack/node
|
|
|
|
|
|
// filters are active, allowedServers contains only the servers that passed
|
|
|
|
|
|
// filtering. The fallback planner queries the full topology, so this check
|
|
|
|
|
|
// prevents out-of-scope targets from leaking through.
|
|
|
|
|
|
if len(allowedServers) > 0 { |
|
|
|
|
|
if _, ok := allowedServers[destinationPlan.TargetNode]; !ok { |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Planned destination %s for volume %d is outside filtered scope, skipping", |
|
|
|
|
|
|
|
|
// Verify the resolved destination. If it falls outside the filtered scope
|
|
|
|
|
|
// or violates replica placement, fall back to the score-based planner and
|
|
|
|
|
|
// pick the best candidate that is actually valid.
|
|
|
|
|
|
if !isValidBalanceDestination(destinationPlan, allowedServers, volumeRP, replicas, selectedVolume.Server) { |
|
|
|
|
|
switch { |
|
|
|
|
|
case destinationPlan == nil: |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Planned destination for volume %d is nil, falling back", |
|
|
|
|
|
diskType, selectedVolume.VolumeID) |
|
|
|
|
|
case !isAllowedBalanceTarget(destinationPlan.TargetNode, allowedServers): |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Planned destination %s for volume %d is outside filtered scope, falling back", |
|
|
diskType, destinationPlan.TargetNode, selectedVolume.VolumeID) |
|
|
diskType, destinationPlan.TargetNode, selectedVolume.VolumeID) |
|
|
return nil, "" |
|
|
|
|
|
|
|
|
case volumeRP != nil && len(replicas) > 0: |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Destination %s violates replica placement for volume %d (rp=%03d), falling back", |
|
|
|
|
|
diskType, destinationPlan.TargetNode, selectedVolume.VolumeID, selectedVolume.ExpectedReplicas) |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Validate move against replica placement policy
|
|
|
|
|
|
if selectedVolume.ExpectedReplicas > 0 && selectedVolume.ExpectedReplicas <= 255 && clusterInfo.VolumeReplicaMap != nil { |
|
|
|
|
|
rpBytes, rpErr := super_block.NewReplicaPlacementFromByte(byte(selectedVolume.ExpectedReplicas)) |
|
|
|
|
|
if rpErr == nil && rpBytes.HasReplication() { |
|
|
|
|
|
replicas := clusterInfo.VolumeReplicaMap[selectedVolume.VolumeID] |
|
|
|
|
|
if len(replicas) == 0 { |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: No replica locations found for volume %d, skipping placement validation", |
|
|
|
|
|
|
|
|
destinationPlan, err = planBalanceDestination(clusterInfo.ActiveTopology, selectedVolume, volumeRP, replicas, allowedServers) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
glog.Warningf("BALANCE [%s]: Failed to plan fallback destination for volume %d: %v", diskType, selectedVolume.VolumeID, err) |
|
|
|
|
|
return nil, "" |
|
|
|
|
|
} |
|
|
|
|
|
if !isValidBalanceDestination(destinationPlan, allowedServers, volumeRP, replicas, selectedVolume.Server) { |
|
|
|
|
|
if destinationPlan == nil { |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Fallback destination for volume %d is nil", |
|
|
diskType, selectedVolume.VolumeID) |
|
|
diskType, selectedVolume.VolumeID) |
|
|
} else { |
|
|
} else { |
|
|
validateMove := func(plan *topology.DestinationPlan) bool { |
|
|
|
|
|
if plan == nil { |
|
|
|
|
|
return false |
|
|
|
|
|
} |
|
|
|
|
|
target := types.ReplicaLocation{ |
|
|
|
|
|
DataCenter: plan.TargetDC, |
|
|
|
|
|
Rack: plan.TargetRack, |
|
|
|
|
|
NodeID: plan.TargetNode, |
|
|
|
|
|
} |
|
|
|
|
|
return IsGoodMove(rpBytes, replicas, selectedVolume.Server, target) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if !validateMove(destinationPlan) { |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Destination %s violates replica placement for volume %d (rp=%03d), falling back", |
|
|
|
|
|
diskType, destinationPlan.TargetNode, selectedVolume.VolumeID, selectedVolume.ExpectedReplicas) |
|
|
|
|
|
// Fall back to score-based planning
|
|
|
|
|
|
destinationPlan, err = planBalanceDestination(clusterInfo.ActiveTopology, selectedVolume) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
glog.Warningf("BALANCE [%s]: Failed to plan fallback destination for volume %d: %v", diskType, selectedVolume.VolumeID, err) |
|
|
|
|
|
return nil, "" |
|
|
|
|
|
} |
|
|
|
|
|
if !validateMove(destinationPlan) { |
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Fallback destination %s also violates replica placement for volume %d", |
|
|
|
|
|
diskType, destinationPlan.TargetNode, selectedVolume.VolumeID) |
|
|
|
|
|
return nil, "" |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
glog.V(1).Infof("BALANCE [%s]: Fallback destination %s is not valid for volume %d", |
|
|
|
|
|
diskType, destinationPlan.TargetNode, selectedVolume.VolumeID) |
|
|
} |
|
|
} |
|
|
|
|
|
return nil, "" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@ -555,7 +553,10 @@ func resolveBalanceDestination(activeTopology *topology.ActiveTopology, selected |
|
|
// planBalanceDestination plans the destination for a balance operation using
|
|
|
// planBalanceDestination plans the destination for a balance operation using
|
|
|
// score-based selection. Used as a fallback when the preferred target cannot
|
|
|
// score-based selection. Used as a fallback when the preferred target cannot
|
|
|
// be resolved, and for single-move scenarios outside the detection loop.
|
|
|
// be resolved, and for single-move scenarios outside the detection loop.
|
|
|
func planBalanceDestination(activeTopology *topology.ActiveTopology, selectedVolume *types.VolumeHealthMetrics) (*topology.DestinationPlan, error) { |
|
|
|
|
|
|
|
|
// rp may be nil when the volume has no replication constraint. When replica
|
|
|
|
|
|
// locations are known, candidates that would violate placement are filtered
|
|
|
|
|
|
// out before scoring.
|
|
|
|
|
|
func planBalanceDestination(activeTopology *topology.ActiveTopology, selectedVolume *types.VolumeHealthMetrics, rp *super_block.ReplicaPlacement, replicas []types.ReplicaLocation, allowedServers map[string]int) (*topology.DestinationPlan, error) { |
|
|
// Get source node information from topology
|
|
|
// Get source node information from topology
|
|
|
var sourceRack, sourceDC string |
|
|
var sourceRack, sourceDC string |
|
|
|
|
|
|
|
|
@ -604,8 +605,21 @@ func planBalanceDestination(activeTopology *topology.ActiveTopology, selectedVol |
|
|
if disk.DiskType != selectedVolume.DiskType { |
|
|
if disk.DiskType != selectedVolume.DiskType { |
|
|
continue |
|
|
continue |
|
|
} |
|
|
} |
|
|
|
|
|
if !isAllowedBalanceTarget(disk.NodeID, allowedServers) { |
|
|
|
|
|
continue |
|
|
|
|
|
} |
|
|
|
|
|
if rp != nil && len(replicas) > 0 { |
|
|
|
|
|
target := types.ReplicaLocation{ |
|
|
|
|
|
DataCenter: disk.DataCenter, |
|
|
|
|
|
Rack: disk.Rack, |
|
|
|
|
|
NodeID: disk.NodeID, |
|
|
|
|
|
} |
|
|
|
|
|
if !IsGoodMove(rp, replicas, selectedVolume.Server, target) { |
|
|
|
|
|
continue |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
score := calculateBalanceScore(disk, sourceRack, sourceDC, selectedVolume.Size) |
|
|
|
|
|
|
|
|
score := calculateBalanceScore(disk, sourceRack, sourceDC, selectedVolume.Size, rp) |
|
|
if score > bestScore { |
|
|
if score > bestScore { |
|
|
bestScore = score |
|
|
bestScore = score |
|
|
bestDisk = disk |
|
|
bestDisk = disk |
|
|
@ -636,7 +650,9 @@ func planBalanceDestination(activeTopology *topology.ActiveTopology, selectedVol |
|
|
// calculateBalanceScore calculates placement score for balance operations.
|
|
|
// calculateBalanceScore calculates placement score for balance operations.
|
|
|
// LoadCount reflects pending+assigned tasks on the disk, so we factor it into
|
|
|
// LoadCount reflects pending+assigned tasks on the disk, so we factor it into
|
|
|
// the utilization estimate to avoid stacking multiple moves onto the same target.
|
|
|
// the utilization estimate to avoid stacking multiple moves onto the same target.
|
|
|
func calculateBalanceScore(disk *topology.DiskInfo, sourceRack, sourceDC string, volumeSize uint64) float64 { |
|
|
|
|
|
|
|
|
// rp may be nil when the volume has no replication constraint; in that case the
|
|
|
|
|
|
// scorer defaults to preferring cross-rack/DC distribution.
|
|
|
|
|
|
func calculateBalanceScore(disk *topology.DiskInfo, sourceRack, sourceDC string, volumeSize uint64, rp *super_block.ReplicaPlacement) float64 { |
|
|
if disk.DiskInfo == nil { |
|
|
if disk.DiskInfo == nil { |
|
|
return 0.0 |
|
|
return 0.0 |
|
|
} |
|
|
} |
|
|
@ -652,17 +668,62 @@ func calculateBalanceScore(disk *topology.DiskInfo, sourceRack, sourceDC string, |
|
|
score += (1.0 - utilization) * 50.0 // Up to 50 points for low utilization
|
|
|
score += (1.0 - utilization) * 50.0 // Up to 50 points for low utilization
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Prefer different racks for better distribution
|
|
|
|
|
|
if disk.Rack != sourceRack { |
|
|
|
|
|
score += 30.0 |
|
|
|
|
|
|
|
|
// Rack scoring: respect the replication policy.
|
|
|
|
|
|
// If replicas must stay on the same rack (SameRackCount > 0 with no
|
|
|
|
|
|
// cross-rack requirement), prefer same-rack destinations. Otherwise
|
|
|
|
|
|
// prefer different racks for better distribution.
|
|
|
|
|
|
sameRack := disk.Rack == sourceRack |
|
|
|
|
|
if rp != nil && rp.DiffRackCount == 0 && rp.SameRackCount > 0 { |
|
|
|
|
|
if sameRack { |
|
|
|
|
|
score += 30.0 |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
if !sameRack { |
|
|
|
|
|
score += 30.0 |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Prefer different data centers for better distribution
|
|
|
|
|
|
if disk.DataCenter != sourceDC { |
|
|
|
|
|
score += 20.0 |
|
|
|
|
|
|
|
|
// DC scoring: same idea. If the policy requires all copies in one DC,
|
|
|
|
|
|
// prefer same-DC destinations. Otherwise prefer different DCs.
|
|
|
|
|
|
sameDC := disk.DataCenter == sourceDC |
|
|
|
|
|
if rp != nil && rp.DiffDataCenterCount == 0 && (rp.SameRackCount > 0 || rp.DiffRackCount > 0) { |
|
|
|
|
|
if sameDC { |
|
|
|
|
|
score += 20.0 |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
if !sameDC { |
|
|
|
|
|
score += 20.0 |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return score |
|
|
return score |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func isAllowedBalanceTarget(nodeID string, allowedServers map[string]int) bool { |
|
|
|
|
|
if len(allowedServers) == 0 { |
|
|
|
|
|
return true |
|
|
|
|
|
} |
|
|
|
|
|
_, ok := allowedServers[nodeID] |
|
|
|
|
|
return ok |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func isValidBalanceDestination(plan *topology.DestinationPlan, allowedServers map[string]int, rp *super_block.ReplicaPlacement, replicas []types.ReplicaLocation, sourceNodeID string) bool { |
|
|
|
|
|
if plan == nil { |
|
|
|
|
|
return false |
|
|
|
|
|
} |
|
|
|
|
|
if !isAllowedBalanceTarget(plan.TargetNode, allowedServers) { |
|
|
|
|
|
return false |
|
|
|
|
|
} |
|
|
|
|
|
if rp == nil || len(replicas) == 0 { |
|
|
|
|
|
return true |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
target := types.ReplicaLocation{ |
|
|
|
|
|
DataCenter: plan.TargetDC, |
|
|
|
|
|
Rack: plan.TargetRack, |
|
|
|
|
|
NodeID: plan.TargetNode, |
|
|
|
|
|
} |
|
|
|
|
|
return IsGoodMove(rp, replicas, sourceNodeID, target) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
// parseCSVSet splits a comma-separated string into a set of trimmed, non-empty values.
|
|
|
// parseCSVSet splits a comma-separated string into a set of trimmed, non-empty values.
|