You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
161 lines
6.4 KiB
161 lines
6.4 KiB
package distribution
|
|
|
|
import (
|
|
"fmt"
|
|
)
|
|
|
|
// ECDistribution represents the target distribution of EC shards
|
|
// based on EC configuration and replication policy.
|
|
type ECDistribution struct {
|
|
// EC configuration
|
|
ECConfig ECConfig
|
|
|
|
// Replication configuration
|
|
ReplicationConfig ReplicationConfig
|
|
|
|
// Target shard counts per topology level (balanced distribution)
|
|
TargetShardsPerDC int
|
|
TargetShardsPerRack int
|
|
TargetShardsPerNode int
|
|
|
|
// Maximum shard counts per topology level (fault tolerance limits)
|
|
// These prevent any single failure domain from having too many shards
|
|
MaxShardsPerDC int
|
|
MaxShardsPerRack int
|
|
MaxShardsPerNode int
|
|
}
|
|
|
|
// CalculateDistribution computes the target EC shard distribution based on
|
|
// EC configuration and replication policy.
|
|
//
|
|
// The algorithm:
|
|
// 1. Uses replication policy to determine minimum topology spread
|
|
// 2. Calculates target shards per level (evenly distributed)
|
|
// 3. Calculates max shards per level (for fault tolerance)
|
|
func CalculateDistribution(ec ECConfig, rep ReplicationConfig) *ECDistribution {
|
|
totalShards := ec.TotalShards()
|
|
|
|
// Target distribution (balanced, rounded up to ensure all shards placed)
|
|
targetShardsPerDC := ceilDivide(totalShards, rep.MinDataCenters)
|
|
targetShardsPerRack := ceilDivide(targetShardsPerDC, rep.MinRacksPerDC)
|
|
targetShardsPerNode := ceilDivide(targetShardsPerRack, rep.MinNodesPerRack)
|
|
|
|
// Maximum limits for fault tolerance
|
|
// The key constraint: losing one failure domain shouldn't lose more than parityShards
|
|
// So max shards per domain = totalShards - parityShards + tolerance
|
|
// We add small tolerance (+2) to allow for imbalanced topologies
|
|
faultToleranceLimit := totalShards - ec.ParityShards + 1
|
|
|
|
maxShardsPerDC := min(faultToleranceLimit, targetShardsPerDC+2)
|
|
maxShardsPerRack := min(faultToleranceLimit, targetShardsPerRack+2)
|
|
maxShardsPerNode := min(faultToleranceLimit, targetShardsPerNode+2)
|
|
|
|
return &ECDistribution{
|
|
ECConfig: ec,
|
|
ReplicationConfig: rep,
|
|
TargetShardsPerDC: targetShardsPerDC,
|
|
TargetShardsPerRack: targetShardsPerRack,
|
|
TargetShardsPerNode: targetShardsPerNode,
|
|
MaxShardsPerDC: maxShardsPerDC,
|
|
MaxShardsPerRack: maxShardsPerRack,
|
|
MaxShardsPerNode: maxShardsPerNode,
|
|
}
|
|
}
|
|
|
|
// String returns a human-readable description of the distribution
|
|
func (d *ECDistribution) String() string {
|
|
return fmt.Sprintf(
|
|
"ECDistribution{EC:%s, DCs:%d (target:%d/max:%d), Racks/DC:%d (target:%d/max:%d), Nodes/Rack:%d (target:%d/max:%d)}",
|
|
d.ECConfig.String(),
|
|
d.ReplicationConfig.MinDataCenters, d.TargetShardsPerDC, d.MaxShardsPerDC,
|
|
d.ReplicationConfig.MinRacksPerDC, d.TargetShardsPerRack, d.MaxShardsPerRack,
|
|
d.ReplicationConfig.MinNodesPerRack, d.TargetShardsPerNode, d.MaxShardsPerNode,
|
|
)
|
|
}
|
|
|
|
// Summary returns a multi-line summary of the distribution plan
|
|
func (d *ECDistribution) Summary() string {
|
|
summary := fmt.Sprintf("EC Configuration: %s\n", d.ECConfig.String())
|
|
summary += fmt.Sprintf("Replication: %s\n", d.ReplicationConfig.String())
|
|
summary += fmt.Sprintf("Distribution Plan:\n")
|
|
summary += fmt.Sprintf(" Data Centers: %d (target %d shards each, max %d)\n",
|
|
d.ReplicationConfig.MinDataCenters, d.TargetShardsPerDC, d.MaxShardsPerDC)
|
|
summary += fmt.Sprintf(" Racks per DC: %d (target %d shards each, max %d)\n",
|
|
d.ReplicationConfig.MinRacksPerDC, d.TargetShardsPerRack, d.MaxShardsPerRack)
|
|
summary += fmt.Sprintf(" Nodes per Rack: %d (target %d shards each, max %d)\n",
|
|
d.ReplicationConfig.MinNodesPerRack, d.TargetShardsPerNode, d.MaxShardsPerNode)
|
|
return summary
|
|
}
|
|
|
|
// CanSurviveDCFailure returns true if the distribution can survive
|
|
// complete loss of one data center
|
|
func (d *ECDistribution) CanSurviveDCFailure() bool {
|
|
// After losing one DC with max shards, check if remaining shards are enough
|
|
remainingAfterDCLoss := d.ECConfig.TotalShards() - d.TargetShardsPerDC
|
|
return remainingAfterDCLoss >= d.ECConfig.MinShardsForReconstruction()
|
|
}
|
|
|
|
// CanSurviveRackFailure returns true if the distribution can survive
|
|
// complete loss of one rack
|
|
func (d *ECDistribution) CanSurviveRackFailure() bool {
|
|
remainingAfterRackLoss := d.ECConfig.TotalShards() - d.TargetShardsPerRack
|
|
return remainingAfterRackLoss >= d.ECConfig.MinShardsForReconstruction()
|
|
}
|
|
|
|
// MinDCsForDCFaultTolerance calculates the minimum number of DCs needed
|
|
// to survive complete DC failure with this EC configuration
|
|
func (d *ECDistribution) MinDCsForDCFaultTolerance() int {
|
|
// To survive DC failure, max shards per DC = parityShards
|
|
maxShardsPerDC := d.ECConfig.MaxTolerableLoss()
|
|
if maxShardsPerDC == 0 {
|
|
return d.ECConfig.TotalShards() // Would need one DC per shard
|
|
}
|
|
return ceilDivide(d.ECConfig.TotalShards(), maxShardsPerDC)
|
|
}
|
|
|
|
// FaultToleranceAnalysis returns a detailed analysis of fault tolerance
|
|
func (d *ECDistribution) FaultToleranceAnalysis() string {
|
|
analysis := fmt.Sprintf("Fault Tolerance Analysis for %s:\n", d.ECConfig.String())
|
|
|
|
// DC failure
|
|
dcSurvive := d.CanSurviveDCFailure()
|
|
shardsAfterDC := d.ECConfig.TotalShards() - d.TargetShardsPerDC
|
|
analysis += fmt.Sprintf(" DC Failure: %s\n", boolToResult(dcSurvive))
|
|
analysis += fmt.Sprintf(" - Losing one DC loses ~%d shards\n", d.TargetShardsPerDC)
|
|
analysis += fmt.Sprintf(" - Remaining: %d shards (need %d)\n", shardsAfterDC, d.ECConfig.DataShards)
|
|
if !dcSurvive {
|
|
analysis += fmt.Sprintf(" - Need at least %d DCs for DC fault tolerance\n", d.MinDCsForDCFaultTolerance())
|
|
}
|
|
|
|
// Rack failure
|
|
rackSurvive := d.CanSurviveRackFailure()
|
|
shardsAfterRack := d.ECConfig.TotalShards() - d.TargetShardsPerRack
|
|
analysis += fmt.Sprintf(" Rack Failure: %s\n", boolToResult(rackSurvive))
|
|
analysis += fmt.Sprintf(" - Losing one rack loses ~%d shards\n", d.TargetShardsPerRack)
|
|
analysis += fmt.Sprintf(" - Remaining: %d shards (need %d)\n", shardsAfterRack, d.ECConfig.DataShards)
|
|
|
|
// Node failure (usually survivable)
|
|
shardsAfterNode := d.ECConfig.TotalShards() - d.TargetShardsPerNode
|
|
nodeSurvive := shardsAfterNode >= d.ECConfig.DataShards
|
|
analysis += fmt.Sprintf(" Node Failure: %s\n", boolToResult(nodeSurvive))
|
|
analysis += fmt.Sprintf(" - Losing one node loses ~%d shards\n", d.TargetShardsPerNode)
|
|
analysis += fmt.Sprintf(" - Remaining: %d shards (need %d)\n", shardsAfterNode, d.ECConfig.DataShards)
|
|
|
|
return analysis
|
|
}
|
|
|
|
func boolToResult(b bool) string {
|
|
if b {
|
|
return "SURVIVABLE ✓"
|
|
}
|
|
return "NOT SURVIVABLE ✗"
|
|
}
|
|
|
|
// ceilDivide performs ceiling division
|
|
func ceilDivide(a, b int) int {
|
|
if b <= 0 {
|
|
return a
|
|
}
|
|
return (a + b - 1) / b
|
|
}
|
|
|