Browse Source

fix: add defensive bounds checks and compute actual shard counts

Critical fixes from code review:

1. topology_ec.go: Add defensive bounds checks to AddShard/DeleteShard
   - Prevent panic when shardId >= MaxShardCount (32)
   - Return false instead of crashing on out-of-range shard IDs

2. command_ec_common.go: Fix doBalanceEcShardsAcrossRacks
   - Was using hardcoded TotalShardsCount (14) for all volumes
   - Now computes actual totalShardsForVolume from rackToShardCount
   - Fixes incorrect rebalancing for volumes with custom EC ratios
   - Example: 5+2=7 shards would incorrectly use 14 as average

These fixes improve robustness and prepare for future custom EC ratios
without changing current behavior for default 10+4 volumes.

Note: MinusParityShards and ec_task.go intentionally NOT changed for
seaweedfs repo - these will be enhanced in seaweed-enterprise repo
where custom EC ratio configuration is added.
pull/7396/head
chrislu 1 month ago
parent
commit
33cfa8c194
  1. 11
      weed/shell/command_ec_common.go
  2. 8
      weed/topology/topology_ec.go

11
weed/shell/command_ec_common.go

@ -678,11 +678,16 @@ func countShardsByRack(vid needle.VolumeId, locations []*EcNode) map[string]int
func (ecb *ecBalancer) doBalanceEcShardsAcrossRacks(collection string, vid needle.VolumeId, locations []*EcNode) error {
racks := ecb.racks()
// calculate average number of shards an ec rack should have for one volume
averageShardsPerEcRack := ceilDivide(erasure_coding.TotalShardsCount, len(racks))
// see the volume's shards are in how many racks, and how many in each rack
rackToShardCount := countShardsByRack(vid, locations)
// Calculate actual total shards for this volume (not hardcoded default)
var totalShardsForVolume int
for _, count := range rackToShardCount {
totalShardsForVolume += count
}
// calculate average number of shards an ec rack should have for one volume
averageShardsPerEcRack := ceilDivide(totalShardsForVolume, len(racks))
rackEcNodesWithVid := groupBy(locations, func(ecNode *EcNode) string {
return string(ecNode.rack)
})

8
weed/topology/topology_ec.go

@ -91,6 +91,10 @@ func NewEcShardLocations(collection string) *EcShardLocations {
}
func (loc *EcShardLocations) AddShard(shardId erasure_coding.ShardId, dn *DataNode) (added bool) {
// Defensive bounds check to prevent panic with out-of-range shard IDs
if int(shardId) >= erasure_coding.MaxShardCount {
return false
}
dataNodes := loc.Locations[shardId]
for _, n := range dataNodes {
if n.Id() == dn.Id() {
@ -102,6 +106,10 @@ func (loc *EcShardLocations) AddShard(shardId erasure_coding.ShardId, dn *DataNo
}
func (loc *EcShardLocations) DeleteShard(shardId erasure_coding.ShardId, dn *DataNode) (deleted bool) {
// Defensive bounds check to prevent panic with out-of-range shard IDs
if int(shardId) >= erasure_coding.MaxShardCount {
return false
}
dataNodes := loc.Locations[shardId]
foundIndex := -1
for index, n := range dataNodes {

Loading…
Cancel
Save