From 8f4cdc5a872e4508b695fb2ebfff350bf9a03ea2 Mon Sep 17 00:00:00 2001 From: chrislu Date: Mon, 27 Oct 2025 21:00:25 -0700 Subject: [PATCH] improve: add defensive programming and better error messages for EC Code review improvements from CodeRabbit: 1. ShardBits Guardrails (ec_volume_info.go): - AddShardId, RemoveShardId: Reject shard IDs >= MaxShardCount - HasShardId: Return false for out-of-range shard IDs - Prevents silent no-ops from bit shifts with invalid IDs 2. Future-Proof Regex (disk_location_ec.go): - Updated regex from \.ec[0-9][0-9] to \.ec\d{2,3} - Now matches .ec00 through .ec999 (currently .ec00-.ec31 used) - Supports future increases to MaxShardCount beyond 99 3. Better Error Messages (volume_grpc_erasure_coding.go): - Include valid range (1..32) in dataShards validation error - Helps operators quickly identify the problem 4. Validation Before Save (volume_grpc_erasure_coding.go): - Validate ECContext (DataShards > 0, ParityShards > 0, Total <= MaxShardCount) - Log EC config being saved to .vif for debugging - Prevents writing invalid configs to disk These changes improve robustness and debuggability without changing core functionality. --- weed/server/volume_grpc_erasure_coding.go | 10 +++++++++- weed/storage/disk_location_ec.go | 4 +++- weed/storage/erasure_coding/ec_volume_info.go | 9 +++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/weed/server/volume_grpc_erasure_coding.go b/weed/server/volume_grpc_erasure_coding.go index 56c08e56d..bb871796b 100644 --- a/weed/server/volume_grpc_erasure_coding.go +++ b/weed/server/volume_grpc_erasure_coding.go @@ -103,11 +103,19 @@ func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_ datSize, _, _ := v.FileStat() volumeInfo.DatFileSize = int64(datSize) + // Validate EC configuration before saving to .vif + if ecCtx.DataShards <= 0 || ecCtx.ParityShards <= 0 || ecCtx.Total() > erasure_coding.MaxShardCount { + return nil, fmt.Errorf("invalid EC config before saving: data=%d, parity=%d, total=%d (max=%d)", + ecCtx.DataShards, ecCtx.ParityShards, ecCtx.Total(), erasure_coding.MaxShardCount) + } + // Save EC configuration to VolumeInfo volumeInfo.EcShardConfig = &volume_server_pb.EcShardConfig{ DataShards: uint32(ecCtx.DataShards), ParityShards: uint32(ecCtx.ParityShards), } + glog.V(1).Infof("Saving EC config to .vif for volume %d: %d+%d (total: %d)", + req.VolumeId, ecCtx.DataShards, ecCtx.ParityShards, ecCtx.Total()) if err := volume_info.SaveVolumeInfo(baseFileName+".vif", volumeInfo); err != nil { return nil, fmt.Errorf("SaveVolumeInfo %s: %v", baseFileName, err) @@ -484,7 +492,7 @@ func (vs *VolumeServer) VolumeEcShardsToVolume(ctx context.Context, req *volume_ // Defensive validation to prevent panics from corrupted ECContext if dataShards <= 0 || dataShards > erasure_coding.MaxShardCount { - return nil, fmt.Errorf("invalid data shard count %d for volume %d", dataShards, req.VolumeId) + return nil, fmt.Errorf("invalid data shard count %d for volume %d (must be 1..%d)", dataShards, req.VolumeId, erasure_coding.MaxShardCount) } shardFileNames := tempShards[:dataShards] diff --git a/weed/storage/disk_location_ec.go b/weed/storage/disk_location_ec.go index de8eca9f3..b370555da 100644 --- a/weed/storage/disk_location_ec.go +++ b/weed/storage/disk_location_ec.go @@ -16,7 +16,9 @@ import ( ) var ( - re = regexp.MustCompile(`\.ec[0-9][0-9]`) + // Match .ec00 through .ec999 (currently only .ec00-.ec31 are used) + // Using \d{2,3} for future-proofing if MaxShardCount is ever increased beyond 99 + re = regexp.MustCompile(`\.ec\d{2,3}`) ) func (l *DiskLocation) FindEcVolume(vid needle.VolumeId) (*erasure_coding.EcVolume, bool) { diff --git a/weed/storage/erasure_coding/ec_volume_info.go b/weed/storage/erasure_coding/ec_volume_info.go index 81891fd6c..793ed9353 100644 --- a/weed/storage/erasure_coding/ec_volume_info.go +++ b/weed/storage/erasure_coding/ec_volume_info.go @@ -119,14 +119,23 @@ func (ecInfo *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb. type ShardBits uint32 // use bits to indicate the shard id, use 32 bits just for possible future extension func (b ShardBits) AddShardId(id ShardId) ShardBits { + if id >= MaxShardCount { + return b // Reject out-of-range shard IDs + } return b | (1 << id) } func (b ShardBits) RemoveShardId(id ShardId) ShardBits { + if id >= MaxShardCount { + return b // Reject out-of-range shard IDs + } return b &^ (1 << id) } func (b ShardBits) HasShardId(id ShardId) bool { + if id >= MaxShardCount { + return false // Out-of-range shard IDs are never present + } return b&(1< 0 }