From 7b6f364e7f52424b782f41d19291355c17e4d924 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 26 Oct 2025 13:48:11 -0700 Subject: [PATCH] calculate expected shard size --- weed/storage/disk_location_ec.go | 55 +++-- .../disk_location_ec_realworld_test.go | 197 ++++++++++++++++++ .../disk_location_ec_shard_size_test.go | 118 +++++++++++ weed/storage/disk_location_ec_test.go | 31 +-- 4 files changed, 373 insertions(+), 28 deletions(-) create mode 100644 weed/storage/disk_location_ec_realworld_test.go create mode 100644 weed/storage/disk_location_ec_shard_size_test.go diff --git a/weed/storage/disk_location_ec.go b/weed/storage/disk_location_ec.go index 0d39a4223..2859f7683 100644 --- a/weed/storage/disk_location_ec.go +++ b/weed/storage/disk_location_ec.go @@ -307,6 +307,30 @@ func (l *DiskLocation) EcShardCount() int { return shardCount } +// calculateExpectedShardSize computes the exact expected shard size based on .dat file size +// The EC encoding process is deterministic: +// 1. Data is processed in batches of (LargeBlockSize * DataShardsCount) for large blocks +// 2. Remaining data is processed in batches of (SmallBlockSize * DataShardsCount) for small blocks +// 3. Each shard gets exactly its portion, with zero-padding applied to incomplete blocks +func calculateExpectedShardSize(datFileSize int64) int64 { + var shardSize int64 + + // Process large blocks (1GB * 10 = 10GB batches) + largeBatchSize := int64(erasure_coding.ErasureCodingLargeBlockSize) * int64(erasure_coding.DataShardsCount) + numLargeBatches := datFileSize / largeBatchSize + shardSize = numLargeBatches * int64(erasure_coding.ErasureCodingLargeBlockSize) + remainingSize := datFileSize - (numLargeBatches * largeBatchSize) + + // Process remaining data in small blocks (1MB * 10 = 10MB batches) + if remainingSize > 0 { + smallBatchSize := int64(erasure_coding.ErasureCodingSmallBlockSize) * int64(erasure_coding.DataShardsCount) + numSmallBatches := (remainingSize + smallBatchSize - 1) / smallBatchSize // Ceiling division + shardSize += numSmallBatches * int64(erasure_coding.ErasureCodingSmallBlockSize) + } + + return shardSize +} + // validateEcVolume checks if EC volume has enough shards to be functional // For distributed EC volumes (where .dat is deleted), any number of shards is valid // For incomplete EC encoding (where .dat still exists), we need at least DataShardsCount shards @@ -319,21 +343,29 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId) var expectedShardSize int64 = -1 datExists := false - // If .dat file exists, compute expected shard size from it + // If .dat file exists, compute exact expected shard size from it if datFileInfo, err := os.Stat(datFileName); err == nil { datExists = true - // Each shard should be approximately datFileSize / DataShardsCount (10 data shards) - // Note: Due to block alignment and padding, actual shard size may be slightly larger - expectedShardSize = datFileInfo.Size() / erasure_coding.DataShardsCount + expectedShardSize = calculateExpectedShardSize(datFileInfo.Size()) } shardCount := 0 var actualShardSize int64 = -1 // Count shards and validate they all have the same size (required for Reed-Solomon EC) + // Check both l.Directory (where shards normally are) and l.IdxDirectory (in case of manual moves) for i := 0; i < erasure_coding.TotalShardsCount; i++ { + // Check in primary directory (l.Directory) shardFileName := baseFileName + erasure_coding.ToExt(i) - if fi, err := os.Stat(shardFileName); err == nil { + fi, err := os.Stat(shardFileName) + + // If not found in primary directory and IdxDirectory is different, check there too + if err != nil && l.Directory != l.IdxDirectory { + indexShardFileName := erasure_coding.EcShardFileName(collection, l.IdxDirectory, int(vid)) + erasure_coding.ToExt(i) + fi, err = os.Stat(indexShardFileName) + } + + if err == nil { // Check if file has non-zero size if fi.Size() > 0 { // Validate all shards are the same size (required for Reed-Solomon EC) @@ -351,14 +383,11 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId) } } - // If .dat file exists, validate shard size is reasonable compared to expected size - // Due to block alignment and padding in EC encoding, actual shard size can be slightly larger - // We allow up to SmallBlockSize (1MB) of padding per shard for block alignment - if datExists && actualShardSize > 0 { - maxExpectedSize := expectedShardSize + erasure_coding.ErasureCodingSmallBlockSize - if actualShardSize < expectedShardSize || actualShardSize > maxExpectedSize { - glog.V(0).Infof("EC volume %d: shard size %d outside expected range [%d, %d] (based on .dat file size with padding)", - vid, actualShardSize, expectedShardSize, maxExpectedSize) + // If .dat file exists, validate shard size matches expected size + if datExists && actualShardSize > 0 && expectedShardSize > 0 { + if actualShardSize != expectedShardSize { + glog.V(0).Infof("EC volume %d: shard size %d doesn't match expected size %d (based on .dat file size)", + vid, actualShardSize, expectedShardSize) return false } } diff --git a/weed/storage/disk_location_ec_realworld_test.go b/weed/storage/disk_location_ec_realworld_test.go new file mode 100644 index 000000000..7ebe16829 --- /dev/null +++ b/weed/storage/disk_location_ec_realworld_test.go @@ -0,0 +1,197 @@ +package storage + +import ( + "os" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding" +) + +// TestCalculateExpectedShardSizeWithRealEncoding validates our shard size calculation +// by actually running EC encoding on real files and comparing the results +func TestCalculateExpectedShardSizeWithRealEncoding(t *testing.T) { + tempDir := t.TempDir() + + tests := []struct { + name string + datFileSize int64 + description string + }{ + { + name: "5MB file", + datFileSize: 5 * 1024 * 1024, + description: "Small file that needs 1 small block per shard", + }, + { + name: "10MB file (exactly 10 small blocks)", + datFileSize: 10 * 1024 * 1024, + description: "Exactly fits in 1MB small blocks", + }, + { + name: "15MB file", + datFileSize: 15 * 1024 * 1024, + description: "Requires 2 small blocks per shard", + }, + { + name: "50MB file", + datFileSize: 50 * 1024 * 1024, + description: "Requires 5 small blocks per shard", + }, + { + name: "100MB file", + datFileSize: 100 * 1024 * 1024, + description: "Requires 10 small blocks per shard", + }, + { + name: "512MB file", + datFileSize: 512 * 1024 * 1024, + description: "Requires 52 small blocks per shard (rounded up)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a test .dat file with the specified size + baseFileName := tempDir + "/test_volume" + datFileName := baseFileName + ".dat" + + // Create .dat file with random data pattern (so it's compressible but realistic) + datFile, err := os.Create(datFileName) + if err != nil { + t.Fatalf("Failed to create .dat file: %v", err) + } + + // Write some pattern data (not all zeros, to be more realistic) + pattern := make([]byte, 4096) + for i := range pattern { + pattern[i] = byte(i % 256) + } + + written := int64(0) + for written < tt.datFileSize { + toWrite := tt.datFileSize - written + if toWrite > int64(len(pattern)) { + toWrite = int64(len(pattern)) + } + n, err := datFile.Write(pattern[:toWrite]) + if err != nil { + t.Fatalf("Failed to write to .dat file: %v", err) + } + written += int64(n) + } + datFile.Close() + + // Calculate expected shard size using our function + expectedShardSize := calculateExpectedShardSize(tt.datFileSize) + + // Run actual EC encoding + err = erasure_coding.WriteEcFiles(baseFileName) + if err != nil { + t.Fatalf("Failed to encode EC files: %v", err) + } + + // Measure actual shard sizes + for i := 0; i < erasure_coding.TotalShardsCount; i++ { + shardFileName := baseFileName + erasure_coding.ToExt(i) + shardInfo, err := os.Stat(shardFileName) + if err != nil { + t.Fatalf("Failed to stat shard file %s: %v", shardFileName, err) + } + + actualShardSize := shardInfo.Size() + + // Verify actual size matches expected size + if actualShardSize != expectedShardSize { + t.Errorf("Shard %d size mismatch:\n"+ + " .dat file size: %d bytes\n"+ + " Expected shard size: %d bytes\n"+ + " Actual shard size: %d bytes\n"+ + " Difference: %d bytes\n"+ + " %s", + i, tt.datFileSize, expectedShardSize, actualShardSize, + actualShardSize-expectedShardSize, tt.description) + } + } + + // If we got here, all shards match! + t.Logf("✓ SUCCESS: .dat size %d → actual shard size %d matches calculated size (%s)", + tt.datFileSize, expectedShardSize, tt.description) + + // Cleanup + os.Remove(datFileName) + for i := 0; i < erasure_coding.TotalShardsCount; i++ { + os.Remove(baseFileName + erasure_coding.ToExt(i)) + } + }) + } +} + +// TestCalculateExpectedShardSizeEdgeCases tests edge cases with real encoding +func TestCalculateExpectedShardSizeEdgeCases(t *testing.T) { + tempDir := t.TempDir() + + tests := []struct { + name string + datFileSize int64 + }{ + {"1 byte file", 1}, + {"1KB file", 1024}, + {"10KB file", 10 * 1024}, + {"1MB file (1 small block)", 1024 * 1024}, + {"1MB + 1 byte", 1024*1024 + 1}, + {"9.9MB (almost 1 small block per shard)", 9*1024*1024 + 900*1024}, + {"10.1MB (just over 1 small block per shard)", 10*1024*1024 + 100*1024}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + baseFileName := tempDir + "/" + tt.name + datFileName := baseFileName + ".dat" + + // Create .dat file + datFile, err := os.Create(datFileName) + if err != nil { + t.Fatalf("Failed to create .dat file: %v", err) + } + + // Write exactly the specified number of bytes + data := make([]byte, tt.datFileSize) + for i := range data { + data[i] = byte(i % 256) + } + datFile.Write(data) + datFile.Close() + + // Calculate expected + expectedShardSize := calculateExpectedShardSize(tt.datFileSize) + + // Run actual EC encoding + err = erasure_coding.WriteEcFiles(baseFileName) + if err != nil { + t.Fatalf("Failed to encode EC files: %v", err) + } + + // Check first shard (all should be same size) + shardFileName := baseFileName + erasure_coding.ToExt(0) + shardInfo, err := os.Stat(shardFileName) + if err != nil { + t.Fatalf("Failed to stat shard file: %v", err) + } + + actualShardSize := shardInfo.Size() + + if actualShardSize != expectedShardSize { + t.Errorf("File size %d: expected shard %d, got %d (diff: %d)", + tt.datFileSize, expectedShardSize, actualShardSize, actualShardSize-expectedShardSize) + } else { + t.Logf("✓ File size %d → shard size %d (correct)", tt.datFileSize, actualShardSize) + } + + // Cleanup + os.Remove(datFileName) + for i := 0; i < erasure_coding.TotalShardsCount; i++ { + os.Remove(baseFileName + erasure_coding.ToExt(i)) + } + }) + } +} diff --git a/weed/storage/disk_location_ec_shard_size_test.go b/weed/storage/disk_location_ec_shard_size_test.go new file mode 100644 index 000000000..061783736 --- /dev/null +++ b/weed/storage/disk_location_ec_shard_size_test.go @@ -0,0 +1,118 @@ +package storage + +import ( + "testing" +) + +func TestCalculateExpectedShardSize(t *testing.T) { + tests := []struct { + name string + datFileSize int64 + expectedShardSize int64 + description string + }{ + { + name: "Exact 10GB (1 large batch)", + datFileSize: 10 * 1024 * 1024 * 1024, // 10GB = 1 large batch + expectedShardSize: 1 * 1024 * 1024 * 1024, // 1GB per shard + description: "Exactly fits in large blocks", + }, + { + name: "11GB (1 large batch + 103 small blocks)", + datFileSize: 11 * 1024 * 1024 * 1024, // 11GB + expectedShardSize: 1*1024*1024*1024 + 103*1024*1024, // 1GB + 103MB (103 small blocks for 1GB remaining) + description: "1GB large + 1GB remaining needs 103 small blocks", + }, + { + name: "5MB (requires 1 small block per shard)", + datFileSize: 5 * 1024 * 1024, // 5MB + expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (rounded up) + description: "Small file rounds up to 1MB per shard", + }, + { + name: "15MB (requires 2 small blocks per shard)", + datFileSize: 15 * 1024 * 1024, // 15MB + expectedShardSize: 2 * 1024 * 1024, // 2MB per shard + description: "15MB needs 2 small blocks per shard", + }, + { + name: "1KB (minimum size)", + datFileSize: 1024, + expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (1 small block) + description: "Tiny file needs 1 small block", + }, + { + name: "10.5GB (mixed)", + datFileSize: 10*1024*1024*1024 + 512*1024*1024, // 10.5GB + expectedShardSize: 1*1024*1024*1024 + 52*1024*1024, // 1GB + 52MB (52 small blocks for 512MB remaining) + description: "1GB large + 512MB remaining needs 52 small blocks", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actualShardSize := calculateExpectedShardSize(tt.datFileSize) + + if actualShardSize != tt.expectedShardSize { + t.Errorf("Expected shard size %d, got %d. %s", + tt.expectedShardSize, actualShardSize, tt.description) + } + + t.Logf("✓ File size: %d → Shard size: %d (%s)", + tt.datFileSize, actualShardSize, tt.description) + }) + } +} + +// TestShardSizeValidationScenarios tests realistic scenarios +func TestShardSizeValidationScenarios(t *testing.T) { + scenarios := []struct { + name string + datFileSize int64 + actualShardSize int64 + shouldBeValid bool + }{ + { + name: "Valid: exact match for 10GB", + datFileSize: 10 * 1024 * 1024 * 1024, // 10GB + actualShardSize: 1 * 1024 * 1024 * 1024, // 1GB (exact) + shouldBeValid: true, + }, + { + name: "Invalid: 1 byte too small", + datFileSize: 10 * 1024 * 1024 * 1024, // 10GB + actualShardSize: 1*1024*1024*1024 - 1, // 1GB - 1 byte + shouldBeValid: false, + }, + { + name: "Invalid: 1 byte too large", + datFileSize: 10 * 1024 * 1024 * 1024, // 10GB + actualShardSize: 1*1024*1024*1024 + 1, // 1GB + 1 byte + shouldBeValid: false, + }, + { + name: "Valid: small file exact match", + datFileSize: 5 * 1024 * 1024, // 5MB + actualShardSize: 1 * 1024 * 1024, // 1MB (exact) + shouldBeValid: true, + }, + { + name: "Invalid: wrong size for small file", + datFileSize: 5 * 1024 * 1024, // 5MB + actualShardSize: 500 * 1024, // 500KB (too small) + shouldBeValid: false, + }, + } + + for _, scenario := range scenarios { + t.Run(scenario.name, func(t *testing.T) { + expectedSize := calculateExpectedShardSize(scenario.datFileSize) + isValid := scenario.actualShardSize == expectedSize + + if isValid != scenario.shouldBeValid { + t.Errorf("Expected validation result %v, got %v. Actual shard: %d, Expected: %d", + scenario.shouldBeValid, isValid, scenario.actualShardSize, expectedSize) + } + }) + } +} diff --git a/weed/storage/disk_location_ec_test.go b/weed/storage/disk_location_ec_test.go index f5ff8b281..874e73da6 100644 --- a/weed/storage/disk_location_ec_test.go +++ b/weed/storage/disk_location_ec_test.go @@ -113,18 +113,17 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) { // Setup test files baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId)) - // Standard shard size for test cases - shardSize := 1024 // 1KB per shard + // Use deterministic sizes that match EC encoding + datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB + expectedShardSize := calculateExpectedShardSize(datFileSize) // Create .dat file if needed - // .dat file size should be DataShardsCount * shard size (10 shards) if tt.createDatFile { datFile, err := os.Create(baseFileName + ".dat") if err != nil { t.Fatalf("Failed to create .dat file: %v", err) } - datData := make([]byte, erasure_coding.DataShardsCount*shardSize) - datFile.Write(datData) + datFile.Truncate(datFileSize) datFile.Close() } @@ -134,8 +133,7 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) { if err != nil { t.Fatalf("Failed to create shard file: %v", err) } - shardData := make([]byte, shardSize) - shardFile.Write(shardData) + shardFile.Truncate(expectedShardSize) shardFile.Close() } @@ -276,29 +274,32 @@ func TestValidateEcVolume(t *testing.T) { t.Run(tt.name, func(t *testing.T) { baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId)) - // Standard shard size for normal test cases - shardSize := 1024 // 1KB per shard + // For proper testing, we need to use realistic sizes that match EC encoding + // EC uses large blocks (1GB) and small blocks (1MB) + // For test purposes, use a .dat file size that results in expected shard sizes + // 10GB .dat file = 1GB per shard (1 large block) + datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB + expectedShardSize := calculateExpectedShardSize(datFileSize) // Create .dat file if needed - // .dat file size should be DataShardsCount * shard size (10 shards) if tt.createDatFile { datFile, err := os.Create(baseFileName + ".dat") if err != nil { t.Fatalf("Failed to create .dat file: %v", err) } - datData := make([]byte, erasure_coding.DataShardsCount*shardSize) - datFile.Write(datData) + // Write minimal data (don't need to fill entire 10GB for tests) + datFile.Truncate(datFileSize) datFile.Close() } - // Create EC shard files + // Create EC shard files with correct size for i := 0; i < tt.numShards; i++ { shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i)) if err != nil { t.Fatalf("Failed to create shard file: %v", err) } - shardData := make([]byte, shardSize) - shardFile.Write(shardData) + // Use truncate to create file of correct size without allocating all the space + shardFile.Truncate(expectedShardSize) shardFile.Close() }