calculate expected shard size

1 month ago · 7b6f364e7f
4 changed files with 373 additions and 28 deletions
--- a/weed/storage/disk_location_ec.go
+++ b/weed/storage/disk_location_ec.go
@ -307,6 +307,30 @@ func (l *DiskLocation) EcShardCount() int {
 	return shardCount
 }

+// calculateExpectedShardSize computes the exact expected shard size based on .dat file size
+// The EC encoding process is deterministic:
+// 1. Data is processed in batches of (LargeBlockSize * DataShardsCount) for large blocks
+// 2. Remaining data is processed in batches of (SmallBlockSize * DataShardsCount) for small blocks
+// 3. Each shard gets exactly its portion, with zero-padding applied to incomplete blocks
+func calculateExpectedShardSize(datFileSize int64) int64 {
+	var shardSize int64
+
+	// Process large blocks (1GB * 10 = 10GB batches)
+	largeBatchSize := int64(erasure_coding.ErasureCodingLargeBlockSize) * int64(erasure_coding.DataShardsCount)
+	numLargeBatches := datFileSize / largeBatchSize
+	shardSize = numLargeBatches * int64(erasure_coding.ErasureCodingLargeBlockSize)
+	remainingSize := datFileSize - (numLargeBatches * largeBatchSize)
+
+	// Process remaining data in small blocks (1MB * 10 = 10MB batches)
+	if remainingSize > 0 {
+		smallBatchSize := int64(erasure_coding.ErasureCodingSmallBlockSize) * int64(erasure_coding.DataShardsCount)
+		numSmallBatches := (remainingSize + smallBatchSize - 1) / smallBatchSize // Ceiling division
+		shardSize += numSmallBatches * int64(erasure_coding.ErasureCodingSmallBlockSize)
+	}
+
+	return shardSize
+}
+
 // validateEcVolume checks if EC volume has enough shards to be functional
 // For distributed EC volumes (where .dat is deleted), any number of shards is valid
 // For incomplete EC encoding (where .dat still exists), we need at least DataShardsCount shards
@ -319,21 +343,29 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
 	var expectedShardSize int64 = -1
 	datExists := false

-	// If .dat file exists, compute expected shard size from it
+	// If .dat file exists, compute exact expected shard size from it
 	if datFileInfo, err := os.Stat(datFileName); err == nil {
 		datExists = true
-		// Each shard should be approximately datFileSize / DataShardsCount (10 data shards)
-		// Note: Due to block alignment and padding, actual shard size may be slightly larger
-		expectedShardSize = datFileInfo.Size() / erasure_coding.DataShardsCount
+		expectedShardSize = calculateExpectedShardSize(datFileInfo.Size())
 	}

 	shardCount := 0
 	var actualShardSize int64 = -1

 	// Count shards and validate they all have the same size (required for Reed-Solomon EC)
+	// Check both l.Directory (where shards normally are) and l.IdxDirectory (in case of manual moves)
 	for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+		// Check in primary directory (l.Directory)
 		shardFileName := baseFileName + erasure_coding.ToExt(i)
-		if fi, err := os.Stat(shardFileName); err == nil {
+		fi, err := os.Stat(shardFileName)
+
+		// If not found in primary directory and IdxDirectory is different, check there too
+		if err != nil && l.Directory != l.IdxDirectory {
+			indexShardFileName := erasure_coding.EcShardFileName(collection, l.IdxDirectory, int(vid)) + erasure_coding.ToExt(i)
+			fi, err = os.Stat(indexShardFileName)
+		}
+
+		if err == nil {
 			// Check if file has non-zero size
 			if fi.Size() > 0 {
 				// Validate all shards are the same size (required for Reed-Solomon EC)
@ -351,14 +383,11 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
 		}
 	}

-	// If .dat file exists, validate shard size is reasonable compared to expected size
-	// Due to block alignment and padding in EC encoding, actual shard size can be slightly larger
-	// We allow up to SmallBlockSize (1MB) of padding per shard for block alignment
-	if datExists && actualShardSize > 0 {
-		maxExpectedSize := expectedShardSize + erasure_coding.ErasureCodingSmallBlockSize
-		if actualShardSize < expectedShardSize || actualShardSize > maxExpectedSize {
-			glog.V(0).Infof("EC volume %d: shard size %d outside expected range [%d, %d] (based on .dat file size with padding)",
-				vid, actualShardSize, expectedShardSize, maxExpectedSize)
+	// If .dat file exists, validate shard size matches expected size
+	if datExists && actualShardSize > 0 && expectedShardSize > 0 {
+		if actualShardSize != expectedShardSize {
+			glog.V(0).Infof("EC volume %d: shard size %d doesn't match expected size %d (based on .dat file size)",
+				vid, actualShardSize, expectedShardSize)
 			return false
 		}
 	}
--- a/weed/storage/disk_location_ec_realworld_test.go
+++ b/weed/storage/disk_location_ec_realworld_test.go
@ -0,0 +1,197 @@
+package storage
+
+import (
+	"os"
+	"testing"
+
+	"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
+)
+
+// TestCalculateExpectedShardSizeWithRealEncoding validates our shard size calculation
+// by actually running EC encoding on real files and comparing the results
+func TestCalculateExpectedShardSizeWithRealEncoding(t *testing.T) {
+	tempDir := t.TempDir()
+
+	tests := []struct {
+		name        string
+		datFileSize int64
+		description string
+	}{
+		{
+			name:        "5MB file",
+			datFileSize: 5 * 1024 * 1024,
+			description: "Small file that needs 1 small block per shard",
+		},
+		{
+			name:        "10MB file (exactly 10 small blocks)",
+			datFileSize: 10 * 1024 * 1024,
+			description: "Exactly fits in 1MB small blocks",
+		},
+		{
+			name:        "15MB file",
+			datFileSize: 15 * 1024 * 1024,
+			description: "Requires 2 small blocks per shard",
+		},
+		{
+			name:        "50MB file",
+			datFileSize: 50 * 1024 * 1024,
+			description: "Requires 5 small blocks per shard",
+		},
+		{
+			name:        "100MB file",
+			datFileSize: 100 * 1024 * 1024,
+			description: "Requires 10 small blocks per shard",
+		},
+		{
+			name:        "512MB file",
+			datFileSize: 512 * 1024 * 1024,
+			description: "Requires 52 small blocks per shard (rounded up)",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Create a test .dat file with the specified size
+			baseFileName := tempDir + "/test_volume"
+			datFileName := baseFileName + ".dat"
+
+			// Create .dat file with random data pattern (so it's compressible but realistic)
+			datFile, err := os.Create(datFileName)
+			if err != nil {
+				t.Fatalf("Failed to create .dat file: %v", err)
+			}
+
+			// Write some pattern data (not all zeros, to be more realistic)
+			pattern := make([]byte, 4096)
+			for i := range pattern {
+				pattern[i] = byte(i % 256)
+			}
+
+			written := int64(0)
+			for written < tt.datFileSize {
+				toWrite := tt.datFileSize - written
+				if toWrite > int64(len(pattern)) {
+					toWrite = int64(len(pattern))
+				}
+				n, err := datFile.Write(pattern[:toWrite])
+				if err != nil {
+					t.Fatalf("Failed to write to .dat file: %v", err)
+				}
+				written += int64(n)
+			}
+			datFile.Close()
+
+			// Calculate expected shard size using our function
+			expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
+
+			// Run actual EC encoding
+			err = erasure_coding.WriteEcFiles(baseFileName)
+			if err != nil {
+				t.Fatalf("Failed to encode EC files: %v", err)
+			}
+
+			// Measure actual shard sizes
+			for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+				shardFileName := baseFileName + erasure_coding.ToExt(i)
+				shardInfo, err := os.Stat(shardFileName)
+				if err != nil {
+					t.Fatalf("Failed to stat shard file %s: %v", shardFileName, err)
+				}
+
+				actualShardSize := shardInfo.Size()
+
+				// Verify actual size matches expected size
+				if actualShardSize != expectedShardSize {
+					t.Errorf("Shard %d size mismatch:\n"+
+						"  .dat file size: %d bytes\n"+
+						"  Expected shard size: %d bytes\n"+
+						"  Actual shard size: %d bytes\n"+
+						"  Difference: %d bytes\n"+
+						"  %s",
+						i, tt.datFileSize, expectedShardSize, actualShardSize,
+						actualShardSize-expectedShardSize, tt.description)
+				}
+			}
+
+			// If we got here, all shards match!
+			t.Logf("✓ SUCCESS: .dat size %d → actual shard size %d matches calculated size (%s)",
+				tt.datFileSize, expectedShardSize, tt.description)
+
+			// Cleanup
+			os.Remove(datFileName)
+			for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+				os.Remove(baseFileName + erasure_coding.ToExt(i))
+			}
+		})
+	}
+}
+
+// TestCalculateExpectedShardSizeEdgeCases tests edge cases with real encoding
+func TestCalculateExpectedShardSizeEdgeCases(t *testing.T) {
+	tempDir := t.TempDir()
+
+	tests := []struct {
+		name        string
+		datFileSize int64
+	}{
+		{"1 byte file", 1},
+		{"1KB file", 1024},
+		{"10KB file", 10 * 1024},
+		{"1MB file (1 small block)", 1024 * 1024},
+		{"1MB + 1 byte", 1024*1024 + 1},
+		{"9.9MB (almost 1 small block per shard)", 9*1024*1024 + 900*1024},
+		{"10.1MB (just over 1 small block per shard)", 10*1024*1024 + 100*1024},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			baseFileName := tempDir + "/" + tt.name
+			datFileName := baseFileName + ".dat"
+
+			// Create .dat file
+			datFile, err := os.Create(datFileName)
+			if err != nil {
+				t.Fatalf("Failed to create .dat file: %v", err)
+			}
+
+			// Write exactly the specified number of bytes
+			data := make([]byte, tt.datFileSize)
+			for i := range data {
+				data[i] = byte(i % 256)
+			}
+			datFile.Write(data)
+			datFile.Close()
+
+			// Calculate expected
+			expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
+
+			// Run actual EC encoding
+			err = erasure_coding.WriteEcFiles(baseFileName)
+			if err != nil {
+				t.Fatalf("Failed to encode EC files: %v", err)
+			}
+
+			// Check first shard (all should be same size)
+			shardFileName := baseFileName + erasure_coding.ToExt(0)
+			shardInfo, err := os.Stat(shardFileName)
+			if err != nil {
+				t.Fatalf("Failed to stat shard file: %v", err)
+			}
+
+			actualShardSize := shardInfo.Size()
+
+			if actualShardSize != expectedShardSize {
+				t.Errorf("File size %d: expected shard %d, got %d (diff: %d)",
+					tt.datFileSize, expectedShardSize, actualShardSize, actualShardSize-expectedShardSize)
+			} else {
+				t.Logf("✓ File size %d → shard size %d (correct)", tt.datFileSize, actualShardSize)
+			}
+
+			// Cleanup
+			os.Remove(datFileName)
+			for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+				os.Remove(baseFileName + erasure_coding.ToExt(i))
+			}
+		})
+	}
+}
--- a/weed/storage/disk_location_ec_shard_size_test.go
+++ b/weed/storage/disk_location_ec_shard_size_test.go
@ -0,0 +1,118 @@
+package storage
+
+import (
+	"testing"
+)
+
+func TestCalculateExpectedShardSize(t *testing.T) {
+	tests := []struct {
+		name              string
+		datFileSize       int64
+		expectedShardSize int64
+		description       string
+	}{
+		{
+			name:              "Exact 10GB (1 large batch)",
+			datFileSize:       10 * 1024 * 1024 * 1024, // 10GB = 1 large batch
+			expectedShardSize: 1 * 1024 * 1024 * 1024,  // 1GB per shard
+			description:       "Exactly fits in large blocks",
+		},
+		{
+			name:              "11GB (1 large batch + 103 small blocks)",
+			datFileSize:       11 * 1024 * 1024 * 1024,          // 11GB
+			expectedShardSize: 1*1024*1024*1024 + 103*1024*1024, // 1GB + 103MB (103 small blocks for 1GB remaining)
+			description:       "1GB large + 1GB remaining needs 103 small blocks",
+		},
+		{
+			name:              "5MB (requires 1 small block per shard)",
+			datFileSize:       5 * 1024 * 1024, // 5MB
+			expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (rounded up)
+			description:       "Small file rounds up to 1MB per shard",
+		},
+		{
+			name:              "15MB (requires 2 small blocks per shard)",
+			datFileSize:       15 * 1024 * 1024, // 15MB
+			expectedShardSize: 2 * 1024 * 1024,  // 2MB per shard
+			description:       "15MB needs 2 small blocks per shard",
+		},
+		{
+			name:              "1KB (minimum size)",
+			datFileSize:       1024,
+			expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (1 small block)
+			description:       "Tiny file needs 1 small block",
+		},
+		{
+			name:              "10.5GB (mixed)",
+			datFileSize:       10*1024*1024*1024 + 512*1024*1024, // 10.5GB
+			expectedShardSize: 1*1024*1024*1024 + 52*1024*1024,   // 1GB + 52MB (52 small blocks for 512MB remaining)
+			description:       "1GB large + 512MB remaining needs 52 small blocks",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			actualShardSize := calculateExpectedShardSize(tt.datFileSize)
+
+			if actualShardSize != tt.expectedShardSize {
+				t.Errorf("Expected shard size %d, got %d. %s",
+					tt.expectedShardSize, actualShardSize, tt.description)
+			}
+
+			t.Logf("✓ File size: %d → Shard size: %d (%s)",
+				tt.datFileSize, actualShardSize, tt.description)
+		})
+	}
+}
+
+// TestShardSizeValidationScenarios tests realistic scenarios
+func TestShardSizeValidationScenarios(t *testing.T) {
+	scenarios := []struct {
+		name            string
+		datFileSize     int64
+		actualShardSize int64
+		shouldBeValid   bool
+	}{
+		{
+			name:            "Valid: exact match for 10GB",
+			datFileSize:     10 * 1024 * 1024 * 1024, // 10GB
+			actualShardSize: 1 * 1024 * 1024 * 1024,  // 1GB (exact)
+			shouldBeValid:   true,
+		},
+		{
+			name:            "Invalid: 1 byte too small",
+			datFileSize:     10 * 1024 * 1024 * 1024, // 10GB
+			actualShardSize: 1*1024*1024*1024 - 1,    // 1GB - 1 byte
+			shouldBeValid:   false,
+		},
+		{
+			name:            "Invalid: 1 byte too large",
+			datFileSize:     10 * 1024 * 1024 * 1024, // 10GB
+			actualShardSize: 1*1024*1024*1024 + 1,    // 1GB + 1 byte
+			shouldBeValid:   false,
+		},
+		{
+			name:            "Valid: small file exact match",
+			datFileSize:     5 * 1024 * 1024, // 5MB
+			actualShardSize: 1 * 1024 * 1024, // 1MB (exact)
+			shouldBeValid:   true,
+		},
+		{
+			name:            "Invalid: wrong size for small file",
+			datFileSize:     5 * 1024 * 1024, // 5MB
+			actualShardSize: 500 * 1024,      // 500KB (too small)
+			shouldBeValid:   false,
+		},
+	}
+
+	for _, scenario := range scenarios {
+		t.Run(scenario.name, func(t *testing.T) {
+			expectedSize := calculateExpectedShardSize(scenario.datFileSize)
+			isValid := scenario.actualShardSize == expectedSize
+
+			if isValid != scenario.shouldBeValid {
+				t.Errorf("Expected validation result %v, got %v. Actual shard: %d, Expected: %d",
+					scenario.shouldBeValid, isValid, scenario.actualShardSize, expectedSize)
+			}
+		})
+	}
+}
--- a/weed/storage/disk_location_ec_test.go
+++ b/weed/storage/disk_location_ec_test.go
@ -113,18 +113,17 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) {
 			// Setup test files
 			baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))

-			// Standard shard size for test cases
-			shardSize := 1024 // 1KB per shard
+			// Use deterministic sizes that match EC encoding
+			datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB
+			expectedShardSize := calculateExpectedShardSize(datFileSize)

 			// Create .dat file if needed
-			// .dat file size should be DataShardsCount * shard size (10 shards)
 			if tt.createDatFile {
 				datFile, err := os.Create(baseFileName + ".dat")
 				if err != nil {
 					t.Fatalf("Failed to create .dat file: %v", err)
 				}
-				datData := make([]byte, erasure_coding.DataShardsCount*shardSize)
-				datFile.Write(datData)
+				datFile.Truncate(datFileSize)
 				datFile.Close()
 			}

@ -134,8 +133,7 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) {
 				if err != nil {
 					t.Fatalf("Failed to create shard file: %v", err)
 				}
-				shardData := make([]byte, shardSize)
-				shardFile.Write(shardData)
+				shardFile.Truncate(expectedShardSize)
 				shardFile.Close()
 			}

@ -276,29 +274,32 @@ func TestValidateEcVolume(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))

-			// Standard shard size for normal test cases
-			shardSize := 1024 // 1KB per shard
+			// For proper testing, we need to use realistic sizes that match EC encoding
+			// EC uses large blocks (1GB) and small blocks (1MB)
+			// For test purposes, use a .dat file size that results in expected shard sizes
+			// 10GB .dat file = 1GB per shard (1 large block)
+			datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB
+			expectedShardSize := calculateExpectedShardSize(datFileSize)

 			// Create .dat file if needed
-			// .dat file size should be DataShardsCount * shard size (10 shards)
 			if tt.createDatFile {
 				datFile, err := os.Create(baseFileName + ".dat")
 				if err != nil {
 					t.Fatalf("Failed to create .dat file: %v", err)
 				}
-				datData := make([]byte, erasure_coding.DataShardsCount*shardSize)
-				datFile.Write(datData)
+				// Write minimal data (don't need to fill entire 10GB for tests)
+				datFile.Truncate(datFileSize)
 				datFile.Close()
 			}

-			// Create EC shard files
+			// Create EC shard files with correct size
 			for i := 0; i < tt.numShards; i++ {
 				shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
 				if err != nil {
 					t.Fatalf("Failed to create shard file: %v", err)
 				}
-				shardData := make([]byte, shardSize)
-				shardFile.Write(shardData)
+				// Use truncate to create file of correct size without allocating all the space
+				shardFile.Truncate(expectedShardSize)
 				shardFile.Close()
 			}