4 changed files with 373 additions and 28 deletions
-
55weed/storage/disk_location_ec.go
-
197weed/storage/disk_location_ec_realworld_test.go
-
118weed/storage/disk_location_ec_shard_size_test.go
-
31weed/storage/disk_location_ec_test.go
@ -0,0 +1,197 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"os" |
||||
|
"testing" |
||||
|
|
||||
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding" |
||||
|
) |
||||
|
|
||||
|
// TestCalculateExpectedShardSizeWithRealEncoding validates our shard size calculation
|
||||
|
// by actually running EC encoding on real files and comparing the results
|
||||
|
func TestCalculateExpectedShardSizeWithRealEncoding(t *testing.T) { |
||||
|
tempDir := t.TempDir() |
||||
|
|
||||
|
tests := []struct { |
||||
|
name string |
||||
|
datFileSize int64 |
||||
|
description string |
||||
|
}{ |
||||
|
{ |
||||
|
name: "5MB file", |
||||
|
datFileSize: 5 * 1024 * 1024, |
||||
|
description: "Small file that needs 1 small block per shard", |
||||
|
}, |
||||
|
{ |
||||
|
name: "10MB file (exactly 10 small blocks)", |
||||
|
datFileSize: 10 * 1024 * 1024, |
||||
|
description: "Exactly fits in 1MB small blocks", |
||||
|
}, |
||||
|
{ |
||||
|
name: "15MB file", |
||||
|
datFileSize: 15 * 1024 * 1024, |
||||
|
description: "Requires 2 small blocks per shard", |
||||
|
}, |
||||
|
{ |
||||
|
name: "50MB file", |
||||
|
datFileSize: 50 * 1024 * 1024, |
||||
|
description: "Requires 5 small blocks per shard", |
||||
|
}, |
||||
|
{ |
||||
|
name: "100MB file", |
||||
|
datFileSize: 100 * 1024 * 1024, |
||||
|
description: "Requires 10 small blocks per shard", |
||||
|
}, |
||||
|
{ |
||||
|
name: "512MB file", |
||||
|
datFileSize: 512 * 1024 * 1024, |
||||
|
description: "Requires 52 small blocks per shard (rounded up)", |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
for _, tt := range tests { |
||||
|
t.Run(tt.name, func(t *testing.T) { |
||||
|
// Create a test .dat file with the specified size
|
||||
|
baseFileName := tempDir + "/test_volume" |
||||
|
datFileName := baseFileName + ".dat" |
||||
|
|
||||
|
// Create .dat file with random data pattern (so it's compressible but realistic)
|
||||
|
datFile, err := os.Create(datFileName) |
||||
|
if err != nil { |
||||
|
t.Fatalf("Failed to create .dat file: %v", err) |
||||
|
} |
||||
|
|
||||
|
// Write some pattern data (not all zeros, to be more realistic)
|
||||
|
pattern := make([]byte, 4096) |
||||
|
for i := range pattern { |
||||
|
pattern[i] = byte(i % 256) |
||||
|
} |
||||
|
|
||||
|
written := int64(0) |
||||
|
for written < tt.datFileSize { |
||||
|
toWrite := tt.datFileSize - written |
||||
|
if toWrite > int64(len(pattern)) { |
||||
|
toWrite = int64(len(pattern)) |
||||
|
} |
||||
|
n, err := datFile.Write(pattern[:toWrite]) |
||||
|
if err != nil { |
||||
|
t.Fatalf("Failed to write to .dat file: %v", err) |
||||
|
} |
||||
|
written += int64(n) |
||||
|
} |
||||
|
datFile.Close() |
||||
|
|
||||
|
// Calculate expected shard size using our function
|
||||
|
expectedShardSize := calculateExpectedShardSize(tt.datFileSize) |
||||
|
|
||||
|
// Run actual EC encoding
|
||||
|
err = erasure_coding.WriteEcFiles(baseFileName) |
||||
|
if err != nil { |
||||
|
t.Fatalf("Failed to encode EC files: %v", err) |
||||
|
} |
||||
|
|
||||
|
// Measure actual shard sizes
|
||||
|
for i := 0; i < erasure_coding.TotalShardsCount; i++ { |
||||
|
shardFileName := baseFileName + erasure_coding.ToExt(i) |
||||
|
shardInfo, err := os.Stat(shardFileName) |
||||
|
if err != nil { |
||||
|
t.Fatalf("Failed to stat shard file %s: %v", shardFileName, err) |
||||
|
} |
||||
|
|
||||
|
actualShardSize := shardInfo.Size() |
||||
|
|
||||
|
// Verify actual size matches expected size
|
||||
|
if actualShardSize != expectedShardSize { |
||||
|
t.Errorf("Shard %d size mismatch:\n"+ |
||||
|
" .dat file size: %d bytes\n"+ |
||||
|
" Expected shard size: %d bytes\n"+ |
||||
|
" Actual shard size: %d bytes\n"+ |
||||
|
" Difference: %d bytes\n"+ |
||||
|
" %s", |
||||
|
i, tt.datFileSize, expectedShardSize, actualShardSize, |
||||
|
actualShardSize-expectedShardSize, tt.description) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// If we got here, all shards match!
|
||||
|
t.Logf("✓ SUCCESS: .dat size %d → actual shard size %d matches calculated size (%s)", |
||||
|
tt.datFileSize, expectedShardSize, tt.description) |
||||
|
|
||||
|
// Cleanup
|
||||
|
os.Remove(datFileName) |
||||
|
for i := 0; i < erasure_coding.TotalShardsCount; i++ { |
||||
|
os.Remove(baseFileName + erasure_coding.ToExt(i)) |
||||
|
} |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// TestCalculateExpectedShardSizeEdgeCases tests edge cases with real encoding
|
||||
|
func TestCalculateExpectedShardSizeEdgeCases(t *testing.T) { |
||||
|
tempDir := t.TempDir() |
||||
|
|
||||
|
tests := []struct { |
||||
|
name string |
||||
|
datFileSize int64 |
||||
|
}{ |
||||
|
{"1 byte file", 1}, |
||||
|
{"1KB file", 1024}, |
||||
|
{"10KB file", 10 * 1024}, |
||||
|
{"1MB file (1 small block)", 1024 * 1024}, |
||||
|
{"1MB + 1 byte", 1024*1024 + 1}, |
||||
|
{"9.9MB (almost 1 small block per shard)", 9*1024*1024 + 900*1024}, |
||||
|
{"10.1MB (just over 1 small block per shard)", 10*1024*1024 + 100*1024}, |
||||
|
} |
||||
|
|
||||
|
for _, tt := range tests { |
||||
|
t.Run(tt.name, func(t *testing.T) { |
||||
|
baseFileName := tempDir + "/" + tt.name |
||||
|
datFileName := baseFileName + ".dat" |
||||
|
|
||||
|
// Create .dat file
|
||||
|
datFile, err := os.Create(datFileName) |
||||
|
if err != nil { |
||||
|
t.Fatalf("Failed to create .dat file: %v", err) |
||||
|
} |
||||
|
|
||||
|
// Write exactly the specified number of bytes
|
||||
|
data := make([]byte, tt.datFileSize) |
||||
|
for i := range data { |
||||
|
data[i] = byte(i % 256) |
||||
|
} |
||||
|
datFile.Write(data) |
||||
|
datFile.Close() |
||||
|
|
||||
|
// Calculate expected
|
||||
|
expectedShardSize := calculateExpectedShardSize(tt.datFileSize) |
||||
|
|
||||
|
// Run actual EC encoding
|
||||
|
err = erasure_coding.WriteEcFiles(baseFileName) |
||||
|
if err != nil { |
||||
|
t.Fatalf("Failed to encode EC files: %v", err) |
||||
|
} |
||||
|
|
||||
|
// Check first shard (all should be same size)
|
||||
|
shardFileName := baseFileName + erasure_coding.ToExt(0) |
||||
|
shardInfo, err := os.Stat(shardFileName) |
||||
|
if err != nil { |
||||
|
t.Fatalf("Failed to stat shard file: %v", err) |
||||
|
} |
||||
|
|
||||
|
actualShardSize := shardInfo.Size() |
||||
|
|
||||
|
if actualShardSize != expectedShardSize { |
||||
|
t.Errorf("File size %d: expected shard %d, got %d (diff: %d)", |
||||
|
tt.datFileSize, expectedShardSize, actualShardSize, actualShardSize-expectedShardSize) |
||||
|
} else { |
||||
|
t.Logf("✓ File size %d → shard size %d (correct)", tt.datFileSize, actualShardSize) |
||||
|
} |
||||
|
|
||||
|
// Cleanup
|
||||
|
os.Remove(datFileName) |
||||
|
for i := 0; i < erasure_coding.TotalShardsCount; i++ { |
||||
|
os.Remove(baseFileName + erasure_coding.ToExt(i)) |
||||
|
} |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,118 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"testing" |
||||
|
) |
||||
|
|
||||
|
func TestCalculateExpectedShardSize(t *testing.T) { |
||||
|
tests := []struct { |
||||
|
name string |
||||
|
datFileSize int64 |
||||
|
expectedShardSize int64 |
||||
|
description string |
||||
|
}{ |
||||
|
{ |
||||
|
name: "Exact 10GB (1 large batch)", |
||||
|
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB = 1 large batch
|
||||
|
expectedShardSize: 1 * 1024 * 1024 * 1024, // 1GB per shard
|
||||
|
description: "Exactly fits in large blocks", |
||||
|
}, |
||||
|
{ |
||||
|
name: "11GB (1 large batch + 103 small blocks)", |
||||
|
datFileSize: 11 * 1024 * 1024 * 1024, // 11GB
|
||||
|
expectedShardSize: 1*1024*1024*1024 + 103*1024*1024, // 1GB + 103MB (103 small blocks for 1GB remaining)
|
||||
|
description: "1GB large + 1GB remaining needs 103 small blocks", |
||||
|
}, |
||||
|
{ |
||||
|
name: "5MB (requires 1 small block per shard)", |
||||
|
datFileSize: 5 * 1024 * 1024, // 5MB
|
||||
|
expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (rounded up)
|
||||
|
description: "Small file rounds up to 1MB per shard", |
||||
|
}, |
||||
|
{ |
||||
|
name: "15MB (requires 2 small blocks per shard)", |
||||
|
datFileSize: 15 * 1024 * 1024, // 15MB
|
||||
|
expectedShardSize: 2 * 1024 * 1024, // 2MB per shard
|
||||
|
description: "15MB needs 2 small blocks per shard", |
||||
|
}, |
||||
|
{ |
||||
|
name: "1KB (minimum size)", |
||||
|
datFileSize: 1024, |
||||
|
expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (1 small block)
|
||||
|
description: "Tiny file needs 1 small block", |
||||
|
}, |
||||
|
{ |
||||
|
name: "10.5GB (mixed)", |
||||
|
datFileSize: 10*1024*1024*1024 + 512*1024*1024, // 10.5GB
|
||||
|
expectedShardSize: 1*1024*1024*1024 + 52*1024*1024, // 1GB + 52MB (52 small blocks for 512MB remaining)
|
||||
|
description: "1GB large + 512MB remaining needs 52 small blocks", |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
for _, tt := range tests { |
||||
|
t.Run(tt.name, func(t *testing.T) { |
||||
|
actualShardSize := calculateExpectedShardSize(tt.datFileSize) |
||||
|
|
||||
|
if actualShardSize != tt.expectedShardSize { |
||||
|
t.Errorf("Expected shard size %d, got %d. %s", |
||||
|
tt.expectedShardSize, actualShardSize, tt.description) |
||||
|
} |
||||
|
|
||||
|
t.Logf("✓ File size: %d → Shard size: %d (%s)", |
||||
|
tt.datFileSize, actualShardSize, tt.description) |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// TestShardSizeValidationScenarios tests realistic scenarios
|
||||
|
func TestShardSizeValidationScenarios(t *testing.T) { |
||||
|
scenarios := []struct { |
||||
|
name string |
||||
|
datFileSize int64 |
||||
|
actualShardSize int64 |
||||
|
shouldBeValid bool |
||||
|
}{ |
||||
|
{ |
||||
|
name: "Valid: exact match for 10GB", |
||||
|
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
|
||||
|
actualShardSize: 1 * 1024 * 1024 * 1024, // 1GB (exact)
|
||||
|
shouldBeValid: true, |
||||
|
}, |
||||
|
{ |
||||
|
name: "Invalid: 1 byte too small", |
||||
|
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
|
||||
|
actualShardSize: 1*1024*1024*1024 - 1, // 1GB - 1 byte
|
||||
|
shouldBeValid: false, |
||||
|
}, |
||||
|
{ |
||||
|
name: "Invalid: 1 byte too large", |
||||
|
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
|
||||
|
actualShardSize: 1*1024*1024*1024 + 1, // 1GB + 1 byte
|
||||
|
shouldBeValid: false, |
||||
|
}, |
||||
|
{ |
||||
|
name: "Valid: small file exact match", |
||||
|
datFileSize: 5 * 1024 * 1024, // 5MB
|
||||
|
actualShardSize: 1 * 1024 * 1024, // 1MB (exact)
|
||||
|
shouldBeValid: true, |
||||
|
}, |
||||
|
{ |
||||
|
name: "Invalid: wrong size for small file", |
||||
|
datFileSize: 5 * 1024 * 1024, // 5MB
|
||||
|
actualShardSize: 500 * 1024, // 500KB (too small)
|
||||
|
shouldBeValid: false, |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
for _, scenario := range scenarios { |
||||
|
t.Run(scenario.name, func(t *testing.T) { |
||||
|
expectedSize := calculateExpectedShardSize(scenario.datFileSize) |
||||
|
isValid := scenario.actualShardSize == expectedSize |
||||
|
|
||||
|
if isValid != scenario.shouldBeValid { |
||||
|
t.Errorf("Expected validation result %v, got %v. Actual shard: %d, Expected: %d", |
||||
|
scenario.shouldBeValid, isValid, scenario.actualShardSize, expectedSize) |
||||
|
} |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue