4 changed files with 373 additions and 28 deletions
-
55weed/storage/disk_location_ec.go
-
197weed/storage/disk_location_ec_realworld_test.go
-
118weed/storage/disk_location_ec_shard_size_test.go
-
31weed/storage/disk_location_ec_test.go
@ -0,0 +1,197 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"os" |
|||
"testing" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding" |
|||
) |
|||
|
|||
// TestCalculateExpectedShardSizeWithRealEncoding validates our shard size calculation
|
|||
// by actually running EC encoding on real files and comparing the results
|
|||
func TestCalculateExpectedShardSizeWithRealEncoding(t *testing.T) { |
|||
tempDir := t.TempDir() |
|||
|
|||
tests := []struct { |
|||
name string |
|||
datFileSize int64 |
|||
description string |
|||
}{ |
|||
{ |
|||
name: "5MB file", |
|||
datFileSize: 5 * 1024 * 1024, |
|||
description: "Small file that needs 1 small block per shard", |
|||
}, |
|||
{ |
|||
name: "10MB file (exactly 10 small blocks)", |
|||
datFileSize: 10 * 1024 * 1024, |
|||
description: "Exactly fits in 1MB small blocks", |
|||
}, |
|||
{ |
|||
name: "15MB file", |
|||
datFileSize: 15 * 1024 * 1024, |
|||
description: "Requires 2 small blocks per shard", |
|||
}, |
|||
{ |
|||
name: "50MB file", |
|||
datFileSize: 50 * 1024 * 1024, |
|||
description: "Requires 5 small blocks per shard", |
|||
}, |
|||
{ |
|||
name: "100MB file", |
|||
datFileSize: 100 * 1024 * 1024, |
|||
description: "Requires 10 small blocks per shard", |
|||
}, |
|||
{ |
|||
name: "512MB file", |
|||
datFileSize: 512 * 1024 * 1024, |
|||
description: "Requires 52 small blocks per shard (rounded up)", |
|||
}, |
|||
} |
|||
|
|||
for _, tt := range tests { |
|||
t.Run(tt.name, func(t *testing.T) { |
|||
// Create a test .dat file with the specified size
|
|||
baseFileName := tempDir + "/test_volume" |
|||
datFileName := baseFileName + ".dat" |
|||
|
|||
// Create .dat file with random data pattern (so it's compressible but realistic)
|
|||
datFile, err := os.Create(datFileName) |
|||
if err != nil { |
|||
t.Fatalf("Failed to create .dat file: %v", err) |
|||
} |
|||
|
|||
// Write some pattern data (not all zeros, to be more realistic)
|
|||
pattern := make([]byte, 4096) |
|||
for i := range pattern { |
|||
pattern[i] = byte(i % 256) |
|||
} |
|||
|
|||
written := int64(0) |
|||
for written < tt.datFileSize { |
|||
toWrite := tt.datFileSize - written |
|||
if toWrite > int64(len(pattern)) { |
|||
toWrite = int64(len(pattern)) |
|||
} |
|||
n, err := datFile.Write(pattern[:toWrite]) |
|||
if err != nil { |
|||
t.Fatalf("Failed to write to .dat file: %v", err) |
|||
} |
|||
written += int64(n) |
|||
} |
|||
datFile.Close() |
|||
|
|||
// Calculate expected shard size using our function
|
|||
expectedShardSize := calculateExpectedShardSize(tt.datFileSize) |
|||
|
|||
// Run actual EC encoding
|
|||
err = erasure_coding.WriteEcFiles(baseFileName) |
|||
if err != nil { |
|||
t.Fatalf("Failed to encode EC files: %v", err) |
|||
} |
|||
|
|||
// Measure actual shard sizes
|
|||
for i := 0; i < erasure_coding.TotalShardsCount; i++ { |
|||
shardFileName := baseFileName + erasure_coding.ToExt(i) |
|||
shardInfo, err := os.Stat(shardFileName) |
|||
if err != nil { |
|||
t.Fatalf("Failed to stat shard file %s: %v", shardFileName, err) |
|||
} |
|||
|
|||
actualShardSize := shardInfo.Size() |
|||
|
|||
// Verify actual size matches expected size
|
|||
if actualShardSize != expectedShardSize { |
|||
t.Errorf("Shard %d size mismatch:\n"+ |
|||
" .dat file size: %d bytes\n"+ |
|||
" Expected shard size: %d bytes\n"+ |
|||
" Actual shard size: %d bytes\n"+ |
|||
" Difference: %d bytes\n"+ |
|||
" %s", |
|||
i, tt.datFileSize, expectedShardSize, actualShardSize, |
|||
actualShardSize-expectedShardSize, tt.description) |
|||
} |
|||
} |
|||
|
|||
// If we got here, all shards match!
|
|||
t.Logf("✓ SUCCESS: .dat size %d → actual shard size %d matches calculated size (%s)", |
|||
tt.datFileSize, expectedShardSize, tt.description) |
|||
|
|||
// Cleanup
|
|||
os.Remove(datFileName) |
|||
for i := 0; i < erasure_coding.TotalShardsCount; i++ { |
|||
os.Remove(baseFileName + erasure_coding.ToExt(i)) |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
|
|||
// TestCalculateExpectedShardSizeEdgeCases tests edge cases with real encoding
|
|||
func TestCalculateExpectedShardSizeEdgeCases(t *testing.T) { |
|||
tempDir := t.TempDir() |
|||
|
|||
tests := []struct { |
|||
name string |
|||
datFileSize int64 |
|||
}{ |
|||
{"1 byte file", 1}, |
|||
{"1KB file", 1024}, |
|||
{"10KB file", 10 * 1024}, |
|||
{"1MB file (1 small block)", 1024 * 1024}, |
|||
{"1MB + 1 byte", 1024*1024 + 1}, |
|||
{"9.9MB (almost 1 small block per shard)", 9*1024*1024 + 900*1024}, |
|||
{"10.1MB (just over 1 small block per shard)", 10*1024*1024 + 100*1024}, |
|||
} |
|||
|
|||
for _, tt := range tests { |
|||
t.Run(tt.name, func(t *testing.T) { |
|||
baseFileName := tempDir + "/" + tt.name |
|||
datFileName := baseFileName + ".dat" |
|||
|
|||
// Create .dat file
|
|||
datFile, err := os.Create(datFileName) |
|||
if err != nil { |
|||
t.Fatalf("Failed to create .dat file: %v", err) |
|||
} |
|||
|
|||
// Write exactly the specified number of bytes
|
|||
data := make([]byte, tt.datFileSize) |
|||
for i := range data { |
|||
data[i] = byte(i % 256) |
|||
} |
|||
datFile.Write(data) |
|||
datFile.Close() |
|||
|
|||
// Calculate expected
|
|||
expectedShardSize := calculateExpectedShardSize(tt.datFileSize) |
|||
|
|||
// Run actual EC encoding
|
|||
err = erasure_coding.WriteEcFiles(baseFileName) |
|||
if err != nil { |
|||
t.Fatalf("Failed to encode EC files: %v", err) |
|||
} |
|||
|
|||
// Check first shard (all should be same size)
|
|||
shardFileName := baseFileName + erasure_coding.ToExt(0) |
|||
shardInfo, err := os.Stat(shardFileName) |
|||
if err != nil { |
|||
t.Fatalf("Failed to stat shard file: %v", err) |
|||
} |
|||
|
|||
actualShardSize := shardInfo.Size() |
|||
|
|||
if actualShardSize != expectedShardSize { |
|||
t.Errorf("File size %d: expected shard %d, got %d (diff: %d)", |
|||
tt.datFileSize, expectedShardSize, actualShardSize, actualShardSize-expectedShardSize) |
|||
} else { |
|||
t.Logf("✓ File size %d → shard size %d (correct)", tt.datFileSize, actualShardSize) |
|||
} |
|||
|
|||
// Cleanup
|
|||
os.Remove(datFileName) |
|||
for i := 0; i < erasure_coding.TotalShardsCount; i++ { |
|||
os.Remove(baseFileName + erasure_coding.ToExt(i)) |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
@ -0,0 +1,118 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"testing" |
|||
) |
|||
|
|||
func TestCalculateExpectedShardSize(t *testing.T) { |
|||
tests := []struct { |
|||
name string |
|||
datFileSize int64 |
|||
expectedShardSize int64 |
|||
description string |
|||
}{ |
|||
{ |
|||
name: "Exact 10GB (1 large batch)", |
|||
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB = 1 large batch
|
|||
expectedShardSize: 1 * 1024 * 1024 * 1024, // 1GB per shard
|
|||
description: "Exactly fits in large blocks", |
|||
}, |
|||
{ |
|||
name: "11GB (1 large batch + 103 small blocks)", |
|||
datFileSize: 11 * 1024 * 1024 * 1024, // 11GB
|
|||
expectedShardSize: 1*1024*1024*1024 + 103*1024*1024, // 1GB + 103MB (103 small blocks for 1GB remaining)
|
|||
description: "1GB large + 1GB remaining needs 103 small blocks", |
|||
}, |
|||
{ |
|||
name: "5MB (requires 1 small block per shard)", |
|||
datFileSize: 5 * 1024 * 1024, // 5MB
|
|||
expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (rounded up)
|
|||
description: "Small file rounds up to 1MB per shard", |
|||
}, |
|||
{ |
|||
name: "15MB (requires 2 small blocks per shard)", |
|||
datFileSize: 15 * 1024 * 1024, // 15MB
|
|||
expectedShardSize: 2 * 1024 * 1024, // 2MB per shard
|
|||
description: "15MB needs 2 small blocks per shard", |
|||
}, |
|||
{ |
|||
name: "1KB (minimum size)", |
|||
datFileSize: 1024, |
|||
expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (1 small block)
|
|||
description: "Tiny file needs 1 small block", |
|||
}, |
|||
{ |
|||
name: "10.5GB (mixed)", |
|||
datFileSize: 10*1024*1024*1024 + 512*1024*1024, // 10.5GB
|
|||
expectedShardSize: 1*1024*1024*1024 + 52*1024*1024, // 1GB + 52MB (52 small blocks for 512MB remaining)
|
|||
description: "1GB large + 512MB remaining needs 52 small blocks", |
|||
}, |
|||
} |
|||
|
|||
for _, tt := range tests { |
|||
t.Run(tt.name, func(t *testing.T) { |
|||
actualShardSize := calculateExpectedShardSize(tt.datFileSize) |
|||
|
|||
if actualShardSize != tt.expectedShardSize { |
|||
t.Errorf("Expected shard size %d, got %d. %s", |
|||
tt.expectedShardSize, actualShardSize, tt.description) |
|||
} |
|||
|
|||
t.Logf("✓ File size: %d → Shard size: %d (%s)", |
|||
tt.datFileSize, actualShardSize, tt.description) |
|||
}) |
|||
} |
|||
} |
|||
|
|||
// TestShardSizeValidationScenarios tests realistic scenarios
|
|||
func TestShardSizeValidationScenarios(t *testing.T) { |
|||
scenarios := []struct { |
|||
name string |
|||
datFileSize int64 |
|||
actualShardSize int64 |
|||
shouldBeValid bool |
|||
}{ |
|||
{ |
|||
name: "Valid: exact match for 10GB", |
|||
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
|
|||
actualShardSize: 1 * 1024 * 1024 * 1024, // 1GB (exact)
|
|||
shouldBeValid: true, |
|||
}, |
|||
{ |
|||
name: "Invalid: 1 byte too small", |
|||
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
|
|||
actualShardSize: 1*1024*1024*1024 - 1, // 1GB - 1 byte
|
|||
shouldBeValid: false, |
|||
}, |
|||
{ |
|||
name: "Invalid: 1 byte too large", |
|||
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
|
|||
actualShardSize: 1*1024*1024*1024 + 1, // 1GB + 1 byte
|
|||
shouldBeValid: false, |
|||
}, |
|||
{ |
|||
name: "Valid: small file exact match", |
|||
datFileSize: 5 * 1024 * 1024, // 5MB
|
|||
actualShardSize: 1 * 1024 * 1024, // 1MB (exact)
|
|||
shouldBeValid: true, |
|||
}, |
|||
{ |
|||
name: "Invalid: wrong size for small file", |
|||
datFileSize: 5 * 1024 * 1024, // 5MB
|
|||
actualShardSize: 500 * 1024, // 500KB (too small)
|
|||
shouldBeValid: false, |
|||
}, |
|||
} |
|||
|
|||
for _, scenario := range scenarios { |
|||
t.Run(scenario.name, func(t *testing.T) { |
|||
expectedSize := calculateExpectedShardSize(scenario.datFileSize) |
|||
isValid := scenario.actualShardSize == expectedSize |
|||
|
|||
if isValid != scenario.shouldBeValid { |
|||
t.Errorf("Expected validation result %v, got %v. Actual shard: %d, Expected: %d", |
|||
scenario.shouldBeValid, isValid, scenario.actualShardSize, expectedSize) |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue