Browse Source

calculate expected shard size

pull/7384/head
chrislu 1 month ago
parent
commit
7b6f364e7f
  1. 55
      weed/storage/disk_location_ec.go
  2. 197
      weed/storage/disk_location_ec_realworld_test.go
  3. 118
      weed/storage/disk_location_ec_shard_size_test.go
  4. 31
      weed/storage/disk_location_ec_test.go

55
weed/storage/disk_location_ec.go

@ -307,6 +307,30 @@ func (l *DiskLocation) EcShardCount() int {
return shardCount
}
// calculateExpectedShardSize computes the exact expected shard size based on .dat file size
// The EC encoding process is deterministic:
// 1. Data is processed in batches of (LargeBlockSize * DataShardsCount) for large blocks
// 2. Remaining data is processed in batches of (SmallBlockSize * DataShardsCount) for small blocks
// 3. Each shard gets exactly its portion, with zero-padding applied to incomplete blocks
func calculateExpectedShardSize(datFileSize int64) int64 {
var shardSize int64
// Process large blocks (1GB * 10 = 10GB batches)
largeBatchSize := int64(erasure_coding.ErasureCodingLargeBlockSize) * int64(erasure_coding.DataShardsCount)
numLargeBatches := datFileSize / largeBatchSize
shardSize = numLargeBatches * int64(erasure_coding.ErasureCodingLargeBlockSize)
remainingSize := datFileSize - (numLargeBatches * largeBatchSize)
// Process remaining data in small blocks (1MB * 10 = 10MB batches)
if remainingSize > 0 {
smallBatchSize := int64(erasure_coding.ErasureCodingSmallBlockSize) * int64(erasure_coding.DataShardsCount)
numSmallBatches := (remainingSize + smallBatchSize - 1) / smallBatchSize // Ceiling division
shardSize += numSmallBatches * int64(erasure_coding.ErasureCodingSmallBlockSize)
}
return shardSize
}
// validateEcVolume checks if EC volume has enough shards to be functional
// For distributed EC volumes (where .dat is deleted), any number of shards is valid
// For incomplete EC encoding (where .dat still exists), we need at least DataShardsCount shards
@ -319,21 +343,29 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
var expectedShardSize int64 = -1
datExists := false
// If .dat file exists, compute expected shard size from it
// If .dat file exists, compute exact expected shard size from it
if datFileInfo, err := os.Stat(datFileName); err == nil {
datExists = true
// Each shard should be approximately datFileSize / DataShardsCount (10 data shards)
// Note: Due to block alignment and padding, actual shard size may be slightly larger
expectedShardSize = datFileInfo.Size() / erasure_coding.DataShardsCount
expectedShardSize = calculateExpectedShardSize(datFileInfo.Size())
}
shardCount := 0
var actualShardSize int64 = -1
// Count shards and validate they all have the same size (required for Reed-Solomon EC)
// Check both l.Directory (where shards normally are) and l.IdxDirectory (in case of manual moves)
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
// Check in primary directory (l.Directory)
shardFileName := baseFileName + erasure_coding.ToExt(i)
if fi, err := os.Stat(shardFileName); err == nil {
fi, err := os.Stat(shardFileName)
// If not found in primary directory and IdxDirectory is different, check there too
if err != nil && l.Directory != l.IdxDirectory {
indexShardFileName := erasure_coding.EcShardFileName(collection, l.IdxDirectory, int(vid)) + erasure_coding.ToExt(i)
fi, err = os.Stat(indexShardFileName)
}
if err == nil {
// Check if file has non-zero size
if fi.Size() > 0 {
// Validate all shards are the same size (required for Reed-Solomon EC)
@ -351,14 +383,11 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
}
}
// If .dat file exists, validate shard size is reasonable compared to expected size
// Due to block alignment and padding in EC encoding, actual shard size can be slightly larger
// We allow up to SmallBlockSize (1MB) of padding per shard for block alignment
if datExists && actualShardSize > 0 {
maxExpectedSize := expectedShardSize + erasure_coding.ErasureCodingSmallBlockSize
if actualShardSize < expectedShardSize || actualShardSize > maxExpectedSize {
glog.V(0).Infof("EC volume %d: shard size %d outside expected range [%d, %d] (based on .dat file size with padding)",
vid, actualShardSize, expectedShardSize, maxExpectedSize)
// If .dat file exists, validate shard size matches expected size
if datExists && actualShardSize > 0 && expectedShardSize > 0 {
if actualShardSize != expectedShardSize {
glog.V(0).Infof("EC volume %d: shard size %d doesn't match expected size %d (based on .dat file size)",
vid, actualShardSize, expectedShardSize)
return false
}
}

197
weed/storage/disk_location_ec_realworld_test.go

@ -0,0 +1,197 @@
package storage
import (
"os"
"testing"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
)
// TestCalculateExpectedShardSizeWithRealEncoding validates our shard size calculation
// by actually running EC encoding on real files and comparing the results
func TestCalculateExpectedShardSizeWithRealEncoding(t *testing.T) {
tempDir := t.TempDir()
tests := []struct {
name string
datFileSize int64
description string
}{
{
name: "5MB file",
datFileSize: 5 * 1024 * 1024,
description: "Small file that needs 1 small block per shard",
},
{
name: "10MB file (exactly 10 small blocks)",
datFileSize: 10 * 1024 * 1024,
description: "Exactly fits in 1MB small blocks",
},
{
name: "15MB file",
datFileSize: 15 * 1024 * 1024,
description: "Requires 2 small blocks per shard",
},
{
name: "50MB file",
datFileSize: 50 * 1024 * 1024,
description: "Requires 5 small blocks per shard",
},
{
name: "100MB file",
datFileSize: 100 * 1024 * 1024,
description: "Requires 10 small blocks per shard",
},
{
name: "512MB file",
datFileSize: 512 * 1024 * 1024,
description: "Requires 52 small blocks per shard (rounded up)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a test .dat file with the specified size
baseFileName := tempDir + "/test_volume"
datFileName := baseFileName + ".dat"
// Create .dat file with random data pattern (so it's compressible but realistic)
datFile, err := os.Create(datFileName)
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
// Write some pattern data (not all zeros, to be more realistic)
pattern := make([]byte, 4096)
for i := range pattern {
pattern[i] = byte(i % 256)
}
written := int64(0)
for written < tt.datFileSize {
toWrite := tt.datFileSize - written
if toWrite > int64(len(pattern)) {
toWrite = int64(len(pattern))
}
n, err := datFile.Write(pattern[:toWrite])
if err != nil {
t.Fatalf("Failed to write to .dat file: %v", err)
}
written += int64(n)
}
datFile.Close()
// Calculate expected shard size using our function
expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
// Run actual EC encoding
err = erasure_coding.WriteEcFiles(baseFileName)
if err != nil {
t.Fatalf("Failed to encode EC files: %v", err)
}
// Measure actual shard sizes
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
shardFileName := baseFileName + erasure_coding.ToExt(i)
shardInfo, err := os.Stat(shardFileName)
if err != nil {
t.Fatalf("Failed to stat shard file %s: %v", shardFileName, err)
}
actualShardSize := shardInfo.Size()
// Verify actual size matches expected size
if actualShardSize != expectedShardSize {
t.Errorf("Shard %d size mismatch:\n"+
" .dat file size: %d bytes\n"+
" Expected shard size: %d bytes\n"+
" Actual shard size: %d bytes\n"+
" Difference: %d bytes\n"+
" %s",
i, tt.datFileSize, expectedShardSize, actualShardSize,
actualShardSize-expectedShardSize, tt.description)
}
}
// If we got here, all shards match!
t.Logf("✓ SUCCESS: .dat size %d → actual shard size %d matches calculated size (%s)",
tt.datFileSize, expectedShardSize, tt.description)
// Cleanup
os.Remove(datFileName)
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
os.Remove(baseFileName + erasure_coding.ToExt(i))
}
})
}
}
// TestCalculateExpectedShardSizeEdgeCases tests edge cases with real encoding
func TestCalculateExpectedShardSizeEdgeCases(t *testing.T) {
tempDir := t.TempDir()
tests := []struct {
name string
datFileSize int64
}{
{"1 byte file", 1},
{"1KB file", 1024},
{"10KB file", 10 * 1024},
{"1MB file (1 small block)", 1024 * 1024},
{"1MB + 1 byte", 1024*1024 + 1},
{"9.9MB (almost 1 small block per shard)", 9*1024*1024 + 900*1024},
{"10.1MB (just over 1 small block per shard)", 10*1024*1024 + 100*1024},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
baseFileName := tempDir + "/" + tt.name
datFileName := baseFileName + ".dat"
// Create .dat file
datFile, err := os.Create(datFileName)
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
// Write exactly the specified number of bytes
data := make([]byte, tt.datFileSize)
for i := range data {
data[i] = byte(i % 256)
}
datFile.Write(data)
datFile.Close()
// Calculate expected
expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
// Run actual EC encoding
err = erasure_coding.WriteEcFiles(baseFileName)
if err != nil {
t.Fatalf("Failed to encode EC files: %v", err)
}
// Check first shard (all should be same size)
shardFileName := baseFileName + erasure_coding.ToExt(0)
shardInfo, err := os.Stat(shardFileName)
if err != nil {
t.Fatalf("Failed to stat shard file: %v", err)
}
actualShardSize := shardInfo.Size()
if actualShardSize != expectedShardSize {
t.Errorf("File size %d: expected shard %d, got %d (diff: %d)",
tt.datFileSize, expectedShardSize, actualShardSize, actualShardSize-expectedShardSize)
} else {
t.Logf("✓ File size %d → shard size %d (correct)", tt.datFileSize, actualShardSize)
}
// Cleanup
os.Remove(datFileName)
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
os.Remove(baseFileName + erasure_coding.ToExt(i))
}
})
}
}

118
weed/storage/disk_location_ec_shard_size_test.go

@ -0,0 +1,118 @@
package storage
import (
"testing"
)
func TestCalculateExpectedShardSize(t *testing.T) {
tests := []struct {
name string
datFileSize int64
expectedShardSize int64
description string
}{
{
name: "Exact 10GB (1 large batch)",
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB = 1 large batch
expectedShardSize: 1 * 1024 * 1024 * 1024, // 1GB per shard
description: "Exactly fits in large blocks",
},
{
name: "11GB (1 large batch + 103 small blocks)",
datFileSize: 11 * 1024 * 1024 * 1024, // 11GB
expectedShardSize: 1*1024*1024*1024 + 103*1024*1024, // 1GB + 103MB (103 small blocks for 1GB remaining)
description: "1GB large + 1GB remaining needs 103 small blocks",
},
{
name: "5MB (requires 1 small block per shard)",
datFileSize: 5 * 1024 * 1024, // 5MB
expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (rounded up)
description: "Small file rounds up to 1MB per shard",
},
{
name: "15MB (requires 2 small blocks per shard)",
datFileSize: 15 * 1024 * 1024, // 15MB
expectedShardSize: 2 * 1024 * 1024, // 2MB per shard
description: "15MB needs 2 small blocks per shard",
},
{
name: "1KB (minimum size)",
datFileSize: 1024,
expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (1 small block)
description: "Tiny file needs 1 small block",
},
{
name: "10.5GB (mixed)",
datFileSize: 10*1024*1024*1024 + 512*1024*1024, // 10.5GB
expectedShardSize: 1*1024*1024*1024 + 52*1024*1024, // 1GB + 52MB (52 small blocks for 512MB remaining)
description: "1GB large + 512MB remaining needs 52 small blocks",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actualShardSize := calculateExpectedShardSize(tt.datFileSize)
if actualShardSize != tt.expectedShardSize {
t.Errorf("Expected shard size %d, got %d. %s",
tt.expectedShardSize, actualShardSize, tt.description)
}
t.Logf("✓ File size: %d → Shard size: %d (%s)",
tt.datFileSize, actualShardSize, tt.description)
})
}
}
// TestShardSizeValidationScenarios tests realistic scenarios
func TestShardSizeValidationScenarios(t *testing.T) {
scenarios := []struct {
name string
datFileSize int64
actualShardSize int64
shouldBeValid bool
}{
{
name: "Valid: exact match for 10GB",
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
actualShardSize: 1 * 1024 * 1024 * 1024, // 1GB (exact)
shouldBeValid: true,
},
{
name: "Invalid: 1 byte too small",
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
actualShardSize: 1*1024*1024*1024 - 1, // 1GB - 1 byte
shouldBeValid: false,
},
{
name: "Invalid: 1 byte too large",
datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
actualShardSize: 1*1024*1024*1024 + 1, // 1GB + 1 byte
shouldBeValid: false,
},
{
name: "Valid: small file exact match",
datFileSize: 5 * 1024 * 1024, // 5MB
actualShardSize: 1 * 1024 * 1024, // 1MB (exact)
shouldBeValid: true,
},
{
name: "Invalid: wrong size for small file",
datFileSize: 5 * 1024 * 1024, // 5MB
actualShardSize: 500 * 1024, // 500KB (too small)
shouldBeValid: false,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
expectedSize := calculateExpectedShardSize(scenario.datFileSize)
isValid := scenario.actualShardSize == expectedSize
if isValid != scenario.shouldBeValid {
t.Errorf("Expected validation result %v, got %v. Actual shard: %d, Expected: %d",
scenario.shouldBeValid, isValid, scenario.actualShardSize, expectedSize)
}
})
}
}

31
weed/storage/disk_location_ec_test.go

@ -113,18 +113,17 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) {
// Setup test files
baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))
// Standard shard size for test cases
shardSize := 1024 // 1KB per shard
// Use deterministic sizes that match EC encoding
datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB
expectedShardSize := calculateExpectedShardSize(datFileSize)
// Create .dat file if needed
// .dat file size should be DataShardsCount * shard size (10 shards)
if tt.createDatFile {
datFile, err := os.Create(baseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
datData := make([]byte, erasure_coding.DataShardsCount*shardSize)
datFile.Write(datData)
datFile.Truncate(datFileSize)
datFile.Close()
}
@ -134,8 +133,7 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) {
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
shardData := make([]byte, shardSize)
shardFile.Write(shardData)
shardFile.Truncate(expectedShardSize)
shardFile.Close()
}
@ -276,29 +274,32 @@ func TestValidateEcVolume(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))
// Standard shard size for normal test cases
shardSize := 1024 // 1KB per shard
// For proper testing, we need to use realistic sizes that match EC encoding
// EC uses large blocks (1GB) and small blocks (1MB)
// For test purposes, use a .dat file size that results in expected shard sizes
// 10GB .dat file = 1GB per shard (1 large block)
datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB
expectedShardSize := calculateExpectedShardSize(datFileSize)
// Create .dat file if needed
// .dat file size should be DataShardsCount * shard size (10 shards)
if tt.createDatFile {
datFile, err := os.Create(baseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
datData := make([]byte, erasure_coding.DataShardsCount*shardSize)
datFile.Write(datData)
// Write minimal data (don't need to fill entire 10GB for tests)
datFile.Truncate(datFileSize)
datFile.Close()
}
// Create EC shard files
// Create EC shard files with correct size
for i := 0; i < tt.numShards; i++ {
shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
shardData := make([]byte, shardSize)
shardFile.Write(shardData)
// Use truncate to create file of correct size without allocating all the space
shardFile.Truncate(expectedShardSize)
shardFile.Close()
}

Loading…
Cancel
Save