You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

643 lines
20 KiB

package storage
import (
"os"
"path/filepath"
"testing"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/util"
)
// TestIncompleteEcEncodingCleanup tests the cleanup logic for incomplete EC encoding scenarios
func TestIncompleteEcEncodingCleanup(t *testing.T) {
tests := []struct {
name string
volumeId needle.VolumeId
collection string
createDatFile bool
createEcxFile bool
createEcjFile bool
numShards int
expectCleanup bool
expectLoadSuccess bool
}{
{
name: "Incomplete EC: shards without .ecx, .dat exists - should cleanup",
volumeId: 100,
collection: "",
createDatFile: true,
createEcxFile: false,
createEcjFile: false,
numShards: 14, // All shards but no .ecx
expectCleanup: true,
expectLoadSuccess: false,
},
{
name: "Distributed EC: shards without .ecx, .dat deleted - should NOT cleanup",
volumeId: 101,
collection: "",
createDatFile: false,
createEcxFile: false,
createEcjFile: false,
numShards: 5, // Partial shards, distributed
expectCleanup: false,
expectLoadSuccess: false,
},
{
name: "Incomplete EC: shards with .ecx but < 10 shards, .dat exists - should cleanup",
volumeId: 102,
collection: "",
createDatFile: true,
createEcxFile: true,
createEcjFile: false,
numShards: 7, // Less than DataShardsCount (10)
expectCleanup: true,
expectLoadSuccess: false,
},
{
name: "Valid local EC: shards with .ecx, >= 10 shards, .dat exists - should load",
volumeId: 103,
collection: "",
createDatFile: true,
createEcxFile: true,
createEcjFile: false,
numShards: 14, // All shards
expectCleanup: false,
expectLoadSuccess: true, // Would succeed if .ecx was valid
},
{
name: "Distributed EC: shards with .ecx, .dat deleted - should load",
volumeId: 104,
collection: "",
createDatFile: false,
createEcxFile: true,
createEcjFile: false,
numShards: 10, // Enough shards
expectCleanup: false,
expectLoadSuccess: true, // Would succeed if .ecx was valid
},
{
name: "Incomplete EC with collection: shards without .ecx, .dat exists - should cleanup",
volumeId: 105,
collection: "test_collection",
createDatFile: true,
createEcxFile: false,
createEcjFile: false,
numShards: 14,
expectCleanup: true,
expectLoadSuccess: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Use per-subtest temp directory for stronger isolation
tempDir := t.TempDir()
// Create DiskLocation
minFreeSpace := util.MinFreeSpace{Type: util.AsPercent, Percent: 1, Raw: "1"}
diskLocation := &DiskLocation{
Directory: tempDir,
DirectoryUuid: "test-uuid",
IdxDirectory: tempDir,
DiskType: types.HddType,
MaxVolumeCount: 100,
OriginalMaxVolumeCount: 100,
MinFreeSpace: minFreeSpace,
}
diskLocation.volumes = make(map[needle.VolumeId]*Volume)
diskLocation.ecVolumes = make(map[needle.VolumeId]*erasure_coding.EcVolume)
// Setup test files
baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))
// Use deterministic but small size: 10MB .dat => 1MB per shard
datFileSize := int64(10 * 1024 * 1024) // 10MB
expectedShardSize := calculateExpectedShardSize(datFileSize)
// Create .dat file if needed
if tt.createDatFile {
datFile, err := os.Create(baseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
if err := datFile.Truncate(datFileSize); err != nil {
t.Fatalf("Failed to truncate .dat file: %v", err)
}
if err := datFile.Close(); err != nil {
t.Fatalf("Failed to close .dat file: %v", err)
}
}
// Create EC shard files
for i := 0; i < tt.numShards; i++ {
shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
if err := shardFile.Truncate(expectedShardSize); err != nil {
t.Fatalf("Failed to truncate shard file: %v", err)
}
if err := shardFile.Close(); err != nil {
t.Fatalf("Failed to close shard file: %v", err)
}
}
// Create .ecx file if needed
if tt.createEcxFile {
ecxFile, err := os.Create(baseFileName + ".ecx")
if err != nil {
t.Fatalf("Failed to create .ecx file: %v", err)
}
if _, err := ecxFile.WriteString("dummy ecx data"); err != nil {
ecxFile.Close()
t.Fatalf("Failed to write .ecx file: %v", err)
}
if err := ecxFile.Close(); err != nil {
t.Fatalf("Failed to close .ecx file: %v", err)
}
}
// Create .ecj file if needed
if tt.createEcjFile {
ecjFile, err := os.Create(baseFileName + ".ecj")
if err != nil {
t.Fatalf("Failed to create .ecj file: %v", err)
}
if _, err := ecjFile.WriteString("dummy ecj data"); err != nil {
ecjFile.Close()
t.Fatalf("Failed to write .ecj file: %v", err)
}
if err := ecjFile.Close(); err != nil {
t.Fatalf("Failed to close .ecj file: %v", err)
}
}
// Run loadAllEcShards
loadErr := diskLocation.loadAllEcShards()
if loadErr != nil {
t.Logf("loadAllEcShards returned error (expected in some cases): %v", loadErr)
}
// Test idempotency - running again should not cause issues
loadErr2 := diskLocation.loadAllEcShards()
if loadErr2 != nil {
t.Logf("Second loadAllEcShards returned error: %v", loadErr2)
}
// Verify cleanup expectations
if tt.expectCleanup {
// Check that files were cleaned up
if util.FileExists(baseFileName + ".ecx") {
t.Errorf("Expected .ecx to be cleaned up but it still exists")
}
if util.FileExists(baseFileName + ".ecj") {
t.Errorf("Expected .ecj to be cleaned up but it still exists")
}
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
shardFile := baseFileName + erasure_coding.ToExt(i)
if util.FileExists(shardFile) {
t.Errorf("Expected shard %d to be cleaned up but it still exists", i)
}
}
// .dat file should still exist (not cleaned up)
if tt.createDatFile && !util.FileExists(baseFileName+".dat") {
t.Errorf("Expected .dat file to remain but it was deleted")
}
} else {
// Check that files were NOT cleaned up
for i := 0; i < tt.numShards; i++ {
shardFile := baseFileName + erasure_coding.ToExt(i)
if !util.FileExists(shardFile) {
t.Errorf("Expected shard %d to remain but it was cleaned up", i)
}
}
if tt.createEcxFile && !util.FileExists(baseFileName+".ecx") {
t.Errorf("Expected .ecx to remain but it was cleaned up")
}
}
// Verify load expectations
if tt.expectLoadSuccess {
if diskLocation.EcShardCount() == 0 {
t.Errorf("Expected EC shards to be loaded for volume %d", tt.volumeId)
}
}
})
}
}
// TestValidateEcVolume tests the validateEcVolume function
func TestValidateEcVolume(t *testing.T) {
tempDir := t.TempDir()
minFreeSpace := util.MinFreeSpace{Type: util.AsPercent, Percent: 1, Raw: "1"}
diskLocation := &DiskLocation{
Directory: tempDir,
DirectoryUuid: "test-uuid",
IdxDirectory: tempDir,
DiskType: types.HddType,
MinFreeSpace: minFreeSpace,
}
tests := []struct {
name string
volumeId needle.VolumeId
collection string
createDatFile bool
numShards int
expectValid bool
}{
{
name: "Valid: .dat exists with 10+ shards",
volumeId: 200,
collection: "",
createDatFile: true,
numShards: 10,
expectValid: true,
},
{
name: "Invalid: .dat exists with < 10 shards",
volumeId: 201,
collection: "",
createDatFile: true,
numShards: 9,
expectValid: false,
},
{
name: "Valid: .dat deleted (distributed EC) with any shards",
volumeId: 202,
collection: "",
createDatFile: false,
numShards: 5,
expectValid: true,
},
{
name: "Valid: .dat deleted (distributed EC) with no shards",
volumeId: 203,
collection: "",
createDatFile: false,
numShards: 0,
expectValid: true,
},
{
name: "Invalid: zero-byte shard files should not count",
volumeId: 204,
collection: "",
createDatFile: true,
numShards: 0, // Will create 10 zero-byte files below
expectValid: false,
},
{
name: "Invalid: .dat exists with different size shards",
volumeId: 205,
collection: "",
createDatFile: true,
numShards: 10, // Will create shards with varying sizes
expectValid: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))
// For proper testing, we need to use realistic sizes that match EC encoding
// EC uses large blocks (1GB) and small blocks (1MB)
// For test purposes, use a small .dat file size that still exercises the logic
// 10MB .dat file = 1MB per shard (one small batch, fast and deterministic)
datFileSize := int64(10 * 1024 * 1024) // 10MB
expectedShardSize := calculateExpectedShardSize(datFileSize)
// Create .dat file if needed
if tt.createDatFile {
datFile, err := os.Create(baseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
// Write minimal data (don't need to fill entire 10GB for tests)
datFile.Truncate(datFileSize)
datFile.Close()
}
// Create EC shard files with correct size
for i := 0; i < tt.numShards; i++ {
shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
// Use truncate to create file of correct size without allocating all the space
if err := shardFile.Truncate(expectedShardSize); err != nil {
shardFile.Close()
t.Fatalf("Failed to truncate shard file: %v", err)
}
if err := shardFile.Close(); err != nil {
t.Fatalf("Failed to close shard file: %v", err)
}
}
// For zero-byte test case, create empty files for all data shards
if tt.volumeId == 204 {
for i := 0; i < erasure_coding.DataShardsCount; i++ {
shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create empty shard file: %v", err)
}
// Don't write anything - leave as zero-byte
shardFile.Close()
}
}
// For mismatched shard size test case, create shards with different sizes
if tt.volumeId == 205 {
for i := 0; i < erasure_coding.DataShardsCount; i++ {
shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
// Write different amount of data to each shard
data := make([]byte, 100+i*10)
shardFile.Write(data)
shardFile.Close()
}
}
// Test validation
isValid := diskLocation.validateEcVolume(tt.collection, tt.volumeId)
if isValid != tt.expectValid {
t.Errorf("Expected validation result %v but got %v", tt.expectValid, isValid)
}
})
}
}
// TestRemoveEcVolumeFiles tests the removeEcVolumeFiles function
func TestRemoveEcVolumeFiles(t *testing.T) {
tests := []struct {
name string
separateIdxDir bool
}{
{"Same directory for data and index", false},
{"Separate idx directory", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
var dataDir, idxDir string
if tt.separateIdxDir {
dataDir = filepath.Join(tempDir, "data")
idxDir = filepath.Join(tempDir, "idx")
os.MkdirAll(dataDir, 0755)
os.MkdirAll(idxDir, 0755)
} else {
dataDir = tempDir
idxDir = tempDir
}
minFreeSpace := util.MinFreeSpace{Type: util.AsPercent, Percent: 1, Raw: "1"}
diskLocation := &DiskLocation{
Directory: dataDir,
DirectoryUuid: "test-uuid",
IdxDirectory: idxDir,
DiskType: types.HddType,
MinFreeSpace: minFreeSpace,
}
volumeId := needle.VolumeId(300)
collection := ""
dataBaseFileName := erasure_coding.EcShardFileName(collection, dataDir, int(volumeId))
idxBaseFileName := erasure_coding.EcShardFileName(collection, idxDir, int(volumeId))
// Create all EC shard files in data directory
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
shardFile, err := os.Create(dataBaseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
if _, err := shardFile.WriteString("dummy shard data"); err != nil {
shardFile.Close()
t.Fatalf("Failed to write shard file: %v", err)
}
if err := shardFile.Close(); err != nil {
t.Fatalf("Failed to close shard file: %v", err)
}
}
// Create .ecx file in idx directory
ecxFile, err := os.Create(idxBaseFileName + ".ecx")
if err != nil {
t.Fatalf("Failed to create .ecx file: %v", err)
}
if _, err := ecxFile.WriteString("dummy ecx data"); err != nil {
ecxFile.Close()
t.Fatalf("Failed to write .ecx file: %v", err)
}
if err := ecxFile.Close(); err != nil {
t.Fatalf("Failed to close .ecx file: %v", err)
}
// Create .ecj file in idx directory
ecjFile, err := os.Create(idxBaseFileName + ".ecj")
if err != nil {
t.Fatalf("Failed to create .ecj file: %v", err)
}
if _, err := ecjFile.WriteString("dummy ecj data"); err != nil {
ecjFile.Close()
t.Fatalf("Failed to write .ecj file: %v", err)
}
if err := ecjFile.Close(); err != nil {
t.Fatalf("Failed to close .ecj file: %v", err)
}
// Create .dat file in data directory (should NOT be removed)
datFile, err := os.Create(dataBaseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
if _, err := datFile.WriteString("dummy dat data"); err != nil {
datFile.Close()
t.Fatalf("Failed to write .dat file: %v", err)
}
if err := datFile.Close(); err != nil {
t.Fatalf("Failed to close .dat file: %v", err)
}
// Call removeEcVolumeFiles
diskLocation.removeEcVolumeFiles(collection, volumeId)
// Verify all EC shard files are removed from data directory
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
shardFile := dataBaseFileName + erasure_coding.ToExt(i)
if util.FileExists(shardFile) {
t.Errorf("Shard file %d should be removed but still exists", i)
}
}
// Verify .ecx file is removed from idx directory
if util.FileExists(idxBaseFileName + ".ecx") {
t.Errorf(".ecx file should be removed but still exists")
}
// Verify .ecj file is removed from idx directory
if util.FileExists(idxBaseFileName + ".ecj") {
t.Errorf(".ecj file should be removed but still exists")
}
// Verify .dat file is NOT removed from data directory
if !util.FileExists(dataBaseFileName + ".dat") {
t.Errorf(".dat file should NOT be removed but was deleted")
}
})
}
}
// TestEcCleanupWithSeparateIdxDirectory tests EC cleanup when idx directory is different
func TestEcCleanupWithSeparateIdxDirectory(t *testing.T) {
tempDir := t.TempDir()
idxDir := filepath.Join(tempDir, "idx")
dataDir := filepath.Join(tempDir, "data")
os.MkdirAll(idxDir, 0755)
os.MkdirAll(dataDir, 0755)
minFreeSpace := util.MinFreeSpace{Type: util.AsPercent, Percent: 1, Raw: "1"}
diskLocation := &DiskLocation{
Directory: dataDir,
DirectoryUuid: "test-uuid",
IdxDirectory: idxDir,
DiskType: types.HddType,
MinFreeSpace: minFreeSpace,
}
diskLocation.volumes = make(map[needle.VolumeId]*Volume)
diskLocation.ecVolumes = make(map[needle.VolumeId]*erasure_coding.EcVolume)
volumeId := needle.VolumeId(400)
collection := ""
// Create shards in data directory (shards only go to Directory, not IdxDirectory)
dataBaseFileName := erasure_coding.EcShardFileName(collection, dataDir, int(volumeId))
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
shardFile, err := os.Create(dataBaseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
if _, err := shardFile.WriteString("dummy shard data"); err != nil {
t.Fatalf("Failed to write shard file: %v", err)
}
if err := shardFile.Close(); err != nil {
t.Fatalf("Failed to close shard file: %v", err)
}
}
// Create .dat in data directory
datFile, err := os.Create(dataBaseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
if _, err := datFile.WriteString("dummy data"); err != nil {
t.Fatalf("Failed to write .dat file: %v", err)
}
if err := datFile.Close(); err != nil {
t.Fatalf("Failed to close .dat file: %v", err)
}
// Do not create .ecx: trigger orphaned-shards cleanup when .dat exists
// Run loadAllEcShards
loadErr := diskLocation.loadAllEcShards()
if loadErr != nil {
t.Logf("loadAllEcShards error: %v", loadErr)
}
// Verify cleanup occurred in data directory (shards)
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
shardFile := dataBaseFileName + erasure_coding.ToExt(i)
if util.FileExists(shardFile) {
t.Errorf("Shard file %d should be cleaned up but still exists", i)
}
}
// Verify .dat in data directory still exists (only EC files are cleaned up)
if !util.FileExists(dataBaseFileName + ".dat") {
t.Errorf(".dat file should remain but was deleted")
}
}
// TestDistributedEcVolumeNoFileDeletion verifies that distributed EC volumes
// (where .dat is deleted) do NOT have their shard files deleted when load fails
// This tests the critical bug fix where DestroyEcVolume was incorrectly deleting files
func TestDistributedEcVolumeNoFileDeletion(t *testing.T) {
tempDir := t.TempDir()
minFreeSpace := util.MinFreeSpace{Type: util.AsPercent, Percent: 1, Raw: "1"}
diskLocation := &DiskLocation{
Directory: tempDir,
DirectoryUuid: "test-uuid",
IdxDirectory: tempDir,
DiskType: types.HddType,
MinFreeSpace: minFreeSpace,
ecVolumes: make(map[needle.VolumeId]*erasure_coding.EcVolume),
}
collection := ""
volumeId := needle.VolumeId(500)
baseFileName := erasure_coding.EcShardFileName(collection, tempDir, int(volumeId))
// Create EC shards (only 5 shards - less than DataShardsCount, but OK for distributed EC)
numDistributedShards := 5
for i := 0; i < numDistributedShards; i++ {
shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
if _, err := shardFile.WriteString("dummy shard data"); err != nil {
shardFile.Close()
t.Fatalf("Failed to write shard file: %v", err)
}
if err := shardFile.Close(); err != nil {
t.Fatalf("Failed to close shard file: %v", err)
}
}
// Create .ecx file to trigger EC loading
ecxFile, err := os.Create(baseFileName + ".ecx")
if err != nil {
t.Fatalf("Failed to create .ecx file: %v", err)
}
if _, err := ecxFile.WriteString("dummy ecx data"); err != nil {
ecxFile.Close()
t.Fatalf("Failed to write .ecx file: %v", err)
}
if err := ecxFile.Close(); err != nil {
t.Fatalf("Failed to close .ecx file: %v", err)
}
// NO .dat file - this is a distributed EC volume
// Run loadAllEcShards - this should fail but NOT delete shard files
loadErr := diskLocation.loadAllEcShards()
if loadErr != nil {
t.Logf("loadAllEcShards returned error (expected): %v", loadErr)
}
// CRITICAL CHECK: Verify shard files still exist (should NOT be deleted)
for i := 0; i < 5; i++ {
shardFile := baseFileName + erasure_coding.ToExt(i)
if !util.FileExists(shardFile) {
t.Errorf("CRITICAL BUG: Shard file %s was deleted for distributed EC volume!", shardFile)
}
}
// Verify .ecx file still exists (should NOT be deleted for distributed EC)
if !util.FileExists(baseFileName + ".ecx") {
t.Errorf("CRITICAL BUG: .ecx file was deleted for distributed EC volume!")
}
t.Logf("SUCCESS: Distributed EC volume files preserved (not deleted)")
}