|
|
|
@ -212,3 +212,124 @@ func min(x, y int64) int64 { |
|
|
|
} |
|
|
|
return x |
|
|
|
} |
|
|
|
|
|
|
|
// WriteDatFileAndVacuum reconstructs volume from EC shards and then vacuums deleted needles
|
|
|
|
// This reuses existing WriteDatFile and volume compaction logic to achieve the same result more cleanly
|
|
|
|
func WriteDatFileAndVacuum(baseFileName string, shardFileNames []string) error { |
|
|
|
// Step 1: Use existing WriteDatFile to reconstruct the full volume
|
|
|
|
datFileSize, err := FindDatFileSize(baseFileName, baseFileName) |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("failed to find dat file size: %w", err) |
|
|
|
} |
|
|
|
|
|
|
|
tempDatFile := baseFileName + ".tmp.dat" |
|
|
|
err = WriteDatFile(tempDatFile, datFileSize, shardFileNames) |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("failed to reconstruct volume with WriteDatFile: %w", err) |
|
|
|
} |
|
|
|
defer os.Remove(tempDatFile) // cleanup temp file
|
|
|
|
|
|
|
|
// Step 2: Create index file with deleted entries marked (existing function)
|
|
|
|
tempIdxFile := baseFileName + ".tmp.idx" |
|
|
|
err = WriteIdxFileFromEcIndex(baseFileName + ".tmp") |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("failed to create index file: %w", err) |
|
|
|
} |
|
|
|
defer os.Remove(tempIdxFile) // cleanup temp file
|
|
|
|
|
|
|
|
// Step 3: Use existing volume compaction logic to filter out deleted needles
|
|
|
|
version, err := readEcVolumeVersion(baseFileName) |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("failed to read volume version: %w", err) |
|
|
|
} |
|
|
|
|
|
|
|
return copyDataBasedOnIndexFileForEcVacuum( |
|
|
|
tempDatFile, tempIdxFile, // source files (with deleted entries)
|
|
|
|
baseFileName+".dat", baseFileName+".idx", // destination files (cleaned)
|
|
|
|
version, |
|
|
|
) |
|
|
|
} |
|
|
|
|
|
|
|
// copyDataBasedOnIndexFileForEcVacuum copies only non-deleted needles from source to destination
|
|
|
|
// This is a simplified version of volume_vacuum.go's copyDataBasedOnIndexFile for EC vacuum use
|
|
|
|
func copyDataBasedOnIndexFileForEcVacuum(srcDatName, srcIdxName, dstDatName, dstIdxName string, version needle.Version) error { |
|
|
|
// Open source data file
|
|
|
|
dataFile, err := os.Open(srcDatName) |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("failed to open source dat file: %w", err) |
|
|
|
} |
|
|
|
srcDatBackend := backend.NewDiskFile(dataFile) |
|
|
|
defer srcDatBackend.Close() |
|
|
|
|
|
|
|
// Create destination data file
|
|
|
|
dstDatBackend, err := backend.CreateVolumeFile(dstDatName, 0, 0) |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("failed to create destination dat file: %w", err) |
|
|
|
} |
|
|
|
defer func() { |
|
|
|
dstDatBackend.Sync() |
|
|
|
dstDatBackend.Close() |
|
|
|
}() |
|
|
|
|
|
|
|
// Load needle map from source index
|
|
|
|
oldNm := needle_map.NewMemDb() |
|
|
|
defer oldNm.Close() |
|
|
|
if err := oldNm.LoadFromIdx(srcIdxName); err != nil { |
|
|
|
return fmt.Errorf("failed to load index file: %w", err) |
|
|
|
} |
|
|
|
|
|
|
|
// Create new needle map for cleaned volume
|
|
|
|
newNm := needle_map.NewMemDb() |
|
|
|
defer newNm.Close() |
|
|
|
|
|
|
|
// Copy superblock with incremented compaction revision
|
|
|
|
sb := super_block.SuperBlock{} |
|
|
|
if existingSb, err := super_block.ReadSuperBlock(srcDatBackend); err == nil { |
|
|
|
sb = existingSb |
|
|
|
sb.CompactionRevision++ |
|
|
|
} else { |
|
|
|
// Use default superblock if reading fails
|
|
|
|
sb = super_block.SuperBlock{ |
|
|
|
Version: version, |
|
|
|
ReplicaPlacement: &super_block.ReplicaPlacement{}, |
|
|
|
CompactionRevision: 1, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
dstDatBackend.WriteAt(sb.Bytes(), 0) |
|
|
|
newOffset := int64(sb.BlockSize()) |
|
|
|
|
|
|
|
// Copy only non-deleted needles
|
|
|
|
err = oldNm.AscendingVisit(func(value needle_map.NeedleValue) error { |
|
|
|
offset, size := value.Offset, value.Size |
|
|
|
|
|
|
|
// Skip deleted needles (this is the key filtering logic!)
|
|
|
|
if offset.IsZero() || size.IsDeleted() { |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
// Read needle from source
|
|
|
|
n := new(needle.Needle) |
|
|
|
if err := n.ReadData(srcDatBackend, offset.ToActualOffset(), size, version); err != nil { |
|
|
|
return fmt.Errorf("cannot read needle from source: %w", err) |
|
|
|
} |
|
|
|
|
|
|
|
// Write needle to destination
|
|
|
|
if err := newNm.Set(n.Id, types.ToOffset(newOffset), n.Size); err != nil { |
|
|
|
return fmt.Errorf("cannot set needle in new map: %w", err) |
|
|
|
} |
|
|
|
if _, _, _, err := n.Append(dstDatBackend, sb.Version); err != nil { |
|
|
|
return fmt.Errorf("cannot append needle to destination: %w", err) |
|
|
|
} |
|
|
|
|
|
|
|
newOffset += n.DiskSize(version) |
|
|
|
return nil |
|
|
|
}) |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("failed to copy needles: %w", err) |
|
|
|
} |
|
|
|
|
|
|
|
// Save the new index file
|
|
|
|
return newNm.SaveToIdx(dstIdxName) |
|
|
|
} |