Browse Source

ec: fall back to data dir when ecx file not found in idx dir (#8541)

* ec: fall back to data dir when ecx file not found in idx dir (#8540)

When -dir.idx is configured after EC encoding, the .ecx/.ecj files
remain in the data directory. NewEcVolume now falls back to the data
directory when the index file is not found in dirIdx.

* ec: add fallback logging and improved error message for ecx lookup

* ec: preserve configured dirIdx, track actual ecx location separately

The previous fallback set ev.dirIdx = dir when finding .ecx in the data
directory, which corrupted IndexBaseFileName() for future writes (e.g.,
WriteIdxFileFromEcIndex during EC-to-volume conversion would write the
.idx file to the data directory instead of the configured index directory).

Introduce ecxActualDir to track where .ecx/.ecj were actually found,
used only by FileName() for cleanup/destroy. IndexBaseFileName() continues
to use the configured dirIdx for new file creation.

* ec: check both idx and data dirs for .ecx in all cleanup and lookup paths

When -dir.idx is configured after EC encoding, .ecx/.ecj files may
reside in the data directory. Several code paths only checked
l.IdxDirectory, causing them to miss these files:

- removeEcVolumeFiles: now removes .ecx/.ecj from both directories
- loadExistingVolume: ecx existence check falls back to data dir
- deleteEcShardIdsForEachLocation: ecx existence check and cleanup
  both cover the data directory
- VolumeEcShardsRebuild: ecx lookup falls back to data directory
  so RebuildEcxFile operates on the correct file
pull/8545/head
Chris Lu 2 days ago
committed by GitHub
parent
commit
af4c3fcb31
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 24
      weed/server/volume_grpc_erasure_coding.go
  2. 4
      weed/storage/disk_location.go
  3. 5
      weed/storage/disk_location_ec.go
  4. 17
      weed/storage/erasure_coding/ec_volume.go

24
weed/server/volume_grpc_erasure_coding.go

@ -154,7 +154,12 @@ func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_s
continue
}
if util.FileExists(path.Join(location.IdxDirectory, baseFileName+".ecx")) {
indexBaseFileName := path.Join(location.IdxDirectory, baseFileName)
if !util.FileExists(indexBaseFileName+".ecx") && location.IdxDirectory != location.Directory {
// .ecx may be in the data directory if created before -dir.idx was configured
indexBaseFileName = path.Join(location.Directory, baseFileName)
}
if util.FileExists(indexBaseFileName + ".ecx") {
// write .ec00 ~ .ec13 files
dataBaseFileName := path.Join(location.Directory, baseFileName)
if generatedShardIds, err := erasure_coding.RebuildEcFiles(dataBaseFileName); err != nil {
@ -163,9 +168,8 @@ func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_s
rebuiltShardIds = generatedShardIds
}
indexBaseFileName := path.Join(location.IdxDirectory, baseFileName)
if err := erasure_coding.RebuildEcxFile(indexBaseFileName); err != nil {
return nil, fmt.Errorf("RebuildEcxFile %s: %v", dataBaseFileName, err)
return nil, fmt.Errorf("RebuildEcxFile %s: %v", indexBaseFileName, err)
}
break
@ -283,7 +287,11 @@ func deleteEcShardIdsForEachLocation(bName string, location *storage.DiskLocatio
indexBaseFilename := path.Join(location.IdxDirectory, bName)
dataBaseFilename := path.Join(location.Directory, bName)
if util.FileExists(path.Join(location.IdxDirectory, bName+".ecx")) {
ecxExists := util.FileExists(path.Join(location.IdxDirectory, bName+".ecx"))
if !ecxExists && location.IdxDirectory != location.Directory {
ecxExists = util.FileExists(path.Join(location.Directory, bName+".ecx"))
}
if ecxExists {
for _, shardId := range shardIds {
shardFileName := dataBaseFilename + erasure_coding.ToExt(int(shardId))
if util.FileExists(shardFileName) {
@ -303,10 +311,16 @@ func deleteEcShardIdsForEachLocation(bName string, location *storage.DiskLocatio
}
if hasEcxFile && existingShardCount == 0 {
if err := os.Remove(indexBaseFilename + ".ecx"); err != nil {
// Remove .ecx/.ecj from both idx and data directories
// since they may be in either location depending on when -dir.idx was configured
if err := os.Remove(indexBaseFilename + ".ecx"); err != nil && !os.IsNotExist(err) {
return err
}
os.Remove(indexBaseFilename + ".ecj")
if location.IdxDirectory != location.Directory {
os.Remove(dataBaseFilename + ".ecx")
os.Remove(dataBaseFilename + ".ecj")
}
if !hasIdxFile {
// .vif is used for ec volumes and normal volumes

4
weed/storage/disk_location.go

@ -172,6 +172,10 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
// skip if ec volumes exists, but validate EC files first
if skipIfEcVolumesExists {
ecxFilePath := filepath.Join(l.IdxDirectory, volumeName+".ecx")
if !util.FileExists(ecxFilePath) && l.IdxDirectory != l.Directory {
// .ecx may have been created before -dir.idx was configured
ecxFilePath = filepath.Join(l.Directory, volumeName+".ecx")
}
if util.FileExists(ecxFilePath) {
// Validate EC volume: shard count, size consistency, and expected size vs .dat file
if !l.validateEcVolume(collection, vid) {

5
weed/storage/disk_location_ec.go

@ -476,6 +476,11 @@ func (l *DiskLocation) removeEcVolumeFiles(collection string, vid needle.VolumeI
// EC loading for incomplete/missing shards on next startup
removeFile(indexBaseFileName+".ecx", "EC index file")
removeFile(indexBaseFileName+".ecj", "EC journal file")
// Also try the data directory in case .ecx/.ecj were created before -dir.idx was configured
if l.IdxDirectory != l.Directory {
removeFile(baseFileName+".ecx", "EC index file (fallback)")
removeFile(baseFileName+".ecj", "EC journal file (fallback)")
}
// Remove all EC shard files (.ec00 ~ .ec31) from data directory
// Use MaxShardCount (32) to support custom EC ratios

17
weed/storage/erasure_coding/ec_volume.go

@ -28,6 +28,7 @@ type EcVolume struct {
Collection string
dir string
dirIdx string
ecxActualDir string // directory where .ecx/.ecj were actually found (may differ from dirIdx after fallback)
ecxFile *os.File
ecxFileSize int64
ecxCreatedAt time.Time
@ -51,8 +52,20 @@ func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection
indexBaseFileName := EcShardFileName(collection, dirIdx, int(vid))
// open ecx file
ev.ecxActualDir = dirIdx
if ev.ecxFile, err = os.OpenFile(indexBaseFileName+".ecx", os.O_RDWR, 0644); err != nil {
return nil, fmt.Errorf("cannot open ec volume index %s.ecx: %v", indexBaseFileName, err)
if dirIdx != dir && os.IsNotExist(err) {
// fall back to data directory if idx directory does not have the .ecx file
firstErr := err
glog.V(1).Infof("ecx file not found at %s.ecx, falling back to %s.ecx", indexBaseFileName, dataBaseFileName)
if ev.ecxFile, err = os.OpenFile(dataBaseFileName+".ecx", os.O_RDWR, 0644); err != nil {
return nil, fmt.Errorf("open ecx index %s.ecx: %v; fallback %s.ecx: %v", indexBaseFileName, firstErr, dataBaseFileName, err)
}
indexBaseFileName = dataBaseFileName
ev.ecxActualDir = dir
} else {
return nil, fmt.Errorf("cannot open ec volume index %s.ecx: %v", indexBaseFileName, err)
}
}
ecxFi, statErr := ev.ecxFile.Stat()
if statErr != nil {
@ -197,7 +210,7 @@ func (ev *EcVolume) Destroy() {
func (ev *EcVolume) FileName(ext string) string {
switch ext {
case ".ecx", ".ecj":
return ev.IndexBaseFileName() + ext
return EcShardFileName(ev.Collection, ev.ecxActualDir, int(ev.VolumeId)) + ext
}
// .vif
return ev.DataBaseFileName() + ext

Loading…
Cancel
Save