From d389c5b27e6ceef6fb623c0c2b8405c754d3ac5d Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Sun, 12 May 2024 23:31:34 +0500 Subject: [PATCH] fix: recreate index include deleted files (#5579) * fix: recreate index include deleted files https://github.com/seaweedfs/seaweedfs/issues/5508 * fix: counting the number of files * fix: log --- weed/command/fix.go | 58 ++++++++++++++++++++++++------- weed/storage/needle_map_memory.go | 4 +-- weed/storage/volume_checking.go | 7 ++-- weed/storage/volume_vacuum.go | 26 +++++++------- 4 files changed, 64 insertions(+), 31 deletions(-) diff --git a/weed/command/fix.go b/weed/command/fix.go index b226a0b1a..4fb4ed88e 100644 --- a/weed/command/fix.go +++ b/weed/command/fix.go @@ -32,12 +32,15 @@ var cmdFix = &Command{ var ( fixVolumeCollection = cmdFix.Flag.String("collection", "", "an optional volume collection name, if specified only it will be processed") fixVolumeId = cmdFix.Flag.Int64("volumeId", 0, "an optional volume id, if not 0 (default) only it will be processed") + fixIncludeDeleted = cmdFix.Flag.Bool("includeDeleted", true, "include deleted entries in the index file") fixIgnoreError = cmdFix.Flag.Bool("ignoreError", false, "an optional, if true will be processed despite errors") ) type VolumeFileScanner4Fix struct { - version needle.Version - nm *needle_map.MemDb + version needle.Version + nm *needle_map.MemDb + nmDeleted *needle_map.MemDb + includeDeleted bool } func (scanner *VolumeFileScanner4Fix) VisitSuperBlock(superBlock super_block.SuperBlock) error { @@ -50,13 +53,20 @@ func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool { } func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { - glog.V(2).Infof("key %d offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed()) + glog.V(2).Infof("key %v offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed()) if n.Size.IsValid() { - pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size) - glog.V(2).Infof("saved %d with error %v", n.Size, pe) + if pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size); pe != nil { + return fmt.Errorf("saved %d with error %v", n.Size, pe) + } } else { - glog.V(2).Infof("skipping deleted file ...") - return scanner.nm.Delete(n.Id) + if scanner.includeDeleted { + if pe := scanner.nmDeleted.Set(n.Id, types.ToOffset(offset), types.TombstoneFileSize); pe != nil { + return fmt.Errorf("saved deleted %d with error %v", n.Size, pe) + } + } else { + glog.V(2).Infof("skipping deleted file ...") + return scanner.nm.Delete(n.Id) + } } return nil } @@ -109,21 +119,45 @@ func runFix(cmd *Command, args []string) bool { if *fixVolumeId != 0 && *fixVolumeId != volumeId { continue } - doFixOneVolume(basePath, baseFileName, collection, volumeId) + doFixOneVolume(basePath, baseFileName, collection, volumeId, *fixIncludeDeleted) } } return true } -func doFixOneVolume(basepath string, baseFileName string, collection string, volumeId int64) { +func SaveToIdx(scaner *VolumeFileScanner4Fix, idxName string) (ret error) { + idxFile, err := os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return + } + defer func() { + idxFile.Close() + }() + + return scaner.nm.AscendingVisit(func(value needle_map.NeedleValue) error { + _, err := idxFile.Write(value.ToBytes()) + if scaner.includeDeleted && err == nil { + if deleted, ok := scaner.nmDeleted.Get(value.Key); ok { + _, err = idxFile.Write(deleted.ToBytes()) + } + } + return err + }) +} + +func doFixOneVolume(basepath string, baseFileName string, collection string, volumeId int64, fixIncludeDeleted bool) { indexFileName := path.Join(basepath, baseFileName+".idx") nm := needle_map.NewMemDb() + nmDeleted := needle_map.NewMemDb() defer nm.Close() + defer nmDeleted.Close() vid := needle.VolumeId(volumeId) scanner := &VolumeFileScanner4Fix{ - nm: nm, + nm: nm, + nmDeleted: nmDeleted, + includeDeleted: fixIncludeDeleted, } if err := storage.ScanVolumeFile(basepath, collection, vid, storage.NeedleMapInMemory, scanner); err != nil { @@ -135,12 +169,12 @@ func doFixOneVolume(basepath string, baseFileName string, collection string, vol } } - if err := nm.SaveToIdx(indexFileName); err != nil { - os.Remove(indexFileName) + if err := SaveToIdx(scanner, indexFileName); err != nil { err := fmt.Errorf("save to .idx File: %v", err) if *fixIgnoreError { glog.Error(err) } else { + os.Remove(indexFileName) glog.Fatal(err) } } diff --git a/weed/storage/needle_map_memory.go b/weed/storage/needle_map_memory.go index a2beb6c33..c75514a31 100644 --- a/weed/storage/needle_map_memory.go +++ b/weed/storage/needle_map_memory.go @@ -36,8 +36,8 @@ func LoadCompactNeedleMap(file *os.File) (*NeedleMap, error) { func doLoading(file *os.File, nm *NeedleMap) (*NeedleMap, error) { e := idx.WalkIndexFile(file, 0, func(key NeedleId, offset Offset, size Size) error { nm.MaybeSetMaxFileKey(key) - nm.FileCounter++ if !offset.IsZero() && size.IsValid() { + nm.FileCounter++ nm.FileByteCounter = nm.FileByteCounter + uint64(size) oldOffset, oldSize := nm.m.Set(NeedleId(key), offset, size) if !oldOffset.IsZero() && oldSize.IsValid() { @@ -51,7 +51,7 @@ func doLoading(file *os.File, nm *NeedleMap) (*NeedleMap, error) { } return nil }) - glog.V(1).Infof("max file key: %d for file: %s", nm.MaxFileKey(), file.Name()) + glog.V(1).Infof("max file key: %v count: %d deleted: %d for file: %s", nm.MaxFileKey(), nm.FileCount(), nm.DeletedCount(), file.Name()) return nm, e } diff --git a/weed/storage/volume_checking.go b/weed/storage/volume_checking.go index 0c5f154e8..f5ceffcce 100644 --- a/weed/storage/volume_checking.go +++ b/weed/storage/volume_checking.go @@ -109,9 +109,6 @@ func verifyNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version, return 0, fmt.Errorf("verifyNeedleIntegrity check %s entry offset %d size %d: %v", datFile.Name(), offset, size, err) } n.AppendAtNs = util.BytesToUint64(bytes) - if n.HasTtl() { - return n.AppendAtNs, nil - } fileTailOffset := offset + needle.GetActualSize(size, v) fileSize, _, err := datFile.GetStat() if err != nil { @@ -130,7 +127,7 @@ func verifyNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version, return n.AppendAtNs, fmt.Errorf("read data [%d,%d) : %v", offset, offset+int64(size), err) } if n.Id != key { - return n.AppendAtNs, fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) + return n.AppendAtNs, fmt.Errorf("index key %v does not match needle's Id %v", key, n.Id) } return n.AppendAtNs, err } @@ -147,7 +144,7 @@ func verifyDeletedNeedleIntegrity(datFile backend.BackendStorageFile, v needle.V return n.AppendAtNs, fmt.Errorf("read data [%d,%d) : %v", fileSize-size, size, err) } if n.Id != key { - return n.AppendAtNs, fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) + return n.AppendAtNs, fmt.Errorf("index key %v does not match needle's Id %v", key, n.Id) } return n.AppendAtNs, err } diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go index c8098493d..6bbbde71d 100644 --- a/weed/storage/volume_vacuum.go +++ b/weed/storage/volume_vacuum.go @@ -487,19 +487,21 @@ func (v *Volume) copyDataBasedOnIndexFile(srcDatName, srcIdxName, dstDatName, da if err != nil { return err } - dstDatSize, _, err := dstDatBackend.GetStat() - if err != nil { - return err - } - if v.nm.ContentSize() > v.nm.DeletedSize() { - expectedContentSize := v.nm.ContentSize() - v.nm.DeletedSize() - if expectedContentSize > uint64(dstDatSize) { - return fmt.Errorf("volume %s unexpected new data size: %d does not match size of content minus deleted: %d", - v.Id.String(), dstDatSize, expectedContentSize) + if v.Ttl.String() == "" { + dstDatSize, _, err := dstDatBackend.GetStat() + if err != nil { + return err + } + if v.nm.ContentSize() > v.nm.DeletedSize() { + expectedContentSize := v.nm.ContentSize() - v.nm.DeletedSize() + if expectedContentSize > uint64(dstDatSize) { + return fmt.Errorf("volume %s unexpected new data size: %d does not match size of content minus deleted: %d", + v.Id.String(), dstDatSize, expectedContentSize) + } + } else { + glog.Warningf("volume %s content size: %d less deleted size: %d, new size: %d", + v.Id.String(), v.nm.ContentSize(), v.nm.DeletedSize(), dstDatSize) } - } else { - glog.Warningf("volume %s content size: %d less deleted size: %d, new size: %d", - v.Id.String(), v.nm.ContentSize(), v.nm.DeletedSize(), dstDatSize) } err = newNm.SaveToIdx(datIdxName) if err != nil {