From 6ddfaf33cba953b92a5e3383a15bf26ab1845dbc Mon Sep 17 00:00:00 2001 From: bingoohuang Date: Wed, 16 Jan 2019 17:48:59 +0800 Subject: [PATCH] extract VolumeFileScanner for ScanVolumeFile --- unmaintained/fix_dat/fix_dat.go | 4 +- unmaintained/see_dat/see_dat.go | 32 ++++++--- weed/command/export.go | 105 +++++++++++++++------------ weed/command/fix.go | 48 ++++++++----- weed/storage/volume_read_write.go | 16 +++-- weed/storage/volume_vacuum.go | 115 +++++++++++++++++------------- 6 files changed, 190 insertions(+), 130 deletions(-) diff --git a/unmaintained/fix_dat/fix_dat.go b/unmaintained/fix_dat/fix_dat.go index 90287ecd6..b26c85de6 100644 --- a/unmaintained/fix_dat/fix_dat.go +++ b/unmaintained/fix_dat/fix_dat.go @@ -10,8 +10,8 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" - "github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" ) var ( @@ -63,7 +63,7 @@ func main() { iterateEntries(datFile, indexFile, func(n *storage.Needle, offset int64) { fmt.Printf("needle id=%v name=%s size=%d dataSize=%d\n", n.Id, string(n.Name), n.Size, n.DataSize) - s, _, e := n.Append(newDatFile, superBlock.Version()) + _, s, _, e := n.Append(newDatFile, superBlock.Version()) fmt.Printf("size %d error %v\n", s, e) }) diff --git a/unmaintained/see_dat/see_dat.go b/unmaintained/see_dat/see_dat.go index ce31c1d20..f79c0a6a9 100644 --- a/unmaintained/see_dat/see_dat.go +++ b/unmaintained/see_dat/see_dat.go @@ -12,21 +12,31 @@ var ( volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") ) +type VolumeFileScanner4SeeDat struct { + version storage.Version +} + +func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock storage.SuperBlock) error { + scanner.version = superBlock.Version() + return nil + +} +func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool { + return false +} + +func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *storage.Needle, offset int64) error { + glog.V(0).Infof("%d,%s%x offset %d size %d cookie %x", *volumeId, n.Id, n.Cookie, offset, n.Size, n.Cookie) + return nil +} + func main() { flag.Parse() - var version storage.Version vid := storage.VolumeId(*volumeId) - err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, - storage.NeedleMapInMemory, - func(superBlock storage.SuperBlock) error { - version = superBlock.Version() - return nil - }, false, func(n *storage.Needle, offset int64) error { - glog.V(0).Infof("%d,%s%x offset %d size %d cookie %x", - *volumeId, n.Id, n.Cookie, offset, n.Size, n.Cookie) - return nil - }) + + scanner := &VolumeFileScanner4SeeDat{} + err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner) if err != nil { glog.Fatalf("Reading Volume File [ERROR] %s\n", err) } diff --git a/weed/command/export.go b/weed/command/export.go index 1202d687c..5c7e064ce 100644 --- a/weed/command/export.go +++ b/weed/command/export.go @@ -84,6 +84,62 @@ func printNeedle(vid storage.VolumeId, n *storage.Needle, version storage.Versio ) } +type VolumeFileScanner4Export struct { + version storage.Version + counter int + needleMap *storage.NeedleMap + vid storage.VolumeId +} + +func (scanner *VolumeFileScanner4Export) VisitSuperBlock(superBlock storage.SuperBlock) error { + scanner.version = superBlock.Version() + return nil + +} +func (scanner *VolumeFileScanner4Export) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4Export) VisitNeedle(n *storage.Needle, offset int64) error { + needleMap := scanner.needleMap + vid := scanner.vid + + nv, ok := needleMap.Get(n.Id) + glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", + n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped(), ok, nv) + if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset { + if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { + glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", + n.LastModified, newerThanUnix) + return nil + } + scanner.counter++ + if *limit > 0 && scanner.counter > *limit { + return io.EOF + } + if tarOutputFile != nil { + return writeFile(vid, n) + } else { + printNeedle(vid, n, scanner.version, false) + return nil + } + } + if !ok { + if *showDeleted && tarOutputFile == nil { + if n.DataSize > 0 { + printNeedle(vid, n, scanner.version, true) + } else { + n.Name = []byte("*tombstone") + printNeedle(vid, n, scanner.version, true) + } + } + glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size) + } else { + glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size) + } + return nil +} + func runExport(cmd *Command, args []string) bool { var err error @@ -145,55 +201,16 @@ func runExport(cmd *Command, args []string) bool { glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err) } - var version storage.Version + volumeFileScanner := &VolumeFileScanner4Export{ + needleMap: needleMap, + vid: vid, + } if tarOutputFile == nil { fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n") } - var counter = 0 - - err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, - storage.NeedleMapInMemory, - func(superBlock storage.SuperBlock) error { - version = superBlock.Version() - return nil - }, true, func(n *storage.Needle, offset int64) error { - nv, ok := needleMap.Get(n.Id) - glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", - n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped(), ok, nv) - if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset { - if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { - glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", - n.LastModified, newerThanUnix) - return nil - } - counter++ - if *limit > 0 && counter > *limit { - return io.EOF - } - if tarOutputFile != nil { - return writeFile(vid, n) - } else { - printNeedle(vid, n, version, false) - return nil - } - } - if !ok { - if *showDeleted && tarOutputFile == nil { - if n.DataSize > 0 { - printNeedle(vid, n, version, true) - } else { - n.Name = []byte("*tombstone") - printNeedle(vid, n, version, true) - } - } - glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size) - } else { - glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size) - } - return nil - }) + err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner) if err != nil && err != io.EOF { glog.Fatalf("Export Volume File [ERROR] %s\n", err) } diff --git a/weed/command/fix.go b/weed/command/fix.go index 3643c9d58..a800978c6 100644 --- a/weed/command/fix.go +++ b/weed/command/fix.go @@ -28,6 +28,32 @@ var ( fixVolumeId = cmdFix.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") ) +type VolumeFileScanner4Fix struct { + version storage.Version + nm *storage.NeedleMap +} + +func (scanner *VolumeFileScanner4Fix) VisitSuperBlock(superBlock storage.SuperBlock) error { + scanner.version = superBlock.Version() + return nil + +} +func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool { + return false +} + +func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *storage.Needle, offset int64) error { + glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped()) + if n.Size > 0 { + pe := scanner.nm.Put(n.Id, types.Offset(offset/types.NeedlePaddingSize), n.Size) + glog.V(2).Infof("saved %d with error %v", n.Size, pe) + } else { + glog.V(2).Infof("skipping deleted file ...") + return scanner.nm.Delete(n.Id, types.Offset(offset/types.NeedlePaddingSize)) + } + return nil +} + func runFix(cmd *Command, args []string) bool { if *fixVolumeId == -1 { @@ -48,24 +74,12 @@ func runFix(cmd *Command, args []string) bool { nm := storage.NewBtreeNeedleMap(indexFile) defer nm.Close() - var version storage.Version vid := storage.VolumeId(*fixVolumeId) - err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, - storage.NeedleMapInMemory, - func(superBlock storage.SuperBlock) error { - version = superBlock.Version() - return nil - }, false, func(n *storage.Needle, offset int64) error { - glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped()) - if n.Size > 0 { - pe := nm.Put(n.Id, types.Offset(offset/types.NeedlePaddingSize), n.Size) - glog.V(2).Infof("saved %d with error %v", n.Size, pe) - } else { - glog.V(2).Infof("skipping deleted file ...") - return nm.Delete(n.Id, types.Offset(offset/types.NeedlePaddingSize)) - } - return nil - }) + scanner := &VolumeFileScanner4Fix{ + nm: nm, + } + + err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, storage.NeedleMapInMemory, scanner) if err != nil { glog.Fatalf("Export Volume File [ERROR] %s\n", err) os.Remove(indexFileName) diff --git a/weed/storage/volume_read_write.go b/weed/storage/volume_read_write.go index 66f90634f..ed9729c84 100644 --- a/weed/storage/volume_read_write.go +++ b/weed/storage/volume_read_write.go @@ -169,16 +169,20 @@ func (v *Volume) readNeedle(n *Needle) (int, error) { return -1, ErrorNotFound } +type VolumeFileScanner interface { + VisitSuperBlock(SuperBlock) error + ReadNeedleBody() bool + VisitNeedle(n *Needle, offset int64) error +} + func ScanVolumeFile(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType, - visitSuperBlock func(SuperBlock) error, - readNeedleBody bool, - visitNeedle func(n *Needle, offset int64) error) (err error) { + volumeFileScanner VolumeFileScanner) (err error) { var v *Volume if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil { return fmt.Errorf("Failed to load volume %d: %v", id, err) } - if err = visitSuperBlock(v.SuperBlock); err != nil { + if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil { return fmt.Errorf("Failed to process volume %d super block: %v", id, err) } defer v.Close() @@ -192,14 +196,14 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, return } for n != nil { - if readNeedleBody { + if volumeFileScanner.ReadNeedleBody() { if err = n.ReadNeedleBody(v.dataFile, version, offset+NeedleEntrySize, rest); err != nil { glog.V(0).Infof("cannot read needle body: %v", err) //err = fmt.Errorf("cannot read needle body: %v", err) //return } } - err = visitNeedle(n, offset) + err = volumeFileScanner.VisitNeedle(n, offset) if err == io.EOF { return nil } diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go index 642114d01..ce76b11a5 100644 --- a/weed/storage/volume_vacuum.go +++ b/weed/storage/volume_vacuum.go @@ -180,11 +180,11 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI return fmt.Errorf("oldDatFile %s 's compact revision is %d while newDatFile %s 's compact revision is %d", oldDatFileName, oldDatCompactRevision, newDatFileName, newDatCompactRevision) } - idx_entry_bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize) - for key, incre_idx_entry := range incrementedHasUpdatedIndexEntry { - NeedleIdToBytes(idx_entry_bytes[0:NeedleIdSize], key) - OffsetToBytes(idx_entry_bytes[NeedleIdSize:NeedleIdSize+OffsetSize], incre_idx_entry.offset) - util.Uint32toBytes(idx_entry_bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], incre_idx_entry.size) + idxEntryBytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize) + for key, increIdxEntry := range incrementedHasUpdatedIndexEntry { + NeedleIdToBytes(idxEntryBytes[0:NeedleIdSize], key) + OffsetToBytes(idxEntryBytes[NeedleIdSize:NeedleIdSize+OffsetSize], increIdxEntry.offset) + util.Uint32toBytes(idxEntryBytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], increIdxEntry.size) var offset int64 if offset, err = dst.Seek(0, 2); err != nil { @@ -201,16 +201,16 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI } //updated needle - if incre_idx_entry.offset != 0 && incre_idx_entry.size != 0 && incre_idx_entry.size != TombstoneFileSize { + if increIdxEntry.offset != 0 && increIdxEntry.size != 0 && increIdxEntry.size != TombstoneFileSize { //even the needle cache in memory is hit, the need_bytes is correct - glog.V(4).Infof("file %d offset %d size %d", key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size) - var needle_bytes []byte - needle_bytes, err = ReadNeedleBlob(oldDatFile, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, v.Version()) + glog.V(4).Infof("file %d offset %d size %d", key, int64(increIdxEntry.offset)*NeedlePaddingSize, increIdxEntry.size) + var needleBytes []byte + needleBytes, err = ReadNeedleBlob(oldDatFile, int64(increIdxEntry.offset)*NeedlePaddingSize, increIdxEntry.size, v.Version()) if err != nil { - return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, err) + return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, int64(increIdxEntry.offset)*NeedlePaddingSize, increIdxEntry.size, err) } - dst.Write(needle_bytes) - util.Uint32toBytes(idx_entry_bytes[8:12], uint32(offset/NeedlePaddingSize)) + dst.Write(needleBytes) + util.Uint32toBytes(idxEntryBytes[8:12], uint32(offset/NeedlePaddingSize)) } else { //deleted needle //fakeDelNeedle 's default Data field is nil fakeDelNeedle := new(Needle) @@ -221,19 +221,59 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI if err != nil { return fmt.Errorf("append deleted %d failed: %v", key, err) } - util.Uint32toBytes(idx_entry_bytes[8:12], uint32(0)) + util.Uint32toBytes(idxEntryBytes[8:12], uint32(0)) } if _, err := idx.Seek(0, 2); err != nil { return fmt.Errorf("cannot seek end of indexfile %s: %v", newIdxFileName, err) } - _, err = idx.Write(idx_entry_bytes) + _, err = idx.Write(idxEntryBytes) } return nil } +type VolumeFileScanner4Vacuum struct { + version Version + v *Volume + dst *os.File + nm *NeedleMap + newOffset int64 + now uint64 +} + +func (scanner *VolumeFileScanner4Vacuum) VisitSuperBlock(superBlock SuperBlock) error { + scanner.version = superBlock.Version() + superBlock.CompactRevision++ + _, err := scanner.dst.Write(superBlock.Bytes()) + scanner.newOffset = int64(superBlock.BlockSize()) + return err + +} +func (scanner *VolumeFileScanner4Vacuum) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4Vacuum) VisitNeedle(n *Needle, offset int64) error { + if n.HasTtl() && scanner.now >= n.LastModified+uint64(scanner.v.Ttl.Minutes()*60) { + return nil + } + nv, ok := scanner.v.nm.Get(n.Id) + glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) + if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 { + if err := scanner.nm.Put(n.Id, Offset(scanner.newOffset/NeedlePaddingSize), n.Size); err != nil { + return fmt.Errorf("cannot put needle: %s", err) + } + if _, _, _, err := n.Append(scanner.dst, scanner.v.Version()); err != nil { + return fmt.Errorf("cannot append needle: %s", err) + } + scanner.newOffset += n.DiskSize(scanner.version) + glog.V(4).Infoln("saving key", n.Id, "volume offset", offset, "=>", scanner.newOffset, "data_size", n.Size) + } + return nil +} + func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, preallocate int64) (err error) { var ( dst, idx *os.File @@ -248,38 +288,13 @@ func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, prealloca } defer idx.Close() - nm := NewBtreeNeedleMap(idx) - new_offset := int64(0) - - now := uint64(time.Now().Unix()) - - var version Version - err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind, - func(superBlock SuperBlock) error { - version = superBlock.Version() - superBlock.CompactRevision++ - _, err = dst.Write(superBlock.Bytes()) - new_offset = int64(superBlock.BlockSize()) - return err - }, true, func(n *Needle, offset int64) error { - if n.HasTtl() && now >= n.LastModified+uint64(v.Ttl.Minutes()*60) { - return nil - } - nv, ok := v.nm.Get(n.Id) - glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) - if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 { - if err = nm.Put(n.Id, Offset(new_offset/NeedlePaddingSize), n.Size); err != nil { - return fmt.Errorf("cannot put needle: %s", err) - } - if _, _, _, err := n.Append(dst, v.Version()); err != nil { - return fmt.Errorf("cannot append needle: %s", err) - } - new_offset += n.DiskSize(version) - glog.V(4).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size) - } - return nil - }) - + scanner := &VolumeFileScanner4Vacuum{ + v: v, + now: uint64(time.Now().Unix()), + nm: NewBtreeNeedleMap(idx), + dst: dst, + } + err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind, scanner) return } @@ -307,7 +322,7 @@ func (v *Volume) copyDataBasedOnIndexFile(dstName, idxName string) (err error) { v.SuperBlock.CompactRevision++ dst.Write(v.SuperBlock.Bytes()) - new_offset := int64(v.SuperBlock.BlockSize()) + newOffset := int64(v.SuperBlock.BlockSize()) WalkIndexFile(oldIndexFile, func(key NeedleId, offset Offset, size uint32) error { if offset == 0 || size == TombstoneFileSize { @@ -328,14 +343,14 @@ func (v *Volume) copyDataBasedOnIndexFile(dstName, idxName string) (err error) { glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) if nv.Offset == offset && nv.Size > 0 { - if err = nm.Put(n.Id, Offset(new_offset/NeedlePaddingSize), n.Size); err != nil { + if err = nm.Put(n.Id, Offset(newOffset/NeedlePaddingSize), n.Size); err != nil { return fmt.Errorf("cannot put needle: %s", err) } if _, _, _, err = n.Append(dst, v.Version()); err != nil { return fmt.Errorf("cannot append needle: %s", err) } - new_offset += n.DiskSize(v.Version()) - glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size) + newOffset += n.DiskSize(v.Version()) + glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", newOffset, "data_size", n.Size) } return nil })