Browse Source

Merge pull request #836 from bingoohuang/master

fix needle Append return offset to avoid uint32 overflow
pull/842/head
Chris Lu 6 years ago
committed by GitHub
parent
commit
123df7453f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      unmaintained/fix_dat/fix_dat.go
  2. 32
      unmaintained/see_dat/see_dat.go
  3. 105
      weed/command/export.go
  4. 46
      weed/command/fix.go
  5. 9
      weed/storage/needle_read_write.go
  6. 62
      weed/storage/needle_read_write_test.go
  7. 24
      weed/storage/volume_read_write.go
  8. 113
      weed/storage/volume_vacuum.go

4
unmaintained/fix_dat/fix_dat.go

@ -10,8 +10,8 @@ import (
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/util"
) )
var ( var (
@ -63,7 +63,7 @@ func main() {
iterateEntries(datFile, indexFile, func(n *storage.Needle, offset int64) { iterateEntries(datFile, indexFile, func(n *storage.Needle, offset int64) {
fmt.Printf("needle id=%v name=%s size=%d dataSize=%d\n", n.Id, string(n.Name), n.Size, n.DataSize) fmt.Printf("needle id=%v name=%s size=%d dataSize=%d\n", n.Id, string(n.Name), n.Size, n.DataSize)
s, _, e := n.Append(newDatFile, superBlock.Version())
_, s, _, e := n.Append(newDatFile, superBlock.Version())
fmt.Printf("size %d error %v\n", s, e) fmt.Printf("size %d error %v\n", s, e)
}) })

32
unmaintained/see_dat/see_dat.go

@ -12,21 +12,31 @@ var (
volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
) )
type VolumeFileScanner4SeeDat struct {
version storage.Version
}
func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock storage.SuperBlock) error {
scanner.version = superBlock.Version()
return nil
}
func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool {
return false
}
func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *storage.Needle, offset int64) error {
glog.V(0).Infof("%d,%s%x offset %d size %d cookie %x", *volumeId, n.Id, n.Cookie, offset, n.Size, n.Cookie)
return nil
}
func main() { func main() {
flag.Parse() flag.Parse()
var version storage.Version
vid := storage.VolumeId(*volumeId) vid := storage.VolumeId(*volumeId)
err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid,
storage.NeedleMapInMemory,
func(superBlock storage.SuperBlock) error {
version = superBlock.Version()
return nil
}, false, func(n *storage.Needle, offset int64) error {
glog.V(0).Infof("%d,%s%x offset %d size %d cookie %x",
*volumeId, n.Id, n.Cookie, offset, n.Size, n.Cookie)
return nil
})
scanner := &VolumeFileScanner4SeeDat{}
err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner)
if err != nil { if err != nil {
glog.Fatalf("Reading Volume File [ERROR] %s\n", err) glog.Fatalf("Reading Volume File [ERROR] %s\n", err)
} }

105
weed/command/export.go

@ -84,6 +84,62 @@ func printNeedle(vid storage.VolumeId, n *storage.Needle, version storage.Versio
) )
} }
type VolumeFileScanner4Export struct {
version storage.Version
counter int
needleMap *storage.NeedleMap
vid storage.VolumeId
}
func (scanner *VolumeFileScanner4Export) VisitSuperBlock(superBlock storage.SuperBlock) error {
scanner.version = superBlock.Version()
return nil
}
func (scanner *VolumeFileScanner4Export) ReadNeedleBody() bool {
return true
}
func (scanner *VolumeFileScanner4Export) VisitNeedle(n *storage.Needle, offset int64) error {
needleMap := scanner.needleMap
vid := scanner.vid
nv, ok := needleMap.Get(n.Id)
glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped(), ok, nv)
if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset {
if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
n.LastModified, newerThanUnix)
return nil
}
scanner.counter++
if *limit > 0 && scanner.counter > *limit {
return io.EOF
}
if tarOutputFile != nil {
return writeFile(vid, n)
} else {
printNeedle(vid, n, scanner.version, false)
return nil
}
}
if !ok {
if *showDeleted && tarOutputFile == nil {
if n.DataSize > 0 {
printNeedle(vid, n, scanner.version, true)
} else {
n.Name = []byte("*tombstone")
printNeedle(vid, n, scanner.version, true)
}
}
glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
} else {
glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
}
return nil
}
func runExport(cmd *Command, args []string) bool { func runExport(cmd *Command, args []string) bool {
var err error var err error
@ -145,55 +201,16 @@ func runExport(cmd *Command, args []string) bool {
glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err) glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err)
} }
var version storage.Version
volumeFileScanner := &VolumeFileScanner4Export{
needleMap: needleMap,
vid: vid,
}
if tarOutputFile == nil { if tarOutputFile == nil {
fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n") fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n")
} }
var counter = 0
err = storage.ScanVolumeFile(*export.dir, *export.collection, vid,
storage.NeedleMapInMemory,
func(superBlock storage.SuperBlock) error {
version = superBlock.Version()
return nil
}, true, func(n *storage.Needle, offset int64) error {
nv, ok := needleMap.Get(n.Id)
glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped(), ok, nv)
if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset {
if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
n.LastModified, newerThanUnix)
return nil
}
counter++
if *limit > 0 && counter > *limit {
return io.EOF
}
if tarOutputFile != nil {
return writeFile(vid, n)
} else {
printNeedle(vid, n, version, false)
return nil
}
}
if !ok {
if *showDeleted && tarOutputFile == nil {
if n.DataSize > 0 {
printNeedle(vid, n, version, true)
} else {
n.Name = []byte("*tombstone")
printNeedle(vid, n, version, true)
}
}
glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
} else {
glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
}
return nil
})
err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
glog.Fatalf("Export Volume File [ERROR] %s\n", err) glog.Fatalf("Export Volume File [ERROR] %s\n", err)
} }

46
weed/command/fix.go

@ -28,6 +28,32 @@ var (
fixVolumeId = cmdFix.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") fixVolumeId = cmdFix.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
) )
type VolumeFileScanner4Fix struct {
version storage.Version
nm *storage.NeedleMap
}
func (scanner *VolumeFileScanner4Fix) VisitSuperBlock(superBlock storage.SuperBlock) error {
scanner.version = superBlock.Version()
return nil
}
func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool {
return false
}
func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *storage.Needle, offset int64) error {
glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped())
if n.Size > 0 {
pe := scanner.nm.Put(n.Id, types.Offset(offset/types.NeedlePaddingSize), n.Size)
glog.V(2).Infof("saved %d with error %v", n.Size, pe)
} else {
glog.V(2).Infof("skipping deleted file ...")
return scanner.nm.Delete(n.Id, types.Offset(offset/types.NeedlePaddingSize))
}
return nil
}
func runFix(cmd *Command, args []string) bool { func runFix(cmd *Command, args []string) bool {
if *fixVolumeId == -1 { if *fixVolumeId == -1 {
@ -48,24 +74,12 @@ func runFix(cmd *Command, args []string) bool {
nm := storage.NewBtreeNeedleMap(indexFile) nm := storage.NewBtreeNeedleMap(indexFile)
defer nm.Close() defer nm.Close()
var version storage.Version
vid := storage.VolumeId(*fixVolumeId) vid := storage.VolumeId(*fixVolumeId)
err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid,
storage.NeedleMapInMemory,
func(superBlock storage.SuperBlock) error {
version = superBlock.Version()
return nil
}, false, func(n *storage.Needle, offset int64) error {
glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped())
if n.Size > 0 {
pe := nm.Put(n.Id, types.Offset(offset/types.NeedlePaddingSize), n.Size)
glog.V(2).Infof("saved %d with error %v", n.Size, pe)
} else {
glog.V(2).Infof("skipping deleted file ...")
return nm.Delete(n.Id, types.Offset(offset/types.NeedlePaddingSize))
scanner := &VolumeFileScanner4Fix{
nm: nm,
} }
return nil
})
err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, storage.NeedleMapInMemory, scanner)
if err != nil { if err != nil {
glog.Fatalf("Export Volume File [ERROR] %s\n", err) glog.Fatalf("Export Volume File [ERROR] %s\n", err)
os.Remove(indexFileName) os.Remove(indexFileName)

9
weed/storage/needle_read_write.go

@ -3,6 +3,7 @@ package storage
import ( import (
"errors" "errors"
"fmt" "fmt"
"io"
"os" "os"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
@ -27,8 +28,8 @@ func (n *Needle) DiskSize(version Version) int64 {
return getActualSize(n.Size, version) return getActualSize(n.Size, version)
} }
func (n *Needle) Append(w *os.File, version Version) (offset Offset, size uint32, actualSize int64, err error) {
if end, e := w.Seek(0, 2); e == nil {
func (n *Needle) Append(w *os.File, version Version) (offset uint64, size uint32, actualSize int64, err error) {
if end, e := w.Seek(0, io.SeekEnd); e == nil {
defer func(w *os.File, off int64) { defer func(w *os.File, off int64) {
if err != nil { if err != nil {
if te := w.Truncate(end); te != nil { if te := w.Truncate(end); te != nil {
@ -36,7 +37,7 @@ func (n *Needle) Append(w *os.File, version Version) (offset Offset, size uint32
} }
} }
}(w, end) }(w, end)
offset = Offset(end)
offset = uint64(end)
} else { } else {
err = fmt.Errorf("Cannot Read Current Volume Position: %v", e) err = fmt.Errorf("Cannot Read Current Volume Position: %v", e)
return return
@ -175,7 +176,7 @@ func (n *Needle) ReadData(r *os.File, offset int64, size uint32, version Version
} }
n.ParseNeedleHeader(bytes) n.ParseNeedleHeader(bytes)
if n.Size != size { if n.Size != size {
return fmt.Errorf("File Entry Not Found. Needle id %d expected size %d Memory %d", n.Id, n.Size, size)
return fmt.Errorf("File Entry Not Found. offset %d, Needle id %d expected size %d Memory %d", offset, n.Id, n.Size, size)
} }
switch version { switch version {
case Version1: case Version1:

62
weed/storage/needle_read_write_test.go

@ -0,0 +1,62 @@
package storage
import (
"crypto/rand"
"github.com/chrislusf/seaweedfs/weed/storage/types"
"io"
"io/ioutil"
"os"
"testing"
)
func TestAppend(t *testing.T) {
n := &Needle{
Cookie: types.Cookie(123), // Cookie Cookie `comment:"random number to mitigate brute force lookups"`
Id: types.NeedleId(123), // Id NeedleId `comment:"needle id"`
Size: 8, // Size uint32 `comment:"sum of DataSize,Data,NameSize,Name,MimeSize,Mime"`
DataSize: 4, // DataSize uint32 `comment:"Data size"` //version2
Data: []byte("abcd"), // Data []byte `comment:"The actual file data"`
Flags: 0, // Flags byte `comment:"boolean flags"` //version2
NameSize: 0, // NameSize uint8 //version2
Name: nil, // Name []byte `comment:"maximum 256 characters"` //version2
MimeSize: 0, // MimeSize uint8 //version2
Mime: nil, // Mime []byte `comment:"maximum 256 characters"` //version2
PairsSize: 0, // PairsSize uint16 //version2
Pairs: nil, // Pairs []byte `comment:"additional name value pairs, json format, maximum 6
LastModified: 123, // LastModified uint64 //only store LastModifiedBytesLength bytes, which is 5 bytes
Ttl: nil, // Ttl *TTL
Checksum: 123, // Checksum CRC `comment:"CRC32 to check integrity"`
AppendAtNs: 123, // AppendAtNs uint64 `comment:"append timestamp in nano seconds"` //version3
Padding: nil, // Padding []byte `comment:"Aligned to 8 bytes"`
}
tempFile, err := ioutil.TempFile("", ".dat")
if err != nil {
t.Errorf("Fail TempFile. %v", err)
return
}
/*
uint8 : 0 to 255
uint16 : 0 to 65535
uint32 : 0 to 4294967295
uint64 : 0 to 18446744073709551615
int8 : -128 to 127
int16 : -32768 to 32767
int32 : -2147483648 to 2147483647
int64 : -9223372036854775808 to 9223372036854775807
*/
fileSize := int64(4294967295) + 10000
io.CopyN(tempFile, rand.Reader, fileSize)
defer func() {
tempFile.Close()
os.Remove(tempFile.Name())
}()
offset, _, _, _ := n.Append(tempFile, CurrentVersion)
if offset != uint64(fileSize) {
t.Errorf("Fail to Append Needle.")
}
}

24
weed/storage/volume_read_write.go

@ -76,7 +76,7 @@ func (v *Volume) AppendBlob(b []byte) (offset int64, err error) {
return return
} }
func (v *Volume) writeNeedle(n *Needle) (offset Offset, size uint32, err error) {
func (v *Volume) writeNeedle(n *Needle) (offset uint64, size uint32, err error) {
glog.V(4).Infof("writing needle %s", NewFileIdFromNeedle(v.Id, n).String()) glog.V(4).Infof("writing needle %s", NewFileIdFromNeedle(v.Id, n).String())
if v.readOnly { if v.readOnly {
err = fmt.Errorf("%s is read-only", v.dataFile.Name()) err = fmt.Errorf("%s is read-only", v.dataFile.Name())
@ -96,8 +96,8 @@ func (v *Volume) writeNeedle(n *Needle) (offset Offset, size uint32, err error)
} }
nv, ok := v.nm.Get(n.Id) nv, ok := v.nm.Get(n.Id)
if !ok || Offset(nv.Offset)*NeedlePaddingSize < offset {
if err = v.nm.Put(n.Id, offset/NeedlePaddingSize, n.Size); err != nil {
if !ok || uint64(nv.Offset)*NeedlePaddingSize < offset {
if err = v.nm.Put(n.Id, Offset(offset/NeedlePaddingSize), n.Size); err != nil {
glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err) glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
} }
} }
@ -124,7 +124,7 @@ func (v *Volume) deleteNeedle(n *Needle) (uint32, error) {
if err != nil { if err != nil {
return size, err return size, err
} }
if err = v.nm.Delete(n.Id, offset/NeedlePaddingSize); err != nil {
if err = v.nm.Delete(n.Id, Offset(offset/NeedlePaddingSize)); err != nil {
return size, err return size, err
} }
return size, err return size, err
@ -169,16 +169,20 @@ func (v *Volume) readNeedle(n *Needle) (int, error) {
return -1, ErrorNotFound return -1, ErrorNotFound
} }
type VolumeFileScanner interface {
VisitSuperBlock(SuperBlock) error
ReadNeedleBody() bool
VisitNeedle(n *Needle, offset int64) error
}
func ScanVolumeFile(dirname string, collection string, id VolumeId, func ScanVolumeFile(dirname string, collection string, id VolumeId,
needleMapKind NeedleMapType, needleMapKind NeedleMapType,
visitSuperBlock func(SuperBlock) error,
readNeedleBody bool,
visitNeedle func(n *Needle, offset int64) error) (err error) {
volumeFileScanner VolumeFileScanner) (err error) {
var v *Volume var v *Volume
if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil { if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil {
return fmt.Errorf("Failed to load volume %d: %v", id, err) return fmt.Errorf("Failed to load volume %d: %v", id, err)
} }
if err = visitSuperBlock(v.SuperBlock); err != nil {
if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil {
return fmt.Errorf("Failed to process volume %d super block: %v", id, err) return fmt.Errorf("Failed to process volume %d super block: %v", id, err)
} }
defer v.Close() defer v.Close()
@ -192,14 +196,14 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId,
return return
} }
for n != nil { for n != nil {
if readNeedleBody {
if volumeFileScanner.ReadNeedleBody() {
if err = n.ReadNeedleBody(v.dataFile, version, offset+NeedleEntrySize, rest); err != nil { if err = n.ReadNeedleBody(v.dataFile, version, offset+NeedleEntrySize, rest); err != nil {
glog.V(0).Infof("cannot read needle body: %v", err) glog.V(0).Infof("cannot read needle body: %v", err)
//err = fmt.Errorf("cannot read needle body: %v", err) //err = fmt.Errorf("cannot read needle body: %v", err)
//return //return
} }
} }
err = visitNeedle(n, offset)
err = volumeFileScanner.VisitNeedle(n, offset)
if err == io.EOF { if err == io.EOF {
return nil return nil
} }

113
weed/storage/volume_vacuum.go

@ -180,11 +180,11 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI
return fmt.Errorf("oldDatFile %s 's compact revision is %d while newDatFile %s 's compact revision is %d", oldDatFileName, oldDatCompactRevision, newDatFileName, newDatCompactRevision) return fmt.Errorf("oldDatFile %s 's compact revision is %d while newDatFile %s 's compact revision is %d", oldDatFileName, oldDatCompactRevision, newDatFileName, newDatCompactRevision)
} }
idx_entry_bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize)
for key, incre_idx_entry := range incrementedHasUpdatedIndexEntry {
NeedleIdToBytes(idx_entry_bytes[0:NeedleIdSize], key)
OffsetToBytes(idx_entry_bytes[NeedleIdSize:NeedleIdSize+OffsetSize], incre_idx_entry.offset)
util.Uint32toBytes(idx_entry_bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], incre_idx_entry.size)
idxEntryBytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize)
for key, increIdxEntry := range incrementedHasUpdatedIndexEntry {
NeedleIdToBytes(idxEntryBytes[0:NeedleIdSize], key)
OffsetToBytes(idxEntryBytes[NeedleIdSize:NeedleIdSize+OffsetSize], increIdxEntry.offset)
util.Uint32toBytes(idxEntryBytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], increIdxEntry.size)
var offset int64 var offset int64
if offset, err = dst.Seek(0, 2); err != nil { if offset, err = dst.Seek(0, 2); err != nil {
@ -201,16 +201,16 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI
} }
//updated needle //updated needle
if incre_idx_entry.offset != 0 && incre_idx_entry.size != 0 && incre_idx_entry.size != TombstoneFileSize {
if increIdxEntry.offset != 0 && increIdxEntry.size != 0 && increIdxEntry.size != TombstoneFileSize {
//even the needle cache in memory is hit, the need_bytes is correct //even the needle cache in memory is hit, the need_bytes is correct
glog.V(4).Infof("file %d offset %d size %d", key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size)
var needle_bytes []byte
needle_bytes, err = ReadNeedleBlob(oldDatFile, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, v.Version())
glog.V(4).Infof("file %d offset %d size %d", key, int64(increIdxEntry.offset)*NeedlePaddingSize, increIdxEntry.size)
var needleBytes []byte
needleBytes, err = ReadNeedleBlob(oldDatFile, int64(increIdxEntry.offset)*NeedlePaddingSize, increIdxEntry.size, v.Version())
if err != nil { if err != nil {
return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, err)
return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, int64(increIdxEntry.offset)*NeedlePaddingSize, increIdxEntry.size, err)
} }
dst.Write(needle_bytes)
util.Uint32toBytes(idx_entry_bytes[8:12], uint32(offset/NeedlePaddingSize))
dst.Write(needleBytes)
util.Uint32toBytes(idxEntryBytes[8:12], uint32(offset/NeedlePaddingSize))
} else { //deleted needle } else { //deleted needle
//fakeDelNeedle 's default Data field is nil //fakeDelNeedle 's default Data field is nil
fakeDelNeedle := new(Needle) fakeDelNeedle := new(Needle)
@ -221,19 +221,59 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI
if err != nil { if err != nil {
return fmt.Errorf("append deleted %d failed: %v", key, err) return fmt.Errorf("append deleted %d failed: %v", key, err)
} }
util.Uint32toBytes(idx_entry_bytes[8:12], uint32(0))
util.Uint32toBytes(idxEntryBytes[8:12], uint32(0))
} }
if _, err := idx.Seek(0, 2); err != nil { if _, err := idx.Seek(0, 2); err != nil {
return fmt.Errorf("cannot seek end of indexfile %s: %v", return fmt.Errorf("cannot seek end of indexfile %s: %v",
newIdxFileName, err) newIdxFileName, err)
} }
_, err = idx.Write(idx_entry_bytes)
_, err = idx.Write(idxEntryBytes)
} }
return nil return nil
} }
type VolumeFileScanner4Vacuum struct {
version Version
v *Volume
dst *os.File
nm *NeedleMap
newOffset int64
now uint64
}
func (scanner *VolumeFileScanner4Vacuum) VisitSuperBlock(superBlock SuperBlock) error {
scanner.version = superBlock.Version()
superBlock.CompactRevision++
_, err := scanner.dst.Write(superBlock.Bytes())
scanner.newOffset = int64(superBlock.BlockSize())
return err
}
func (scanner *VolumeFileScanner4Vacuum) ReadNeedleBody() bool {
return true
}
func (scanner *VolumeFileScanner4Vacuum) VisitNeedle(n *Needle, offset int64) error {
if n.HasTtl() && scanner.now >= n.LastModified+uint64(scanner.v.Ttl.Minutes()*60) {
return nil
}
nv, ok := scanner.v.nm.Get(n.Id)
glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv)
if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 {
if err := scanner.nm.Put(n.Id, Offset(scanner.newOffset/NeedlePaddingSize), n.Size); err != nil {
return fmt.Errorf("cannot put needle: %s", err)
}
if _, _, _, err := n.Append(scanner.dst, scanner.v.Version()); err != nil {
return fmt.Errorf("cannot append needle: %s", err)
}
scanner.newOffset += n.DiskSize(scanner.version)
glog.V(4).Infoln("saving key", n.Id, "volume offset", offset, "=>", scanner.newOffset, "data_size", n.Size)
}
return nil
}
func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, preallocate int64) (err error) { func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, preallocate int64) (err error) {
var ( var (
dst, idx *os.File dst, idx *os.File
@ -248,38 +288,13 @@ func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, prealloca
} }
defer idx.Close() defer idx.Close()
nm := NewBtreeNeedleMap(idx)
new_offset := int64(0)
now := uint64(time.Now().Unix())
var version Version
err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind,
func(superBlock SuperBlock) error {
version = superBlock.Version()
superBlock.CompactRevision++
_, err = dst.Write(superBlock.Bytes())
new_offset = int64(superBlock.BlockSize())
return err
}, true, func(n *Needle, offset int64) error {
if n.HasTtl() && now >= n.LastModified+uint64(v.Ttl.Minutes()*60) {
return nil
scanner := &VolumeFileScanner4Vacuum{
v: v,
now: uint64(time.Now().Unix()),
nm: NewBtreeNeedleMap(idx),
dst: dst,
} }
nv, ok := v.nm.Get(n.Id)
glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv)
if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 {
if err = nm.Put(n.Id, Offset(new_offset/NeedlePaddingSize), n.Size); err != nil {
return fmt.Errorf("cannot put needle: %s", err)
}
if _, _, _, err := n.Append(dst, v.Version()); err != nil {
return fmt.Errorf("cannot append needle: %s", err)
}
new_offset += n.DiskSize(version)
glog.V(4).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size)
}
return nil
})
err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind, scanner)
return return
} }
@ -307,7 +322,7 @@ func (v *Volume) copyDataBasedOnIndexFile(dstName, idxName string) (err error) {
v.SuperBlock.CompactRevision++ v.SuperBlock.CompactRevision++
dst.Write(v.SuperBlock.Bytes()) dst.Write(v.SuperBlock.Bytes())
new_offset := int64(v.SuperBlock.BlockSize())
newOffset := int64(v.SuperBlock.BlockSize())
WalkIndexFile(oldIndexFile, func(key NeedleId, offset Offset, size uint32) error { WalkIndexFile(oldIndexFile, func(key NeedleId, offset Offset, size uint32) error {
if offset == 0 || size == TombstoneFileSize { if offset == 0 || size == TombstoneFileSize {
@ -328,14 +343,14 @@ func (v *Volume) copyDataBasedOnIndexFile(dstName, idxName string) (err error) {
glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv)
if nv.Offset == offset && nv.Size > 0 { if nv.Offset == offset && nv.Size > 0 {
if err = nm.Put(n.Id, Offset(new_offset/NeedlePaddingSize), n.Size); err != nil {
if err = nm.Put(n.Id, Offset(newOffset/NeedlePaddingSize), n.Size); err != nil {
return fmt.Errorf("cannot put needle: %s", err) return fmt.Errorf("cannot put needle: %s", err)
} }
if _, _, _, err = n.Append(dst, v.Version()); err != nil { if _, _, _, err = n.Append(dst, v.Version()); err != nil {
return fmt.Errorf("cannot append needle: %s", err) return fmt.Errorf("cannot append needle: %s", err)
} }
new_offset += n.DiskSize(v.Version())
glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size)
newOffset += n.DiskSize(v.Version())
glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", newOffset, "data_size", n.Size)
} }
return nil return nil
}) })

Loading…
Cancel
Save