From 5bfb72d058bc9198d90bbab9cd694f9248a64e34 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 7 Jul 2018 00:51:17 -0700 Subject: [PATCH] faster loading boltdb or leveldb needle map metrics by bloomfilter avoid btree --- weed/storage/needle_map.go | 43 ---------- weed/storage/needle_map_boltdb.go | 4 +- weed/storage/needle_map_leveldb.go | 4 +- weed/storage/needle_map_metric.go | 107 +++++++++++++++++++++++++ weed/storage/needle_map_metric_test.go | 29 +++++++ 5 files changed, 140 insertions(+), 47 deletions(-) create mode 100644 weed/storage/needle_map_metric.go create mode 100644 weed/storage/needle_map_metric_test.go diff --git a/weed/storage/needle_map.go b/weed/storage/needle_map.go index 14e4ccf3a..16ce458e7 100644 --- a/weed/storage/needle_map.go +++ b/weed/storage/needle_map.go @@ -84,46 +84,3 @@ func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) { defer nm.indexFileAccessLock.Unlock() return ioutil.ReadFile(nm.indexFile.Name()) } - -type mapMetric struct { - indexFile *os.File - - DeletionCounter int `json:"DeletionCounter"` - FileCounter int `json:"FileCounter"` - DeletionByteCounter uint64 `json:"DeletionByteCounter"` - FileByteCounter uint64 `json:"FileByteCounter"` - MaximumFileKey uint64 `json:"MaxFileKey"` -} - -func (mm *mapMetric) logDelete(deletedByteCount uint32) { - mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount) - mm.DeletionCounter++ -} - -func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) { - if key > mm.MaximumFileKey { - mm.MaximumFileKey = key - } - mm.FileCounter++ - mm.FileByteCounter = mm.FileByteCounter + uint64(newSize) - if oldSize > 0 { - mm.DeletionCounter++ - mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize) - } -} - -func (mm mapMetric) ContentSize() uint64 { - return mm.FileByteCounter -} -func (mm mapMetric) DeletedSize() uint64 { - return mm.DeletionByteCounter -} -func (mm mapMetric) FileCount() int { - return mm.FileCounter -} -func (mm mapMetric) DeletedCount() int { - return mm.DeletionCounter -} -func (mm mapMetric) MaxFileKey() uint64 { - return mm.MaximumFileKey -} diff --git a/weed/storage/needle_map_boltdb.go b/weed/storage/needle_map_boltdb.go index 5e64c5199..021dd6aa9 100644 --- a/weed/storage/needle_map_boltdb.go +++ b/weed/storage/needle_map_boltdb.go @@ -33,11 +33,11 @@ func NewBoltDbNeedleMap(dbFileName string, indexFile *os.File) (m *BoltDbNeedleM return } glog.V(1).Infof("Loading %s...", indexFile.Name()) - nm, indexLoadError := LoadBtreeNeedleMap(indexFile) + mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) if indexLoadError != nil { return nil, indexLoadError } - m.mapMetric = nm.mapMetric + m.mapMetric = *mm return } diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index 2d4ff4d71..c3e474033 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -31,11 +31,11 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedl return } glog.V(1).Infof("Loading %s...", indexFile.Name()) - nm, indexLoadError := LoadBtreeNeedleMap(indexFile) + mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) if indexLoadError != nil { return nil, indexLoadError } - m.mapMetric = nm.mapMetric + m.mapMetric = *mm return } diff --git a/weed/storage/needle_map_metric.go b/weed/storage/needle_map_metric.go new file mode 100644 index 000000000..f43b29c59 --- /dev/null +++ b/weed/storage/needle_map_metric.go @@ -0,0 +1,107 @@ +package storage + +import ( + "fmt" + "os" + "github.com/willf/bloom" + "github.com/chrislusf/seaweedfs/weed/glog" + "encoding/binary" +) + +type mapMetric struct { + DeletionCounter int `json:"DeletionCounter"` + FileCounter int `json:"FileCounter"` + DeletionByteCounter uint64 `json:"DeletionByteCounter"` + FileByteCounter uint64 `json:"FileByteCounter"` + MaximumFileKey uint64 `json:"MaxFileKey"` +} + +func (mm *mapMetric) logDelete(deletedByteCount uint32) { + mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount) + mm.DeletionCounter++ +} + +func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) { + if key > mm.MaximumFileKey { + mm.MaximumFileKey = key + } + mm.FileCounter++ + mm.FileByteCounter = mm.FileByteCounter + uint64(newSize) + if oldSize > 0 { + mm.DeletionCounter++ + mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize) + } +} + +func (mm mapMetric) ContentSize() uint64 { + return mm.FileByteCounter +} +func (mm mapMetric) DeletedSize() uint64 { + return mm.DeletionByteCounter +} +func (mm mapMetric) FileCount() int { + return mm.FileCounter +} +func (mm mapMetric) DeletedCount() int { + return mm.DeletionCounter +} +func (mm mapMetric) MaxFileKey() uint64 { + return mm.MaximumFileKey +} + +func newNeedleMapMetricFromIndexFile(r *os.File) (mm *mapMetric, err error) { + mm = &mapMetric{} + var bf *bloom.BloomFilter + buf := make([]byte, 8) + err = reverseWalkIndexFile(r, func(entryCount int64) { + bf = bloom.NewWithEstimates(uint(entryCount), 0.001) + }, func(key uint64, offset, size uint32) error { + + if key > mm.MaximumFileKey { + mm.MaximumFileKey = key + } + + binary.BigEndian.PutUint64(buf, key) + if size != TombstoneFileSize { + mm.FileByteCounter += uint64(size) + } + + if !bf.Test(buf) { + mm.FileCounter++ + bf.Add(buf) + } else { + // deleted file + mm.DeletionCounter++ + if size != TombstoneFileSize { + // previously already deleted file + mm.DeletionByteCounter += uint64(size) + } + } + return nil + }) + return +} + +func reverseWalkIndexFile(r *os.File, initFn func(entryCount int64), fn func(key uint64, offset, size uint32) error) error { + fi, err := r.Stat() + if err != nil { + return fmt.Errorf("file %s stat error: %v", r.Name(), err) + } + fileSize := fi.Size() + if fileSize%NeedleIndexSize != 0 { + return fmt.Errorf("unexpected file %s size: %d", r.Name(), fileSize) + } + + initFn(fileSize / NeedleIndexSize) + + bytes := make([]byte, NeedleIndexSize) + for readerOffset := fileSize - NeedleIndexSize; readerOffset >= 0; readerOffset -= NeedleIndexSize { + count, e := r.ReadAt(bytes, readerOffset) + glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) + key, offset, size := idxFileEntry(bytes) + if e = fn(key, offset, size); e != nil { + return e + } + } + return nil +} diff --git a/weed/storage/needle_map_metric_test.go b/weed/storage/needle_map_metric_test.go new file mode 100644 index 000000000..0be2e1d7c --- /dev/null +++ b/weed/storage/needle_map_metric_test.go @@ -0,0 +1,29 @@ +package storage + +import ( + "testing" + "io/ioutil" + "math/rand" + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func TestFastLoadingNeedleMapMetrics(t *testing.T) { + + idxFile, _ := ioutil.TempFile("", "tmp.idx") + nm := NewBtreeNeedleMap(idxFile) + + for i := 0; i < 10000; i++ { + nm.Put(uint64(i+1), uint32(0), uint32(1)) + if rand.Float32() < 0.2 { + nm.Delete(uint64(rand.Int63n(int64(i))+1), uint32(0)) + } + } + + mm, _ := newNeedleMapMetricFromIndexFile(idxFile) + + glog.V(0).Infof("FileCount expected %d actual %d", nm.FileCount(), mm.FileCount()) + glog.V(0).Infof("DeletedSize expected %d actual %d", nm.DeletedSize(), mm.DeletedSize()) + glog.V(0).Infof("ContentSize expected %d actual %d", nm.ContentSize(), mm.ContentSize()) + glog.V(0).Infof("DeletedCount expected %d actual %d", nm.DeletedCount(), mm.DeletedCount()) + glog.V(0).Infof("MaxFileKey expected %d actual %d", nm.MaxFileKey(), mm.MaxFileKey()) +}