Browse Source
			
			
			faster loading boltdb or leveldb needle map metrics by bloomfilter
			
				
		faster loading boltdb or leveldb needle map metrics by bloomfilter
	
		
	
			
				avoid btreepull/680/head
				 5 changed files with 140 additions and 47 deletions
			
			
		- 
					43weed/storage/needle_map.go
- 
					4weed/storage/needle_map_boltdb.go
- 
					4weed/storage/needle_map_leveldb.go
- 
					107weed/storage/needle_map_metric.go
- 
					29weed/storage/needle_map_metric_test.go
| @ -0,0 +1,107 @@ | |||
| package storage | |||
| 
 | |||
| import ( | |||
| 	"fmt" | |||
| 	"os" | |||
| 	"github.com/willf/bloom" | |||
| 	"github.com/chrislusf/seaweedfs/weed/glog" | |||
| 	"encoding/binary" | |||
| ) | |||
| 
 | |||
| type mapMetric struct { | |||
| 	DeletionCounter     int    `json:"DeletionCounter"` | |||
| 	FileCounter         int    `json:"FileCounter"` | |||
| 	DeletionByteCounter uint64 `json:"DeletionByteCounter"` | |||
| 	FileByteCounter     uint64 `json:"FileByteCounter"` | |||
| 	MaximumFileKey      uint64 `json:"MaxFileKey"` | |||
| } | |||
| 
 | |||
| func (mm *mapMetric) logDelete(deletedByteCount uint32) { | |||
| 	mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount) | |||
| 	mm.DeletionCounter++ | |||
| } | |||
| 
 | |||
| func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) { | |||
| 	if key > mm.MaximumFileKey { | |||
| 		mm.MaximumFileKey = key | |||
| 	} | |||
| 	mm.FileCounter++ | |||
| 	mm.FileByteCounter = mm.FileByteCounter + uint64(newSize) | |||
| 	if oldSize > 0 { | |||
| 		mm.DeletionCounter++ | |||
| 		mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize) | |||
| 	} | |||
| } | |||
| 
 | |||
| func (mm mapMetric) ContentSize() uint64 { | |||
| 	return mm.FileByteCounter | |||
| } | |||
| func (mm mapMetric) DeletedSize() uint64 { | |||
| 	return mm.DeletionByteCounter | |||
| } | |||
| func (mm mapMetric) FileCount() int { | |||
| 	return mm.FileCounter | |||
| } | |||
| func (mm mapMetric) DeletedCount() int { | |||
| 	return mm.DeletionCounter | |||
| } | |||
| func (mm mapMetric) MaxFileKey() uint64 { | |||
| 	return mm.MaximumFileKey | |||
| } | |||
| 
 | |||
| func newNeedleMapMetricFromIndexFile(r *os.File) (mm *mapMetric, err error) { | |||
| 	mm = &mapMetric{} | |||
| 	var bf *bloom.BloomFilter | |||
| 	buf := make([]byte, 8) | |||
| 	err = reverseWalkIndexFile(r, func(entryCount int64) { | |||
| 		bf = bloom.NewWithEstimates(uint(entryCount), 0.001) | |||
| 	}, func(key uint64, offset, size uint32) error { | |||
| 
 | |||
| 		if key > mm.MaximumFileKey { | |||
| 			mm.MaximumFileKey = key | |||
| 		} | |||
| 
 | |||
| 		binary.BigEndian.PutUint64(buf, key) | |||
| 		if size != TombstoneFileSize { | |||
| 			mm.FileByteCounter += uint64(size) | |||
| 		} | |||
| 
 | |||
| 		if !bf.Test(buf) { | |||
| 			mm.FileCounter++ | |||
| 			bf.Add(buf) | |||
| 		} else { | |||
| 			// deleted file
 | |||
| 			mm.DeletionCounter++ | |||
| 			if size != TombstoneFileSize { | |||
| 				// previously already deleted file
 | |||
| 				mm.DeletionByteCounter += uint64(size) | |||
| 			} | |||
| 		} | |||
| 		return nil | |||
| 	}) | |||
| 	return | |||
| } | |||
| 
 | |||
| func reverseWalkIndexFile(r *os.File, initFn func(entryCount int64), fn func(key uint64, offset, size uint32) error) error { | |||
| 	fi, err := r.Stat() | |||
| 	if err != nil { | |||
| 		return fmt.Errorf("file %s stat error: %v", r.Name(), err) | |||
| 	} | |||
| 	fileSize := fi.Size() | |||
| 	if fileSize%NeedleIndexSize != 0 { | |||
| 		return fmt.Errorf("unexpected file %s size: %d", r.Name(), fileSize) | |||
| 	} | |||
| 
 | |||
| 	initFn(fileSize / NeedleIndexSize) | |||
| 
 | |||
| 	bytes := make([]byte, NeedleIndexSize) | |||
| 	for readerOffset := fileSize - NeedleIndexSize; readerOffset >= 0; readerOffset -= NeedleIndexSize { | |||
| 		count, e := r.ReadAt(bytes, readerOffset) | |||
| 		glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) | |||
| 		key, offset, size := idxFileEntry(bytes) | |||
| 		if e = fn(key, offset, size); e != nil { | |||
| 			return e | |||
| 		} | |||
| 	} | |||
| 	return nil | |||
| } | |||
| @ -0,0 +1,29 @@ | |||
| package storage | |||
| 
 | |||
| import ( | |||
| 	"testing" | |||
| 	"io/ioutil" | |||
| 	"math/rand" | |||
| 	"github.com/chrislusf/seaweedfs/weed/glog" | |||
| ) | |||
| 
 | |||
| func TestFastLoadingNeedleMapMetrics(t *testing.T) { | |||
| 
 | |||
| 	idxFile, _ := ioutil.TempFile("", "tmp.idx") | |||
| 	nm := NewBtreeNeedleMap(idxFile) | |||
| 
 | |||
| 	for i := 0; i < 10000; i++ { | |||
| 		nm.Put(uint64(i+1), uint32(0), uint32(1)) | |||
| 		if rand.Float32() < 0.2 { | |||
| 			nm.Delete(uint64(rand.Int63n(int64(i))+1), uint32(0)) | |||
| 		} | |||
| 	} | |||
| 
 | |||
| 	mm, _ := newNeedleMapMetricFromIndexFile(idxFile) | |||
| 
 | |||
| 	glog.V(0).Infof("FileCount expected %d actual %d", nm.FileCount(), mm.FileCount()) | |||
| 	glog.V(0).Infof("DeletedSize expected %d actual %d", nm.DeletedSize(), mm.DeletedSize()) | |||
| 	glog.V(0).Infof("ContentSize expected %d actual %d", nm.ContentSize(), mm.ContentSize()) | |||
| 	glog.V(0).Infof("DeletedCount expected %d actual %d", nm.DeletedCount(), mm.DeletedCount()) | |||
| 	glog.V(0).Infof("MaxFileKey expected %d actual %d", nm.MaxFileKey(), mm.MaxFileKey()) | |||
| } | |||
						Write
						Preview
					
					
					Loading…
					
					Cancel
						Save
					
		Reference in new issue