diff --git a/weed-fs/src/pkg/storage/compact_map.go b/weed-fs/src/pkg/storage/compact_map.go new file mode 100644 index 000000000..4e83a76c8 --- /dev/null +++ b/weed-fs/src/pkg/storage/compact_map.go @@ -0,0 +1,158 @@ +package storage + +import () + +type NeedleValue struct { + Key Key + Offset uint32 "Volume offset" //since aligned to 8 bytes, range is 4G*8=32G + Size uint32 "Size of the data portion" +} + +const ( + batch = 100000 +) + +type Key uint64 + +type CompactSection struct { + values []NeedleValue + overflow map[Key]*NeedleValue + start Key + end Key + counter int +} + +func NewCompactSection(start Key) CompactSection { + return CompactSection{ + values: make([]NeedleValue, batch), + overflow: make(map[Key]*NeedleValue), + start: start, + } +} +func (cs *CompactSection) Set(key Key, offset uint32, size uint32) { + if key > cs.end { + cs.end = key + } + if i := cs.binarySearchValues(key); i >= 0 { + cs.values[i].Offset, cs.values[i].Size = offset, size + } else { + needOverflow := cs.counter >= batch + needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > key + if needOverflow { + //println("start", cs.start, "counter", cs.counter, "key", key) + cs.overflow[key] = &NeedleValue{Key: key, Offset: offset, Size: size} + } else { + p := &cs.values[cs.counter] + p.Key, p.Offset, p.Size = key, offset, size + //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key) + cs.counter++ + } + } +} +func (cs *CompactSection) Delete(key Key) { + if i := cs.binarySearchValues(key); i >= 0 { + cs.values[i].Size = 0 + } + delete(cs.overflow, key) +} +func (cs *CompactSection) Get(key Key) (*NeedleValue, bool) { + if v, ok := cs.overflow[key]; ok { + return v, true + } + if i := cs.binarySearchValues(key); i >= 0 { + return &cs.values[i], true + } + return nil, false +} +func (cs *CompactSection) binarySearchValues(key Key) int { + l, h := 0, cs.counter-1 + if h >= 0 && cs.values[h].Key < key { + return -2 + } + //println("looking for key", key) + for l <= h { + m := (l + h) / 2 + //println("mid", m, "key", cs.values[m].Key, cs.values[m].Offset, cs.values[m].Size) + if cs.values[m].Key < key { + l = m + 1 + } else if key < cs.values[m].Key { + h = m - 1 + } else { + //println("found", m) + return m + } + } + return -1 +} + +//This map assumes mostly inserting increasing keys +type CompactMap struct { + list []CompactSection +} + +func NewCompactMap() CompactMap { + return CompactMap{} +} + +func (cm *CompactMap) Set(key Key, offset uint32, size uint32) { + x := cm.binarySearchCompactSection(key) + if x < 0 { + //println(x, "creating", len(cm.list), "section1, starting", key) + cm.list = append(cm.list, NewCompactSection(key)) + x = len(cm.list) - 1 + } + cm.list[x].Set(key, offset, size) +} +func (cm *CompactMap) Delete(key Key) { + x := cm.binarySearchCompactSection(key) + if x < 0 { + return + } + cm.list[x].Delete(key) +} +func (cm *CompactMap) Get(key Key) (*NeedleValue, bool) { + x := cm.binarySearchCompactSection(key) + if x < 0 { + return nil, false + } + return cm.list[x].Get(key) +} +func (cm *CompactMap) binarySearchCompactSection(key Key) int { + l, h := 0, len(cm.list)-1 + if h < 0 { + return -5 + } + if cm.list[h].start <= key { + if cm.list[h].counter < batch || key <= cm.list[h].end{ + return h + } else { + return -4 + } + } + for l <= h { + m := (l + h) / 2 + if key < cm.list[m].start { + h = m - 1 + } else { // cm.list[m].start <= key + if cm.list[m+1].start <= key { + l = m + 1 + } else { + return m + } + } + } + return -3 +} + +func (cm *CompactMap) Peek() { + for k, v := range cm.list[0].values { + if k < 100 { + println("[", v.Key, v.Offset, v.Size, "]") + } + } + for k, v := range cm.list[0].overflow { + if k < 100 { + println("o[", v.Key, v.Offset, v.Size, "]") + } + } +} diff --git a/weed-fs/src/pkg/storage/compact_map_perf_test.go b/weed-fs/src/pkg/storage/compact_map_perf_test.go new file mode 100644 index 000000000..2e2227279 --- /dev/null +++ b/weed-fs/src/pkg/storage/compact_map_perf_test.go @@ -0,0 +1,43 @@ +package storage + +import ( + "testing" + "log" + "os" + "pkg/util" +) + +func TestMemoryUsage(t *testing.T) { + + indexFile, ie := os.OpenFile("sample.idx", os.O_RDWR|os.O_RDONLY, 0644) + if ie != nil { + log.Fatalln(ie) + } + LoadNewNeedleMap(indexFile) + +} + +func LoadNewNeedleMap(file *os.File) CompactMap { + m := NewCompactMap() + bytes := make([]byte, 16*1024) + count, e := file.Read(bytes) + if count > 0 { + fstat, _ := file.Stat() + log.Println("Loading index file", fstat.Name(), "size", fstat.Size()) + } + for count > 0 && e == nil { + for i := 0; i < count; i += 16 { + key := util.BytesToUint64(bytes[i : i+8]) + offset := util.BytesToUint32(bytes[i+8 : i+12]) + size := util.BytesToUint32(bytes[i+12 : i+16]) + if offset > 0 { + m.Set(Key(key), offset, size) + } else { + //delete(m, key) + } + } + + count, e = file.Read(bytes) + } + return m +} diff --git a/weed-fs/src/pkg/storage/compact_map_test.go b/weed-fs/src/pkg/storage/compact_map_test.go new file mode 100644 index 000000000..6c3bb6e96 --- /dev/null +++ b/weed-fs/src/pkg/storage/compact_map_test.go @@ -0,0 +1,65 @@ +package storage + +import ( + "testing" +) + +func TestXYZ(t *testing.T) { + m := NewCompactMap() + for i := uint32(0); i < 100*batch; i += 2 { + m.Set(Key(i), i, i) + } + + for i := uint32(0); i < 100*batch; i += 37 { + m.Delete(Key(i)) + } + + for i := uint32(0); i < 10*batch; i += 3 { + m.Set(Key(i), i+11, i+5) + } + +// for i := uint32(0); i < 100; i++ { +// if v := m.Get(Key(i)); v != nil { +// println(i, "=", v.Key, v.Offset, v.Size) +// } +// } + + for i := uint32(0); i < 10*batch; i++ { + v, ok := m.Get(Key(i)) + if i%3 == 0 { + if !ok { + t.Fatal("key", i, "missing!") + } + if v.Size != i+5 { + t.Fatal("key", i, "size", v.Size) + } + } else if i%37 == 0 { + if ok && v.Size > 0 { + t.Fatal("key", i, "should have been deleted needle value", v) + } + } else if i%2 == 0 { + if v.Size != i { + t.Fatal("key", i, "size", v.Size) + } + } + } + + //println("cm.list =", len(m.list)) + + for i := uint32(10 * batch); i < 100*batch; i++ { + v, ok := m.Get(Key(i)) + if i%37 == 0 { + if ok && v.Size > 0 { + t.Fatal("key", i, "should have been deleted needle value", v) + } + } else if i%2 == 0 { + if v==nil{ + t.Fatal("key", i, "missing") + } + if v.Size != i { + t.Fatal("key", i, "size", v.Size) + } + } + } + +} diff --git a/weed-fs/src/pkg/storage/needle_map.go b/weed-fs/src/pkg/storage/needle_map.go index 4e6406d6a..53a640052 100644 --- a/weed-fs/src/pkg/storage/needle_map.go +++ b/weed-fs/src/pkg/storage/needle_map.go @@ -6,20 +6,15 @@ import ( "pkg/util" ) -type NeedleValue struct { - Offset uint32 "Volume offset" //since aligned to 8 bytes, range is 4G*8=32G - Size uint32 "Size of the data portion" -} - type NeedleMap struct { indexFile *os.File - m map[uint64]NeedleValue //mapping needle key(uint64) to NeedleValue + m CompactMap bytes []byte } func NewNeedleMap(file *os.File) *NeedleMap { nm := &NeedleMap{ - m: make(map[uint64]NeedleValue), + m: NewCompactMap(), bytes: make([]byte, 16), indexFile: file, } @@ -43,31 +38,31 @@ func LoadNeedleMap(file *os.File) *NeedleMap { key := util.BytesToUint64(bytes[i : i+8]) offset := util.BytesToUint32(bytes[i+8 : i+12]) size := util.BytesToUint32(bytes[i+12 : i+16]) - if offset>0 { - nm.m[key] = NeedleValue{util.Offset: offset, Size: size} - }else{ - delete(nm.m, key) - } - } - + if offset > 0 { + nm.m.Set(Key(key), offset, size) + } else { + nm.m.Delete(Key(key)) + } + } + count, e = nm.indexFile.Read(bytes) } return nm -} +} func (nm *NeedleMap) Put(key uint64, offset uint32, size uint32) (int, error) { - nm.m[key] = NeedleValue{Offset: offset, Size: size} + nm.m.Set(Key(key), offset, size) util.Uint64toBytes(nm.bytes[0:8], key) util.Uint32toBytes(nm.bytes[8:12], offset) util.Uint32toBytes(nm.bytes[12:16], size) return nm.indexFile.Write(nm.bytes) } -func (nm *NeedleMap) Get(key uint64) (element NeedleValue, ok bool) { - element, ok = nm.m[key] +func (nm *NeedleMap) Get(key uint64) (element *NeedleValue, ok bool) { + element, ok = nm.m.Get(Key(key)) return } func (nm *NeedleMap) Delete(key uint64) { - delete(nm.m, key) + nm.m.Delete(Key(key)) util.Uint64toBytes(nm.bytes[0:8], key) util.Uint32toBytes(nm.bytes[8:12], 0) util.Uint32toBytes(nm.bytes[12:16], 0) @@ -76,6 +71,3 @@ func (nm *NeedleMap) Delete(key uint64) { func (nm *NeedleMap) Close() { nm.indexFile.Close() } -func (nm *NeedleMap) Length() int{ - return len(nm.m) -} diff --git a/weed-fs/src/pkg/storage/sample.idx b/weed-fs/src/pkg/storage/sample.idx new file mode 100644 index 000000000..44918b41d Binary files /dev/null and b/weed-fs/src/pkg/storage/sample.idx differ