diff --git a/weed/storage/needle_map/compact_map.go b/weed/storage/needle_map/compact_map.go index 567f9f7de..e5baff1e2 100644 --- a/weed/storage/needle_map/compact_map.go +++ b/weed/storage/needle_map/compact_map.go @@ -2,6 +2,7 @@ package needle_map import ( "fmt" + "math" "sort" "sync" @@ -9,14 +10,23 @@ import ( ) const ( - SegmentChunkSize = 25000 + MaxCompactKey = math.MaxUint16 + SegmentChunkSize = 50000 // should be <= MaxCompactKey ) +type CompactKey uint16 +type CompactOffset [types.OffsetSize]byte +type CompactNeedleValue struct { + key CompactKey + offset CompactOffset + size types.Size +} + type CompactMapSegment struct { - // TODO: maybe a compact-er structure for needle values? - list []NeedleValue - firstKey types.NeedleId - lastKey types.NeedleId + list []CompactNeedleValue + chunk int + firstKey CompactKey + lastKey CompactKey } type CompactMap struct { @@ -25,12 +35,35 @@ type CompactMap struct { segments map[int]*CompactMapSegment } +func (ck CompactKey) Key(chunk int) types.NeedleId { + return (types.NeedleId(SegmentChunkSize) * types.NeedleId(chunk)) + types.NeedleId(ck) +} + +func OffsetToCompact(offset types.Offset) CompactOffset { + var co CompactOffset + types.OffsetToBytes(co[:], offset) + return co +} + +func (co CompactOffset) Offset() types.Offset { + return types.BytesToOffset(co[:]) +} + +func (cnv CompactNeedleValue) NeedleValue(chunk int) NeedleValue { + key := cnv.key.Key(chunk) + return NeedleValue{ + Key: key, + Offset: cnv.offset.Offset(), + Size: cnv.size, + } +} + func newCompactMapSegment(chunk int) *CompactMapSegment { - startKey := types.NeedleId(chunk * SegmentChunkSize) return &CompactMapSegment{ - list: []NeedleValue{}, - firstKey: startKey + SegmentChunkSize - 1, - lastKey: startKey, + list: []CompactNeedleValue{}, + chunk: chunk, + firstKey: MaxCompactKey, + lastKey: 0, } } @@ -42,41 +75,49 @@ func (cs *CompactMapSegment) cap() int { return cap(cs.list) } -// bsearchKey returns the NeedleValue index for a given ID key. +func (cs *CompactMapSegment) compactKey(key types.NeedleId) CompactKey { + return CompactKey(key - (types.NeedleId(SegmentChunkSize) * types.NeedleId(cs.chunk))) +} + +// bsearchKey returns the CompactNeedleValue index for a given ID key. // If the key is not found, it returns the index where it should be inserted instead. func (cs *CompactMapSegment) bsearchKey(key types.NeedleId) (int, bool) { + ck := cs.compactKey(key) + switch { case len(cs.list) == 0: return 0, false - case key == cs.firstKey: + case ck == cs.firstKey: return 0, true - case key <= cs.firstKey: + case ck <= cs.firstKey: return 0, false - case key == cs.lastKey: + case ck == cs.lastKey: return len(cs.list) - 1, true - case key > cs.lastKey: + case ck > cs.lastKey: return len(cs.list), false } i := sort.Search(len(cs.list), func(i int) bool { - return cs.list[i].Key >= key + return cs.list[i].key >= ck }) - return i, cs.list[i].Key == key + return i, cs.list[i].key == ck } -// set inserts/updates a NeedleValue. +// set inserts/updates a CompactNeedleValue. // If the operation is an update, returns the overwritten value's previous offset and size. func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) { i, found := cs.bsearchKey(key) if found { // update - oldOffset.OffsetLower = cs.list[i].Offset.OffsetLower - oldOffset.OffsetHigher = cs.list[i].Offset.OffsetHigher - oldSize = cs.list[i].Size - - cs.list[i].Size = size - cs.list[i].Offset.OffsetLower = offset.OffsetLower - cs.list[i].Offset.OffsetHigher = offset.OffsetHigher + o := cs.list[i].offset.Offset() + oldOffset.OffsetLower = o.OffsetLower + oldOffset.OffsetHigher = o.OffsetHigher + oldSize = cs.list[i].size + + o.OffsetLower = offset.OffsetLower + o.OffsetHigher = offset.OffsetHigher + cs.list[i].offset = OffsetToCompact(o) + cs.list[i].size = size return } @@ -86,32 +127,33 @@ func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size t } if len(cs.list) == SegmentChunkSize-1 { // if we max out our segment storage, pin its capacity to minimize memory usage - nl := make([]NeedleValue, SegmentChunkSize, SegmentChunkSize) + nl := make([]CompactNeedleValue, SegmentChunkSize, SegmentChunkSize) copy(nl, cs.list[:i]) copy(nl[i+1:], cs.list[i:]) cs.list = nl } else { - cs.list = append(cs.list, NeedleValue{}) + cs.list = append(cs.list, CompactNeedleValue{}) copy(cs.list[i+1:], cs.list[i:]) } - cs.list[i] = NeedleValue{ - Key: key, - Offset: offset, - Size: size, + ck := cs.compactKey(key) + cs.list[i] = CompactNeedleValue{ + key: ck, + offset: OffsetToCompact(offset), + size: size, } - if key < cs.firstKey { - cs.firstKey = key + if ck < cs.firstKey { + cs.firstKey = ck } - if key > cs.lastKey { - cs.lastKey = key + if ck > cs.lastKey { + cs.lastKey = ck } return } // get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found. -func (cs *CompactMapSegment) get(key types.NeedleId) (*NeedleValue, bool) { +func (cs *CompactMapSegment) get(key types.NeedleId) (*CompactNeedleValue, bool) { if i, found := cs.bsearchKey(key); found { return &cs.list[i], true } @@ -122,9 +164,9 @@ func (cs *CompactMapSegment) get(key types.NeedleId) (*NeedleValue, bool) { // delete deletes a map entry by key. Returns the entries' previous Size, if available. func (cs *CompactMapSegment) delete(key types.NeedleId) types.Size { if i, found := cs.bsearchKey(key); found { - if cs.list[i].Size > 0 && cs.list[i].Size.IsValid() { - ret := cs.list[i].Size - cs.list[i].Size = -cs.list[i].Size + if cs.list[i].size > 0 && cs.list[i].size.IsValid() { + ret := cs.list[i].size + cs.list[i].size = -cs.list[i].size return ret } } @@ -188,7 +230,11 @@ func (cm *CompactMap) Get(key types.NeedleId) (*NeedleValue, bool) { defer cm.RUnlock() cs := cm.segmentForKey(key) - return cs.get(key) + if cnv, found := cs.get(key); found { + nv := cnv.NeedleValue(cs.chunk) + return &nv, true + } + return nil, false } // Delete deletes a map entry by key. Returns the entries' previous Size, if available. @@ -212,7 +258,9 @@ func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error { sort.Ints(chunks) for _, c := range chunks { - for _, nv := range cm.segments[c].list { + cs := cm.segments[c] + for _, cnv := range cs.list { + nv := cnv.NeedleValue(cs.chunk) if err := visit(nv); err != nil { return err } diff --git a/weed/storage/needle_map/compact_map_test.go b/weed/storage/needle_map/compact_map_test.go index 82dc2f9f6..969808531 100644 --- a/weed/storage/needle_map/compact_map_test.go +++ b/weed/storage/needle_map/compact_map_test.go @@ -11,12 +11,12 @@ import ( func TestSegmentBsearchKey(t *testing.T) { testSegment := &CompactMapSegment{ - list: []NeedleValue{ - NeedleValue{Key: 10}, - NeedleValue{Key: 20}, - NeedleValue{Key: 21}, - NeedleValue{Key: 26}, - NeedleValue{Key: 30}, + list: []CompactNeedleValue{ + CompactNeedleValue{key: 10}, + CompactNeedleValue{key: 20}, + CompactNeedleValue{key: 21}, + CompactNeedleValue{key: 26}, + CompactNeedleValue{key: 30}, }, firstKey: 10, lastKey: 30, @@ -116,10 +116,10 @@ func TestSegmentBsearchKey(t *testing.T) { func TestSegmentSet(t *testing.T) { testSegment := &CompactMapSegment{ - list: []NeedleValue{ - NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100}, - NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200}, - NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300}, + list: []CompactNeedleValue{ + CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100}, + CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200}, + CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300}, }, firstKey: 10, lastKey: 30, @@ -173,13 +173,13 @@ func TestSegmentSet(t *testing.T) { } wantSegment := &CompactMapSegment{ - list: []NeedleValue{ - NeedleValue{Key: 5, Offset: types.Uint32ToOffset(1000), Size: 123}, - NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100}, - NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200}, - NeedleValue{Key: 25, Offset: types.Uint32ToOffset(8000), Size: 789}, - NeedleValue{Key: 30, Offset: types.Uint32ToOffset(9000), Size: 999}, - NeedleValue{Key: 51, Offset: types.Uint32ToOffset(7000), Size: 456}, + list: []CompactNeedleValue{ + CompactNeedleValue{key: 5, offset: OffsetToCompact(types.Uint32ToOffset(1000)), size: 123}, + CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100}, + CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200}, + CompactNeedleValue{key: 25, offset: OffsetToCompact(types.Uint32ToOffset(8000)), size: 789}, + CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(9000)), size: 999}, + CompactNeedleValue{key: 51, offset: OffsetToCompact(types.Uint32ToOffset(7000)), size: 456}, }, firstKey: 5, lastKey: 51, @@ -213,7 +213,7 @@ func TestSegmentSetOrdering(t *testing.T) { t.Errorf("expected size %d, got %d", want, got) } for i := 1; i < cs.len(); i++ { - if ka, kb := cs.list[i-1].Key, cs.list[i].Key; ka >= kb { + if ka, kb := cs.list[i-1].key, cs.list[i].key; ka >= kb { t.Errorf("found out of order entries at (%d, %d) = (%d, %d)", i-1, i, ka, kb) } } @@ -221,10 +221,10 @@ func TestSegmentSetOrdering(t *testing.T) { func TestSegmentGet(t *testing.T) { testSegment := &CompactMapSegment{ - list: []NeedleValue{ - NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100}, - NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200}, - NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300}, + list: []CompactNeedleValue{ + CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100}, + CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200}, + CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300}, }, firstKey: 10, lastKey: 30, @@ -233,7 +233,7 @@ func TestSegmentGet(t *testing.T) { testCases := []struct { name string key types.NeedleId - wantValue *NeedleValue + wantValue *CompactNeedleValue wantFound bool }{ { @@ -277,11 +277,11 @@ func TestSegmentGet(t *testing.T) { func TestSegmentDelete(t *testing.T) { testSegment := &CompactMapSegment{ - list: []NeedleValue{ - NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100}, - NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200}, - NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300}, - NeedleValue{Key: 40, Offset: types.Uint32ToOffset(600), Size: 400}, + list: []CompactNeedleValue{ + CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100}, + CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200}, + CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300}, + CompactNeedleValue{key: 40, offset: OffsetToCompact(types.Uint32ToOffset(600)), size: 400}, }, firstKey: 10, lastKey: 40, @@ -317,11 +317,11 @@ func TestSegmentDelete(t *testing.T) { } wantSegment := &CompactMapSegment{ - list: []NeedleValue{ - NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100}, - NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: -200}, - NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300}, - NeedleValue{Key: 40, Offset: types.Uint32ToOffset(600), Size: -400}, + list: []CompactNeedleValue{ + CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100}, + CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: -200}, + CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300}, + CompactNeedleValue{key: 40, offset: OffsetToCompact(types.Uint32ToOffset(600)), size: -400}, }, firstKey: 10, lastKey: 40, @@ -343,8 +343,9 @@ func TestSegmentForKey(t *testing.T) { name: "first segment", key: 12, want: &CompactMapSegment{ - list: []NeedleValue{}, - firstKey: SegmentChunkSize - 1, + list: []CompactNeedleValue{}, + chunk: 0, + firstKey: MaxCompactKey, lastKey: 0, }, }, @@ -352,18 +353,20 @@ func TestSegmentForKey(t *testing.T) { name: "second segment, gapless", key: SegmentChunkSize + 34, want: &CompactMapSegment{ - list: []NeedleValue{}, - firstKey: (2 * SegmentChunkSize) - 1, - lastKey: SegmentChunkSize, + list: []CompactNeedleValue{}, + chunk: 1, + firstKey: MaxCompactKey, + lastKey: 0, }, }, { name: "gapped segment", key: (5 * SegmentChunkSize) + 56, want: &CompactMapSegment{ - list: []NeedleValue{}, - firstKey: (6 * SegmentChunkSize) - 1, - lastKey: 5 * SegmentChunkSize, + list: []CompactNeedleValue{}, + chunk: 5, + firstKey: MaxCompactKey, + lastKey: 0, }, }, } @@ -380,19 +383,22 @@ func TestSegmentForKey(t *testing.T) { wantMap := &CompactMap{ segments: map[int]*CompactMapSegment{ 0: &CompactMapSegment{ - list: []NeedleValue{}, - firstKey: SegmentChunkSize - 1, + list: []CompactNeedleValue{}, + chunk: 0, + firstKey: MaxCompactKey, lastKey: 0, }, 1: &CompactMapSegment{ - list: []NeedleValue{}, - firstKey: (2 * SegmentChunkSize) - 1, - lastKey: SegmentChunkSize, + list: []CompactNeedleValue{}, + chunk: 1, + firstKey: MaxCompactKey, + lastKey: 0, }, 5: &CompactMapSegment{ - list: []NeedleValue{}, - firstKey: (6 * SegmentChunkSize) - 1, - lastKey: 5 * SegmentChunkSize, + list: []CompactNeedleValue{}, + chunk: 5, + firstKey: MaxCompactKey, + lastKey: 0, }, }, }