Browse Source

Use a memory-efficient structure for `CompactMap` needle value entries.

This slightly complicates the code, but makes a **massive** difference
in memory efficiency - preliminary results show a ~30% reduction in
heap usage, with no measurable performance impact otherwise.
pull/6842/head
Lisandro Pin 2 weeks ago
parent
commit
4eaed7f206
Failed to extract signature
  1. 128
      weed/storage/needle_map/compact_map.go
  2. 104
      weed/storage/needle_map/compact_map_test.go

128
weed/storage/needle_map/compact_map.go

@ -2,6 +2,7 @@ package needle_map
import ( import (
"fmt" "fmt"
"math"
"sort" "sort"
"sync" "sync"
@ -9,14 +10,23 @@ import (
) )
const ( const (
SegmentChunkSize = 25000
MaxCompactKey = math.MaxUint16
SegmentChunkSize = 50000 // should be <= MaxCompactKey
) )
type CompactKey uint16
type CompactOffset [types.OffsetSize]byte
type CompactNeedleValue struct {
key CompactKey
offset CompactOffset
size types.Size
}
type CompactMapSegment struct { type CompactMapSegment struct {
// TODO: maybe a compact-er structure for needle values?
list []NeedleValue
firstKey types.NeedleId
lastKey types.NeedleId
list []CompactNeedleValue
chunk int
firstKey CompactKey
lastKey CompactKey
} }
type CompactMap struct { type CompactMap struct {
@ -25,12 +35,35 @@ type CompactMap struct {
segments map[int]*CompactMapSegment segments map[int]*CompactMapSegment
} }
func (ck CompactKey) Key(chunk int) types.NeedleId {
return (types.NeedleId(SegmentChunkSize) * types.NeedleId(chunk)) + types.NeedleId(ck)
}
func OffsetToCompact(offset types.Offset) CompactOffset {
var co CompactOffset
types.OffsetToBytes(co[:], offset)
return co
}
func (co CompactOffset) Offset() types.Offset {
return types.BytesToOffset(co[:])
}
func (cnv CompactNeedleValue) NeedleValue(chunk int) NeedleValue {
key := cnv.key.Key(chunk)
return NeedleValue{
Key: key,
Offset: cnv.offset.Offset(),
Size: cnv.size,
}
}
func newCompactMapSegment(chunk int) *CompactMapSegment { func newCompactMapSegment(chunk int) *CompactMapSegment {
startKey := types.NeedleId(chunk * SegmentChunkSize)
return &CompactMapSegment{ return &CompactMapSegment{
list: []NeedleValue{},
firstKey: startKey + SegmentChunkSize - 1,
lastKey: startKey,
list: []CompactNeedleValue{},
chunk: chunk,
firstKey: MaxCompactKey,
lastKey: 0,
} }
} }
@ -42,41 +75,49 @@ func (cs *CompactMapSegment) cap() int {
return cap(cs.list) return cap(cs.list)
} }
// bsearchKey returns the NeedleValue index for a given ID key.
func (cs *CompactMapSegment) compactKey(key types.NeedleId) CompactKey {
return CompactKey(key - (types.NeedleId(SegmentChunkSize) * types.NeedleId(cs.chunk)))
}
// bsearchKey returns the CompactNeedleValue index for a given ID key.
// If the key is not found, it returns the index where it should be inserted instead. // If the key is not found, it returns the index where it should be inserted instead.
func (cs *CompactMapSegment) bsearchKey(key types.NeedleId) (int, bool) { func (cs *CompactMapSegment) bsearchKey(key types.NeedleId) (int, bool) {
ck := cs.compactKey(key)
switch { switch {
case len(cs.list) == 0: case len(cs.list) == 0:
return 0, false return 0, false
case key == cs.firstKey:
case ck == cs.firstKey:
return 0, true return 0, true
case key <= cs.firstKey:
case ck <= cs.firstKey:
return 0, false return 0, false
case key == cs.lastKey:
case ck == cs.lastKey:
return len(cs.list) - 1, true return len(cs.list) - 1, true
case key > cs.lastKey:
case ck > cs.lastKey:
return len(cs.list), false return len(cs.list), false
} }
i := sort.Search(len(cs.list), func(i int) bool { i := sort.Search(len(cs.list), func(i int) bool {
return cs.list[i].Key >= key
return cs.list[i].key >= ck
}) })
return i, cs.list[i].Key == key
return i, cs.list[i].key == ck
} }
// set inserts/updates a NeedleValue.
// set inserts/updates a CompactNeedleValue.
// If the operation is an update, returns the overwritten value's previous offset and size. // If the operation is an update, returns the overwritten value's previous offset and size.
func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) { func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) {
i, found := cs.bsearchKey(key) i, found := cs.bsearchKey(key)
if found { if found {
// update // update
oldOffset.OffsetLower = cs.list[i].Offset.OffsetLower
oldOffset.OffsetHigher = cs.list[i].Offset.OffsetHigher
oldSize = cs.list[i].Size
cs.list[i].Size = size
cs.list[i].Offset.OffsetLower = offset.OffsetLower
cs.list[i].Offset.OffsetHigher = offset.OffsetHigher
o := cs.list[i].offset.Offset()
oldOffset.OffsetLower = o.OffsetLower
oldOffset.OffsetHigher = o.OffsetHigher
oldSize = cs.list[i].size
o.OffsetLower = offset.OffsetLower
o.OffsetHigher = offset.OffsetHigher
cs.list[i].offset = OffsetToCompact(o)
cs.list[i].size = size
return return
} }
@ -86,32 +127,33 @@ func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size t
} }
if len(cs.list) == SegmentChunkSize-1 { if len(cs.list) == SegmentChunkSize-1 {
// if we max out our segment storage, pin its capacity to minimize memory usage // if we max out our segment storage, pin its capacity to minimize memory usage
nl := make([]NeedleValue, SegmentChunkSize, SegmentChunkSize)
nl := make([]CompactNeedleValue, SegmentChunkSize, SegmentChunkSize)
copy(nl, cs.list[:i]) copy(nl, cs.list[:i])
copy(nl[i+1:], cs.list[i:]) copy(nl[i+1:], cs.list[i:])
cs.list = nl cs.list = nl
} else { } else {
cs.list = append(cs.list, NeedleValue{})
cs.list = append(cs.list, CompactNeedleValue{})
copy(cs.list[i+1:], cs.list[i:]) copy(cs.list[i+1:], cs.list[i:])
} }
cs.list[i] = NeedleValue{
Key: key,
Offset: offset,
Size: size,
ck := cs.compactKey(key)
cs.list[i] = CompactNeedleValue{
key: ck,
offset: OffsetToCompact(offset),
size: size,
} }
if key < cs.firstKey {
cs.firstKey = key
if ck < cs.firstKey {
cs.firstKey = ck
} }
if key > cs.lastKey {
cs.lastKey = key
if ck > cs.lastKey {
cs.lastKey = ck
} }
return return
} }
// get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found. // get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found.
func (cs *CompactMapSegment) get(key types.NeedleId) (*NeedleValue, bool) {
func (cs *CompactMapSegment) get(key types.NeedleId) (*CompactNeedleValue, bool) {
if i, found := cs.bsearchKey(key); found { if i, found := cs.bsearchKey(key); found {
return &cs.list[i], true return &cs.list[i], true
} }
@ -122,9 +164,9 @@ func (cs *CompactMapSegment) get(key types.NeedleId) (*NeedleValue, bool) {
// delete deletes a map entry by key. Returns the entries' previous Size, if available. // delete deletes a map entry by key. Returns the entries' previous Size, if available.
func (cs *CompactMapSegment) delete(key types.NeedleId) types.Size { func (cs *CompactMapSegment) delete(key types.NeedleId) types.Size {
if i, found := cs.bsearchKey(key); found { if i, found := cs.bsearchKey(key); found {
if cs.list[i].Size > 0 && cs.list[i].Size.IsValid() {
ret := cs.list[i].Size
cs.list[i].Size = -cs.list[i].Size
if cs.list[i].size > 0 && cs.list[i].size.IsValid() {
ret := cs.list[i].size
cs.list[i].size = -cs.list[i].size
return ret return ret
} }
} }
@ -188,7 +230,11 @@ func (cm *CompactMap) Get(key types.NeedleId) (*NeedleValue, bool) {
defer cm.RUnlock() defer cm.RUnlock()
cs := cm.segmentForKey(key) cs := cm.segmentForKey(key)
return cs.get(key)
if cnv, found := cs.get(key); found {
nv := cnv.NeedleValue(cs.chunk)
return &nv, true
}
return nil, false
} }
// Delete deletes a map entry by key. Returns the entries' previous Size, if available. // Delete deletes a map entry by key. Returns the entries' previous Size, if available.
@ -212,7 +258,9 @@ func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error {
sort.Ints(chunks) sort.Ints(chunks)
for _, c := range chunks { for _, c := range chunks {
for _, nv := range cm.segments[c].list {
cs := cm.segments[c]
for _, cnv := range cs.list {
nv := cnv.NeedleValue(cs.chunk)
if err := visit(nv); err != nil { if err := visit(nv); err != nil {
return err return err
} }

104
weed/storage/needle_map/compact_map_test.go

@ -11,12 +11,12 @@ import (
func TestSegmentBsearchKey(t *testing.T) { func TestSegmentBsearchKey(t *testing.T) {
testSegment := &CompactMapSegment{ testSegment := &CompactMapSegment{
list: []NeedleValue{
NeedleValue{Key: 10},
NeedleValue{Key: 20},
NeedleValue{Key: 21},
NeedleValue{Key: 26},
NeedleValue{Key: 30},
list: []CompactNeedleValue{
CompactNeedleValue{key: 10},
CompactNeedleValue{key: 20},
CompactNeedleValue{key: 21},
CompactNeedleValue{key: 26},
CompactNeedleValue{key: 30},
}, },
firstKey: 10, firstKey: 10,
lastKey: 30, lastKey: 30,
@ -116,10 +116,10 @@ func TestSegmentBsearchKey(t *testing.T) {
func TestSegmentSet(t *testing.T) { func TestSegmentSet(t *testing.T) {
testSegment := &CompactMapSegment{ testSegment := &CompactMapSegment{
list: []NeedleValue{
NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100},
NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200},
NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300},
list: []CompactNeedleValue{
CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100},
CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200},
CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300},
}, },
firstKey: 10, firstKey: 10,
lastKey: 30, lastKey: 30,
@ -173,13 +173,13 @@ func TestSegmentSet(t *testing.T) {
} }
wantSegment := &CompactMapSegment{ wantSegment := &CompactMapSegment{
list: []NeedleValue{
NeedleValue{Key: 5, Offset: types.Uint32ToOffset(1000), Size: 123},
NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100},
NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200},
NeedleValue{Key: 25, Offset: types.Uint32ToOffset(8000), Size: 789},
NeedleValue{Key: 30, Offset: types.Uint32ToOffset(9000), Size: 999},
NeedleValue{Key: 51, Offset: types.Uint32ToOffset(7000), Size: 456},
list: []CompactNeedleValue{
CompactNeedleValue{key: 5, offset: OffsetToCompact(types.Uint32ToOffset(1000)), size: 123},
CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100},
CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200},
CompactNeedleValue{key: 25, offset: OffsetToCompact(types.Uint32ToOffset(8000)), size: 789},
CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(9000)), size: 999},
CompactNeedleValue{key: 51, offset: OffsetToCompact(types.Uint32ToOffset(7000)), size: 456},
}, },
firstKey: 5, firstKey: 5,
lastKey: 51, lastKey: 51,
@ -213,7 +213,7 @@ func TestSegmentSetOrdering(t *testing.T) {
t.Errorf("expected size %d, got %d", want, got) t.Errorf("expected size %d, got %d", want, got)
} }
for i := 1; i < cs.len(); i++ { for i := 1; i < cs.len(); i++ {
if ka, kb := cs.list[i-1].Key, cs.list[i].Key; ka >= kb {
if ka, kb := cs.list[i-1].key, cs.list[i].key; ka >= kb {
t.Errorf("found out of order entries at (%d, %d) = (%d, %d)", i-1, i, ka, kb) t.Errorf("found out of order entries at (%d, %d) = (%d, %d)", i-1, i, ka, kb)
} }
} }
@ -221,10 +221,10 @@ func TestSegmentSetOrdering(t *testing.T) {
func TestSegmentGet(t *testing.T) { func TestSegmentGet(t *testing.T) {
testSegment := &CompactMapSegment{ testSegment := &CompactMapSegment{
list: []NeedleValue{
NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100},
NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200},
NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300},
list: []CompactNeedleValue{
CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100},
CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200},
CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300},
}, },
firstKey: 10, firstKey: 10,
lastKey: 30, lastKey: 30,
@ -233,7 +233,7 @@ func TestSegmentGet(t *testing.T) {
testCases := []struct { testCases := []struct {
name string name string
key types.NeedleId key types.NeedleId
wantValue *NeedleValue
wantValue *CompactNeedleValue
wantFound bool wantFound bool
}{ }{
{ {
@ -277,11 +277,11 @@ func TestSegmentGet(t *testing.T) {
func TestSegmentDelete(t *testing.T) { func TestSegmentDelete(t *testing.T) {
testSegment := &CompactMapSegment{ testSegment := &CompactMapSegment{
list: []NeedleValue{
NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100},
NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: 200},
NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300},
NeedleValue{Key: 40, Offset: types.Uint32ToOffset(600), Size: 400},
list: []CompactNeedleValue{
CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100},
CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: 200},
CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300},
CompactNeedleValue{key: 40, offset: OffsetToCompact(types.Uint32ToOffset(600)), size: 400},
}, },
firstKey: 10, firstKey: 10,
lastKey: 40, lastKey: 40,
@ -317,11 +317,11 @@ func TestSegmentDelete(t *testing.T) {
} }
wantSegment := &CompactMapSegment{ wantSegment := &CompactMapSegment{
list: []NeedleValue{
NeedleValue{Key: 10, Offset: types.Uint32ToOffset(0), Size: 100},
NeedleValue{Key: 20, Offset: types.Uint32ToOffset(100), Size: -200},
NeedleValue{Key: 30, Offset: types.Uint32ToOffset(300), Size: 300},
NeedleValue{Key: 40, Offset: types.Uint32ToOffset(600), Size: -400},
list: []CompactNeedleValue{
CompactNeedleValue{key: 10, offset: OffsetToCompact(types.Uint32ToOffset(0)), size: 100},
CompactNeedleValue{key: 20, offset: OffsetToCompact(types.Uint32ToOffset(100)), size: -200},
CompactNeedleValue{key: 30, offset: OffsetToCompact(types.Uint32ToOffset(300)), size: 300},
CompactNeedleValue{key: 40, offset: OffsetToCompact(types.Uint32ToOffset(600)), size: -400},
}, },
firstKey: 10, firstKey: 10,
lastKey: 40, lastKey: 40,
@ -343,8 +343,9 @@ func TestSegmentForKey(t *testing.T) {
name: "first segment", name: "first segment",
key: 12, key: 12,
want: &CompactMapSegment{ want: &CompactMapSegment{
list: []NeedleValue{},
firstKey: SegmentChunkSize - 1,
list: []CompactNeedleValue{},
chunk: 0,
firstKey: MaxCompactKey,
lastKey: 0, lastKey: 0,
}, },
}, },
@ -352,18 +353,20 @@ func TestSegmentForKey(t *testing.T) {
name: "second segment, gapless", name: "second segment, gapless",
key: SegmentChunkSize + 34, key: SegmentChunkSize + 34,
want: &CompactMapSegment{ want: &CompactMapSegment{
list: []NeedleValue{},
firstKey: (2 * SegmentChunkSize) - 1,
lastKey: SegmentChunkSize,
list: []CompactNeedleValue{},
chunk: 1,
firstKey: MaxCompactKey,
lastKey: 0,
}, },
}, },
{ {
name: "gapped segment", name: "gapped segment",
key: (5 * SegmentChunkSize) + 56, key: (5 * SegmentChunkSize) + 56,
want: &CompactMapSegment{ want: &CompactMapSegment{
list: []NeedleValue{},
firstKey: (6 * SegmentChunkSize) - 1,
lastKey: 5 * SegmentChunkSize,
list: []CompactNeedleValue{},
chunk: 5,
firstKey: MaxCompactKey,
lastKey: 0,
}, },
}, },
} }
@ -380,19 +383,22 @@ func TestSegmentForKey(t *testing.T) {
wantMap := &CompactMap{ wantMap := &CompactMap{
segments: map[int]*CompactMapSegment{ segments: map[int]*CompactMapSegment{
0: &CompactMapSegment{ 0: &CompactMapSegment{
list: []NeedleValue{},
firstKey: SegmentChunkSize - 1,
list: []CompactNeedleValue{},
chunk: 0,
firstKey: MaxCompactKey,
lastKey: 0, lastKey: 0,
}, },
1: &CompactMapSegment{ 1: &CompactMapSegment{
list: []NeedleValue{},
firstKey: (2 * SegmentChunkSize) - 1,
lastKey: SegmentChunkSize,
list: []CompactNeedleValue{},
chunk: 1,
firstKey: MaxCompactKey,
lastKey: 0,
}, },
5: &CompactMapSegment{ 5: &CompactMapSegment{
list: []NeedleValue{},
firstKey: (6 * SegmentChunkSize) - 1,
lastKey: 5 * SegmentChunkSize,
list: []CompactNeedleValue{},
chunk: 5,
firstKey: MaxCompactKey,
lastKey: 0,
}, },
}, },
} }

Loading…
Cancel
Save