From 2e1506c31e962b560924df44bfe10d89d7eea8f6 Mon Sep 17 00:00:00 2001 From: Lisandro Pin Date: Fri, 23 May 2025 16:05:08 +0200 Subject: [PATCH] Rewrite `needle_map.CompactMap()` for more efficient memory usage (#6813) --- weed/storage/needle_map/compact_map.go | 169 ++++++++++---------- weed/storage/needle_map/compact_map_test.go | 2 +- 2 files changed, 83 insertions(+), 88 deletions(-) diff --git a/weed/storage/needle_map/compact_map.go b/weed/storage/needle_map/compact_map.go index c9f5967e4..10d0802ad 100644 --- a/weed/storage/needle_map/compact_map.go +++ b/weed/storage/needle_map/compact_map.go @@ -16,118 +16,114 @@ type SectionalNeedleId uint32 const SectionalNeedleIdLimit = 1<<32 - 1 type SectionalNeedleValue struct { - Key SectionalNeedleId - OffsetLower OffsetLower `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G - Size Size `comment:"Size of the data portion"` -} - -type SectionalNeedleValueExtra struct { + Key SectionalNeedleId + OffsetLower OffsetLower `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G + Size Size `comment:"Size of the data portion"` OffsetHigher OffsetHigher } type CompactSection struct { sync.RWMutex - values []SectionalNeedleValue - valuesExtra []SectionalNeedleValueExtra - overflow Overflow - overflowExtra OverflowExtra - start NeedleId - end NeedleId - counter int + values []SectionalNeedleValue + overflow Overflow + start NeedleId + end NeedleId + counter int } type Overflow []SectionalNeedleValue -type OverflowExtra []SectionalNeedleValueExtra func NewCompactSection(start NeedleId) *CompactSection { return &CompactSection{ - values: make([]SectionalNeedleValue, batch), - valuesExtra: make([]SectionalNeedleValueExtra, batch), - overflow: Overflow(make([]SectionalNeedleValue, 0)), - overflowExtra: OverflowExtra(make([]SectionalNeedleValueExtra, 0)), - start: start, + values: []SectionalNeedleValue{}, + overflow: Overflow([]SectionalNeedleValue{}), + start: start, } } // return old entry size func (cs *CompactSection) Set(key NeedleId, offset Offset, size Size) (oldOffset Offset, oldSize Size) { cs.Lock() + defer cs.Unlock() + if key > cs.end { cs.end = key } skey := SectionalNeedleId(key - cs.start) if i := cs.binarySearchValues(skey); i >= 0 { - oldOffset.OffsetHigher, oldOffset.OffsetLower, oldSize = cs.valuesExtra[i].OffsetHigher, cs.values[i].OffsetLower, cs.values[i].Size - //println("key", key, "old size", ret) - cs.valuesExtra[i].OffsetHigher, cs.values[i].OffsetLower, cs.values[i].Size = offset.OffsetHigher, offset.OffsetLower, size - } else { - needOverflow := cs.counter >= batch - needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > skey - if needOverflow { - lookBackIndex := cs.counter - 128 - if lookBackIndex < 0 { - lookBackIndex = 0 - } - if cs.counter < batch && cs.values[lookBackIndex].Key < skey { - // still has capacity and only partially out of order - p := &cs.values[cs.counter] - p.Key, cs.valuesExtra[cs.counter].OffsetHigher, p.OffsetLower, p.Size = skey, offset.OffsetHigher, offset.OffsetLower, size - //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key) - for x := cs.counter - 1; x >= lookBackIndex; x-- { - if cs.values[x].Key > cs.values[x+1].Key { - cs.values[x], cs.values[x+1] = cs.values[x+1], cs.values[x] - cs.valuesExtra[x], cs.valuesExtra[x+1] = cs.valuesExtra[x+1], cs.valuesExtra[x] - } else { - break - } - } - cs.counter++ - } else { - //println("start", cs.start, "counter", cs.counter, "key", key) - if oldValueExtra, oldValue, found := cs.findOverflowEntry(skey); found { - oldOffset.OffsetHigher, oldOffset.OffsetLower, oldSize = oldValueExtra.OffsetHigher, oldValue.OffsetLower, oldValue.Size - } - cs.setOverflowEntry(skey, offset, size) + // update + oldOffset.OffsetHigher, oldOffset.OffsetLower, oldSize = cs.values[i].OffsetHigher, cs.values[i].OffsetLower, cs.values[i].Size + cs.values[i].OffsetHigher, cs.values[i].OffsetLower, cs.values[i].Size = offset.OffsetHigher, offset.OffsetLower, size + return + } + + needOverflow := cs.counter >= batch + needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > skey + if !needOverflow { + // non-overflow insert + cs.values = append(cs.values, SectionalNeedleValue{ + Key: skey, + OffsetLower: offset.OffsetLower, + Size: size, + OffsetHigher: offset.OffsetHigher, + }) + cs.counter++ + return + } + + lookBackIndex := cs.counter - 128 + if lookBackIndex < 0 { + lookBackIndex = 0 + } + if cs.counter < batch && cs.values[lookBackIndex].Key < skey { + // still has capacity and only partially out of order + for ; lookBackIndex < cs.counter; lookBackIndex++ { + if cs.values[lookBackIndex].Key >= skey { + break } - } else { - p := &cs.values[cs.counter] - p.Key, cs.valuesExtra[cs.counter].OffsetHigher, p.OffsetLower, p.Size = skey, offset.OffsetHigher, offset.OffsetLower, size - //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key) - cs.counter++ } + cs.values = append(cs.values, SectionalNeedleValue{}) + copy(cs.values[lookBackIndex+1:], cs.values[lookBackIndex:]) + cs.values[lookBackIndex].Key, cs.values[lookBackIndex].Size = skey, size + cs.values[lookBackIndex].OffsetLower, cs.values[lookBackIndex].OffsetHigher = offset.OffsetLower, offset.OffsetHigher + cs.counter++ + return + } + + // overflow insert + //println("start", cs.start, "counter", cs.counter, "key", key) + if oldValue, found := cs.findOverflowEntry(skey); found { + oldOffset.OffsetHigher, oldOffset.OffsetLower, oldSize = oldValue.OffsetHigher, oldValue.OffsetLower, oldValue.Size } - cs.Unlock() + cs.setOverflowEntry(skey, offset, size) + return } func (cs *CompactSection) setOverflowEntry(skey SectionalNeedleId, offset Offset, size Size) { - needleValue := SectionalNeedleValue{Key: skey, OffsetLower: offset.OffsetLower, Size: size} - needleValueExtra := SectionalNeedleValueExtra{OffsetHigher: offset.OffsetHigher} + needleValue := SectionalNeedleValue{Key: skey, OffsetLower: offset.OffsetLower, Size: size, OffsetHigher: offset.OffsetHigher} insertCandidate := sort.Search(len(cs.overflow), func(i int) bool { return cs.overflow[i].Key >= needleValue.Key }) + if insertCandidate != len(cs.overflow) && cs.overflow[insertCandidate].Key == needleValue.Key { cs.overflow[insertCandidate] = needleValue - } else { - cs.overflow = append(cs.overflow, needleValue) - cs.overflowExtra = append(cs.overflowExtra, needleValueExtra) - for i := len(cs.overflow) - 1; i > insertCandidate; i-- { - cs.overflow[i] = cs.overflow[i-1] - cs.overflowExtra[i] = cs.overflowExtra[i-1] - } - cs.overflow[insertCandidate] = needleValue - cs.overflowExtra[insertCandidate] = needleValueExtra + return } + + cs.overflow = append(cs.overflow, SectionalNeedleValue{}) + copy(cs.overflow[insertCandidate+1:], cs.overflow[insertCandidate:]) + cs.overflow[insertCandidate] = needleValue } -func (cs *CompactSection) findOverflowEntry(key SectionalNeedleId) (nve SectionalNeedleValueExtra, nv SectionalNeedleValue, found bool) { +func (cs *CompactSection) findOverflowEntry(key SectionalNeedleId) (nv SectionalNeedleValue, found bool) { foundCandidate := sort.Search(len(cs.overflow), func(i int) bool { return cs.overflow[i].Key >= key }) if foundCandidate != len(cs.overflow) && cs.overflow[foundCandidate].Key == key { - return cs.overflowExtra[foundCandidate], cs.overflow[foundCandidate], true + return cs.overflow[foundCandidate], true } - return nve, nv, false + return nv, false } func (cs *CompactSection) deleteOverflowEntry(key SectionalNeedleId) { @@ -157,7 +153,7 @@ func (cs *CompactSection) Delete(key NeedleId) Size { cs.values[i].Size = -cs.values[i].Size } } - if _, v, found := cs.findOverflowEntry(skey); found { + if v, found := cs.findOverflowEntry(skey); found { cs.deleteOverflowEntry(skey) ret = v.Size } @@ -170,12 +166,12 @@ func (cs *CompactSection) Get(key NeedleId) (*NeedleValue, bool) { return nil, false } skey := SectionalNeedleId(key - cs.start) - if ve, v, ok := cs.findOverflowEntry(skey); ok { - nv := toNeedleValue(ve, v, cs) + if v, ok := cs.findOverflowEntry(skey); ok { + nv := toNeedleValue(v, cs) return &nv, true } if i := cs.binarySearchValues(skey); i >= 0 { - nv := toNeedleValue(cs.valuesExtra[i], cs.values[i], cs) + nv := toNeedleValue(cs.values[i], cs) return &nv, true } return nil, false @@ -273,7 +269,7 @@ func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error { var i, j int for i, j = 0, 0; i < len(cs.overflow) && j < len(cs.values) && j < cs.counter; { if cs.overflow[i].Key < cs.values[j].Key { - if err := visit(toNeedleValue(cs.overflowExtra[i], cs.overflow[i], cs)); err != nil { + if err := visit(toNeedleValue(cs.overflow[i], cs)); err != nil { cs.RUnlock() return err } @@ -281,7 +277,7 @@ func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error { } else if cs.overflow[i].Key == cs.values[j].Key { j++ } else { - if err := visit(toNeedleValue(cs.valuesExtra[j], cs.values[j], cs)); err != nil { + if err := visit(toNeedleValue(cs.values[j], cs)); err != nil { cs.RUnlock() return err } @@ -289,13 +285,13 @@ func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error { } } for ; i < len(cs.overflow); i++ { - if err := visit(toNeedleValue(cs.overflowExtra[i], cs.overflow[i], cs)); err != nil { + if err := visit(toNeedleValue(cs.overflow[i], cs)); err != nil { cs.RUnlock() return err } } for ; j < len(cs.values) && j < cs.counter; j++ { - if err := visit(toNeedleValue(cs.valuesExtra[j], cs.values[j], cs)); err != nil { + if err := visit(toNeedleValue(cs.values[j], cs)); err != nil { cs.RUnlock() return err } @@ -305,20 +301,19 @@ func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error { return nil } -func toNeedleValue(snve SectionalNeedleValueExtra, snv SectionalNeedleValue, cs *CompactSection) NeedleValue { +func toNeedleValue(snv SectionalNeedleValue, cs *CompactSection) NeedleValue { offset := Offset{ - OffsetHigher: snve.OffsetHigher, + OffsetHigher: snv.OffsetHigher, OffsetLower: snv.OffsetLower, } return NeedleValue{Key: NeedleId(snv.Key) + cs.start, Offset: offset, Size: snv.Size} } -func (nv NeedleValue) toSectionalNeedleValue(cs *CompactSection) (SectionalNeedleValue, SectionalNeedleValueExtra) { +func (nv NeedleValue) toSectionalNeedleValue(cs *CompactSection) SectionalNeedleValue { return SectionalNeedleValue{ - SectionalNeedleId(nv.Key - cs.start), - nv.Offset.OffsetLower, - nv.Size, - }, SectionalNeedleValueExtra{ - nv.Offset.OffsetHigher, - } + Key: SectionalNeedleId(nv.Key - cs.start), + OffsetLower: nv.Offset.OffsetLower, + Size: nv.Size, + OffsetHigher: nv.Offset.OffsetHigher, + } } diff --git a/weed/storage/needle_map/compact_map_test.go b/weed/storage/needle_map/compact_map_test.go index 58d2a6e3a..4863e1767 100644 --- a/weed/storage/needle_map/compact_map_test.go +++ b/weed/storage/needle_map/compact_map_test.go @@ -150,7 +150,7 @@ func TestOverflow(t *testing.T) { t.Fatalf("expecting 5 entries now: %+v", cs.overflow) } - _, x, _ := cs.findOverflowEntry(5) + x, _ := cs.findOverflowEntry(5) if x.Key != 5 { t.Fatalf("expecting entry 5 now: %+v", x) }