You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							284 lines
						
					
					
						
							7.0 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							284 lines
						
					
					
						
							7.0 KiB
						
					
					
				| package needle_map | |
| 
 | |
| /* CompactMap is an in-memory map of needle indeces, optimized for memory usage. | |
|  * | |
|  * It's implemented as a map of sorted indeces segments, which are in turn accessed through binary | |
|  * search. This guarantees a best-case scenario (ordered inserts/updates) of O(1) and a worst case | |
|  * scenario of O(log n) runtime, with memory usage unaffected by insert ordering. | |
|  * | |
|  * Note that even at O(log n), the clock time for both reads and writes is very low, so CompactMap | |
|  * will seldom bottleneck index operations. | |
|  */ | |
| 
 | |
| import ( | |
| 	"fmt" | |
| 	"math" | |
| 	"slices" | |
| 	"sort" | |
| 	"sync" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/storage/types" | |
| ) | |
| 
 | |
| const ( | |
| 	MaxCompactKey    = math.MaxUint16 | |
| 	SegmentChunkSize = 50000 // should be <= MaxCompactKey | |
| ) | |
| 
 | |
| type CompactKey uint16 | |
| type CompactOffset [types.OffsetSize]byte | |
| type CompactNeedleValue struct { | |
| 	key    CompactKey | |
| 	offset CompactOffset | |
| 	size   types.Size | |
| } | |
| 
 | |
| type Chunk uint64 | |
| type CompactMapSegment struct { | |
| 	list     []CompactNeedleValue | |
| 	chunk    Chunk | |
| 	firstKey CompactKey | |
| 	lastKey  CompactKey | |
| } | |
| 
 | |
| type CompactMap struct { | |
| 	sync.RWMutex | |
| 
 | |
| 	segments map[Chunk]*CompactMapSegment | |
| } | |
| 
 | |
| func (ck CompactKey) Key(chunk Chunk) types.NeedleId { | |
| 	return (types.NeedleId(SegmentChunkSize) * types.NeedleId(chunk)) + types.NeedleId(ck) | |
| } | |
| 
 | |
| func OffsetToCompact(offset types.Offset) CompactOffset { | |
| 	var co CompactOffset | |
| 	types.OffsetToBytes(co[:], offset) | |
| 	return co | |
| } | |
| 
 | |
| func (co CompactOffset) Offset() types.Offset { | |
| 	return types.BytesToOffset(co[:]) | |
| } | |
| 
 | |
| func (cnv CompactNeedleValue) NeedleValue(chunk Chunk) NeedleValue { | |
| 	return NeedleValue{ | |
| 		Key:    cnv.key.Key(chunk), | |
| 		Offset: cnv.offset.Offset(), | |
| 		Size:   cnv.size, | |
| 	} | |
| } | |
| 
 | |
| func newCompactMapSegment(chunk Chunk) *CompactMapSegment { | |
| 	return &CompactMapSegment{ | |
| 		list:     []CompactNeedleValue{}, | |
| 		chunk:    chunk, | |
| 		firstKey: MaxCompactKey, | |
| 		lastKey:  0, | |
| 	} | |
| } | |
| 
 | |
| func (cs *CompactMapSegment) len() int { | |
| 	return len(cs.list) | |
| } | |
| 
 | |
| func (cs *CompactMapSegment) cap() int { | |
| 	return cap(cs.list) | |
| } | |
| 
 | |
| func (cs *CompactMapSegment) compactKey(key types.NeedleId) CompactKey { | |
| 	return CompactKey(key - (types.NeedleId(SegmentChunkSize) * types.NeedleId(cs.chunk))) | |
| } | |
| 
 | |
| // bsearchKey returns the CompactNeedleValue index for a given ID key. | |
| // If the key is not found, it returns the index where it should be inserted instead. | |
| func (cs *CompactMapSegment) bsearchKey(key types.NeedleId) (int, bool) { | |
| 	ck := cs.compactKey(key) | |
| 
 | |
| 	switch { | |
| 	case len(cs.list) == 0: | |
| 		return 0, false | |
| 	case ck == cs.firstKey: | |
| 		return 0, true | |
| 	case ck <= cs.firstKey: | |
| 		return 0, false | |
| 	case ck == cs.lastKey: | |
| 		return len(cs.list) - 1, true | |
| 	case ck > cs.lastKey: | |
| 		return len(cs.list), false | |
| 	} | |
| 
 | |
| 	i := sort.Search(len(cs.list), func(i int) bool { | |
| 		return cs.list[i].key >= ck | |
| 	}) | |
| 	return i, cs.list[i].key == ck | |
| } | |
| 
 | |
| // set inserts/updates a CompactNeedleValue. | |
| // If the operation is an update, returns the overwritten value's previous offset and size. | |
| func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) { | |
| 	i, found := cs.bsearchKey(key) | |
| 	if found { | |
| 		// update | |
| 		o := cs.list[i].offset.Offset() | |
| 		oldOffset.OffsetLower = o.OffsetLower | |
| 		oldOffset.OffsetHigher = o.OffsetHigher | |
| 		oldSize = cs.list[i].size | |
| 
 | |
| 		o.OffsetLower = offset.OffsetLower | |
| 		o.OffsetHigher = offset.OffsetHigher | |
| 		cs.list[i].offset = OffsetToCompact(o) | |
| 		cs.list[i].size = size | |
| 		return | |
| 	} | |
| 
 | |
| 	// insert | |
| 	if len(cs.list) >= SegmentChunkSize { | |
| 		panic(fmt.Sprintf("attempted to write more than %d entries on CompactMapSegment %p!!!", SegmentChunkSize, cs)) | |
| 	} | |
| 	if len(cs.list) == SegmentChunkSize-1 { | |
| 		// if we max out our segment storage, pin its capacity to minimize memory usage | |
| 		nl := make([]CompactNeedleValue, SegmentChunkSize, SegmentChunkSize) | |
| 		copy(nl, cs.list[:i]) | |
| 		copy(nl[i+1:], cs.list[i:]) | |
| 		cs.list = nl | |
| 	} else { | |
| 		cs.list = append(cs.list, CompactNeedleValue{}) | |
| 		copy(cs.list[i+1:], cs.list[i:]) | |
| 	} | |
| 
 | |
| 	ck := cs.compactKey(key) | |
| 	cs.list[i] = CompactNeedleValue{ | |
| 		key:    ck, | |
| 		offset: OffsetToCompact(offset), | |
| 		size:   size, | |
| 	} | |
| 	if ck < cs.firstKey { | |
| 		cs.firstKey = ck | |
| 	} | |
| 	if ck > cs.lastKey { | |
| 		cs.lastKey = ck | |
| 	} | |
| 
 | |
| 	return | |
| } | |
| 
 | |
| // get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found. | |
| func (cs *CompactMapSegment) get(key types.NeedleId) (*CompactNeedleValue, bool) { | |
| 	if i, found := cs.bsearchKey(key); found { | |
| 		return &cs.list[i], true | |
| 	} | |
| 
 | |
| 	return nil, false | |
| } | |
| 
 | |
| // delete deletes a map entry by key. Returns the entries' previous Size, if available. | |
| func (cs *CompactMapSegment) delete(key types.NeedleId) types.Size { | |
| 	if i, found := cs.bsearchKey(key); found { | |
| 		if cs.list[i].size > 0 && cs.list[i].size.IsValid() { | |
| 			ret := cs.list[i].size | |
| 			cs.list[i].size = -cs.list[i].size | |
| 			return ret | |
| 		} | |
| 	} | |
| 
 | |
| 	return types.Size(0) | |
| } | |
| 
 | |
| func NewCompactMap() *CompactMap { | |
| 	return &CompactMap{ | |
| 		segments: map[Chunk]*CompactMapSegment{}, | |
| 	} | |
| } | |
| 
 | |
| func (cm *CompactMap) Len() int { | |
| 	l := 0 | |
| 	for _, s := range cm.segments { | |
| 		l += s.len() | |
| 	} | |
| 	return l | |
| } | |
| 
 | |
| func (cm *CompactMap) Cap() int { | |
| 	c := 0 | |
| 	for _, s := range cm.segments { | |
| 		c += s.cap() | |
| 	} | |
| 	return c | |
| } | |
| 
 | |
| func (cm *CompactMap) String() string { | |
| 	if cm.Len() == 0 { | |
| 		return "empty" | |
| 	} | |
| 	return fmt.Sprintf( | |
| 		"%d/%d elements on %d segments, %.02f%% efficiency", | |
| 		cm.Len(), cm.Cap(), len(cm.segments), | |
| 		float64(100)*float64(cm.Len())/float64(cm.Cap())) | |
| } | |
| 
 | |
| func (cm *CompactMap) segmentForKey(key types.NeedleId) *CompactMapSegment { | |
| 	chunk := Chunk(key / SegmentChunkSize) | |
| 	if cs, ok := cm.segments[chunk]; ok { | |
| 		return cs | |
| 	} | |
| 
 | |
| 	cs := newCompactMapSegment(chunk) | |
| 	cm.segments[chunk] = cs | |
| 	return cs | |
| } | |
| 
 | |
| // Set inserts/updates a NeedleValue. | |
| // If the operation is an update, returns the overwritten value's previous offset and size. | |
| func (cm *CompactMap) Set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) { | |
| 	cm.RLock() | |
| 	defer cm.RUnlock() | |
| 
 | |
| 	cs := cm.segmentForKey(key) | |
| 	return cs.set(key, offset, size) | |
| } | |
| 
 | |
| // Get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found. | |
| func (cm *CompactMap) Get(key types.NeedleId) (*NeedleValue, bool) { | |
| 	cm.RLock() | |
| 	defer cm.RUnlock() | |
| 
 | |
| 	cs := cm.segmentForKey(key) | |
| 	if cnv, found := cs.get(key); found { | |
| 		nv := cnv.NeedleValue(cs.chunk) | |
| 		return &nv, true | |
| 	} | |
| 	return nil, false | |
| } | |
| 
 | |
| // Delete deletes a map entry by key. Returns the entries' previous Size, if available. | |
| func (cm *CompactMap) Delete(key types.NeedleId) types.Size { | |
| 	cm.RLock() | |
| 	defer cm.RUnlock() | |
| 
 | |
| 	cs := cm.segmentForKey(key) | |
| 	return cs.delete(key) | |
| } | |
| 
 | |
| // AscendingVisit runs a function on all entries, in ascending key order. Returns any errors hit while visiting. | |
| func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error { | |
| 	cm.RLock() | |
| 	defer cm.RUnlock() | |
| 
 | |
| 	chunks := []Chunk{} | |
| 	for c := range cm.segments { | |
| 		chunks = append(chunks, c) | |
| 	} | |
| 	slices.Sort(chunks) | |
| 
 | |
| 	for _, c := range chunks { | |
| 		cs := cm.segments[c] | |
| 		for _, cnv := range cs.list { | |
| 			nv := cnv.NeedleValue(cs.chunk) | |
| 			if err := visit(nv); err != nil { | |
| 				return err | |
| 			} | |
| 		} | |
| 	} | |
| 	return nil | |
| }
 |