Chris Lu
3 years ago
5 changed files with 551 additions and 6 deletions
-
102weed/util/skiplist/name_batch.go
-
303weed/util/skiplist/name_list.go
-
73weed/util/skiplist/name_list_test.go
-
75weed/util/skiplist/skiplist.pb.go
-
4weed/util/skiplist/skiplist.proto
@ -0,0 +1,102 @@ |
|||||
|
package skiplist |
||||
|
|
||||
|
import ( |
||||
|
"github.com/chrislusf/seaweedfs/weed/glog" |
||||
|
"github.com/golang/protobuf/proto" |
||||
|
"sort" |
||||
|
"strings" |
||||
|
) |
||||
|
|
||||
|
type NameBatch struct { |
||||
|
key string |
||||
|
names map[string]struct{} |
||||
|
} |
||||
|
|
||||
|
func (nb *NameBatch) ContainsName(name string) (found bool) { |
||||
|
_, found = nb.names[name] |
||||
|
return |
||||
|
} |
||||
|
func (nb *NameBatch) WriteName(name string) { |
||||
|
if nb.key == "" || strings.Compare(nb.key, name) > 0 { |
||||
|
nb.key = name |
||||
|
} |
||||
|
nb.names[name] = struct{}{} |
||||
|
} |
||||
|
func (nb *NameBatch) DeleteName(name string) { |
||||
|
delete(nb.names, name) |
||||
|
if nb.key == name { |
||||
|
nb.key = "" |
||||
|
for n := range nb.names { |
||||
|
if nb.key == "" || strings.Compare(nb.key, n) > 0 { |
||||
|
nb.key = n |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
func (nb *NameBatch) ListNames(startFrom string, visitNamesFn func(name string) bool) bool { |
||||
|
var names []string |
||||
|
needFilter := startFrom == "" |
||||
|
for n := range nb.names { |
||||
|
if !needFilter || strings.Compare(n, startFrom) >= 0 { |
||||
|
names = append(names, n) |
||||
|
} |
||||
|
} |
||||
|
sort.Slice(names, func(i, j int) bool { |
||||
|
return strings.Compare(names[i], names[j]) < 0 |
||||
|
}) |
||||
|
for _, n := range names { |
||||
|
if !visitNamesFn(n) { |
||||
|
return false |
||||
|
} |
||||
|
} |
||||
|
return true |
||||
|
} |
||||
|
|
||||
|
func NewNameBatch() *NameBatch { |
||||
|
return &NameBatch{ |
||||
|
names: make(map[string]struct{}), |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func LoadNameBatch(data []byte) *NameBatch { |
||||
|
t := &NameBatchData{} |
||||
|
if len(data) > 0 { |
||||
|
err := proto.Unmarshal(data, t) |
||||
|
if err != nil { |
||||
|
glog.Errorf("unmarshal into NameBatchData{} : %v", err) |
||||
|
return nil |
||||
|
} |
||||
|
} |
||||
|
nb := NewNameBatch() |
||||
|
for _, n := range t.Names { |
||||
|
name := string(n) |
||||
|
if nb.key == "" || strings.Compare(nb.key, name) > 0 { |
||||
|
nb.key = name |
||||
|
} |
||||
|
nb.names[name] = struct{}{} |
||||
|
} |
||||
|
return nb |
||||
|
} |
||||
|
|
||||
|
func (nb *NameBatch) ToBytes() []byte { |
||||
|
t := &NameBatchData{} |
||||
|
for n := range nb.names { |
||||
|
t.Names = append(t.Names, []byte(n)) |
||||
|
} |
||||
|
data, _ := proto.Marshal(t) |
||||
|
return data |
||||
|
} |
||||
|
|
||||
|
func (nb *NameBatch) SplitBy(name string) (x, y *NameBatch) { |
||||
|
x, y = NewNameBatch(), NewNameBatch() |
||||
|
|
||||
|
for n := range nb.names { |
||||
|
// there should be no equal case though
|
||||
|
if strings.Compare(n, name) <= 0 { |
||||
|
x.WriteName(n) |
||||
|
} else { |
||||
|
y.WriteName(n) |
||||
|
} |
||||
|
} |
||||
|
return |
||||
|
} |
@ -0,0 +1,303 @@ |
|||||
|
package skiplist |
||||
|
|
||||
|
import ( |
||||
|
"bytes" |
||||
|
) |
||||
|
|
||||
|
type NameList struct { |
||||
|
skipList *SkipList |
||||
|
batchSize int |
||||
|
} |
||||
|
|
||||
|
func NewNameList(store ListStore, batchSize int) *NameList { |
||||
|
return &NameList{ |
||||
|
skipList: New(store), |
||||
|
batchSize: batchSize, |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/* |
||||
|
Be reluctant to create new nodes. Try to fit into either previous node or next node. |
||||
|
Prefer to add to previous node. |
||||
|
|
||||
|
There are multiple cases after finding the name for greater or equal node |
||||
|
1. found and node.Key == name |
||||
|
The node contains a batch with leading key the same as the name |
||||
|
nothing to do |
||||
|
2. no such node found or node.Key > name |
||||
|
|
||||
|
if no such node found |
||||
|
prevNode = list.LargestNode |
||||
|
|
||||
|
// case 2.1
|
||||
|
if previousNode contains name |
||||
|
nothing to do |
||||
|
|
||||
|
// prefer to add to previous node
|
||||
|
if prevNode != nil { |
||||
|
// case 2.2
|
||||
|
if prevNode has capacity |
||||
|
prevNode.add name, and save |
||||
|
return |
||||
|
// case 2.3
|
||||
|
split prevNode by name |
||||
|
} |
||||
|
|
||||
|
// case 2.4
|
||||
|
// merge into next node. Avoid too many nodes if adding data in reverse order.
|
||||
|
if nextNode is not nil and nextNode has capacity |
||||
|
delete nextNode.Key |
||||
|
nextNode.Key = name |
||||
|
nextNode.batch.add name |
||||
|
insert nodeNode.Key |
||||
|
return |
||||
|
|
||||
|
// case 2.5
|
||||
|
if prevNode is nil |
||||
|
insert new node with key = name, value = batch{name} |
||||
|
return |
||||
|
|
||||
|
*/ |
||||
|
func (nl *NameList) WriteName(name string) error { |
||||
|
lookupKey := []byte(name) |
||||
|
prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
// case 1: the name already exists as one leading key in the batch
|
||||
|
if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
if !found { |
||||
|
prevNode, err = nl.skipList.GetLargestNode() |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if nextNode != nil && prevNode == nil { |
||||
|
prevNode, err = nl.skipList.loadElement(nextNode.Prev) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if prevNode != nil { |
||||
|
prevNameBatch := LoadNameBatch(prevNode.Value) |
||||
|
// case 2.1
|
||||
|
if prevNameBatch.ContainsName(name) { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// case 2.2
|
||||
|
if len(prevNameBatch.names) < nl.batchSize { |
||||
|
prevNameBatch.WriteName(name) |
||||
|
return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) |
||||
|
} |
||||
|
|
||||
|
// case 2.3
|
||||
|
x, y := prevNameBatch.SplitBy(name) |
||||
|
addToX := len(x.names) <= len(y.names) |
||||
|
if len(x.names) != len(prevNameBatch.names) { |
||||
|
if addToX { |
||||
|
x.WriteName(name) |
||||
|
} |
||||
|
if x.key == prevNameBatch.key { |
||||
|
if err := nl.skipList.ChangeValue(prevNode, x.ToBytes()); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} else { |
||||
|
if err := nl.skipList.Insert([]byte(x.key), x.ToBytes()); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
if len(y.names) != len(prevNameBatch.names) { |
||||
|
if !addToX { |
||||
|
y.WriteName(name) |
||||
|
} |
||||
|
if y.key == prevNameBatch.key { |
||||
|
if err := nl.skipList.ChangeValue(prevNode, y.ToBytes()); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} else { |
||||
|
if err := nl.skipList.Insert([]byte(y.key), y.ToBytes()); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return nil |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// case 2.4
|
||||
|
if nextNode != nil { |
||||
|
nextNameBatch := LoadNameBatch(nextNode.Value) |
||||
|
if len(nextNameBatch.names) < nl.batchSize { |
||||
|
if err := nl.skipList.Delete(nextNode.Key); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
nextNameBatch.WriteName(name) |
||||
|
if err := nl.skipList.Insert([]byte(nextNameBatch.key), nextNameBatch.ToBytes()); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// case 2.5
|
||||
|
// now prevNode is nil
|
||||
|
newNameBatch := NewNameBatch() |
||||
|
newNameBatch.WriteName(name) |
||||
|
if err := nl.skipList.Insert([]byte(newNameBatch.key), newNameBatch.ToBytes()); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
/* |
||||
|
// case 1: exists in nextNode
|
||||
|
if nextNode != nil && nextNode.Key == name { |
||||
|
remove from nextNode, update nextNode |
||||
|
// TODO: merge with prevNode if possible?
|
||||
|
return |
||||
|
} |
||||
|
if nextNode is nil |
||||
|
prevNode = list.Largestnode |
||||
|
if prevNode == nil and nextNode.Prev != nil |
||||
|
prevNode = load(nextNode.Prev) |
||||
|
|
||||
|
// case 2: does not exist
|
||||
|
// case 2.1
|
||||
|
if prevNode == nil { |
||||
|
return |
||||
|
} |
||||
|
// case 2.2
|
||||
|
if prevNameBatch does not contain name { |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// case 3
|
||||
|
delete from prevNameBatch |
||||
|
if prevNameBatch + nextNode < capacityList |
||||
|
// case 3.1
|
||||
|
merge |
||||
|
else |
||||
|
// case 3.2
|
||||
|
update prevNode |
||||
|
|
||||
|
|
||||
|
*/ |
||||
|
func (nl *NameList) DeleteName(name string) error { |
||||
|
lookupKey := []byte(name) |
||||
|
prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
// case 1
|
||||
|
var nextNameBatch *NameBatch |
||||
|
if nextNode != nil { |
||||
|
nextNameBatch = LoadNameBatch(nextNode.Value) |
||||
|
} |
||||
|
if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { |
||||
|
if err := nl.skipList.Delete(nextNode.Key); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
nextNameBatch.DeleteName(name) |
||||
|
if len(nextNameBatch.names) > 0 { |
||||
|
if err := nl.skipList.Insert([]byte(nextNameBatch.key), nextNameBatch.ToBytes()); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
if !found { |
||||
|
prevNode, err = nl.skipList.GetLargestNode() |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if nextNode != nil && prevNode == nil { |
||||
|
prevNode, err = nl.skipList.loadElement(nextNode.Prev) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// case 2
|
||||
|
if prevNode == nil { |
||||
|
// case 2.1
|
||||
|
return nil |
||||
|
} |
||||
|
prevNameBatch := LoadNameBatch(prevNode.Value) |
||||
|
if !prevNameBatch.ContainsName(name) { |
||||
|
// case 2.2
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// case 3
|
||||
|
prevNameBatch.DeleteName(name) |
||||
|
if len(prevNameBatch.names) == 0 { |
||||
|
if err := nl.skipList.Delete(prevNode.Key); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
if nextNameBatch != nil && len(nextNameBatch.names) + len(prevNameBatch.names) < nl.batchSize { |
||||
|
// case 3.1 merge nextNode and prevNode
|
||||
|
if err := nl.skipList.Delete(nextNode.Key); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
for nextName := range nextNameBatch.names { |
||||
|
prevNameBatch.WriteName(nextName) |
||||
|
} |
||||
|
return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) |
||||
|
} else { |
||||
|
// case 3.2 update prevNode
|
||||
|
return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) |
||||
|
} |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func (nl *NameList) ListNames(startFrom string, visitNamesFn func(name string) bool) error { |
||||
|
lookupKey := []byte(startFrom) |
||||
|
prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { |
||||
|
prevNode = nil |
||||
|
} |
||||
|
if !found { |
||||
|
prevNode, err = nl.skipList.GetLargestNode() |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if prevNode != nil { |
||||
|
prevNameBatch := LoadNameBatch(prevNode.Value) |
||||
|
if !prevNameBatch.ListNames(startFrom, visitNamesFn) { |
||||
|
return nil |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
for nextNode != nil { |
||||
|
nextNameBatch := LoadNameBatch(nextNode.Value) |
||||
|
if !nextNameBatch.ListNames(startFrom, visitNamesFn) { |
||||
|
return nil |
||||
|
} |
||||
|
nextNode, err = nl.skipList.loadElement(nextNode.Next[0]) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return nil |
||||
|
} |
@ -0,0 +1,73 @@ |
|||||
|
package skiplist |
||||
|
|
||||
|
import ( |
||||
|
"math/rand" |
||||
|
"strconv" |
||||
|
"testing" |
||||
|
) |
||||
|
|
||||
|
const ( |
||||
|
maxNameCount = 100 |
||||
|
) |
||||
|
|
||||
|
func String(x int) string { |
||||
|
return strconv.Itoa(x) |
||||
|
} |
||||
|
|
||||
|
func TestNameList(t *testing.T) { |
||||
|
list := NewNameList(memStore, 7) |
||||
|
|
||||
|
for i := 0; i < maxNameCount; i++ { |
||||
|
list.WriteName(String(i)) |
||||
|
} |
||||
|
|
||||
|
counter := 0 |
||||
|
list.ListNames("", func(name string) bool { |
||||
|
counter++ |
||||
|
print(name, " ") |
||||
|
return true |
||||
|
}) |
||||
|
if counter != maxNameCount { |
||||
|
t.Fail() |
||||
|
} |
||||
|
|
||||
|
// list.skipList.println()
|
||||
|
|
||||
|
deleteBase := 5 |
||||
|
deleteCount := maxNameCount - 3 * deleteBase |
||||
|
|
||||
|
for i := deleteBase; i < deleteBase+deleteCount; i++ { |
||||
|
list.DeleteName(String(i)) |
||||
|
} |
||||
|
|
||||
|
counter = 0 |
||||
|
list.ListNames("", func(name string) bool { |
||||
|
counter++ |
||||
|
return true |
||||
|
}) |
||||
|
// list.skipList.println()
|
||||
|
if counter != maxNameCount-deleteCount { |
||||
|
t.Fail() |
||||
|
} |
||||
|
|
||||
|
// randomized deletion
|
||||
|
list = NewNameList(memStore, 7) |
||||
|
// Delete elements at random positions in the list.
|
||||
|
rList := rand.Perm(maxN) |
||||
|
for _, i := range rList { |
||||
|
list.WriteName(String(i)) |
||||
|
} |
||||
|
for _, i := range rList { |
||||
|
list.DeleteName(String(i)) |
||||
|
} |
||||
|
counter = 0 |
||||
|
list.ListNames("", func(name string) bool { |
||||
|
counter++ |
||||
|
print(name, " ") |
||||
|
return true |
||||
|
}) |
||||
|
if counter != 0 { |
||||
|
t.Fail() |
||||
|
} |
||||
|
|
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue