Chris Lu
3 years ago
5 changed files with 551 additions and 6 deletions
-
102weed/util/skiplist/name_batch.go
-
303weed/util/skiplist/name_list.go
-
73weed/util/skiplist/name_list_test.go
-
75weed/util/skiplist/skiplist.pb.go
-
4weed/util/skiplist/skiplist.proto
@ -0,0 +1,102 @@ |
|||
package skiplist |
|||
|
|||
import ( |
|||
"github.com/chrislusf/seaweedfs/weed/glog" |
|||
"github.com/golang/protobuf/proto" |
|||
"sort" |
|||
"strings" |
|||
) |
|||
|
|||
type NameBatch struct { |
|||
key string |
|||
names map[string]struct{} |
|||
} |
|||
|
|||
func (nb *NameBatch) ContainsName(name string) (found bool) { |
|||
_, found = nb.names[name] |
|||
return |
|||
} |
|||
func (nb *NameBatch) WriteName(name string) { |
|||
if nb.key == "" || strings.Compare(nb.key, name) > 0 { |
|||
nb.key = name |
|||
} |
|||
nb.names[name] = struct{}{} |
|||
} |
|||
func (nb *NameBatch) DeleteName(name string) { |
|||
delete(nb.names, name) |
|||
if nb.key == name { |
|||
nb.key = "" |
|||
for n := range nb.names { |
|||
if nb.key == "" || strings.Compare(nb.key, n) > 0 { |
|||
nb.key = n |
|||
} |
|||
} |
|||
} |
|||
} |
|||
func (nb *NameBatch) ListNames(startFrom string, visitNamesFn func(name string) bool) bool { |
|||
var names []string |
|||
needFilter := startFrom == "" |
|||
for n := range nb.names { |
|||
if !needFilter || strings.Compare(n, startFrom) >= 0 { |
|||
names = append(names, n) |
|||
} |
|||
} |
|||
sort.Slice(names, func(i, j int) bool { |
|||
return strings.Compare(names[i], names[j]) < 0 |
|||
}) |
|||
for _, n := range names { |
|||
if !visitNamesFn(n) { |
|||
return false |
|||
} |
|||
} |
|||
return true |
|||
} |
|||
|
|||
func NewNameBatch() *NameBatch { |
|||
return &NameBatch{ |
|||
names: make(map[string]struct{}), |
|||
} |
|||
} |
|||
|
|||
func LoadNameBatch(data []byte) *NameBatch { |
|||
t := &NameBatchData{} |
|||
if len(data) > 0 { |
|||
err := proto.Unmarshal(data, t) |
|||
if err != nil { |
|||
glog.Errorf("unmarshal into NameBatchData{} : %v", err) |
|||
return nil |
|||
} |
|||
} |
|||
nb := NewNameBatch() |
|||
for _, n := range t.Names { |
|||
name := string(n) |
|||
if nb.key == "" || strings.Compare(nb.key, name) > 0 { |
|||
nb.key = name |
|||
} |
|||
nb.names[name] = struct{}{} |
|||
} |
|||
return nb |
|||
} |
|||
|
|||
func (nb *NameBatch) ToBytes() []byte { |
|||
t := &NameBatchData{} |
|||
for n := range nb.names { |
|||
t.Names = append(t.Names, []byte(n)) |
|||
} |
|||
data, _ := proto.Marshal(t) |
|||
return data |
|||
} |
|||
|
|||
func (nb *NameBatch) SplitBy(name string) (x, y *NameBatch) { |
|||
x, y = NewNameBatch(), NewNameBatch() |
|||
|
|||
for n := range nb.names { |
|||
// there should be no equal case though
|
|||
if strings.Compare(n, name) <= 0 { |
|||
x.WriteName(n) |
|||
} else { |
|||
y.WriteName(n) |
|||
} |
|||
} |
|||
return |
|||
} |
@ -0,0 +1,303 @@ |
|||
package skiplist |
|||
|
|||
import ( |
|||
"bytes" |
|||
) |
|||
|
|||
type NameList struct { |
|||
skipList *SkipList |
|||
batchSize int |
|||
} |
|||
|
|||
func NewNameList(store ListStore, batchSize int) *NameList { |
|||
return &NameList{ |
|||
skipList: New(store), |
|||
batchSize: batchSize, |
|||
} |
|||
} |
|||
|
|||
/* |
|||
Be reluctant to create new nodes. Try to fit into either previous node or next node. |
|||
Prefer to add to previous node. |
|||
|
|||
There are multiple cases after finding the name for greater or equal node |
|||
1. found and node.Key == name |
|||
The node contains a batch with leading key the same as the name |
|||
nothing to do |
|||
2. no such node found or node.Key > name |
|||
|
|||
if no such node found |
|||
prevNode = list.LargestNode |
|||
|
|||
// case 2.1
|
|||
if previousNode contains name |
|||
nothing to do |
|||
|
|||
// prefer to add to previous node
|
|||
if prevNode != nil { |
|||
// case 2.2
|
|||
if prevNode has capacity |
|||
prevNode.add name, and save |
|||
return |
|||
// case 2.3
|
|||
split prevNode by name |
|||
} |
|||
|
|||
// case 2.4
|
|||
// merge into next node. Avoid too many nodes if adding data in reverse order.
|
|||
if nextNode is not nil and nextNode has capacity |
|||
delete nextNode.Key |
|||
nextNode.Key = name |
|||
nextNode.batch.add name |
|||
insert nodeNode.Key |
|||
return |
|||
|
|||
// case 2.5
|
|||
if prevNode is nil |
|||
insert new node with key = name, value = batch{name} |
|||
return |
|||
|
|||
*/ |
|||
func (nl *NameList) WriteName(name string) error { |
|||
lookupKey := []byte(name) |
|||
prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
// case 1: the name already exists as one leading key in the batch
|
|||
if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { |
|||
return nil |
|||
} |
|||
|
|||
if !found { |
|||
prevNode, err = nl.skipList.GetLargestNode() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
if nextNode != nil && prevNode == nil { |
|||
prevNode, err = nl.skipList.loadElement(nextNode.Prev) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
if prevNode != nil { |
|||
prevNameBatch := LoadNameBatch(prevNode.Value) |
|||
// case 2.1
|
|||
if prevNameBatch.ContainsName(name) { |
|||
return nil |
|||
} |
|||
|
|||
// case 2.2
|
|||
if len(prevNameBatch.names) < nl.batchSize { |
|||
prevNameBatch.WriteName(name) |
|||
return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) |
|||
} |
|||
|
|||
// case 2.3
|
|||
x, y := prevNameBatch.SplitBy(name) |
|||
addToX := len(x.names) <= len(y.names) |
|||
if len(x.names) != len(prevNameBatch.names) { |
|||
if addToX { |
|||
x.WriteName(name) |
|||
} |
|||
if x.key == prevNameBatch.key { |
|||
if err := nl.skipList.ChangeValue(prevNode, x.ToBytes()); err != nil { |
|||
return err |
|||
} |
|||
} else { |
|||
if err := nl.skipList.Insert([]byte(x.key), x.ToBytes()); err != nil { |
|||
return err |
|||
} |
|||
} |
|||
} |
|||
if len(y.names) != len(prevNameBatch.names) { |
|||
if !addToX { |
|||
y.WriteName(name) |
|||
} |
|||
if y.key == prevNameBatch.key { |
|||
if err := nl.skipList.ChangeValue(prevNode, y.ToBytes()); err != nil { |
|||
return err |
|||
} |
|||
} else { |
|||
if err := nl.skipList.Insert([]byte(y.key), y.ToBytes()); err != nil { |
|||
return err |
|||
} |
|||
} |
|||
} |
|||
return nil |
|||
|
|||
} |
|||
|
|||
// case 2.4
|
|||
if nextNode != nil { |
|||
nextNameBatch := LoadNameBatch(nextNode.Value) |
|||
if len(nextNameBatch.names) < nl.batchSize { |
|||
if err := nl.skipList.Delete(nextNode.Key); err != nil { |
|||
return err |
|||
} |
|||
nextNameBatch.WriteName(name) |
|||
if err := nl.skipList.Insert([]byte(nextNameBatch.key), nextNameBatch.ToBytes()); err != nil { |
|||
return err |
|||
} |
|||
return nil |
|||
} |
|||
} |
|||
|
|||
// case 2.5
|
|||
// now prevNode is nil
|
|||
newNameBatch := NewNameBatch() |
|||
newNameBatch.WriteName(name) |
|||
if err := nl.skipList.Insert([]byte(newNameBatch.key), newNameBatch.ToBytes()); err != nil { |
|||
return err |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
/* |
|||
// case 1: exists in nextNode
|
|||
if nextNode != nil && nextNode.Key == name { |
|||
remove from nextNode, update nextNode |
|||
// TODO: merge with prevNode if possible?
|
|||
return |
|||
} |
|||
if nextNode is nil |
|||
prevNode = list.Largestnode |
|||
if prevNode == nil and nextNode.Prev != nil |
|||
prevNode = load(nextNode.Prev) |
|||
|
|||
// case 2: does not exist
|
|||
// case 2.1
|
|||
if prevNode == nil { |
|||
return |
|||
} |
|||
// case 2.2
|
|||
if prevNameBatch does not contain name { |
|||
return |
|||
} |
|||
|
|||
// case 3
|
|||
delete from prevNameBatch |
|||
if prevNameBatch + nextNode < capacityList |
|||
// case 3.1
|
|||
merge |
|||
else |
|||
// case 3.2
|
|||
update prevNode |
|||
|
|||
|
|||
*/ |
|||
func (nl *NameList) DeleteName(name string) error { |
|||
lookupKey := []byte(name) |
|||
prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
// case 1
|
|||
var nextNameBatch *NameBatch |
|||
if nextNode != nil { |
|||
nextNameBatch = LoadNameBatch(nextNode.Value) |
|||
} |
|||
if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { |
|||
if err := nl.skipList.Delete(nextNode.Key); err != nil { |
|||
return err |
|||
} |
|||
nextNameBatch.DeleteName(name) |
|||
if len(nextNameBatch.names) > 0 { |
|||
if err := nl.skipList.Insert([]byte(nextNameBatch.key), nextNameBatch.ToBytes()); err != nil { |
|||
return err |
|||
} |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
if !found { |
|||
prevNode, err = nl.skipList.GetLargestNode() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
if nextNode != nil && prevNode == nil { |
|||
prevNode, err = nl.skipList.loadElement(nextNode.Prev) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
// case 2
|
|||
if prevNode == nil { |
|||
// case 2.1
|
|||
return nil |
|||
} |
|||
prevNameBatch := LoadNameBatch(prevNode.Value) |
|||
if !prevNameBatch.ContainsName(name) { |
|||
// case 2.2
|
|||
return nil |
|||
} |
|||
|
|||
// case 3
|
|||
prevNameBatch.DeleteName(name) |
|||
if len(prevNameBatch.names) == 0 { |
|||
if err := nl.skipList.Delete(prevNode.Key); err != nil { |
|||
return err |
|||
} |
|||
return nil |
|||
} |
|||
if nextNameBatch != nil && len(nextNameBatch.names) + len(prevNameBatch.names) < nl.batchSize { |
|||
// case 3.1 merge nextNode and prevNode
|
|||
if err := nl.skipList.Delete(nextNode.Key); err != nil { |
|||
return err |
|||
} |
|||
for nextName := range nextNameBatch.names { |
|||
prevNameBatch.WriteName(nextName) |
|||
} |
|||
return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) |
|||
} else { |
|||
// case 3.2 update prevNode
|
|||
return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (nl *NameList) ListNames(startFrom string, visitNamesFn func(name string) bool) error { |
|||
lookupKey := []byte(startFrom) |
|||
prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { |
|||
prevNode = nil |
|||
} |
|||
if !found { |
|||
prevNode, err = nl.skipList.GetLargestNode() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
if prevNode != nil { |
|||
prevNameBatch := LoadNameBatch(prevNode.Value) |
|||
if !prevNameBatch.ListNames(startFrom, visitNamesFn) { |
|||
return nil |
|||
} |
|||
} |
|||
|
|||
for nextNode != nil { |
|||
nextNameBatch := LoadNameBatch(nextNode.Value) |
|||
if !nextNameBatch.ListNames(startFrom, visitNamesFn) { |
|||
return nil |
|||
} |
|||
nextNode, err = nl.skipList.loadElement(nextNode.Next[0]) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
return nil |
|||
} |
@ -0,0 +1,73 @@ |
|||
package skiplist |
|||
|
|||
import ( |
|||
"math/rand" |
|||
"strconv" |
|||
"testing" |
|||
) |
|||
|
|||
const ( |
|||
maxNameCount = 100 |
|||
) |
|||
|
|||
func String(x int) string { |
|||
return strconv.Itoa(x) |
|||
} |
|||
|
|||
func TestNameList(t *testing.T) { |
|||
list := NewNameList(memStore, 7) |
|||
|
|||
for i := 0; i < maxNameCount; i++ { |
|||
list.WriteName(String(i)) |
|||
} |
|||
|
|||
counter := 0 |
|||
list.ListNames("", func(name string) bool { |
|||
counter++ |
|||
print(name, " ") |
|||
return true |
|||
}) |
|||
if counter != maxNameCount { |
|||
t.Fail() |
|||
} |
|||
|
|||
// list.skipList.println()
|
|||
|
|||
deleteBase := 5 |
|||
deleteCount := maxNameCount - 3 * deleteBase |
|||
|
|||
for i := deleteBase; i < deleteBase+deleteCount; i++ { |
|||
list.DeleteName(String(i)) |
|||
} |
|||
|
|||
counter = 0 |
|||
list.ListNames("", func(name string) bool { |
|||
counter++ |
|||
return true |
|||
}) |
|||
// list.skipList.println()
|
|||
if counter != maxNameCount-deleteCount { |
|||
t.Fail() |
|||
} |
|||
|
|||
// randomized deletion
|
|||
list = NewNameList(memStore, 7) |
|||
// Delete elements at random positions in the list.
|
|||
rList := rand.Perm(maxN) |
|||
for _, i := range rList { |
|||
list.WriteName(String(i)) |
|||
} |
|||
for _, i := range rList { |
|||
list.DeleteName(String(i)) |
|||
} |
|||
counter = 0 |
|||
list.ListNames("", func(name string) bool { |
|||
counter++ |
|||
print(name, " ") |
|||
return true |
|||
}) |
|||
if counter != 0 { |
|||
t.Fail() |
|||
} |
|||
|
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue