From 08e4b56a8abaaadb0701979e4bd857fb8c50c776 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 26 Jan 2020 13:01:11 -0800 Subject: [PATCH] mount: able to handle large git clone --- weed/filesys/dirty_page.go | 7 +---- weed/filesys/dirty_page_interval.go | 34 +++++++++++--------- weed/filesys/dirty_page_interval_test.go | 40 ++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 21 deletions(-) create mode 100644 weed/filesys/dirty_page_interval_test.go diff --git a/weed/filesys/dirty_page.go b/weed/filesys/dirty_page.go index f1532a6a0..7a41e371e 100644 --- a/weed/filesys/dirty_page.go +++ b/weed/filesys/dirty_page.go @@ -44,12 +44,7 @@ func (pages *ContinuousDirtyPages) AddPage(ctx context.Context, offset int64, da return pages.flushAndSave(ctx, offset, data) } - hasOverlap := pages.intervals.AddInterval(data, offset) - if hasOverlap { - chunks, err = pages.saveExistingPagesToStorage(ctx) - pages.intervals.AddInterval(data, offset) - return - } + pages.intervals.AddInterval(data, offset) var chunk *filer_pb.FileChunk var hasSavedData bool diff --git a/weed/filesys/dirty_page_interval.go b/weed/filesys/dirty_page_interval.go index c64196cdf..77fab75ef 100644 --- a/weed/filesys/dirty_page_interval.go +++ b/weed/filesys/dirty_page_interval.go @@ -65,7 +65,10 @@ func (c *ContinuousIntervals) TotalSize() (total int64) { return } -func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap bool) { +func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) { + + // TODO AddInterval needs to handle all possible out of order writes + interval := &IntervalNode{Data: data, Offset: offset, Size: int64(len(data))} var prevList, nextList *IntervalLinkedList @@ -75,6 +78,10 @@ func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap nextList = list break } + if list.Head.Offset < interval.Offset+interval.Size && interval.Offset+interval.Size <= list.Head.Offset+list.Size() { + glog.V(0).Infof("unexpected [%d,%d) overlaps [%d,%d)", interval.Offset, interval.Offset+interval.Size, list.Head.Offset, list.Head.Offset+list.Size()) + break + } } for _, list := range c.lists { @@ -84,20 +91,17 @@ func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap break } if list.Head.Offset <= offset && offset < list.Head.Offset+list.Size() { - if list.Tail.Offset <= offset { - dataStartIndex := list.Tail.Offset + list.Tail.Size - offset - glog.V(4).Infof("overlap data new [0,%d) same=%v", dataStartIndex, bytes.Compare(interval.Data[0:dataStartIndex], list.Tail.Data[len(list.Tail.Data)-int(dataStartIndex):])) - interval.Data = interval.Data[dataStartIndex:] - interval.Size -= dataStartIndex - interval.Offset = offset + dataStartIndex - glog.V(4).Infof("overlapping append as [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data)) - list.addNodeToTail(interval) - prevList = list - break - } - glog.V(4).Infof("overlapped! interval is [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data)) - hasOverlap = true - return + + // the new interval overwrites the old tail + dataStartIndex := list.Tail.Offset + list.Tail.Size - offset + glog.V(4).Infof("overlap data new [0,%d) same=%v", dataStartIndex, bytes.Compare(interval.Data[0:dataStartIndex], list.Tail.Data[len(list.Tail.Data)-int(dataStartIndex):])) + list.Tail.Data = list.Tail.Data[:len(list.Tail.Data)-int(dataStartIndex)] + list.Tail.Size -= dataStartIndex + glog.V(4).Infof("overlapping append as [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data)) + + list.addNodeToTail(interval) + prevList = list + break } } diff --git a/weed/filesys/dirty_page_interval_test.go b/weed/filesys/dirty_page_interval_test.go new file mode 100644 index 000000000..4f62f90c9 --- /dev/null +++ b/weed/filesys/dirty_page_interval_test.go @@ -0,0 +1,40 @@ +package filesys + +import ( + "bytes" + "testing" +) + +func TestContinuousIntervals_AddInterval(t *testing.T) { + + c := &ContinuousIntervals{} + + // 25, 25, 25 + c.AddInterval(getBytes(25, 3), 0) + // _, _, 23, 23, 23, 23 + c.AddInterval(getBytes(23, 4), 2) + + expectedData(t, c, 0, 25, 25, 23, 23, 23, 23) +} + +func expectedData(t *testing.T, c *ContinuousIntervals, offset int, data ...byte) { + start, stop := int64(offset), int64(offset+len(data)) + for _, list := range c.lists { + nodeStart, nodeStop := max(start, list.Head.Offset), min(stop, list.Head.Offset+list.Size()) + if nodeStart < nodeStop { + buf := make([]byte, nodeStop-nodeStart) + list.ReadData(buf, nodeStart, nodeStop) + if bytes.Compare(buf, data[nodeStart-start:nodeStop-start]) != 0 { + t.Errorf("expected %v actual %v", data[nodeStart-start:nodeStop-start], buf) + } + } + } +} + +func getBytes(content byte, length int) []byte { + data := make([]byte, length) + for i := 0; i < length; i++ { + data[i] = content + } + return data +}