From c2e589f202b84b5beb98f1f5c243cc38b58f232b Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Wed, 22 Jan 2020 23:00:04 -0800 Subject: [PATCH] mount: better combines connected intervals to write to volume servers --- weed/filesys/dirty_page.go | 161 +++++++++-------------- weed/filesys/dirty_page_interval.go | 190 ++++++++++++++++++++++++++++ weed/filesys/file.go | 6 - weed/filesys/filehandle.go | 15 ++- 4 files changed, 259 insertions(+), 113 deletions(-) create mode 100644 weed/filesys/dirty_page_interval.go diff --git a/weed/filesys/dirty_page.go b/weed/filesys/dirty_page.go index 3418dc1c9..f1532a6a0 100644 --- a/weed/filesys/dirty_page.go +++ b/weed/filesys/dirty_page.go @@ -4,8 +4,8 @@ import ( "bytes" "context" "fmt" + "io" "sync" - "sync/atomic" "time" "github.com/chrislusf/seaweedfs/weed/glog" @@ -15,28 +15,19 @@ import ( ) type ContinuousDirtyPages struct { - hasData bool - Offset int64 - Size int64 - Data []byte - f *File - lock sync.Mutex + intervals *ContinuousIntervals + f *File + lock sync.Mutex } func newDirtyPages(file *File) *ContinuousDirtyPages { return &ContinuousDirtyPages{ - Data: nil, - f: file, + intervals: &ContinuousIntervals{}, + f: file, } } func (pages *ContinuousDirtyPages) releaseResource() { - if pages.Data != nil { - pages.f.wfs.bufPool.Put(pages.Data) - pages.Data = nil - atomic.AddInt32(&counter, -1) - glog.V(3).Infof("%s/%s releasing resource %d", pages.f.dir.Path, pages.f.Name, counter) - } } var counter = int32(0) @@ -46,84 +37,49 @@ func (pages *ContinuousDirtyPages) AddPage(ctx context.Context, offset int64, da pages.lock.Lock() defer pages.lock.Unlock() - var chunk *filer_pb.FileChunk + glog.V(3).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data))) if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) { // this is more than what buffer can hold. return pages.flushAndSave(ctx, offset, data) } - if pages.Data == nil { - pages.Data = pages.f.wfs.bufPool.Get().([]byte) - atomic.AddInt32(&counter, 1) - glog.V(3).Infof("%s/%s acquire resource %d", pages.f.dir.Path, pages.f.Name, counter) - } - - if offset < pages.Offset || offset >= pages.Offset+int64(len(pages.Data)) || - pages.Offset+int64(len(pages.Data)) < offset+int64(len(data)) { - // if the data is out of range, - // or buffer is full if adding new data, - // flush current buffer and add new data - - glog.V(4).Infof("offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data)) - - if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { - if chunk != nil { - glog.V(4).Infof("%s/%s add save [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size)) - chunks = append(chunks, chunk) - } - } else { - glog.V(0).Infof("%s/%s add save [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err) - return - } - pages.Offset = offset - glog.V(4).Infof("copy data0: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data)) - copy(pages.Data, data) - pages.Size = int64(len(data)) + hasOverlap := pages.intervals.AddInterval(data, offset) + if hasOverlap { + chunks, err = pages.saveExistingPagesToStorage(ctx) + pages.intervals.AddInterval(data, offset) return } - if offset != pages.Offset+pages.Size { - // when this happens, debug shows the data overlapping with existing data is empty - // the data is not just append - if offset == pages.Offset && int(pages.Size) < len(data) { - glog.V(4).Infof("copy data1: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data)) - copy(pages.Data[pages.Size:], data[pages.Size:]) - } else { - if pages.Size != 0 { - glog.V(1).Infof("%s/%s add page: pages [%d, %d) write [%d, %d)", pages.f.dir.Path, pages.f.Name, pages.Offset, pages.Offset+pages.Size, offset, offset+int64(len(data))) - } - return pages.flushAndSave(ctx, offset, data) + var chunk *filer_pb.FileChunk + var hasSavedData bool + + if pages.intervals.TotalSize() > pages.f.wfs.option.ChunkSizeLimit { + chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage(ctx) + if hasSavedData { + chunks = append(chunks, chunk) } - } else { - glog.V(4).Infof("copy data2: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data)) - copy(pages.Data[offset-pages.Offset:], data) } - pages.Size = max(pages.Size, offset+int64(len(data))-pages.Offset) - return } func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) { var chunk *filer_pb.FileChunk + var newChunks []*filer_pb.FileChunk // flush existing - if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { - if chunk != nil { - glog.V(4).Infof("%s/%s flush existing [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId) - chunks = append(chunks, chunk) + if newChunks, err = pages.saveExistingPagesToStorage(ctx); err == nil { + if newChunks != nil { + chunks = append(chunks, newChunks...) } } else { - glog.V(0).Infof("%s/%s failed to flush1 [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err) return } - pages.Size = 0 - pages.Offset = 0 // flush the new page - if chunk, err = pages.saveToStorage(ctx, data, offset); err == nil { + if chunk, err = pages.saveToStorage(ctx, bytes.NewReader(data), offset, int64(len(data))); err == nil { if chunk != nil { glog.V(4).Infof("%s/%s flush big request [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId) chunks = append(chunks, chunk) @@ -136,37 +92,55 @@ func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int6 return } -func (pages *ContinuousDirtyPages) FlushToStorage(ctx context.Context) (chunk *filer_pb.FileChunk, err error) { +func (pages *ContinuousDirtyPages) FlushToStorage(ctx context.Context) (chunks []*filer_pb.FileChunk, err error) { pages.lock.Lock() defer pages.lock.Unlock() - if pages.Size == 0 { - return nil, nil - } + return pages.saveExistingPagesToStorage(ctx) +} - if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { - pages.Size = 0 - pages.Offset = 0 - if chunk != nil { - glog.V(4).Infof("%s/%s flush [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size)) +func (pages *ContinuousDirtyPages) saveExistingPagesToStorage(ctx context.Context) (chunks []*filer_pb.FileChunk, err error) { + + var hasSavedData bool + var chunk *filer_pb.FileChunk + + for { + + chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage(ctx) + if !hasSavedData { + return chunks, err + } + + if err == nil { + chunks = append(chunks, chunk) + } else { + return } } - return + } -func (pages *ContinuousDirtyPages) saveExistingPagesToStorage(ctx context.Context) (*filer_pb.FileChunk, error) { +func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage(ctx context.Context) (chunk *filer_pb.FileChunk, hasSavedData bool, err error) { - if pages.Size == 0 { - return nil, nil + maxList := pages.intervals.RemoveLargestIntervalLinkedList() + if maxList == nil { + return nil, false, nil } - glog.V(0).Infof("%s/%s saveExistingPagesToStorage [%d,%d): Data len=%d", pages.f.dir.Path, pages.f.Name, pages.Offset, pages.Size, len(pages.Data)) + chunk, err = pages.saveToStorage(ctx, maxList.ToReader(), maxList.Offset(), maxList.Size()) + if err == nil { + hasSavedData = true + glog.V(3).Infof("%s saveToStorage [%d,%d) %s", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), chunk.FileId) + } else { + glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), err) + return + } - return pages.saveToStorage(ctx, pages.Data[:pages.Size], pages.Offset) + return } -func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte, offset int64) (*filer_pb.FileChunk, error) { +func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, reader io.Reader, offset int64, size int64) (*filer_pb.FileChunk, error) { var fileId, host string var auth security.EncodedJwt @@ -195,8 +169,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte } fileUrl := fmt.Sprintf("http://%s/%s", host, fileId) - bufReader := bytes.NewReader(buf) - uploadResult, err := operation.Upload(fileUrl, pages.f.Name, bufReader, false, "", nil, auth) + uploadResult, err := operation.Upload(fileUrl, pages.f.Name, reader, false, "", nil, auth) if err != nil { glog.V(0).Infof("upload data %v to %s: %v", pages.f.Name, fileUrl, err) return nil, fmt.Errorf("upload data: %v", err) @@ -209,7 +182,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte return &filer_pb.FileChunk{ FileId: fileId, Offset: offset, - Size: uint64(len(buf)), + Size: uint64(size), Mtime: time.Now().UnixNano(), ETag: uploadResult.ETag, }, nil @@ -229,23 +202,11 @@ func min(x, y int64) int64 { return y } -func (pages *ContinuousDirtyPages) ReadDirtyData(ctx context.Context, data []byte, startOffset int64) (offset int64, size int, err error) { - bufSize := int64(len(data)) +func (pages *ContinuousDirtyPages) ReadDirtyData(ctx context.Context, data []byte, startOffset int64) (offset int64, size int) { pages.lock.Lock() defer pages.lock.Unlock() - if startOffset+bufSize < pages.Offset { - return - } - if startOffset >= pages.Offset+pages.Size { - return - } - - offset = max(pages.Offset, startOffset) - stopOffset := min(pages.Offset+pages.Size, startOffset+bufSize) - size = int(stopOffset - offset) - copy(data[offset-startOffset:], pages.Data[offset-pages.Offset:stopOffset-pages.Offset]) + return pages.intervals.ReadData(data, startOffset) - return } diff --git a/weed/filesys/dirty_page_interval.go b/weed/filesys/dirty_page_interval.go new file mode 100644 index 000000000..5c55268c7 --- /dev/null +++ b/weed/filesys/dirty_page_interval.go @@ -0,0 +1,190 @@ +package filesys + +import ( + "bytes" + "io" + "math" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type IntervalNode struct { + Data []byte + Offset int64 + Size int64 + Next *IntervalNode +} + +type IntervalLinkedList struct { + Head *IntervalNode + Tail *IntervalNode +} + +type ContinuousIntervals struct { + lists []*IntervalLinkedList +} + +func (list *IntervalLinkedList) Offset() int64 { + return list.Head.Offset +} +func (list *IntervalLinkedList) Size() int64 { + return list.Tail.Offset + list.Tail.Size - list.Head.Offset +} +func (list *IntervalLinkedList) addNodeToTail(node *IntervalNode) { + // glog.V(0).Infof("add to tail [%d,%d) + [%d,%d) => [%d,%d)", list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, node.Offset+node.Size, list.Head.Offset, node.Offset+node.Size) + list.Tail.Next = node + list.Tail = node +} +func (list *IntervalLinkedList) addNodeToHead(node *IntervalNode) { + // glog.V(0).Infof("add to head [%d,%d) + [%d,%d) => [%d,%d)", node.Offset, node.Offset+node.Size, list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, list.Tail.Offset+list.Tail.Size) + node.Next = list.Head + list.Head = node +} + +func (list *IntervalLinkedList) ReadData(buf []byte, start, stop int64) { + t := list.Head + for { + + nodeStart, nodeStop := max(start, t.Offset), min(stop, t.Offset+t.Size) + if nodeStart < nodeStop { + glog.V(0).Infof("copying start=%d stop=%d t=[%d,%d) t.data=%d => bufSize=%d nodeStart=%d, nodeStop=%d", + start, stop, t.Offset, t.Offset+t.Size, len(t.Data), + len(buf), nodeStart, nodeStop) + copy(buf[nodeStart-start:], t.Data[nodeStart-t.Offset:nodeStop-t.Offset]) + } + + if t.Next == nil { + break + } + t = t.Next + } +} + +func (c *ContinuousIntervals) TotalSize() (total int64) { + for _, list := range c.lists { + total += list.Size() + } + return +} + +func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap bool) { + interval := &IntervalNode{Data: data, Offset: offset, Size: int64(len(data))} + + var prevList, nextList *IntervalLinkedList + + for _, list := range c.lists { + if list.Head.Offset == interval.Offset+interval.Size { + nextList = list + break + } + } + + for _, list := range c.lists { + if list.Head.Offset+list.Size() == offset { + list.addNodeToTail(interval) + prevList = list + break + } + if list.Head.Offset <= offset && offset < list.Head.Offset+list.Size() { + if list.Tail.Offset <= offset { + dataStartIndex := list.Tail.Offset + list.Tail.Size - offset + // glog.V(4).Infof("overlap data new [0,%d) same=%v", dataStartIndex, bytes.Compare(interval.Data[0:dataStartIndex], list.Tail.Data[len(list.Tail.Data)-int(dataStartIndex):])) + interval.Data = interval.Data[dataStartIndex:] + interval.Size -= dataStartIndex + interval.Offset = offset + dataStartIndex + // glog.V(4).Infof("overlapping append as [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data)) + list.addNodeToTail(interval) + prevList = list + break + } + glog.V(4).Infof("overlapped! interval is [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data)) + hasOverlap = true + return + } + } + + if prevList != nil && nextList != nil { + // glog.V(4).Infof("connecting [%d,%d) + [%d,%d) => [%d,%d)", prevList.Head.Offset, prevList.Tail.Offset+prevList.Tail.Size, nextList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size, prevList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size) + prevList.Tail.Next = nextList.Head + prevList.Tail = nextList.Tail + c.removeList(nextList) + } else if nextList != nil { + // add to head was not done when checking + nextList.addNodeToHead(interval) + } + if prevList == nil && nextList == nil { + c.lists = append(c.lists, &IntervalLinkedList{ + Head: interval, + Tail: interval, + }) + } + + return +} + +func (c *ContinuousIntervals) RemoveLargestIntervalLinkedList() *IntervalLinkedList { + var maxSize int64 + maxIndex := -1 + for k, list := range c.lists { + if maxSize <= list.Size() { + maxSize = list.Size() + maxIndex = k + } + } + if maxSize <= 0 { + return nil + } + + t := c.lists[maxIndex] + c.lists = append(c.lists[0:maxIndex], c.lists[maxIndex+1:]...) + return t + +} + +func (c *ContinuousIntervals) removeList(target *IntervalLinkedList) { + index := -1 + for k, list := range c.lists { + if list.Offset() == target.Offset() { + index = k + } + } + if index < 0 { + return + } + + c.lists = append(c.lists[0:index], c.lists[index+1:]...) + +} + +func (c *ContinuousIntervals) ReadData(data []byte, startOffset int64) (offset int64, size int) { + var minOffset int64 = math.MaxInt64 + var maxStop int64 + for _, list := range c.lists { + start := max(startOffset, list.Offset()) + stop := min(startOffset+int64(len(data)), list.Offset()+list.Size()) + if start <= stop { + list.ReadData(data[start-startOffset:], start, stop) + minOffset = min(minOffset, start) + maxStop = max(maxStop, stop) + } + } + + if minOffset == math.MaxInt64 { + return 0, 0 + } + + offset = minOffset + size = int(maxStop - offset) + return +} + +func (l *IntervalLinkedList) ToReader() io.Reader { + var readers []io.Reader + t := l.Head + readers = append(readers, bytes.NewReader(t.Data)) + for t.Next != nil { + t = t.Next + readers = append(readers, bytes.NewReader(t.Data)) + } + return io.MultiReader(readers...) +} diff --git a/weed/filesys/file.go b/weed/filesys/file.go index b1d53507b..5a823f516 100644 --- a/weed/filesys/file.go +++ b/weed/filesys/file.go @@ -230,12 +230,6 @@ func (file *File) maybeLoadEntry(ctx context.Context) error { return nil } -func (file *File) addChunk(chunk *filer_pb.FileChunk) { - if chunk != nil { - file.addChunks([]*filer_pb.FileChunk{chunk}) - } -} - func (file *File) addChunks(chunks []*filer_pb.FileChunk) { sort.Slice(chunks, func(i, j int) bool { diff --git a/weed/filesys/filehandle.go b/weed/filesys/filehandle.go index 981de7ea2..c3f06ae8a 100644 --- a/weed/filesys/filehandle.go +++ b/weed/filesys/filehandle.go @@ -55,8 +55,8 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus totalRead, err := fh.readFromChunks(ctx, buff, req.Offset) if err == nil { - dirtyOffset, dirtySize, dirtyReadErr := fh.readFromDirtyPages(ctx, buff, req.Offset) - if dirtyReadErr == nil && totalRead+req.Offset < dirtyOffset+int64(dirtySize) { + dirtyOffset, dirtySize := fh.readFromDirtyPages(ctx, buff, req.Offset) + if totalRead+req.Offset < dirtyOffset+int64(dirtySize) { totalRead = dirtyOffset + int64(dirtySize) - req.Offset } } @@ -70,7 +70,7 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus return err } -func (fh *FileHandle) readFromDirtyPages(ctx context.Context, buff []byte, startOffset int64) (offset int64, size int, err error) { +func (fh *FileHandle) readFromDirtyPages(ctx context.Context, buff []byte, startOffset int64) (offset int64, size int) { return fh.dirtyPages.ReadDirtyData(ctx, buff, startOffset) } @@ -102,8 +102,6 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f // write the request to volume servers - glog.V(4).Infof("%+v/%v write fh %d: [%d,%d)", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data))) - chunks, err := fh.dirtyPages.AddPage(ctx, req.Offset, req.Data) if err != nil { glog.Errorf("%+v/%v write fh %d: [%d,%d): %v", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data)), err) @@ -152,13 +150,16 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error { // send the data to the OS glog.V(4).Infof("%s fh %d flush %v", fh.f.fullpath(), fh.handle, req) - chunk, err := fh.dirtyPages.FlushToStorage(ctx) + chunks, err := fh.dirtyPages.FlushToStorage(ctx) if err != nil { glog.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err) return fmt.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err) } - fh.f.addChunk(chunk) + fh.f.addChunks(chunks) + if len(chunks) > 0 { + fh.dirtyMetadata = true + } if !fh.dirtyMetadata { return nil