From 9a73319b45eefb965e8ab8c19b8f82750a6dfc75 Mon Sep 17 00:00:00 2001 From: chrislu Date: Tue, 21 Dec 2021 17:28:55 -0800 Subject: [PATCH] mount: different write strategy for streaming write and random write --- weed/filesys/filehandle.go | 2 +- weed/filesys/page_writer.go | 78 +++++++++++++++++++ weed/filesys/page_writer/page_chunk.go | 1 + ...iter_pattern.go => page_writer_pattern.go} | 17 +++- 4 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 weed/filesys/page_writer.go create mode 100644 weed/filesys/page_writer/page_chunk.go rename weed/filesys/{page_writer/writer_pattern.go => page_writer_pattern.go} (52%) diff --git a/weed/filesys/filehandle.go b/weed/filesys/filehandle.go index 232d28667..d92b17b5b 100644 --- a/weed/filesys/filehandle.go +++ b/weed/filesys/filehandle.go @@ -41,7 +41,7 @@ func newFileHandle(file *File, uid, gid uint32) *FileHandle { fh := &FileHandle{ f: file, // dirtyPages: newContinuousDirtyPages(file, writeOnly), - dirtyPages: newTempFileDirtyPages(file), + dirtyPages: newPageWriter(file, 2*1024*1024), Uid: uid, Gid: gid, } diff --git a/weed/filesys/page_writer.go b/weed/filesys/page_writer.go new file mode 100644 index 000000000..9c9e38968 --- /dev/null +++ b/weed/filesys/page_writer.go @@ -0,0 +1,78 @@ +package filesys + +import ( + "github.com/chrislusf/seaweedfs/weed/filesys/page_writer" + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type PageWriter struct { + f *File + collection string + replication string + chunkSize int64 + writerPattern *WriterPattern + + randomWriter page_writer.DirtyPages + streamWriter page_writer.DirtyPages +} + +var ( + _ = page_writer.DirtyPages(&PageWriter{}) +) + +func newPageWriter(file *File, chunkSize int64) *PageWriter { + pw := &PageWriter{ + f: file, + chunkSize: chunkSize, + randomWriter: newTempFileDirtyPages(file), + streamWriter: newContinuousDirtyPages(file), + writerPattern: NewWriterPattern(file.Name, chunkSize), + } + return pw +} + +func (pw *PageWriter) AddPage(offset int64, data []byte) { + + glog.V(4).Infof("AddPage %v [%d, %d) streaming:%v", pw.f.fullpath(), offset, offset+int64(len(data)), pw.writerPattern.IsStreamingMode()) + + pw.writerPattern.MonitorWriteAt(offset, len(data)) + + chunkIndex := offset / pw.chunkSize + for i := chunkIndex; len(data) > 0; i++ { + writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset) + pw.addToOneChunk(i, offset, data[:writeSize]) + offset += writeSize + data = data[writeSize:] + } +} + +func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte) { + if chunkIndex > 0 { + if pw.writerPattern.IsStreamingMode() { + pw.streamWriter.AddPage(offset, data) + return + } + } + pw.randomWriter.AddPage(offset, data) +} + +func (pw *PageWriter) FlushData() error { + if err := pw.streamWriter.FlushData(); err != nil { + return err + } + return pw.randomWriter.FlushData() +} + +func (pw *PageWriter) ReadDirtyDataAt(data []byte, startOffset int64) (maxStop int64) { + glog.V(4).Infof("ReadDirtyDataAt %v [%d, %d)", pw.f.fullpath(), startOffset, startOffset+int64(len(data))) + m1 := pw.streamWriter.ReadDirtyDataAt(data, startOffset) + m2 := pw.randomWriter.ReadDirtyDataAt(data, startOffset) + return max(m1, m2) +} + +func (pw *PageWriter) GetStorageOptions() (collection, replication string) { + if pw.writerPattern.IsStreamingMode() { + return pw.streamWriter.GetStorageOptions() + } + return pw.randomWriter.GetStorageOptions() +} diff --git a/weed/filesys/page_writer/page_chunk.go b/weed/filesys/page_writer/page_chunk.go new file mode 100644 index 000000000..b21e6acca --- /dev/null +++ b/weed/filesys/page_writer/page_chunk.go @@ -0,0 +1 @@ +package page_writer diff --git a/weed/filesys/page_writer/writer_pattern.go b/weed/filesys/page_writer_pattern.go similarity index 52% rename from weed/filesys/page_writer/writer_pattern.go rename to weed/filesys/page_writer_pattern.go index c7641c37f..fdd796215 100644 --- a/weed/filesys/page_writer/writer_pattern.go +++ b/weed/filesys/page_writer_pattern.go @@ -1,22 +1,35 @@ -package page_writer +package filesys + +import "fmt" type WriterPattern struct { isStreaming bool lastWriteOffset int64 + chunkSize int64 + fileName string } // For streaming write: only cache the first chunk // For random write: fall back to temp file approach +// writes can only change from streaming mode to non-streaming mode -func NewWriterPattern() *WriterPattern { +func NewWriterPattern(fileName string, chunkSize int64) *WriterPattern { return &WriterPattern{ isStreaming: true, lastWriteOffset: 0, + chunkSize: chunkSize, + fileName: fileName, } } func (rp *WriterPattern) MonitorWriteAt(offset int64, size int) { + if rp.lastWriteOffset == 0 { + } if rp.lastWriteOffset > offset { + if rp.isStreaming { + fmt.Printf("file %s ==> non streaming at [%d,%d)\n", rp.fileName, offset, offset+int64(size)) + } + fmt.Printf("write %s [%d,%d)\n", rp.fileName, offset, offset+int64(size)) rp.isStreaming = false } rp.lastWriteOffset = offset