From 3a75d7f7aaaf7d7102f6ead38a221f60cbec5652 Mon Sep 17 00:00:00 2001 From: Patrick Schmidt Date: Mon, 22 Aug 2022 02:11:43 +0200 Subject: [PATCH] Implement copy_file_range syscall in FUSE (#3475) See the man page of copy_file_range: https://man7.org/linux/man-pages/man2/copy_file_range.2.html --- weed/mount/filehandle_read.go | 6 +- weed/mount/weedfs_file_copy_range.go | 120 +++++++++++++++++++++++++++ weed/mount/weedfs_file_read.go | 7 +- weed/mount/weedfs_unsupported.go | 16 ---- 4 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 weed/mount/weedfs_file_copy_range.go diff --git a/weed/mount/filehandle_read.go b/weed/mount/filehandle_read.go index 78f3ff5a9..d0192e73e 100644 --- a/weed/mount/filehandle_read.go +++ b/weed/mount/filehandle_read.go @@ -3,10 +3,11 @@ package mount import ( "context" "fmt" + "io" + "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "io" ) func (fh *FileHandle) lockForRead(startOffset int64, size int) { @@ -25,9 +26,6 @@ func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) { fileFullPath := fh.FullPath() - fh.entryLock.Lock() - defer fh.entryLock.Unlock() - entry := fh.entry if entry == nil { return 0, io.EOF diff --git a/weed/mount/weedfs_file_copy_range.go b/weed/mount/weedfs_file_copy_range.go new file mode 100644 index 000000000..c85d5f9c4 --- /dev/null +++ b/weed/mount/weedfs_file_copy_range.go @@ -0,0 +1,120 @@ +package mount + +import ( + "io" + "net/http" + + "github.com/hanwen/go-fuse/v2/fuse" + + "github.com/seaweedfs/seaweedfs/weed/glog" +) + +// CopyFileRange copies data from one file to another from and to specified offsets. +// +// See https://man7.org/linux/man-pages/man2/copy_file_range.2.html +// See https://github.com/libfuse/libfuse/commit/fe4f9428fc403fa8b99051f52d84ea5bd13f3855 +/** + * Copy a range of data from one file to another + * + * Niels de Vos: • libfuse: add copy_file_range() support + * + * Performs an optimized copy between two file descriptors without the + * additional cost of transferring data through the FUSE kernel module + * to user space (glibc) and then back into the FUSE filesystem again. + * + * In case this method is not implemented, applications are expected to + * fall back to a regular file copy. (Some glibc versions did this + * emulation automatically, but the emulation has been removed from all + * glibc release branches.) + */ +func (wfs *WFS) CopyFileRange(cancel <-chan struct{}, in *fuse.CopyFileRangeIn) (written uint32, code fuse.Status) { + // flags must equal 0 for this syscall as of now + if in.Flags != 0 { + return 0, fuse.EINVAL + } + + // files must exist + fhOut := wfs.GetHandle(FileHandleId(in.FhOut)) + if fhOut == nil { + return 0, fuse.EBADF + } + fhIn := wfs.GetHandle(FileHandleId(in.FhIn)) + if fhIn == nil { + return 0, fuse.EBADF + } + + // lock source and target file handles + fhOut.Lock() + defer fhOut.Unlock() + fhOut.entryLock.Lock() + defer fhOut.entryLock.Unlock() + + if fhOut.entry == nil { + return 0, fuse.ENOENT + } + + if fhIn.fh != fhOut.fh { + fhIn.Lock() + defer fhIn.Unlock() + fhIn.entryLock.Lock() + defer fhIn.entryLock.Unlock() + } + + // directories are not supported + if fhIn.entry.IsDirectory || fhOut.entry.IsDirectory { + return 0, fuse.EISDIR + } + + // cannot copy data to an overlapping range of the same file + offInEnd := in.OffIn + in.Len - 1 + offOutEnd := in.OffOut + in.Len - 1 + + if fhIn.inode == fhOut.inode && in.OffIn <= offOutEnd && offInEnd >= in.OffOut { + return 0, fuse.EINVAL + } + + glog.V(4).Infof( + "CopyFileRange %s fhIn %d -> %s fhOut %d, %d:%d -> %d:%d", + fhIn.FullPath(), fhIn.fh, + fhOut.FullPath(), fhOut.fh, + in.OffIn, offInEnd, + in.OffOut, offOutEnd, + ) + + // read data from source file + fhIn.lockForRead(int64(in.OffIn), int(in.Len)) + defer fhIn.unlockForRead(int64(in.OffIn), int(in.Len)) + + data := make([]byte, int(in.Len)) + totalRead, err := fhIn.readFromChunks(data, int64(in.OffIn)) + if err == nil || err == io.EOF { + maxStop := fhIn.readFromDirtyPages(data, int64(in.OffIn)) + totalRead = max(maxStop-int64(in.OffIn), totalRead) + } + if err == io.EOF { + err = nil + } + if err != nil { + glog.Warningf("file handle read %s %d: %v", fhIn.FullPath(), totalRead, err) + return 0, fuse.EIO + } + + if totalRead == 0 { + return 0, fuse.OK + } + + // put data at the specified offset in target file + fhOut.dirtyPages.writerPattern.MonitorWriteAt(int64(in.OffOut), int(in.Len)) + fhOut.entry.Content = nil + fhOut.dirtyPages.AddPage(int64(in.OffOut), data, fhOut.dirtyPages.writerPattern.IsSequentialMode()) + fhOut.entry.Attributes.FileSize = uint64(max(int64(in.OffOut)+totalRead, int64(fhOut.entry.Attributes.FileSize))) + fhOut.dirtyMetadata = true + written = uint32(totalRead) + + // detect mime type + if written > 0 && in.OffOut <= 512 { + fhOut.contentType = http.DetectContentType(data[:min(totalRead, 512)-1]) + } + + return written, fuse.OK +} diff --git a/weed/mount/weedfs_file_read.go b/weed/mount/weedfs_file_read.go index 2af42b333..22b87fdad 100644 --- a/weed/mount/weedfs_file_read.go +++ b/weed/mount/weedfs_file_read.go @@ -1,9 +1,11 @@ package mount import ( + "io" + "github.com/hanwen/go-fuse/v2/fuse" + "github.com/seaweedfs/seaweedfs/weed/glog" - "io" ) /** @@ -37,6 +39,9 @@ func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buff []byte) (fuse return nil, fuse.ENOENT } + fh.entryLock.Lock() + defer fh.entryLock.Unlock() + offset := int64(in.Offset) fh.lockForRead(offset, len(buff)) defer fh.unlockForRead(offset, len(buff)) diff --git a/weed/mount/weedfs_unsupported.go b/weed/mount/weedfs_unsupported.go index 2536811b8..08347aec1 100644 --- a/weed/mount/weedfs_unsupported.go +++ b/weed/mount/weedfs_unsupported.go @@ -4,22 +4,6 @@ import "github.com/hanwen/go-fuse/v2/fuse" // https://github.com/libfuse/libfuse/blob/48ae2e72b39b6a31cb2194f6f11786b7ca06aac6/include/fuse.h#L778 -/** - * Copy a range of data from one file to anotherNiels de Vos, 4 years ago: • libfuse: add copy_file_range() support - * - * Performs an optimized copy between two file descriptors without the - * additional cost of transferring data through the FUSE kernel module - * to user space (glibc) and then back into the FUSE filesystem again. - * - * In case this method is not implemented, applications are expected to - * fall back to a regular file copy. (Some glibc versions did this - * emulation automatically, but the emulation has been removed from all - * glibc release branches.) - */ -func (wfs *WFS) CopyFileRange(cancel <-chan struct{}, in *fuse.CopyFileRangeIn) (written uint32, code fuse.Status) { - return 0, fuse.ENOSYS -} - /** * Allocates space for an open file *