You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
154 lines
4.5 KiB
154 lines
4.5 KiB
package mount
|
|
|
|
import (
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/hanwen/go-fuse/v2/fuse"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
// CopyFileRange copies data from one file to another from and to specified offsets.
|
|
//
|
|
// See https://man7.org/linux/man-pages/man2/copy_file_range.2.html
|
|
// See https://github.com/libfuse/libfuse/commit/fe4f9428fc403fa8b99051f52d84ea5bd13f3855
|
|
/**
|
|
* Copy a range of data from one file to another
|
|
*
|
|
* Niels de Vos: • libfuse: add copy_file_range() support
|
|
*
|
|
* Performs an optimized copy between two file descriptors without the
|
|
* additional cost of transferring data through the FUSE kernel module
|
|
* to user space (glibc) and then back into the FUSE filesystem again.
|
|
*
|
|
* In case this method is not implemented, applications are expected to
|
|
* fall back to a regular file copy. (Some glibc versions did this
|
|
* emulation automatically, but the emulation has been removed from all
|
|
* glibc release branches.)
|
|
*/
|
|
func (wfs *WFS) CopyFileRange(cancel <-chan struct{}, in *fuse.CopyFileRangeIn) (written uint32, code fuse.Status) {
|
|
// flags must equal 0 for this syscall as of now
|
|
if in.Flags != 0 {
|
|
return 0, fuse.EINVAL
|
|
}
|
|
|
|
// files must exist
|
|
fhOut := wfs.GetHandle(FileHandleId(in.FhOut))
|
|
if fhOut == nil {
|
|
return 0, fuse.EBADF
|
|
}
|
|
fhIn := wfs.GetHandle(FileHandleId(in.FhIn))
|
|
if fhIn == nil {
|
|
return 0, fuse.EBADF
|
|
}
|
|
|
|
// lock source and target file handles
|
|
fhOutActiveLock := fhOut.wfs.fhLockTable.AcquireLock("CopyFileRange", fhOut.fh, util.ExclusiveLock)
|
|
defer fhOut.wfs.fhLockTable.ReleaseLock(fhOut.fh, fhOutActiveLock)
|
|
|
|
if fhOut.entry == nil {
|
|
return 0, fuse.ENOENT
|
|
}
|
|
|
|
if fhIn.fh != fhOut.fh {
|
|
fhInActiveLock := fhIn.wfs.fhLockTable.AcquireLock("CopyFileRange", fhIn.fh, util.SharedLock)
|
|
defer fhIn.wfs.fhLockTable.ReleaseLock(fhIn.fh, fhInActiveLock)
|
|
}
|
|
|
|
// directories are not supported
|
|
if fhIn.entry.IsDirectory || fhOut.entry.IsDirectory {
|
|
return 0, fuse.EISDIR
|
|
}
|
|
|
|
glog.V(4).Infof(
|
|
"CopyFileRange %s fhIn %d -> %s fhOut %d, [%d,%d) -> [%d,%d)",
|
|
fhIn.FullPath(), fhIn.fh,
|
|
fhOut.FullPath(), fhOut.fh,
|
|
in.OffIn, in.OffIn+in.Len,
|
|
in.OffOut, in.OffOut+in.Len,
|
|
)
|
|
|
|
// Concurrent copy operations could allocate too much memory, so we want to
|
|
// throttle our concurrency, scaling with the number of writers the mount
|
|
// was configured with.
|
|
if wfs.concurrentCopiersSem != nil {
|
|
wfs.concurrentCopiersSem <- struct{}{}
|
|
defer func() { <-wfs.concurrentCopiersSem }()
|
|
}
|
|
|
|
// We want to stream the copy operation to avoid allocating massive buffers.
|
|
nowUnixNano := time.Now().UnixNano()
|
|
totalCopied := int64(0)
|
|
buff := wfs.copyBufferPool.Get().([]byte)
|
|
defer wfs.copyBufferPool.Put(buff)
|
|
for {
|
|
// Comply with cancellation as best as we can, given that the underlying
|
|
// IO functions aren't cancellation-aware.
|
|
select {
|
|
case <-cancel:
|
|
glog.Warningf("canceled CopyFileRange for %s (copied %d)",
|
|
fhIn.FullPath(), totalCopied)
|
|
return uint32(totalCopied), fuse.EINTR
|
|
default: // keep going
|
|
}
|
|
|
|
// We can save one IO by breaking early if we already know the next read
|
|
// will result in zero bytes.
|
|
remaining := int64(in.Len) - totalCopied
|
|
readLen := min(remaining, int64(len(buff)))
|
|
if readLen == 0 {
|
|
break
|
|
}
|
|
|
|
// Perform the read
|
|
offsetIn := totalCopied + int64(in.OffIn)
|
|
numBytesRead, err := readDataByFileHandle(
|
|
buff[:readLen], fhIn, offsetIn)
|
|
if err != nil {
|
|
glog.Warningf("file handle read %s %d (total %d): %v",
|
|
fhIn.FullPath(), numBytesRead, totalCopied, err)
|
|
return 0, fuse.EIO
|
|
}
|
|
|
|
// Break if we're done copying (no more bytes to read)
|
|
if numBytesRead == 0 {
|
|
break
|
|
}
|
|
|
|
offsetOut := int64(in.OffOut) + totalCopied
|
|
|
|
// Detect mime type only during the beginning of our stream, since
|
|
// DetectContentType is expecting some of the first 512 bytes of the
|
|
// file. See [http.DetectContentType] for details.
|
|
if offsetOut <= 512 {
|
|
fhOut.contentType = http.DetectContentType(buff[:numBytesRead])
|
|
}
|
|
|
|
// Perform the write
|
|
fhOut.dirtyPages.writerPattern.MonitorWriteAt(offsetOut, int(numBytesRead))
|
|
fhOut.dirtyPages.AddPage(
|
|
offsetOut,
|
|
buff[:numBytesRead],
|
|
fhOut.dirtyPages.writerPattern.IsSequentialMode(),
|
|
nowUnixNano)
|
|
|
|
// Accumulate for the next loop iteration
|
|
totalCopied += numBytesRead
|
|
}
|
|
|
|
if totalCopied == 0 {
|
|
return 0, fuse.OK
|
|
}
|
|
|
|
fhOut.entry.Attributes.FileSize = uint64(max(
|
|
totalCopied+int64(in.OffOut),
|
|
int64(fhOut.entry.Attributes.FileSize),
|
|
))
|
|
fhOut.entry.Content = nil
|
|
fhOut.dirtyMetadata = true
|
|
|
|
written = uint32(totalCopied)
|
|
return written, fuse.OK
|
|
}
|