From 84592a9f13f6a767074b53dd24336836d1c8230b Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Wed, 22 Feb 2023 17:52:46 -0500 Subject: [PATCH] Remove splicing features After numerous tests it was found the splice features were at best the same performance as standard IO and at worse actually slower. To simplify the code all splice features are removed. --- README.md | 23 +- libfuse/ecfd/tests/HAVE_SPLICE.c | 11 - libfuse/ecfd/tests/HAVE_VMSPLICE.c | 12 - libfuse/include/fuse_common.h | 6 - libfuse/include/fuse_msgbuf.h | 1 - libfuse/lib/fuse_i.h | 6 - libfuse/lib/fuse_ll.hpp | 6 - libfuse/lib/fuse_loop_mt.cpp | 6 +- libfuse/lib/fuse_lowlevel.c | 515 +---------------------------- libfuse/lib/fuse_msgbuf.cpp | 81 ++--- libfuse/lib/fuse_msgbuf.hpp | 10 +- man/mergerfs.1 | 25 +- src/fuse_write_buf.cpp | 111 ------- src/fuse_write_buf.hpp | 35 -- src/option_parser.cpp | 12 +- 15 files changed, 114 insertions(+), 746 deletions(-) delete mode 100644 libfuse/ecfd/tests/HAVE_SPLICE.c delete mode 100644 libfuse/ecfd/tests/HAVE_VMSPLICE.c delete mode 100644 src/fuse_write_buf.cpp delete mode 100644 src/fuse_write_buf.hpp diff --git a/README.md b/README.md index 51564416..36467b89 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2023-02-19 +% 2023-02-23 # NAME @@ -248,6 +248,11 @@ These options are the same regardless of whether you use them with the `mergerfs `async_read=true` instead. * **sync_read**: deprecated - Perform reads synchronously. Use `async_read=false` instead. +* **use_ino**: deprecated - Always enabled. +* **allow_other**: deprecated - Always enabled. +* **splice_read**: deprecated - Does nothing. +* **splice_write**: deprecated - Does nothing. +* **splice_move**: deprecated - Does nothing. **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting. @@ -976,7 +981,6 @@ mergerfs is at its core just a proxy and therefore its theoretical max performan NOTE: be sure to read about these features before changing them to understand what behaviors it may impact -* enable (or disable) `splice_move`, `splice_read`, and `splice_write` * disable `security_capability` and/or `xattr` * increase cache timeouts `cache.attr`, `cache.entry`, `cache.negative_entry` * enable (or disable) page caching (`cache.files`) @@ -1011,7 +1015,7 @@ When benchmarking through mergerfs ensure you only use 1 branch to remove any po 3. Mount mergerfs over a local drive. NVMe, SSD, HDD, etc. If you have more than one I'd suggest testing each of them as drives and/or controllers (their drivers) could impact performance. 4. Finally, if you intend to use mergerfs with a network filesystem, either as the source of data or to combine with another through mergerfs, test each of those alone as above. -Once you find the component which has the performance issue you can do further testing with different options to see if they impact performance. For reads and writes the most relevant would be: `cache.files`, `async_read`, `splice_move`, `splice_read`, `splice_write`. Less likely but relevant when using NFS or with certain filesystems would be `security_capability`, `xattr`, and `posix_acl`. If you find a specific system, drive, filesystem, controller, etc. that performs poorly contact trapexit so he may investigate further. +Once you find the component which has the performance issue you can do further testing with different options to see if they impact performance. For reads and writes the most relevant would be: `cache.files`, `async_read`. Less likely but relevant when using NFS or with certain filesystems would be `security_capability`, `xattr`, and `posix_acl`. If you find a specific system, drive, filesystem, controller, etc. that performs poorly contact trapexit so he may investigate further. Sometimes the problem is really the application accessing or writing data through mergerfs. Some software use small buffer sizes which can lead to more requests and therefore greater overhead. You can test this out yourself by replace `bs=1M` in the examples below with `ibs` or `obs` and using a size of `512` instead of `1M`. In one example test using `nullrw` the write speed dropped from 4.9GB/s to 69.7MB/s when moving from `1M` to `512`. Similar results were had when testing reads. Small writes overhead may be improved by leveraging a write cache but in casual tests little gain was found. More tests will need to be done before this feature would become available. If you have an app that appears slow with mergerfs it could be due to this. Contact trapexit so he may investigate further. @@ -1326,7 +1330,18 @@ If `mergerfs` doesn't work as a type it could be due to how the `mount.mergerfs` See above first. -If/when mergerfs is rewritten to use the low-level API then it'll be plausible to support system libfuse but till then it's simply too much work to manage the differences across the versions. +If/when mergerfs is rewritten to use the low-level API then it'll be +plausible to support system libfuse but till then it's simply too much +work to manage the differences across the versions. + + +#### Why was splice support removed? + +After a lot of testing over the years splicing always appeared to be +at best provide equivalent performance and in cases worse +performance. Splice is not supported on other platforms forcing a +traditional read/write fallback to be provided. The splice code was +removed to simplify the codebase. #### Why use mergerfs over mhddfs? diff --git a/libfuse/ecfd/tests/HAVE_SPLICE.c b/libfuse/ecfd/tests/HAVE_SPLICE.c deleted file mode 100644 index 4aec041a..00000000 --- a/libfuse/ecfd/tests/HAVE_SPLICE.c +++ /dev/null @@ -1,11 +0,0 @@ -#define _GNU_SOURCE -#include - -int -main(int argc, - char *argv[]) -{ - (void)splice; - - return 0; -} diff --git a/libfuse/ecfd/tests/HAVE_VMSPLICE.c b/libfuse/ecfd/tests/HAVE_VMSPLICE.c deleted file mode 100644 index 48765728..00000000 --- a/libfuse/ecfd/tests/HAVE_VMSPLICE.c +++ /dev/null @@ -1,12 +0,0 @@ -#define _GNU_SOURCE -#include -#include - -int -main(int argc, - char *argv[]) -{ - (void)vmsplice; - - return 0; -} diff --git a/libfuse/include/fuse_common.h b/libfuse/include/fuse_common.h index 7d9a050e..68b50f06 100644 --- a/libfuse/include/fuse_common.h +++ b/libfuse/include/fuse_common.h @@ -101,9 +101,6 @@ struct fuse_file_info_t * FUSE_CAP_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_CAP_BIG_WRITES: filesystem can handle write size larger than 4kB * FUSE_CAP_DONT_MASK: don't apply umask to file mode on create operations - * FUSE_CAP_SPLICE_WRITE: ability to use splice() to write to the fuse device - * FUSE_CAP_SPLICE_MOVE: ability to move data to the fuse device with splice() - * FUSE_CAP_SPLICE_READ: ability to use splice() to read from the fuse device * FUSE_CAP_IOCTL_DIR: ioctl support on directories * FUSE_CAP_CACHE_SYMLINKS: cache READLINK responses */ @@ -113,9 +110,6 @@ struct fuse_file_info_t #define FUSE_CAP_EXPORT_SUPPORT (1 << 4) #define FUSE_CAP_BIG_WRITES (1 << 5) #define FUSE_CAP_DONT_MASK (1 << 6) -#define FUSE_CAP_SPLICE_WRITE (1 << 7) -#define FUSE_CAP_SPLICE_MOVE (1 << 8) -#define FUSE_CAP_SPLICE_READ (1 << 9) #define FUSE_CAP_FLOCK_LOCKS (1 << 10) #define FUSE_CAP_IOCTL_DIR (1 << 11) #define FUSE_CAP_READDIR_PLUS (1 << 13) diff --git a/libfuse/include/fuse_msgbuf.h b/libfuse/include/fuse_msgbuf.h index bd1f5a9a..2a56b651 100644 --- a/libfuse/include/fuse_msgbuf.h +++ b/libfuse/include/fuse_msgbuf.h @@ -8,5 +8,4 @@ struct fuse_msgbuf_t char *mem; uint32_t size; uint32_t used; - int pipefd[2]; }; diff --git a/libfuse/lib/fuse_i.h b/libfuse/lib/fuse_i.h index af3d899f..abcf4cc7 100644 --- a/libfuse/lib/fuse_i.h +++ b/libfuse/lib/fuse_i.h @@ -58,12 +58,6 @@ struct fuse_ll int no_remote_posix_lock; int no_remote_flock; int big_writes; - int splice_write; - int splice_move; - int splice_read; - int no_splice_write; - int no_splice_move; - int no_splice_read; struct fuse_lowlevel_ops op; void *userdata; uid_t owner; diff --git a/libfuse/lib/fuse_ll.hpp b/libfuse/lib/fuse_ll.hpp index 965243c0..8f7bf45f 100644 --- a/libfuse/lib/fuse_ll.hpp +++ b/libfuse/lib/fuse_ll.hpp @@ -7,9 +7,3 @@ int fuse_receive_buf(struct fuse_session *se, void fuse_process_buf(void *data, const fuse_msgbuf_t *msgbuf, struct fuse_chan *ch); - -int fuse_receive_buf_splice(struct fuse_chan *ch, - fuse_msgbuf_t *msgbuf); -void fuse_process_buf_splice(struct fuse_chan *ch, - const fuse_msgbuf_t *msgbuf, - void *data); diff --git a/libfuse/lib/fuse_loop_mt.cpp b/libfuse/lib/fuse_loop_mt.cpp index f62a0745..dc726ff4 100644 --- a/libfuse/lib/fuse_loop_mt.cpp +++ b/libfuse/lib/fuse_loop_mt.cpp @@ -33,7 +33,6 @@ struct fuse_worker_data_t struct fuse_session *se; sem_t finished; std::function msgbuf_processor; - std::function msgbuf_allocator; std::shared_ptr tp; }; @@ -99,7 +98,6 @@ fuse_do_work(void *data) fuse_worker_data_t *wd = (fuse_worker_data_t*)data; fuse_session *se = wd->se; auto &process_msgbuf = wd->msgbuf_processor; - auto &msgbuf_allocator = wd->msgbuf_allocator; WorkerCleanup workercleanup(wd); while(!fuse_session_exited(se)) @@ -107,7 +105,7 @@ fuse_do_work(void *data) int rv; fuse_msgbuf_t *msgbuf; - msgbuf = msgbuf_allocator(); + msgbuf = msgbuf_alloc(); do { @@ -455,8 +453,6 @@ fuse_session_loop_mt(struct fuse_session *se_, wd.msgbuf_processor = process_msgbuf_sync; } - wd.msgbuf_allocator = ((se_->f->splice_read) ? msgbuf_alloc : msgbuf_alloc_memonly); - wd.se = se_; sem_init(&wd.finished,0,0); diff --git a/libfuse/lib/fuse_lowlevel.c b/libfuse/lib/fuse_lowlevel.c index 41670a72..2d0220ba 100644 --- a/libfuse/lib/fuse_lowlevel.c +++ b/libfuse/lib/fuse_lowlevel.c @@ -17,7 +17,7 @@ #include "fuse_opt.h" #include "fuse_misc.h" #include "fuse_pollhandle.h" -#include "fuse_msgbuf.h" +#include "fuse_msgbuf.hpp" #include #include @@ -52,7 +52,7 @@ __attribute__((constructor)) void fuse_ll_constructor(void) { - pagesize = getpagesize(); + pagesize = sysconf(_SC_PAGESIZE); lfmp_init(&g_FMP_fuse_req,sizeof(struct fuse_req),1); } @@ -374,7 +374,6 @@ fuse_send_data_iov_fallback(struct fuse_ll *f, size_t len) { int res; - void *mbuf; struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); /* Optimize common case */ @@ -391,24 +390,25 @@ fuse_send_data_iov_fallback(struct fuse_ll *f, return fuse_send_msg(f, ch, iov, iov_count); } - res = posix_memalign(&mbuf, pagesize, len); - if(res != 0) - return res; + fuse_msgbuf_t *msgbuf; + msgbuf = msgbuf_alloc(); + if(msgbuf == NULL) + return -ENOMEM; - mem_buf.buf[0].mem = mbuf; + mem_buf.buf[0].mem = msgbuf->mem; res = fuse_buf_copy(&mem_buf, buf, 0); if(res < 0) { - free(mbuf); + msgbuf_free(msgbuf); return -res; } len = res; - iov[iov_count].iov_base = mbuf; + iov[iov_count].iov_base = msgbuf->mem; iov[iov_count].iov_len = len; iov_count++; res = fuse_send_msg(f, ch, iov, iov_count); - free(mbuf); + msgbuf_free(msgbuf); return res; } @@ -429,298 +429,6 @@ fuse_ll_pipe_free(struct fuse_ll_pipe *llp) free(llp); } -#ifdef HAVE_SPLICE -static -struct fuse_ll_pipe* -fuse_ll_get_pipe(struct fuse_ll *f) -{ - struct fuse_ll_pipe *llp = pthread_getspecific(f->pipe_key); - if(llp == NULL) - { - int res; - - llp = malloc(sizeof(struct fuse_ll_pipe)); - if(llp == NULL) - return NULL; - - res = pipe(llp->pipe); - if(res == -1) - { - free(llp); - return NULL; - } - - if(fcntl(llp->pipe[0], F_SETFL, O_NONBLOCK) == -1 || - fcntl(llp->pipe[1], F_SETFL, O_NONBLOCK) == -1) - { - close(llp->pipe[0]); - close(llp->pipe[1]); - free(llp); - return NULL; - } - - /* - *the default size is 16 pages on linux - */ - llp->size = pagesize * 16; - llp->can_grow = 1; - - pthread_setspecific(f->pipe_key, llp); - } - - return llp; -} -#endif - -static -void -fuse_ll_clear_pipe(struct fuse_ll *f) -{ - struct fuse_ll_pipe *llp = pthread_getspecific(f->pipe_key); - - if(llp) - { - pthread_setspecific(f->pipe_key, NULL); - fuse_ll_pipe_free(llp); - } -} - -#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) -static -int -read_back(int fd, - char *buf, - size_t len) -{ - int res; - - res = read(fd, buf, len); - if(res == -1) - { - fprintf(stderr, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); - return -EIO; - } - - if(res != len) - { - fprintf(stderr, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); - return -EIO; - } - - return 0; -} - -static -int -fuse_send_data_iov(struct fuse_ll *f, - struct fuse_chan *ch, - struct iovec *iov, - int iov_count, - struct fuse_bufvec *buf, - unsigned int flags) -{ - int res; - size_t len = fuse_buf_size(buf); - struct fuse_out_header *out = iov[0].iov_base; - struct fuse_ll_pipe *llp; - int splice_flags; - size_t pipesize; - size_t total_buf_size; - size_t idx; - size_t headerlen; - struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); - - if(f->broken_splice_nonblock) - goto fallback; - - if(flags & FUSE_BUF_NO_SPLICE) - goto fallback; - - total_buf_size = 0; - for (idx = buf->idx; idx < buf->count; idx++) - { - if(buf->buf[idx].flags & FUSE_BUF_IS_FD) - { - total_buf_size += buf->buf[idx].size; - if(idx == buf->idx) - total_buf_size -= buf->off; - } - } - - if(total_buf_size < 2 * pagesize) - goto fallback; - - if(f->conn.proto_minor < 14 || !(f->conn.want & FUSE_CAP_SPLICE_WRITE)) - goto fallback; - - llp = fuse_ll_get_pipe(f); - if(llp == NULL) - goto fallback; - - headerlen = iov_length(iov, iov_count); - - out->len = headerlen + len; - - /* - * Heuristic for the required pipe size, does not work if the - * source contains less than page size fragments - */ - pipesize = pagesize * (iov_count + buf->count + 1) + out->len; - - if(llp->size < pipesize) - { - if(llp->can_grow) - { - res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); - if(res == -1) - { - llp->can_grow = 0; - goto fallback; - } - llp->size = res; - } - - if(llp->size < pipesize) - goto fallback; - } - - res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); - if(res == -1) - goto fallback; - - if(res != headerlen) - { - res = -EIO; - fprintf(stderr, "fuse: short vmsplice to pipe: %u/%zu\n", res, - headerlen); - goto clear_pipe; - } - - pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; - pipe_buf.buf[0].fd = llp->pipe[1]; - - res = fuse_buf_copy(&pipe_buf, buf, - FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); - if(res < 0) - { - if(res == -EAGAIN || res == -EINVAL) - { - /* - * Should only get EAGAIN on kernels with - * broken SPLICE_F_NONBLOCK support (<= - * 2.6.35) where this error or a short read is - * returned even if the pipe itself is not - * full - * - * EINVAL might mean that splice can't handle - * this combination of input and output. - */ - if(res == -EAGAIN) - f->broken_splice_nonblock = 1; - - pthread_setspecific(f->pipe_key, NULL); - fuse_ll_pipe_free(llp); - goto fallback; - } - res = -res; - goto clear_pipe; - } - - if(res != 0 && res < len) - { - struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); - void *mbuf; - size_t now_len = res; - /* - * For regular files a short count is either - * 1) due to EOF, or - * 2) because of broken SPLICE_F_NONBLOCK (see above) - * - * For other inputs it's possible that we overflowed - * the pipe because of small buffer fragments. - */ - - res = posix_memalign(&mbuf, pagesize, len); - if(res != 0) - goto clear_pipe; - - mem_buf.buf[0].mem = mbuf; - mem_buf.off = now_len; - res = fuse_buf_copy(&mem_buf, buf, 0); - if(res > 0) - { - char *tmpbuf; - size_t extra_len = res; - /* - * Trickiest case: got more data. Need to get - * back the data from the pipe and then fall - * back to regular write. - */ - tmpbuf = malloc(headerlen); - if(tmpbuf == NULL) - { - free(mbuf); - res = ENOMEM; - goto clear_pipe; - } - res = read_back(llp->pipe[0], tmpbuf, headerlen); - free(tmpbuf); - if(res != 0) - { - free(mbuf); - goto clear_pipe; - } - res = read_back(llp->pipe[0], mbuf, now_len); - if(res != 0) - { - free(mbuf); - goto clear_pipe; - } - len = now_len + extra_len; - iov[iov_count].iov_base = mbuf; - iov[iov_count].iov_len = len; - iov_count++; - res = fuse_send_msg(f, ch, iov, iov_count); - free(mbuf); - return res; - } - free(mbuf); - res = now_len; - } - len = res; - out->len = headerlen + len; - - splice_flags = 0; - if((flags & FUSE_BUF_SPLICE_MOVE) && - (f->conn.want & FUSE_CAP_SPLICE_MOVE)) - splice_flags |= SPLICE_F_MOVE; - - res = splice(llp->pipe[0], NULL, fuse_chan_fd(ch), NULL, out->len, splice_flags); - if(res == -1) - { - res = -errno; - perror("fuse: splice from pipe"); - goto clear_pipe; - } - - if(res != out->len) - { - res = -EIO; - fprintf(stderr, "fuse: short splice from pipe: %u/%u\n", - res, out->len); - goto clear_pipe; - } - - return 0; - - clear_pipe: - fuse_ll_clear_pipe(f); - return res; - - fallback: - return fuse_send_data_iov_fallback(f, ch, iov, iov_count, buf, len); -} -#else static int fuse_send_data_iov(struct fuse_ll *f, @@ -735,7 +443,6 @@ fuse_send_data_iov(struct fuse_ll *f, return fuse_send_data_iov_fallback(f, ch, iov, iov_count, buf, len); } -#endif int fuse_reply_data(fuse_req_t req, @@ -1445,22 +1152,6 @@ do_init(fuse_req_t req, f->conn.max_readahead = 0; } - if(req->f->conn.proto_minor >= 14) - { -#ifdef HAVE_SPLICE -#ifdef HAVE_VMSPLICE - f->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; - if(f->splice_write) - f->conn.want |= FUSE_CAP_SPLICE_WRITE; - if(f->splice_move) - f->conn.want |= FUSE_CAP_SPLICE_MOVE; -#endif - f->conn.capable |= FUSE_CAP_SPLICE_READ; - if(f->splice_read) - f->conn.want |= FUSE_CAP_SPLICE_READ; -#endif - } - if(req->f->conn.proto_minor >= 18) f->conn.capable |= FUSE_CAP_IOCTL_DIR; @@ -1476,7 +1167,7 @@ do_init(fuse_req_t req, bufsize = FUSE_MIN_READ_BUFFER; } - bufsize -= 4096; + bufsize -= pagesize; if(bufsize < f->conn.max_write) f->conn.max_write = bufsize; @@ -1484,17 +1175,12 @@ do_init(fuse_req_t req, if(f->op.init) f->op.init(f->userdata, &f->conn); - if(f->no_splice_read) - f->conn.want &= ~FUSE_CAP_SPLICE_READ; - if(f->no_splice_write) - f->conn.want &= ~FUSE_CAP_SPLICE_WRITE; - if(f->no_splice_move) - f->conn.want &= ~FUSE_CAP_SPLICE_MOVE; - if((arg->flags & FUSE_MAX_PAGES) && (f->conn.want & FUSE_CAP_MAX_PAGES)) { outarg.flags |= FUSE_MAX_PAGES; outarg.max_pages = f->conn.max_pages; + + msgbuf_set_bufsize(outarg.max_pages + 1); } if(f->conn.want & FUSE_CAP_ASYNC_READ) @@ -1968,12 +1654,6 @@ static const struct fuse_opt fuse_ll_opts[] = { "no_remote_lock", offsetof(struct fuse_ll, no_remote_flock), 1}, { "no_remote_flock", offsetof(struct fuse_ll, no_remote_flock), 1}, { "no_remote_posix_lock", offsetof(struct fuse_ll, no_remote_posix_lock), 1}, - { "splice_write", offsetof(struct fuse_ll, splice_write), 1}, - { "no_splice_write", offsetof(struct fuse_ll, no_splice_write), 1}, - { "splice_move", offsetof(struct fuse_ll, splice_move), 1}, - { "no_splice_move", offsetof(struct fuse_ll, no_splice_move), 1}, - { "splice_read", offsetof(struct fuse_ll, splice_read), 1}, - { "no_splice_read", offsetof(struct fuse_ll, no_splice_read), 1}, FUSE_OPT_KEY("max_read=", FUSE_OPT_KEY_DISCARD), FUSE_OPT_KEY("-h", KEY_HELP), FUSE_OPT_KEY("--help", KEY_HELP), @@ -2001,9 +1681,6 @@ fuse_ll_help(void) " -o no_remote_lock disable remote file locking\n" " -o no_remote_flock disable remote file locking (BSD)\n" " -o no_remote_posix_lock disable remove file locking (POSIX)\n" - " -o [no_]splice_write use splice to write to the fuse device\n" - " -o [no_]splice_move move data while splicing to the fuse device\n" - " -o [no_]splice_read use splice to read from the fuse device\n" ); } @@ -2110,7 +1787,7 @@ fuse_ll_buf_receive_read(struct fuse_session *se_, if(rv < sizeof(struct fuse_in_header)) { - fprintf(stderr, "short splice from fuse device\n"); + fprintf(stderr, "short read from fuse device\n"); return -EIO; } @@ -2191,152 +1868,6 @@ fuse_ll_buf_process_read_init(struct fuse_session *se_, return; } - -#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) -static -int -fuse_ll_buf_receive_splice(struct fuse_session *se_, - fuse_msgbuf_t *msgbuf_) -{ - int rv; - size_t bufsize = msgbuf_->size; - - rv = splice(fuse_chan_fd(se_->ch),NULL,msgbuf_->pipefd[1],NULL,bufsize,SPLICE_F_MOVE); - if(rv == -1) - return -errno; - - if(rv < sizeof(struct fuse_in_header)) - { - fprintf(stderr,"short splice from fuse device\n"); - return -EIO; - } - - return rv; -} - -static -void -fuse_ll_buf_process_splice(struct fuse_session *se_, - const fuse_msgbuf_t *msgbuf_) -{ - int rv; - struct fuse_req *req; - struct fuse_in_header *in; - struct iovec iov = { msgbuf_->mem, msgbuf_->size }; - - retry: - rv = vmsplice(msgbuf_->pipefd[0], &iov, 1, 0); - if(rv == -1) - { - rv = errno; - if(rv == EAGAIN) - goto retry; - // TODO: Need to propagate back errors to caller - return; - } - - in = (struct fuse_in_header*)msgbuf_->mem; - - req = fuse_ll_alloc_req(se_->f); - if(req == NULL) - return fuse_send_enomem(se_->f,se_->ch,in->unique); - - req->unique = in->unique; - req->ctx.uid = in->uid; - req->ctx.gid = in->gid; - req->ctx.pid = in->pid; - req->ch = se_->ch; - - rv = ENOSYS; - if(in->opcode >= FUSE_MAXOP) - goto reply_err; - if(fuse_ll_ops[in->opcode].func == NULL) - goto reply_err; - - fuse_ll_ops[in->opcode].func(req, in); - - return; - - reply_err: - fuse_reply_err(req, rv); - return; -} - -static -void -fuse_ll_buf_process_splice_init(struct fuse_session *se_, - const fuse_msgbuf_t *msgbuf_) -{ - int rv; - struct fuse_req *req; - struct fuse_in_header *in; - struct iovec iov = { msgbuf_->mem, msgbuf_->size }; - - retry: - rv = vmsplice(msgbuf_->pipefd[0], &iov, 1, 0); - if(rv == -1) - { - rv = errno; - if(rv == EAGAIN) - goto retry; - // TODO: Need to propagate back errors to caller - return; - } - - in = (struct fuse_in_header*)msgbuf_->mem; - - req = fuse_ll_alloc_req(se_->f); - if(req == NULL) - return fuse_send_enomem(se_->f,se_->ch,in->unique); - - req->unique = in->unique; - req->ctx.uid = in->uid; - req->ctx.gid = in->gid; - req->ctx.pid = in->pid; - req->ch = se_->ch; - - rv = EIO; - if(in->opcode != FUSE_INIT) - goto reply_err; - if(fuse_ll_ops[in->opcode].func == NULL) - goto reply_err; - - se_->process_buf = fuse_ll_buf_process_splice; - - fuse_ll_ops[in->opcode].func(req, in); - - return; - - reply_err: - fuse_reply_err(req, rv); - return; -} -#else -static -int -fuse_ll_buf_receive_splice(struct fuse_session *se_, - fuse_msgbuf_t *msgbuf_) -{ - return fuse_ll_buf_receive_read(se_,msgbuf_); -} - -static -void -fuse_ll_buf_process_splice(struct fuse_session *se_, - const fuse_msgbuf_t *msgbuf_) -{ - return fuse_ll_buf_process_read(se_,msgbuf_); -} - -static -void -fuse_ll_buf_process_splice_init(struct fuse_session *se_, - const fuse_msgbuf_t *msgbuf_) -{ - return fuse_ll_buf_process_read_init(se_,msgbuf_); -} -#endif - /* * always call fuse_lowlevel_new_common() internally, to work around a * misfeature in the FreeBSD runtime linker, which links the old @@ -2386,20 +1917,10 @@ fuse_lowlevel_new_common(struct fuse_args *args, f->owner = getuid(); f->userdata = userdata; - if(f->splice_read) - { - se = fuse_session_new(f, - fuse_ll_buf_receive_splice, - fuse_ll_buf_process_splice_init, - fuse_ll_destroy); - } - else - { - se = fuse_session_new(f, - fuse_ll_buf_receive_read, - fuse_ll_buf_process_read_init, - fuse_ll_destroy); - } + se = fuse_session_new(f, + fuse_ll_buf_receive_read, + fuse_ll_buf_process_read_init, + fuse_ll_destroy); if(!se) goto out_key_destroy; diff --git a/libfuse/lib/fuse_msgbuf.cpp b/libfuse/lib/fuse_msgbuf.cpp index 45861906..fd167612 100644 --- a/libfuse/lib/fuse_msgbuf.cpp +++ b/libfuse/lib/fuse_msgbuf.cpp @@ -15,22 +15,32 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "fuse_msgbuf.h" +#include "fuse_msgbuf.hpp" +#include "fuse.h" -#include -#include #include +#include #include #include #include -static std::size_t g_BUFSIZE = (1024 * 1024 * 2); +static std::uint32_t g_PAGESIZE = 0; +static std::uint32_t g_BUFSIZE = 0; static std::mutex g_MUTEX; static std::stack g_MSGBUF_STACK; +static +__attribute__((constructor)) +void +msgbuf_constructor() +{ + g_PAGESIZE = sysconf(_SC_PAGESIZE); + g_BUFSIZE = (g_PAGESIZE * (FUSE_MAX_MAX_PAGES + 2)); +} + static __attribute__((destructor)) void @@ -39,53 +49,34 @@ msgbuf_destroy() // TODO: cleanup? } -std::size_t -msgbuf_bufsize() +uint32_t +msgbuf_get_bufsize() { return g_BUFSIZE; } void -msgbuf_bufsize(const std::size_t size_) +msgbuf_set_bufsize(const uint32_t size_in_pages_) { - g_BUFSIZE = size_; + g_BUFSIZE = (size_in_pages_ * g_PAGESIZE); } -fuse_msgbuf_t* -msgbuf_alloc() +static +void* +page_aligned_malloc(const uint64_t size_) { int rv; - fuse_msgbuf_t *msgbuf; + void *buf = NULL; - g_MUTEX.lock(); - if(g_MSGBUF_STACK.empty()) - { - g_MUTEX.unlock(); + rv = posix_memalign(&buf,g_PAGESIZE,size_); + if(rv != 0) + return NULL; - msgbuf = (fuse_msgbuf_t*)malloc(sizeof(fuse_msgbuf_t)); - if(msgbuf == NULL) - return NULL; - - rv = pipe(msgbuf->pipefd); - assert(rv == 0); - rv = fcntl(msgbuf->pipefd[0],F_SETPIPE_SZ,g_BUFSIZE); - assert(rv > 0); - msgbuf->mem = (char*)malloc(rv); - msgbuf->size = rv; - msgbuf->used = 0; - } - else - { - msgbuf = g_MSGBUF_STACK.top(); - g_MSGBUF_STACK.pop(); - g_MUTEX.unlock(); - } - - return msgbuf; + return buf; } fuse_msgbuf_t* -msgbuf_alloc_memonly() +msgbuf_alloc() { fuse_msgbuf_t *msgbuf; @@ -98,11 +89,14 @@ msgbuf_alloc_memonly() if(msgbuf == NULL) return NULL; - msgbuf->pipefd[0] = -1; - msgbuf->pipefd[1] = -1; - msgbuf->mem = (char*)malloc(g_BUFSIZE); + msgbuf->mem = (char*)page_aligned_malloc(g_BUFSIZE); + if(msgbuf->mem == NULL) + { + free(msgbuf); + return NULL; + } + msgbuf->size = g_BUFSIZE; - msgbuf->used = 0; } else { @@ -119,5 +113,12 @@ msgbuf_free(fuse_msgbuf_t *msgbuf_) { std::lock_guard lck(g_MUTEX); + if(msgbuf_->size != g_BUFSIZE) + { + free(msgbuf_->mem); + free(msgbuf_); + return; + } + g_MSGBUF_STACK.push(msgbuf_); } diff --git a/libfuse/lib/fuse_msgbuf.hpp b/libfuse/lib/fuse_msgbuf.hpp index 2579131d..8ff0f77f 100644 --- a/libfuse/lib/fuse_msgbuf.hpp +++ b/libfuse/lib/fuse_msgbuf.hpp @@ -19,11 +19,15 @@ #pragma once #include "fuse_msgbuf.h" +#include "extern_c.h" -void msgbuf_bufsize(const uint32_t size); -std::size_t msgbuf_bufsize(); +EXTERN_C_BEGIN + +void msgbuf_set_bufsize(const uint32_t size); +uint32_t msgbuf_get_bufsize(); fuse_msgbuf_t* msgbuf_alloc(); fuse_msgbuf_t* msgbuf_alloc_memonly(); - void msgbuf_free(fuse_msgbuf_t *msgbuf); + +EXTERN_C_END diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 7bec5f79..9755cb65 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 2.9.2.1 .\" -.TH "mergerfs" "1" "2023-02-19" "mergerfs user manual" "" +.TH "mergerfs" "1" "2023-02-23" "mergerfs user manual" "" .hy .SH NAME .PP @@ -354,6 +354,16 @@ Use \f[C]async_read=true\f[R] instead. .IP \[bu] 2 \f[B]sync_read\f[R]: deprecated - Perform reads synchronously. Use \f[C]async_read=false\f[R] instead. +.IP \[bu] 2 +\f[B]use_ino\f[R]: deprecated - Always enabled. +.IP \[bu] 2 +\f[B]allow_other\f[R]: deprecated - Always enabled. +.IP \[bu] 2 +\f[B]splice_read\f[R]: deprecated - Does nothing. +.IP \[bu] 2 +\f[B]splice_write\f[R]: deprecated - Does nothing. +.IP \[bu] 2 +\f[B]splice_move\f[R]: deprecated - Does nothing. .PP \f[B]NOTE:\f[R] Options are evaluated in the order listed so if the options are \f[B]func.rmdir=rand,category.action=ff\f[R] the @@ -1806,9 +1816,6 @@ below (including the benchmarking section.) NOTE: be sure to read about these features before changing them to understand what behaviors it may impact .IP \[bu] 2 -enable (or disable) \f[C]splice_move\f[R], \f[C]splice_read\f[R], and -\f[C]splice_write\f[R] -.IP \[bu] 2 disable \f[C]security_capability\f[R] and/or \f[C]xattr\f[R] .IP \[bu] 2 increase cache timeouts \f[C]cache.attr\f[R], \f[C]cache.entry\f[R], @@ -1899,8 +1906,7 @@ Once you find the component which has the performance issue you can do further testing with different options to see if they impact performance. For reads and writes the most relevant would be: \f[C]cache.files\f[R], -\f[C]async_read\f[R], \f[C]splice_move\f[R], \f[C]splice_read\f[R], -\f[C]splice_write\f[R]. +\f[C]async_read\f[R]. Less likely but relevant when using NFS or with certain filesystems would be \f[C]security_capability\f[R], \f[C]xattr\f[R], and \f[C]posix_acl\f[R]. @@ -2538,6 +2544,13 @@ See above first. If/when mergerfs is rewritten to use the low-level API then it\[cq]ll be plausible to support system libfuse but till then it\[cq]s simply too much work to manage the differences across the versions. +.SS Why was splice support removed? +.PP +After a lot of testing over the years splicing always appeared to be at +best provide equivalent performance and in cases worse performance. +Splice is not supported on other platforms forcing a traditional +read/write fallback to be provided. +The splice code was removed to simplify the codebase. .SS Why use mergerfs over mhddfs? .PP mhddfs is no longer maintained and has some known stability and security diff --git a/src/fuse_write_buf.cpp b/src/fuse_write_buf.cpp deleted file mode 100644 index fb06e4de..00000000 --- a/src/fuse_write_buf.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - Copyright (c) 2016, Antonio SJ Musumeci - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "config.hpp" -#include "errno.hpp" -#include "fileinfo.hpp" -#include "fs_movefile.hpp" -#include "fuse_write.hpp" - -#include "fuse.h" - -#include -#include - -#include -#include - -using std::string; -using std::vector; - - -namespace l -{ - static - bool - out_of_space(const int error_) - { - return ((error_ == ENOSPC) || - (error_ == EDQUOT)); - } - - static - int - write_buf(const int fd_, - fuse_bufvec *src_, - const off_t offset_) - { - size_t size = fuse_buf_size(src_); - fuse_bufvec dst = FUSE_BUFVEC_INIT(size); - const fuse_buf_copy_flags cpflags = - (fuse_buf_copy_flags)(FUSE_BUF_SPLICE_MOVE|FUSE_BUF_SPLICE_NONBLOCK); - - dst.buf->flags = (fuse_buf_flags)(FUSE_BUF_IS_FD|FUSE_BUF_FD_SEEK); - dst.buf->fd = fd_; - dst.buf->pos = offset_; - - return fuse_buf_copy(&dst,src_,cpflags); - } - - static - int - move_and_write_buf(FileInfo *fi_, - fuse_bufvec *src_, - off_t offset_, - int err_) - { - int rv; - Config::Read cfg; - - if(cfg->moveonenospc.enabled == false) - return err_; - - rv = fs::movefile_as_root(cfg->moveonenospc.policy, - cfg->branches, - fi_->fusepath, - &fi_->fd); - if(rv == -1) - return err_; - - return l::write_buf(fi_->fd,src_,offset_); - } -} - -namespace FUSE -{ - int - write_buf(const fuse_file_info_t *ffi_, - fuse_bufvec *src_, - off_t offset_) - { - int rv; - FileInfo *fi = reinterpret_cast(ffi_->fh); - - rv = l::write_buf(fi->fd,src_,offset_); - if(l::out_of_space(-rv)) - rv = l::move_and_write_buf(fi,src_,offset_,rv); - - return rv; - } - - int - write_buf_null(const fuse_file_info_t *ffi_, - fuse_bufvec *src_, - off_t offset_) - { - return src_->buf[0].size; - } -} diff --git a/src/fuse_write_buf.hpp b/src/fuse_write_buf.hpp deleted file mode 100644 index be4779df..00000000 --- a/src/fuse_write_buf.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - Copyright (c) 2016, Antonio SJ Musumeci - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#pragma once - -#include "fuse.h" - -#include - - -namespace FUSE -{ - int - write_buf(const fuse_file_info_t *ffi, - struct fuse_bufvec *buf, - off_t offset); - - int - write_buf_null(const fuse_file_info_t *ffi, - struct fuse_bufvec *buf, - off_t offset); -} diff --git a/src/option_parser.cpp b/src/option_parser.cpp index 962a573d..e18fe1dd 100644 --- a/src/option_parser.cpp +++ b/src/option_parser.cpp @@ -122,14 +122,20 @@ should_ignore(const std::string &key_) { static const std::set IGNORED_KEYS = { + "allow_other", "atomic_o_trunc", "big_writes", "cache.open", "defaults", "hard_remove", + "no_splice_move", + "no_splice_read", + "no_splice_write", "nonempty", - "use_ino", - "allow_other" + "splice_move", + "splice_read", + "splice_write", + "use_ino" }; return (IGNORED_KEYS.find(key_) != IGNORED_KEYS.end()); @@ -420,7 +426,7 @@ namespace options errs_->push_back({0,"mountpoint not set"}); check_for_mount_loop(cfg,errs_); - + set_default_options(args_); set_fsname(cfg,args_); set_subtype(args_);