diff --git a/README.md b/README.md index bb3810de..c49ac499 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2019-05-18 +% 2019-05-22 # NAME @@ -91,6 +91,7 @@ mergerfs does **not** support the copy-on-write (CoW) behavior found in **aufs** * **cache.attr=<int>**: file attribute cache timeout in seconds. (default: 1) * **cache.entry=<int>**: file name lookup cache timeout in seconds. (default: 1) * **cache.negative_entry=<int>**: negative file name lookup cache timeout in seconds. (default: 0) +* **cache.symlinks=<bool>**: cache symlinks (if supported by kernel) (default: false) **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting. @@ -524,6 +525,11 @@ Of the syscalls used by mergerfs in policies the `statfs` / `statvfs` call is pe Example: If the create policy is `mfs` and the timeout is 60 then for that 60 seconds the same drive will be returned as the target for creates because the available space won't be updated for that time. +#### symlink caching + +As of version 4.20 Linux supports symlink caching. Significant performance increases can be had in workloads which use a lot of symlinks. Setting `cache.symlinks=true` will result in requesting symlink caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr `user.mergerfs.cache.symlinks`. + + #### writeback caching writeback caching is a technique for improving write speeds by batching writes at a faster device and then bulk writing to the slower device. With FUSE the kernel will wait for a number of writes to be made and then send it to the filesystem as one request. mergerfs currently uses a slightly modified and vendored libfuse 2.9.7 which does not support writeback caching. However, a prototype port to libfuse 3.x has been made and the writeback cache appears to work as expected (though performance improvements greatly depend on the way the client app writes data). Once the port is complete and thoroughly tested writeback caching will be available. @@ -889,7 +895,7 @@ MergerFS is not intended to be a replacement for ZFS. MergerFS is intended to pr #### Can drives be written to directly? Outside of mergerfs while pooled? -Yes, however its not recommended to use the same file from within the pool and from without at the same time. Especially if using caching of any kind (cache.entry, cache.attr, ac_attr_timeout, cache.negative_entry, auto_cache, kernel_cache). +Yes, however its not recommended to use the same file from within the pool and from without at the same time. Especially if using caching of any kind (cache.entry, cache.attr, ac_attr_timeout, cache.negative_entry, cache.symlinks, auto_cache, kernel_cache). #### Why do I get an "out of space" / "no space left on device" / ENOSPC error even though there appears to be lots of space available? @@ -974,6 +980,7 @@ NOTE: be sure to read about these features before changing them * add (or remove) `splice_move`, `splice_read`, and `splice_write` * increase cache timeouts `cache.attr`, `cache.entry`, `cache.negative_entry` * enable `cache.open` and/or `cache.statfs` +* enable `cache.symlinks` * change the number opf worker threads * disable `security_capability` and/or `xattr` * disable `posix_acl` diff --git a/libfuse/include/fuse_common.h b/libfuse/include/fuse_common.h index 1eb55c17..a1830f1e 100644 --- a/libfuse/include/fuse_common.h +++ b/libfuse/include/fuse_common.h @@ -100,6 +100,7 @@ struct fuse_file_info { * FUSE_CAP_SPLICE_MOVE: ability to move data to the fuse device with splice() * FUSE_CAP_SPLICE_READ: ability to use splice() to read from the fuse device * FUSE_CAP_IOCTL_DIR: ioctl support on directories + * FUSE_CAP_CACHE_SYMLINKS: cache READLINK responses */ #define FUSE_CAP_ASYNC_READ (1 << 0) #define FUSE_CAP_POSIX_LOCKS (1 << 1) @@ -115,6 +116,7 @@ struct fuse_file_info { #define FUSE_CAP_ASYNC_DIO (1 << 15) #define FUSE_CAP_PARALLEL_DIROPS (1 << 18) #define FUSE_CAP_POSIX_ACL (1 << 19) +#define FUSE_CAP_CACHE_SYMLINKS (1 << 20) /** diff --git a/libfuse/include/fuse_kernel.h b/libfuse/include/fuse_kernel.h index c806a17b..2ac59861 100644 --- a/libfuse/include/fuse_kernel.h +++ b/libfuse/include/fuse_kernel.h @@ -119,6 +119,12 @@ * * 7.28 * - add FUSE_COPY_FILE_RANGE + * - add FOPEN_CACHE_DIR + * - add FUSE_MAX_PAGES, add max_pages to init_out + * - add FUSE_CACHE_SYMLINKS + * + * 7.29 + * - add FUSE_NO_OPENDIR_SUPPORT flag */ #ifndef _LINUX_FUSE_H @@ -154,7 +160,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 27 +#define FUSE_KERNEL_MINOR_VERSION 29 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -222,10 +228,12 @@ struct fuse_file_lock { * FOPEN_DIRECT_IO: bypass page cache for this open file * FOPEN_KEEP_CACHE: don't invalidate the data cache on open * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) +#define FOPEN_CACHE_DIR (1 << 3) /** * INIT request/reply flags @@ -252,6 +260,9 @@ struct fuse_file_lock { * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc * FUSE_POSIX_ACL: filesystem supports posix acls * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED + * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages + * FUSE_CACHE_SYMLINKS: cache READLINK responses + * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -275,6 +286,9 @@ struct fuse_file_lock { #define FUSE_HANDLE_KILLPRIV (1 << 19) #define FUSE_POSIX_ACL (1 << 20) #define FUSE_ABORT_ERROR (1 << 21) +#define FUSE_MAX_PAGES (1 << 22) +#define FUSE_CACHE_SYMLINKS (1 << 23) +#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) /** * CUSE INIT request/reply flags @@ -614,7 +628,9 @@ struct fuse_init_out { uint16_t congestion_threshold; uint32_t max_write; uint32_t time_gran; - uint32_t unused[9]; + uint16_t max_pages; + uint16_t padding; + uint32_t unused[8]; }; #define CUSE_INIT_INFO_MAX 4096 diff --git a/libfuse/lib/fuse_lowlevel.c b/libfuse/lib/fuse_lowlevel.c index 7d097b94..7f93971e 100644 --- a/libfuse/lib/fuse_lowlevel.c +++ b/libfuse/lib/fuse_lowlevel.c @@ -1797,6 +1797,8 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) f->conn.capable |= FUSE_CAP_FLOCK_LOCKS; if (arg->flags & FUSE_POSIX_ACL) f->conn.capable |= FUSE_CAP_POSIX_ACL; + if (arg->flags & FUSE_CACHE_SYMLINKS) + f->conn.capable |= FUSE_CAP_CACHE_SYMLINKS; if (arg->flags & FUSE_ASYNC_DIO) f->conn.capable |= FUSE_CAP_ASYNC_DIO; if (arg->flags & FUSE_PARALLEL_DIROPS) @@ -1869,6 +1871,8 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) outarg.flags |= FUSE_FLOCK_LOCKS; if (f->conn.want & FUSE_CAP_POSIX_ACL) outarg.flags |= FUSE_POSIX_ACL; + if (f->conn.want & FUSE_CAP_CACHE_SYMLINKS) + outarg.flags |= FUSE_CACHE_SYMLINKS; if (f->conn.want & FUSE_CAP_ASYNC_DIO) outarg.flags |= FUSE_ASYNC_DIO; if (f->conn.want & FUSE_CAP_PARALLEL_DIROPS) diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 02d7c4b8..fce5222a 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 1.19.2.4 .\" -.TH "mergerfs" "1" "2019\-05\-18" "mergerfs user manual" "" +.TH "mergerfs" "1" "2019\-05\-22" "mergerfs user manual" "" .hy .SH NAME .PP @@ -225,6 +225,9 @@ seconds. \f[B]cache.negative_entry=\f[]: negative file name lookup cache timeout in seconds. (default: 0) +.IP \[bu] 2 +\f[B]cache.symlinks=\f[]: cache symlinks (if supported by kernel) +(default: false) .PP \f[B]NOTE:\f[] Options are evaluated in the order listed so if the options are \f[B]func.rmdir=rand,category.action=ff\f[] the @@ -1113,6 +1116,17 @@ policy will be cached for the number of seconds its set to. Example: If the create policy is \f[C]mfs\f[] and the timeout is 60 then for that 60 seconds the same drive will be returned as the target for creates because the available space won\[aq]t be updated for that time. +.SS symlink caching +.PP +As of version 4.20 Linux supports symlink caching. +Significant performance increases can be had in workloads which use a +lot of symlinks. +Setting \f[C]cache.symlinks=true\f[] will result in requesting symlink +caching from the kernel only if supported. +As a result its safe to enable it on systems prior to 4.20. +That said it is disabled by default for now. +You can see if caching is enabled by querying the xattr +\f[C]user.mergerfs.cache.symlinks\f[]. .SS writeback caching .PP writeback caching is a technique for improving write speeds by batching @@ -1778,7 +1792,8 @@ here (http://louwrentius.com/the-hidden-cost-of-using-zfs-for-your-home-nas.html Yes, however its not recommended to use the same file from within the pool and from without at the same time. Especially if using caching of any kind (cache.entry, cache.attr, -ac_attr_timeout, cache.negative_entry, auto_cache, kernel_cache). +ac_attr_timeout, cache.negative_entry, cache.symlinks, auto_cache, +kernel_cache). .SS Why do I get an "out of space" / "no space left on device" / ENOSPC error even though there appears to be lots of space available? .PP @@ -1948,6 +1963,8 @@ increase cache timeouts \f[C]cache.attr\f[], \f[C]cache.entry\f[], .IP \[bu] 2 enable \f[C]cache.open\f[] and/or \f[C]cache.statfs\f[] .IP \[bu] 2 +enable \f[C]cache.symlinks\f[] +.IP \[bu] 2 change the number opf worker threads .IP \[bu] 2 disable \f[C]security_capability\f[] and/or \f[C]xattr\f[] diff --git a/src/config.cpp b/src/config.cpp index 05e4c769..58d0c012 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -49,6 +49,7 @@ Config::Config() statfs(StatFS::BASE), statfs_ignore(StatFSIgnore::NONE), posix_acl(false), + cache_symlinks(false), POLICYINIT(access), POLICYINIT(chmod), POLICYINIT(chown), diff --git a/src/config.hpp b/src/config.hpp index 26a30499..796d834b 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -79,6 +79,7 @@ public: StatFS::Enum statfs; StatFSIgnore::Enum statfs_ignore; bool posix_acl; + bool cache_symlinks; public: const Policy *policies[FuseFunc::Enum::END]; diff --git a/src/fuse_getxattr.cpp b/src/fuse_getxattr.cpp index 47cafba3..952aef7d 100644 --- a/src/fuse_getxattr.cpp +++ b/src/fuse_getxattr.cpp @@ -365,6 +365,8 @@ namespace l l::getxattr_controlfile_cache_entry(attrvalue); else if((attr[2] == "cache") && (attr[3] == "negative_entry")) l::getxattr_controlfile_cache_negative_entry(attrvalue); + else if((attr[2] == "cache") && (attr[3] == "symlinks")) + l::getxattr_controlfile_bool(config.cache_symlinks,attrvalue); break; } diff --git a/src/fuse_init.cpp b/src/fuse_init.cpp index 80fbe4de..09925392 100644 --- a/src/fuse_init.cpp +++ b/src/fuse_init.cpp @@ -21,12 +21,29 @@ namespace l { + static + void + want(fuse_conn_info *conn_, + const int flag_) + { + conn_->want |= flag_; + } + + static + bool + capable(fuse_conn_info *conn_, + const int flag_) + { + return (conn_->capable & flag_); + } + + static void want_if_capable(fuse_conn_info *conn_, const int flag_) { - if(conn_->capable & flag_) - conn_->want |= flag_; + if(capable(conn_,flag_)) + want(conn_,flag_); } } @@ -35,6 +52,8 @@ namespace FUSE void * init(fuse_conn_info *conn_) { + Config &c = Config::get_writable(); + ugid::init(); l::want_if_capable(conn_,FUSE_CAP_ASYNC_READ); @@ -44,9 +63,17 @@ namespace FUSE l::want_if_capable(conn_,FUSE_CAP_IOCTL_DIR); l::want_if_capable(conn_,FUSE_CAP_ASYNC_DIO); l::want_if_capable(conn_,FUSE_CAP_PARALLEL_DIROPS); - if(Config::get().posix_acl) - l::want_if_capable(conn_,FUSE_CAP_POSIX_ACL); - return &Config::get_writable(); + if(c.posix_acl && l::capable(conn_,FUSE_CAP_POSIX_ACL)) + l::want(conn_,FUSE_CAP_POSIX_ACL); + else + c.posix_acl = false; + + if(c.cache_symlinks && l::capable(conn_,FUSE_CAP_CACHE_SYMLINKS)) + l::want(conn_,FUSE_CAP_CACHE_SYMLINKS); + else + c.cache_symlinks = false; + + return &c; } } diff --git a/src/fuse_listxattr.cpp b/src/fuse_listxattr.cpp index 5aa3eed2..70ce5e24 100644 --- a/src/fuse_listxattr.cpp +++ b/src/fuse_listxattr.cpp @@ -50,6 +50,7 @@ namespace l ("user.mergerfs.cache.negative_entry") ("user.mergerfs.cache.open") ("user.mergerfs.cache.statfs") + ("user.mergerfs.cache.symlinks") ("user.mergerfs.direct_io") ("user.mergerfs.dropcacheonclose") ("user.mergerfs.ignorepponrename") diff --git a/src/option_parser.cpp b/src/option_parser.cpp index 3a173930..47bf438e 100644 --- a/src/option_parser.cpp +++ b/src/option_parser.cpp @@ -239,6 +239,8 @@ parse_and_process_cache(Config &config_, return (set_kv_option(outargs,"negative_timeout",value_),0); else if(func_ == "attr") return (set_kv_option(outargs,"attr_timeout",value_),0); + else if(func_ == "symlinks") + return parse_and_process(value_,config_.cache_symlinks); return 1; } @@ -383,6 +385,9 @@ usage(void) " default = 0 (disabled)\n" " -o cache.statfs= 'statfs' cache timeout in seconds. Used by\n" " policies. default = 0 (disabled)\n" + " -o cache.symlinks=\n" + " enable kernel caching of symlinks (if supported)\n" + " default = false\n" " -o cache.attr= file attribute cache timeout in seconds.\n" " default = 1\n" " -o cache.entry= file name lookup cache timeout in seconds.\n"