diff --git a/README.md b/README.md index c49ac499..2ab09dea 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2019-05-22 +% 2019-05-23 # NAME @@ -92,6 +92,7 @@ mergerfs does **not** support the copy-on-write (CoW) behavior found in **aufs** * **cache.entry=<int>**: file name lookup cache timeout in seconds. (default: 1) * **cache.negative_entry=<int>**: negative file name lookup cache timeout in seconds. (default: 0) * **cache.symlinks=<bool>**: cache symlinks (if supported by kernel) (default: false) +* **cache.readdir=<bool>**: cache readdir (if supported by kernel) (default: false) **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting. @@ -527,7 +528,12 @@ Example: If the create policy is `mfs` and the timeout is 60 then for that 60 se #### symlink caching -As of version 4.20 Linux supports symlink caching. Significant performance increases can be had in workloads which use a lot of symlinks. Setting `cache.symlinks=true` will result in requesting symlink caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr `user.mergerfs.cache.symlinks`. +As of version 4.20 Linux supports symlink caching. Significant performance increases can be had in workloads which use a lot of symlinks. Setting `cache.symlinks=true` will result in requesting symlink caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr `user.mergerfs.cache.symlinks` but given it must be requested at startup you can not change it at runtime. + + +#### readdir caching + +As of version 4.20 Linux supports readdir caching. This can have a significant impact on directory traversal. Especially when combined with entry (`cache.entry`) and attribute ('cache.attr') caching. Setting `cache.readdir=true` will result in requesting readdir caching from the kernel on each `opendir`. If the kernel doesn't support readdir caching setting the option to `true` has no effect. This option is configuarable at runtime via xattr `user.mergerfs.cache.readdir`. #### writeback caching diff --git a/libfuse/include/fuse_common.h b/libfuse/include/fuse_common.h index a1830f1e..3ec08dcb 100644 --- a/libfuse/include/fuse_common.h +++ b/libfuse/include/fuse_common.h @@ -42,49 +42,51 @@ extern "C" { * * Changed in version 2.5 */ -struct fuse_file_info { - /** Open flags. Available in open() and release() */ - int flags; - - /** Old file handle, don't use */ - unsigned long fh_old; - - /** In case of a write operation indicates if this was caused by a - writepage */ - int writepage; - - /** Can be filled in by open, to use direct I/O on this file. - Introduced in version 2.4 */ - unsigned int direct_io : 1; - - /** Can be filled in by open, to indicate, that cached file data - need not be invalidated. Introduced in version 2.4 */ - unsigned int keep_cache : 1; - - /** Indicates a flush operation. Set in flush operation, also - maybe set in highlevel lock operation and lowlevel release - operation. Introduced in version 2.6 */ - unsigned int flush : 1; - - /** Can be filled in by open, to indicate that the file is not - seekable. Introduced in version 2.8 */ - unsigned int nonseekable : 1; - - /* Indicates that flock locks for this file should be - released. If set, lock_owner shall contain a valid value. - May only be set in ->release(). Introduced in version - 2.9 */ - unsigned int flock_release : 1; - - /** Padding. Do not use*/ - unsigned int padding : 27; - - /** File handle. May be filled in by filesystem in open(). - Available in all other file operations */ - uint64_t fh; - - /** Lock owner id. Available in locking operations and flush */ - uint64_t lock_owner; +struct +fuse_file_info +{ + /** Open flags. Available in open() and release() */ + int flags; + + /** In case of a write operation indicates if this was caused by a + writepage */ + uint32_t writepage : 1; + + /** Can be filled in by open, to use direct I/O on this file. + Introduced in version 2.4 */ + uint32_t direct_io : 1; + + /** Can be filled in by open, to indicate, that cached file data + need not be invalidated. Introduced in version 2.4 */ + uint32_t keep_cache : 1; + + /** Indicates a flush operation. Set in flush operation, also + maybe set in highlevel lock operation and lowlevel release + operation. Introduced in version 2.6 */ + uint32_t flush : 1; + + /** Can be filled in by open, to indicate that the file is not + seekable. Introduced in version 2.8 */ + uint32_t nonseekable : 1; + + /* Indicates that flock locks for this file should be + released. If set, lock_owner shall contain a valid value. + May only be set in ->release(). Introduced in version + 2.9 */ + uint32_t flock_release : 1; + + /* Requests the kernel to cache entries returned by readdir */ + uint32_t cache_readdir : 1; + + /** Padding. Do not use*/ + uint32_t padding : 25; + + /** File handle. May be filled in by filesystem in open(). + Available in all other file operations */ + uint64_t fh; + + /** Lock owner id. Available in locking operations and flush */ + uint64_t lock_owner; }; /** diff --git a/libfuse/lib/fuse.c b/libfuse/lib/fuse.c index a9037336..4add1dd3 100644 --- a/libfuse/lib/fuse.c +++ b/libfuse/lib/fuse.c @@ -3275,7 +3275,6 @@ static struct fuse_dh *get_dirhandle(const struct fuse_file_info *llfi, struct fuse_dh *dh = (struct fuse_dh *) (uintptr_t) llfi->fh; memset(fi, 0, sizeof(struct fuse_file_info)); fi->fh = dh->fh; - fi->fh_old = dh->fh; return dh; } @@ -3294,6 +3293,7 @@ static void fuse_lib_opendir(fuse_req_t req, fuse_ino_t ino, reply_err(req, -ENOMEM); return; } + memset(dh, 0, sizeof(struct fuse_dh)); dh->fuse = f; dh->contents = NULL; @@ -3313,7 +3313,10 @@ static void fuse_lib_opendir(fuse_req_t req, fuse_ino_t ino, err = fuse_fs_opendir(f->fs, path, &fi); fuse_finish_interrupt(f, req, &d); dh->fh = fi.fh; + llfi->keep_cache = fi.keep_cache; + llfi->cache_readdir = fi.cache_readdir; } + if (!err) { if (fuse_reply_open(req, llfi) == -ENOENT) { /* The opendir syscall was interrupted, so it diff --git a/libfuse/lib/fuse_lowlevel.c b/libfuse/lib/fuse_lowlevel.c index 7f93971e..681fc98e 100644 --- a/libfuse/lib/fuse_lowlevel.c +++ b/libfuse/lib/fuse_lowlevel.c @@ -352,6 +352,8 @@ static void fill_open(struct fuse_open_out *arg, arg->open_flags |= FOPEN_KEEP_CACHE; if (f->nonseekable) arg->open_flags |= FOPEN_NONSEEKABLE; + if (f->cache_readdir) + arg->open_flags |= FOPEN_CACHE_DIR; } int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) @@ -1041,7 +1043,6 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (arg->getattr_flags & FUSE_GETATTR_FH) { memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; fip = &fi; } } @@ -1067,7 +1068,6 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi_store, 0, sizeof(fi_store)); fi = &fi_store; fi->fh = arg->fh; - fi->fh_old = fi->fh; } arg->valid &= FUSE_SET_ATTR_MODE | @@ -1230,7 +1230,6 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; if (req->f->conn.proto_minor >= 9) { fi.lock_owner = arg->lock_owner; fi.flags = arg->flags; @@ -1248,7 +1247,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; fi.writepage = arg->write_flags & 1; if (req->f->conn.proto_minor < 9) { @@ -1279,7 +1277,6 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; fi.writepage = arg->write_flags & 1; if (req->f->conn.proto_minor < 9) { @@ -1318,7 +1315,6 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; fi.flush = 1; if (req->f->conn.proto_minor >= 7) fi.lock_owner = arg->lock_owner; @@ -1337,7 +1333,6 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.flags = arg->flags; fi.fh = arg->fh; - fi.fh_old = fi.fh; if (req->f->conn.proto_minor >= 8) { fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; fi.lock_owner = arg->lock_owner; @@ -1360,7 +1355,6 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; if (req->f->op.fsync) req->f->op.fsync(req, nodeid, arg->fsync_flags & 1, &fi); @@ -1389,7 +1383,6 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; if (req->f->op.readdir) req->f->op.readdir(req, nodeid, arg->size, arg->offset, &fi); @@ -1405,7 +1398,6 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.flags = arg->flags; fi.fh = arg->fh; - fi.fh_old = fi.fh; if (req->f->op.releasedir) req->f->op.releasedir(req, nodeid, &fi); @@ -1420,7 +1412,6 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; if (req->f->op.fsyncdir) req->f->op.fsyncdir(req, nodeid, arg->fsync_flags & 1, &fi); @@ -1675,7 +1666,6 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; if (sizeof(void *) == 4 && req->f->conn.proto_minor >= 16 && !(flags & FUSE_IOCTL_32BIT)) { @@ -1702,7 +1692,6 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.fh_old = fi.fh; if (req->f->op.poll) { struct fuse_pollhandle *ph = NULL; diff --git a/man/mergerfs.1 b/man/mergerfs.1 index fce5222a..669cb995 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 1.19.2.4 .\" -.TH "mergerfs" "1" "2019\-05\-22" "mergerfs user manual" "" +.TH "mergerfs" "1" "2019\-05\-23" "mergerfs user manual" "" .hy .SH NAME .PP @@ -228,6 +228,9 @@ timeout in seconds. .IP \[bu] 2 \f[B]cache.symlinks=\f[]: cache symlinks (if supported by kernel) (default: false) +.IP \[bu] 2 +\f[B]cache.readdir=\f[]: cache readdir (if supported by kernel) +(default: false) .PP \f[B]NOTE:\f[] Options are evaluated in the order listed so if the options are \f[B]func.rmdir=rand,category.action=ff\f[] the @@ -1126,7 +1129,20 @@ caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr -\f[C]user.mergerfs.cache.symlinks\f[]. +\f[C]user.mergerfs.cache.symlinks\f[] but given it must be requested at +startup you can not change it at runtime. +.SS readdir caching +.PP +As of version 4.20 Linux supports readdir caching. +This can have a significant impact on directory traversal. +Especially when combined with entry (\f[C]cache.entry\f[]) and attribute +(\[aq]cache.attr\[aq]) caching. +Setting \f[C]cache.readdir=true\f[] will result in requesting readdir +caching from the kernel on each \f[C]opendir\f[]. +If the kernel doesn\[aq]t support readdir caching setting the option to +\f[C]true\f[] has no effect. +This option is configuarable at runtime via xattr +\f[C]user.mergerfs.cache.readdir\f[]. .SS writeback caching .PP writeback caching is a technique for improving write speeds by batching diff --git a/src/config.hpp b/src/config.hpp index 796d834b..de55b721 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -80,6 +80,7 @@ public: StatFSIgnore::Enum statfs_ignore; bool posix_acl; bool cache_symlinks; + bool cache_readdir; public: const Policy *policies[FuseFunc::Enum::END]; diff --git a/src/fuse_getxattr.cpp b/src/fuse_getxattr.cpp index 952aef7d..cdbdd77d 100644 --- a/src/fuse_getxattr.cpp +++ b/src/fuse_getxattr.cpp @@ -367,6 +367,8 @@ namespace l l::getxattr_controlfile_cache_negative_entry(attrvalue); else if((attr[2] == "cache") && (attr[3] == "symlinks")) l::getxattr_controlfile_bool(config.cache_symlinks,attrvalue); + else if((attr[2] == "cache") && (attr[3] == "readdir")) + l::getxattr_controlfile_bool(config.cache_readdir,attrvalue); break; } diff --git a/src/fuse_listxattr.cpp b/src/fuse_listxattr.cpp index 70ce5e24..5f3a97c6 100644 --- a/src/fuse_listxattr.cpp +++ b/src/fuse_listxattr.cpp @@ -49,6 +49,7 @@ namespace l ("user.mergerfs.cache.entry") ("user.mergerfs.cache.negative_entry") ("user.mergerfs.cache.open") + ("user.mergerfs.cache.readdir") ("user.mergerfs.cache.statfs") ("user.mergerfs.cache.symlinks") ("user.mergerfs.direct_io") diff --git a/src/fuse_opendir.cpp b/src/fuse_opendir.cpp index 721ce085..30c54b2b 100644 --- a/src/fuse_opendir.cpp +++ b/src/fuse_opendir.cpp @@ -14,6 +14,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "config.hpp" #include "dirinfo.hpp" #include @@ -24,8 +25,16 @@ namespace FUSE opendir(const char *fusepath_, fuse_file_info *ffi_) { + const Config &config = Config::get(); + ffi_->fh = reinterpret_cast(new DirInfo(fusepath_)); + if(config.cache_readdir) + { + ffi_->keep_cache = 1; + ffi_->cache_readdir = 1; + } + return 0; } } diff --git a/src/fuse_setxattr.cpp b/src/fuse_setxattr.cpp index 4239d02e..637922a5 100644 --- a/src/fuse_setxattr.cpp +++ b/src/fuse_setxattr.cpp @@ -178,7 +178,7 @@ namespace l int setxattr_xattr(const string &attrval_, const int flags_, - int xattr_) + int &xattr_) { if((flags_ & XATTR_CREATE) == XATTR_CREATE) return -EEXIST; @@ -436,6 +436,8 @@ namespace l return l::setxattr_controlfile_cache_entry(attrval,flags); else if((attr[2] == "cache") && (attr[3] == "negative_entry")) return l::setxattr_controlfile_cache_negative_entry(attrval,flags); + else if((attr[2] == "cache") && (attr[3] == "readdir")) + return l::setxattr_bool(attrval,flags,config.cache_readdir); break; default: diff --git a/src/option_parser.cpp b/src/option_parser.cpp index 47bf438e..88739e5b 100644 --- a/src/option_parser.cpp +++ b/src/option_parser.cpp @@ -241,6 +241,8 @@ parse_and_process_cache(Config &config_, return (set_kv_option(outargs,"attr_timeout",value_),0); else if(func_ == "symlinks") return parse_and_process(value_,config_.cache_symlinks); + else if(func_ == "readdir") + return parse_and_process(value_,config_.cache_readdir); return 1; } @@ -395,6 +397,8 @@ usage(void) " -o cache.negative_entry=\n" " negative file name lookup cache timeout in\n" " seconds. default = 0\n" + " -o cache.readdir=\n" + " enable kernel caching readdir (if supported)\n" " -o direct_io Bypass page caching, may increase write\n" " speeds at the cost of reads. Please read docs\n" " for more details as there are tradeoffs.\n"