Browse Source

Merge pull request #630 from trapexit/cache-dir

add readdir caching
pull/632/head
trapexit 5 years ago
committed by GitHub
parent
commit
40a4762cac
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 10
      README.md
  2. 88
      libfuse/include/fuse_common.h
  3. 5
      libfuse/lib/fuse.c
  4. 15
      libfuse/lib/fuse_lowlevel.c
  5. 20
      man/mergerfs.1
  6. 1
      src/config.hpp
  7. 2
      src/fuse_getxattr.cpp
  8. 1
      src/fuse_listxattr.cpp
  9. 9
      src/fuse_opendir.cpp
  10. 4
      src/fuse_setxattr.cpp
  11. 4
      src/option_parser.cpp

10
README.md

@ -1,6 +1,6 @@
% mergerfs(1) mergerfs user manual % mergerfs(1) mergerfs user manual
% Antonio SJ Musumeci <trapexit@spawn.link> % Antonio SJ Musumeci <trapexit@spawn.link>
% 2019-05-22
% 2019-05-23
# NAME # NAME
@ -92,6 +92,7 @@ mergerfs does **not** support the copy-on-write (CoW) behavior found in **aufs**
* **cache.entry=&lt;int&gt;**: file name lookup cache timeout in seconds. (default: 1) * **cache.entry=&lt;int&gt;**: file name lookup cache timeout in seconds. (default: 1)
* **cache.negative_entry=&lt;int&gt;**: negative file name lookup cache timeout in seconds. (default: 0) * **cache.negative_entry=&lt;int&gt;**: negative file name lookup cache timeout in seconds. (default: 0)
* **cache.symlinks=&lt;bool&gt;**: cache symlinks (if supported by kernel) (default: false) * **cache.symlinks=&lt;bool&gt;**: cache symlinks (if supported by kernel) (default: false)
* **cache.readdir=&lt;bool&gt;**: cache readdir (if supported by kernel) (default: false)
**NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting. **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting.
@ -527,7 +528,12 @@ Example: If the create policy is `mfs` and the timeout is 60 then for that 60 se
#### symlink caching #### symlink caching
As of version 4.20 Linux supports symlink caching. Significant performance increases can be had in workloads which use a lot of symlinks. Setting `cache.symlinks=true` will result in requesting symlink caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr `user.mergerfs.cache.symlinks`.
As of version 4.20 Linux supports symlink caching. Significant performance increases can be had in workloads which use a lot of symlinks. Setting `cache.symlinks=true` will result in requesting symlink caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr `user.mergerfs.cache.symlinks` but given it must be requested at startup you can not change it at runtime.
#### readdir caching
As of version 4.20 Linux supports readdir caching. This can have a significant impact on directory traversal. Especially when combined with entry (`cache.entry`) and attribute ('cache.attr') caching. Setting `cache.readdir=true` will result in requesting readdir caching from the kernel on each `opendir`. If the kernel doesn't support readdir caching setting the option to `true` has no effect. This option is configuarable at runtime via xattr `user.mergerfs.cache.readdir`.
#### writeback caching #### writeback caching

88
libfuse/include/fuse_common.h

@ -42,49 +42,51 @@ extern "C" {
* *
* Changed in version 2.5 * Changed in version 2.5
*/ */
struct fuse_file_info {
/** Open flags. Available in open() and release() */
int flags;
/** Old file handle, don't use */
unsigned long fh_old;
/** In case of a write operation indicates if this was caused by a
writepage */
int writepage;
/** Can be filled in by open, to use direct I/O on this file.
Introduced in version 2.4 */
unsigned int direct_io : 1;
/** Can be filled in by open, to indicate, that cached file data
need not be invalidated. Introduced in version 2.4 */
unsigned int keep_cache : 1;
/** Indicates a flush operation. Set in flush operation, also
maybe set in highlevel lock operation and lowlevel release
operation. Introduced in version 2.6 */
unsigned int flush : 1;
/** Can be filled in by open, to indicate that the file is not
seekable. Introduced in version 2.8 */
unsigned int nonseekable : 1;
/* Indicates that flock locks for this file should be
released. If set, lock_owner shall contain a valid value.
May only be set in ->release(). Introduced in version
2.9 */
unsigned int flock_release : 1;
/** Padding. Do not use*/
unsigned int padding : 27;
/** File handle. May be filled in by filesystem in open().
Available in all other file operations */
uint64_t fh;
/** Lock owner id. Available in locking operations and flush */
uint64_t lock_owner;
struct
fuse_file_info
{
/** Open flags. Available in open() and release() */
int flags;
/** In case of a write operation indicates if this was caused by a
writepage */
uint32_t writepage : 1;
/** Can be filled in by open, to use direct I/O on this file.
Introduced in version 2.4 */
uint32_t direct_io : 1;
/** Can be filled in by open, to indicate, that cached file data
need not be invalidated. Introduced in version 2.4 */
uint32_t keep_cache : 1;
/** Indicates a flush operation. Set in flush operation, also
maybe set in highlevel lock operation and lowlevel release
operation. Introduced in version 2.6 */
uint32_t flush : 1;
/** Can be filled in by open, to indicate that the file is not
seekable. Introduced in version 2.8 */
uint32_t nonseekable : 1;
/* Indicates that flock locks for this file should be
released. If set, lock_owner shall contain a valid value.
May only be set in ->release(). Introduced in version
2.9 */
uint32_t flock_release : 1;
/* Requests the kernel to cache entries returned by readdir */
uint32_t cache_readdir : 1;
/** Padding. Do not use*/
uint32_t padding : 25;
/** File handle. May be filled in by filesystem in open().
Available in all other file operations */
uint64_t fh;
/** Lock owner id. Available in locking operations and flush */
uint64_t lock_owner;
}; };
/** /**

5
libfuse/lib/fuse.c

@ -3275,7 +3275,6 @@ static struct fuse_dh *get_dirhandle(const struct fuse_file_info *llfi,
struct fuse_dh *dh = (struct fuse_dh *) (uintptr_t) llfi->fh; struct fuse_dh *dh = (struct fuse_dh *) (uintptr_t) llfi->fh;
memset(fi, 0, sizeof(struct fuse_file_info)); memset(fi, 0, sizeof(struct fuse_file_info));
fi->fh = dh->fh; fi->fh = dh->fh;
fi->fh_old = dh->fh;
return dh; return dh;
} }
@ -3294,6 +3293,7 @@ static void fuse_lib_opendir(fuse_req_t req, fuse_ino_t ino,
reply_err(req, -ENOMEM); reply_err(req, -ENOMEM);
return; return;
} }
memset(dh, 0, sizeof(struct fuse_dh)); memset(dh, 0, sizeof(struct fuse_dh));
dh->fuse = f; dh->fuse = f;
dh->contents = NULL; dh->contents = NULL;
@ -3313,7 +3313,10 @@ static void fuse_lib_opendir(fuse_req_t req, fuse_ino_t ino,
err = fuse_fs_opendir(f->fs, path, &fi); err = fuse_fs_opendir(f->fs, path, &fi);
fuse_finish_interrupt(f, req, &d); fuse_finish_interrupt(f, req, &d);
dh->fh = fi.fh; dh->fh = fi.fh;
llfi->keep_cache = fi.keep_cache;
llfi->cache_readdir = fi.cache_readdir;
} }
if (!err) { if (!err) {
if (fuse_reply_open(req, llfi) == -ENOENT) { if (fuse_reply_open(req, llfi) == -ENOENT) {
/* The opendir syscall was interrupted, so it /* The opendir syscall was interrupted, so it

15
libfuse/lib/fuse_lowlevel.c

@ -352,6 +352,8 @@ static void fill_open(struct fuse_open_out *arg,
arg->open_flags |= FOPEN_KEEP_CACHE; arg->open_flags |= FOPEN_KEEP_CACHE;
if (f->nonseekable) if (f->nonseekable)
arg->open_flags |= FOPEN_NONSEEKABLE; arg->open_flags |= FOPEN_NONSEEKABLE;
if (f->cache_readdir)
arg->open_flags |= FOPEN_CACHE_DIR;
} }
int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
@ -1041,7 +1043,6 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
if (arg->getattr_flags & FUSE_GETATTR_FH) { if (arg->getattr_flags & FUSE_GETATTR_FH) {
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fip = &fi; fip = &fi;
} }
} }
@ -1067,7 +1068,6 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi_store, 0, sizeof(fi_store)); memset(&fi_store, 0, sizeof(fi_store));
fi = &fi_store; fi = &fi_store;
fi->fh = arg->fh; fi->fh = arg->fh;
fi->fh_old = fi->fh;
} }
arg->valid &= arg->valid &=
FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_MODE |
@ -1230,7 +1230,6 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->conn.proto_minor >= 9) { if (req->f->conn.proto_minor >= 9) {
fi.lock_owner = arg->lock_owner; fi.lock_owner = arg->lock_owner;
fi.flags = arg->flags; fi.flags = arg->flags;
@ -1248,7 +1247,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fi.writepage = arg->write_flags & 1; fi.writepage = arg->write_flags & 1;
if (req->f->conn.proto_minor < 9) { if (req->f->conn.proto_minor < 9) {
@ -1279,7 +1277,6 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg,
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fi.writepage = arg->write_flags & 1; fi.writepage = arg->write_flags & 1;
if (req->f->conn.proto_minor < 9) { if (req->f->conn.proto_minor < 9) {
@ -1318,7 +1315,6 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fi.flush = 1; fi.flush = 1;
if (req->f->conn.proto_minor >= 7) if (req->f->conn.proto_minor >= 7)
fi.lock_owner = arg->lock_owner; fi.lock_owner = arg->lock_owner;
@ -1337,7 +1333,6 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags; fi.flags = arg->flags;
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->conn.proto_minor >= 8) { if (req->f->conn.proto_minor >= 8) {
fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
fi.lock_owner = arg->lock_owner; fi.lock_owner = arg->lock_owner;
@ -1360,7 +1355,6 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.fsync) if (req->f->op.fsync)
req->f->op.fsync(req, nodeid, arg->fsync_flags & 1, &fi); req->f->op.fsync(req, nodeid, arg->fsync_flags & 1, &fi);
@ -1389,7 +1383,6 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.readdir) if (req->f->op.readdir)
req->f->op.readdir(req, nodeid, arg->size, arg->offset, &fi); req->f->op.readdir(req, nodeid, arg->size, arg->offset, &fi);
@ -1405,7 +1398,6 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags; fi.flags = arg->flags;
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.releasedir) if (req->f->op.releasedir)
req->f->op.releasedir(req, nodeid, &fi); req->f->op.releasedir(req, nodeid, &fi);
@ -1420,7 +1412,6 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.fsyncdir) if (req->f->op.fsyncdir)
req->f->op.fsyncdir(req, nodeid, arg->fsync_flags & 1, &fi); req->f->op.fsyncdir(req, nodeid, arg->fsync_flags & 1, &fi);
@ -1675,7 +1666,6 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (sizeof(void *) == 4 && req->f->conn.proto_minor >= 16 && if (sizeof(void *) == 4 && req->f->conn.proto_minor >= 16 &&
!(flags & FUSE_IOCTL_32BIT)) { !(flags & FUSE_IOCTL_32BIT)) {
@ -1702,7 +1692,6 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.poll) { if (req->f->op.poll) {
struct fuse_pollhandle *ph = NULL; struct fuse_pollhandle *ph = NULL;

20
man/mergerfs.1

@ -1,7 +1,7 @@
.\"t .\"t
.\" Automatically generated by Pandoc 1.19.2.4 .\" Automatically generated by Pandoc 1.19.2.4
.\" .\"
.TH "mergerfs" "1" "2019\-05\-22" "mergerfs user manual" ""
.TH "mergerfs" "1" "2019\-05\-23" "mergerfs user manual" ""
.hy .hy
.SH NAME .SH NAME
.PP .PP
@ -228,6 +228,9 @@ timeout in seconds.
.IP \[bu] 2 .IP \[bu] 2
\f[B]cache.symlinks=<bool>\f[]: cache symlinks (if supported by kernel) \f[B]cache.symlinks=<bool>\f[]: cache symlinks (if supported by kernel)
(default: false) (default: false)
.IP \[bu] 2
\f[B]cache.readdir=<bool>\f[]: cache readdir (if supported by kernel)
(default: false)
.PP .PP
\f[B]NOTE:\f[] Options are evaluated in the order listed so if the \f[B]NOTE:\f[] Options are evaluated in the order listed so if the
options are \f[B]func.rmdir=rand,category.action=ff\f[] the options are \f[B]func.rmdir=rand,category.action=ff\f[] the
@ -1126,7 +1129,20 @@ caching from the kernel only if supported.
As a result its safe to enable it on systems prior to 4.20. As a result its safe to enable it on systems prior to 4.20.
That said it is disabled by default for now. That said it is disabled by default for now.
You can see if caching is enabled by querying the xattr You can see if caching is enabled by querying the xattr
\f[C]user.mergerfs.cache.symlinks\f[].
\f[C]user.mergerfs.cache.symlinks\f[] but given it must be requested at
startup you can not change it at runtime.
.SS readdir caching
.PP
As of version 4.20 Linux supports readdir caching.
This can have a significant impact on directory traversal.
Especially when combined with entry (\f[C]cache.entry\f[]) and attribute
(\[aq]cache.attr\[aq]) caching.
Setting \f[C]cache.readdir=true\f[] will result in requesting readdir
caching from the kernel on each \f[C]opendir\f[].
If the kernel doesn\[aq]t support readdir caching setting the option to
\f[C]true\f[] has no effect.
This option is configuarable at runtime via xattr
\f[C]user.mergerfs.cache.readdir\f[].
.SS writeback caching .SS writeback caching
.PP .PP
writeback caching is a technique for improving write speeds by batching writeback caching is a technique for improving write speeds by batching

1
src/config.hpp

@ -80,6 +80,7 @@ public:
StatFSIgnore::Enum statfs_ignore; StatFSIgnore::Enum statfs_ignore;
bool posix_acl; bool posix_acl;
bool cache_symlinks; bool cache_symlinks;
bool cache_readdir;
public: public:
const Policy *policies[FuseFunc::Enum::END]; const Policy *policies[FuseFunc::Enum::END];

2
src/fuse_getxattr.cpp

@ -367,6 +367,8 @@ namespace l
l::getxattr_controlfile_cache_negative_entry(attrvalue); l::getxattr_controlfile_cache_negative_entry(attrvalue);
else if((attr[2] == "cache") && (attr[3] == "symlinks")) else if((attr[2] == "cache") && (attr[3] == "symlinks"))
l::getxattr_controlfile_bool(config.cache_symlinks,attrvalue); l::getxattr_controlfile_bool(config.cache_symlinks,attrvalue);
else if((attr[2] == "cache") && (attr[3] == "readdir"))
l::getxattr_controlfile_bool(config.cache_readdir,attrvalue);
break; break;
} }

1
src/fuse_listxattr.cpp

@ -49,6 +49,7 @@ namespace l
("user.mergerfs.cache.entry") ("user.mergerfs.cache.entry")
("user.mergerfs.cache.negative_entry") ("user.mergerfs.cache.negative_entry")
("user.mergerfs.cache.open") ("user.mergerfs.cache.open")
("user.mergerfs.cache.readdir")
("user.mergerfs.cache.statfs") ("user.mergerfs.cache.statfs")
("user.mergerfs.cache.symlinks") ("user.mergerfs.cache.symlinks")
("user.mergerfs.direct_io") ("user.mergerfs.direct_io")

9
src/fuse_opendir.cpp

@ -14,6 +14,7 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "config.hpp"
#include "dirinfo.hpp" #include "dirinfo.hpp"
#include <fuse.h> #include <fuse.h>
@ -24,8 +25,16 @@ namespace FUSE
opendir(const char *fusepath_, opendir(const char *fusepath_,
fuse_file_info *ffi_) fuse_file_info *ffi_)
{ {
const Config &config = Config::get();
ffi_->fh = reinterpret_cast<uint64_t>(new DirInfo(fusepath_)); ffi_->fh = reinterpret_cast<uint64_t>(new DirInfo(fusepath_));
if(config.cache_readdir)
{
ffi_->keep_cache = 1;
ffi_->cache_readdir = 1;
}
return 0; return 0;
} }
} }

4
src/fuse_setxattr.cpp

@ -178,7 +178,7 @@ namespace l
int int
setxattr_xattr(const string &attrval_, setxattr_xattr(const string &attrval_,
const int flags_, const int flags_,
int xattr_)
int &xattr_)
{ {
if((flags_ & XATTR_CREATE) == XATTR_CREATE) if((flags_ & XATTR_CREATE) == XATTR_CREATE)
return -EEXIST; return -EEXIST;
@ -436,6 +436,8 @@ namespace l
return l::setxattr_controlfile_cache_entry(attrval,flags); return l::setxattr_controlfile_cache_entry(attrval,flags);
else if((attr[2] == "cache") && (attr[3] == "negative_entry")) else if((attr[2] == "cache") && (attr[3] == "negative_entry"))
return l::setxattr_controlfile_cache_negative_entry(attrval,flags); return l::setxattr_controlfile_cache_negative_entry(attrval,flags);
else if((attr[2] == "cache") && (attr[3] == "readdir"))
return l::setxattr_bool(attrval,flags,config.cache_readdir);
break; break;
default: default:

4
src/option_parser.cpp

@ -241,6 +241,8 @@ parse_and_process_cache(Config &config_,
return (set_kv_option(outargs,"attr_timeout",value_),0); return (set_kv_option(outargs,"attr_timeout",value_),0);
else if(func_ == "symlinks") else if(func_ == "symlinks")
return parse_and_process(value_,config_.cache_symlinks); return parse_and_process(value_,config_.cache_symlinks);
else if(func_ == "readdir")
return parse_and_process(value_,config_.cache_readdir);
return 1; return 1;
} }
@ -395,6 +397,8 @@ usage(void)
" -o cache.negative_entry=<int>\n" " -o cache.negative_entry=<int>\n"
" negative file name lookup cache timeout in\n" " negative file name lookup cache timeout in\n"
" seconds. default = 0\n" " seconds. default = 0\n"
" -o cache.readdir=<bool>\n"
" enable kernel caching readdir (if supported)\n"
" -o direct_io Bypass page caching, may increase write\n" " -o direct_io Bypass page caching, may increase write\n"
" speeds at the cost of reads. Please read docs\n" " speeds at the cost of reads. Please read docs\n"
" for more details as there are tradeoffs.\n" " for more details as there are tradeoffs.\n"

Loading…
Cancel
Save