Browse Source

add readdir caching

pull/630/head
Antonio SJ Musumeci 5 years ago
parent
commit
2323c16316
  1. 10
      README.md
  2. 24
      libfuse/include/fuse_common.h
  3. 5
      libfuse/lib/fuse.c
  4. 15
      libfuse/lib/fuse_lowlevel.c
  5. 20
      man/mergerfs.1
  6. 1
      src/config.hpp
  7. 2
      src/fuse_getxattr.cpp
  8. 1
      src/fuse_listxattr.cpp
  9. 9
      src/fuse_opendir.cpp
  10. 4
      src/fuse_setxattr.cpp
  11. 4
      src/option_parser.cpp

10
README.md

@ -1,6 +1,6 @@
% mergerfs(1) mergerfs user manual % mergerfs(1) mergerfs user manual
% Antonio SJ Musumeci <trapexit@spawn.link> % Antonio SJ Musumeci <trapexit@spawn.link>
% 2019-05-22
% 2019-05-23
# NAME # NAME
@ -92,6 +92,7 @@ mergerfs does **not** support the copy-on-write (CoW) behavior found in **aufs**
* **cache.entry=&lt;int&gt;**: file name lookup cache timeout in seconds. (default: 1) * **cache.entry=&lt;int&gt;**: file name lookup cache timeout in seconds. (default: 1)
* **cache.negative_entry=&lt;int&gt;**: negative file name lookup cache timeout in seconds. (default: 0) * **cache.negative_entry=&lt;int&gt;**: negative file name lookup cache timeout in seconds. (default: 0)
* **cache.symlinks=&lt;bool&gt;**: cache symlinks (if supported by kernel) (default: false) * **cache.symlinks=&lt;bool&gt;**: cache symlinks (if supported by kernel) (default: false)
* **cache.readdir=&lt;bool&gt;**: cache readdir (if supported by kernel) (default: false)
**NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting. **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting.
@ -527,7 +528,12 @@ Example: If the create policy is `mfs` and the timeout is 60 then for that 60 se
#### symlink caching #### symlink caching
As of version 4.20 Linux supports symlink caching. Significant performance increases can be had in workloads which use a lot of symlinks. Setting `cache.symlinks=true` will result in requesting symlink caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr `user.mergerfs.cache.symlinks`.
As of version 4.20 Linux supports symlink caching. Significant performance increases can be had in workloads which use a lot of symlinks. Setting `cache.symlinks=true` will result in requesting symlink caching from the kernel only if supported. As a result its safe to enable it on systems prior to 4.20. That said it is disabled by default for now. You can see if caching is enabled by querying the xattr `user.mergerfs.cache.symlinks` but given it must be requested at startup you can not change it at runtime.
#### readdir caching
As of version 4.20 Linux supports readdir caching. This can have a significant impact on directory traversal. Especially when combined with entry (`cache.entry`) and attribute ('cache.attr') caching. Setting `cache.readdir=true` will result in requesting readdir caching from the kernel on each `opendir`. If the kernel doesn't support readdir caching setting the option to `true` has no effect. This option is configuarable at runtime via xattr `user.mergerfs.cache.readdir`.
#### writeback caching #### writeback caching

24
libfuse/include/fuse_common.h

@ -42,42 +42,44 @@ extern "C" {
* *
* Changed in version 2.5 * Changed in version 2.5
*/ */
struct fuse_file_info {
struct
fuse_file_info
{
/** Open flags. Available in open() and release() */ /** Open flags. Available in open() and release() */
int flags; int flags;
/** Old file handle, don't use */
unsigned long fh_old;
/** In case of a write operation indicates if this was caused by a /** In case of a write operation indicates if this was caused by a
writepage */ writepage */
int writepage;
uint32_t writepage : 1;
/** Can be filled in by open, to use direct I/O on this file. /** Can be filled in by open, to use direct I/O on this file.
Introduced in version 2.4 */ Introduced in version 2.4 */
unsigned int direct_io : 1;
uint32_t direct_io : 1;
/** Can be filled in by open, to indicate, that cached file data /** Can be filled in by open, to indicate, that cached file data
need not be invalidated. Introduced in version 2.4 */ need not be invalidated. Introduced in version 2.4 */
unsigned int keep_cache : 1;
uint32_t keep_cache : 1;
/** Indicates a flush operation. Set in flush operation, also /** Indicates a flush operation. Set in flush operation, also
maybe set in highlevel lock operation and lowlevel release maybe set in highlevel lock operation and lowlevel release
operation. Introduced in version 2.6 */ operation. Introduced in version 2.6 */
unsigned int flush : 1;
uint32_t flush : 1;
/** Can be filled in by open, to indicate that the file is not /** Can be filled in by open, to indicate that the file is not
seekable. Introduced in version 2.8 */ seekable. Introduced in version 2.8 */
unsigned int nonseekable : 1;
uint32_t nonseekable : 1;
/* Indicates that flock locks for this file should be /* Indicates that flock locks for this file should be
released. If set, lock_owner shall contain a valid value. released. If set, lock_owner shall contain a valid value.
May only be set in ->release(). Introduced in version May only be set in ->release(). Introduced in version
2.9 */ 2.9 */
unsigned int flock_release : 1;
uint32_t flock_release : 1;
/* Requests the kernel to cache entries returned by readdir */
uint32_t cache_readdir : 1;
/** Padding. Do not use*/ /** Padding. Do not use*/
unsigned int padding : 27;
uint32_t padding : 25;
/** File handle. May be filled in by filesystem in open(). /** File handle. May be filled in by filesystem in open().
Available in all other file operations */ Available in all other file operations */

5
libfuse/lib/fuse.c

@ -3275,7 +3275,6 @@ static struct fuse_dh *get_dirhandle(const struct fuse_file_info *llfi,
struct fuse_dh *dh = (struct fuse_dh *) (uintptr_t) llfi->fh; struct fuse_dh *dh = (struct fuse_dh *) (uintptr_t) llfi->fh;
memset(fi, 0, sizeof(struct fuse_file_info)); memset(fi, 0, sizeof(struct fuse_file_info));
fi->fh = dh->fh; fi->fh = dh->fh;
fi->fh_old = dh->fh;
return dh; return dh;
} }
@ -3294,6 +3293,7 @@ static void fuse_lib_opendir(fuse_req_t req, fuse_ino_t ino,
reply_err(req, -ENOMEM); reply_err(req, -ENOMEM);
return; return;
} }
memset(dh, 0, sizeof(struct fuse_dh)); memset(dh, 0, sizeof(struct fuse_dh));
dh->fuse = f; dh->fuse = f;
dh->contents = NULL; dh->contents = NULL;
@ -3313,7 +3313,10 @@ static void fuse_lib_opendir(fuse_req_t req, fuse_ino_t ino,
err = fuse_fs_opendir(f->fs, path, &fi); err = fuse_fs_opendir(f->fs, path, &fi);
fuse_finish_interrupt(f, req, &d); fuse_finish_interrupt(f, req, &d);
dh->fh = fi.fh; dh->fh = fi.fh;
llfi->keep_cache = fi.keep_cache;
llfi->cache_readdir = fi.cache_readdir;
} }
if (!err) { if (!err) {
if (fuse_reply_open(req, llfi) == -ENOENT) { if (fuse_reply_open(req, llfi) == -ENOENT) {
/* The opendir syscall was interrupted, so it /* The opendir syscall was interrupted, so it

15
libfuse/lib/fuse_lowlevel.c

@ -352,6 +352,8 @@ static void fill_open(struct fuse_open_out *arg,
arg->open_flags |= FOPEN_KEEP_CACHE; arg->open_flags |= FOPEN_KEEP_CACHE;
if (f->nonseekable) if (f->nonseekable)
arg->open_flags |= FOPEN_NONSEEKABLE; arg->open_flags |= FOPEN_NONSEEKABLE;
if (f->cache_readdir)
arg->open_flags |= FOPEN_CACHE_DIR;
} }
int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
@ -1041,7 +1043,6 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
if (arg->getattr_flags & FUSE_GETATTR_FH) { if (arg->getattr_flags & FUSE_GETATTR_FH) {
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fip = &fi; fip = &fi;
} }
} }
@ -1067,7 +1068,6 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi_store, 0, sizeof(fi_store)); memset(&fi_store, 0, sizeof(fi_store));
fi = &fi_store; fi = &fi_store;
fi->fh = arg->fh; fi->fh = arg->fh;
fi->fh_old = fi->fh;
} }
arg->valid &= arg->valid &=
FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_MODE |
@ -1230,7 +1230,6 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->conn.proto_minor >= 9) { if (req->f->conn.proto_minor >= 9) {
fi.lock_owner = arg->lock_owner; fi.lock_owner = arg->lock_owner;
fi.flags = arg->flags; fi.flags = arg->flags;
@ -1248,7 +1247,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fi.writepage = arg->write_flags & 1; fi.writepage = arg->write_flags & 1;
if (req->f->conn.proto_minor < 9) { if (req->f->conn.proto_minor < 9) {
@ -1279,7 +1277,6 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg,
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fi.writepage = arg->write_flags & 1; fi.writepage = arg->write_flags & 1;
if (req->f->conn.proto_minor < 9) { if (req->f->conn.proto_minor < 9) {
@ -1318,7 +1315,6 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
fi.flush = 1; fi.flush = 1;
if (req->f->conn.proto_minor >= 7) if (req->f->conn.proto_minor >= 7)
fi.lock_owner = arg->lock_owner; fi.lock_owner = arg->lock_owner;
@ -1337,7 +1333,6 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags; fi.flags = arg->flags;
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->conn.proto_minor >= 8) { if (req->f->conn.proto_minor >= 8) {
fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
fi.lock_owner = arg->lock_owner; fi.lock_owner = arg->lock_owner;
@ -1360,7 +1355,6 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.fsync) if (req->f->op.fsync)
req->f->op.fsync(req, nodeid, arg->fsync_flags & 1, &fi); req->f->op.fsync(req, nodeid, arg->fsync_flags & 1, &fi);
@ -1389,7 +1383,6 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.readdir) if (req->f->op.readdir)
req->f->op.readdir(req, nodeid, arg->size, arg->offset, &fi); req->f->op.readdir(req, nodeid, arg->size, arg->offset, &fi);
@ -1405,7 +1398,6 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags; fi.flags = arg->flags;
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.releasedir) if (req->f->op.releasedir)
req->f->op.releasedir(req, nodeid, &fi); req->f->op.releasedir(req, nodeid, &fi);
@ -1420,7 +1412,6 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.fsyncdir) if (req->f->op.fsyncdir)
req->f->op.fsyncdir(req, nodeid, arg->fsync_flags & 1, &fi); req->f->op.fsyncdir(req, nodeid, arg->fsync_flags & 1, &fi);
@ -1675,7 +1666,6 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (sizeof(void *) == 4 && req->f->conn.proto_minor >= 16 && if (sizeof(void *) == 4 && req->f->conn.proto_minor >= 16 &&
!(flags & FUSE_IOCTL_32BIT)) { !(flags & FUSE_IOCTL_32BIT)) {
@ -1702,7 +1692,6 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi)); memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh; fi.fh = arg->fh;
fi.fh_old = fi.fh;
if (req->f->op.poll) { if (req->f->op.poll) {
struct fuse_pollhandle *ph = NULL; struct fuse_pollhandle *ph = NULL;

20
man/mergerfs.1

@ -1,7 +1,7 @@
.\"t .\"t
.\" Automatically generated by Pandoc 1.19.2.4 .\" Automatically generated by Pandoc 1.19.2.4
.\" .\"
.TH "mergerfs" "1" "2019\-05\-22" "mergerfs user manual" ""
.TH "mergerfs" "1" "2019\-05\-23" "mergerfs user manual" ""
.hy .hy
.SH NAME .SH NAME
.PP .PP
@ -228,6 +228,9 @@ timeout in seconds.
.IP \[bu] 2 .IP \[bu] 2
\f[B]cache.symlinks=<bool>\f[]: cache symlinks (if supported by kernel) \f[B]cache.symlinks=<bool>\f[]: cache symlinks (if supported by kernel)
(default: false) (default: false)
.IP \[bu] 2
\f[B]cache.readdir=<bool>\f[]: cache readdir (if supported by kernel)
(default: false)
.PP .PP
\f[B]NOTE:\f[] Options are evaluated in the order listed so if the \f[B]NOTE:\f[] Options are evaluated in the order listed so if the
options are \f[B]func.rmdir=rand,category.action=ff\f[] the options are \f[B]func.rmdir=rand,category.action=ff\f[] the
@ -1126,7 +1129,20 @@ caching from the kernel only if supported.
As a result its safe to enable it on systems prior to 4.20. As a result its safe to enable it on systems prior to 4.20.
That said it is disabled by default for now. That said it is disabled by default for now.
You can see if caching is enabled by querying the xattr You can see if caching is enabled by querying the xattr
\f[C]user.mergerfs.cache.symlinks\f[].
\f[C]user.mergerfs.cache.symlinks\f[] but given it must be requested at
startup you can not change it at runtime.
.SS readdir caching
.PP
As of version 4.20 Linux supports readdir caching.
This can have a significant impact on directory traversal.
Especially when combined with entry (\f[C]cache.entry\f[]) and attribute
(\[aq]cache.attr\[aq]) caching.
Setting \f[C]cache.readdir=true\f[] will result in requesting readdir
caching from the kernel on each \f[C]opendir\f[].
If the kernel doesn\[aq]t support readdir caching setting the option to
\f[C]true\f[] has no effect.
This option is configuarable at runtime via xattr
\f[C]user.mergerfs.cache.readdir\f[].
.SS writeback caching .SS writeback caching
.PP .PP
writeback caching is a technique for improving write speeds by batching writeback caching is a technique for improving write speeds by batching

1
src/config.hpp

@ -80,6 +80,7 @@ public:
StatFSIgnore::Enum statfs_ignore; StatFSIgnore::Enum statfs_ignore;
bool posix_acl; bool posix_acl;
bool cache_symlinks; bool cache_symlinks;
bool cache_readdir;
public: public:
const Policy *policies[FuseFunc::Enum::END]; const Policy *policies[FuseFunc::Enum::END];

2
src/fuse_getxattr.cpp

@ -367,6 +367,8 @@ namespace l
l::getxattr_controlfile_cache_negative_entry(attrvalue); l::getxattr_controlfile_cache_negative_entry(attrvalue);
else if((attr[2] == "cache") && (attr[3] == "symlinks")) else if((attr[2] == "cache") && (attr[3] == "symlinks"))
l::getxattr_controlfile_bool(config.cache_symlinks,attrvalue); l::getxattr_controlfile_bool(config.cache_symlinks,attrvalue);
else if((attr[2] == "cache") && (attr[3] == "readdir"))
l::getxattr_controlfile_bool(config.cache_readdir,attrvalue);
break; break;
} }

1
src/fuse_listxattr.cpp

@ -49,6 +49,7 @@ namespace l
("user.mergerfs.cache.entry") ("user.mergerfs.cache.entry")
("user.mergerfs.cache.negative_entry") ("user.mergerfs.cache.negative_entry")
("user.mergerfs.cache.open") ("user.mergerfs.cache.open")
("user.mergerfs.cache.readdir")
("user.mergerfs.cache.statfs") ("user.mergerfs.cache.statfs")
("user.mergerfs.cache.symlinks") ("user.mergerfs.cache.symlinks")
("user.mergerfs.direct_io") ("user.mergerfs.direct_io")

9
src/fuse_opendir.cpp

@ -14,6 +14,7 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "config.hpp"
#include "dirinfo.hpp" #include "dirinfo.hpp"
#include <fuse.h> #include <fuse.h>
@ -24,8 +25,16 @@ namespace FUSE
opendir(const char *fusepath_, opendir(const char *fusepath_,
fuse_file_info *ffi_) fuse_file_info *ffi_)
{ {
const Config &config = Config::get();
ffi_->fh = reinterpret_cast<uint64_t>(new DirInfo(fusepath_)); ffi_->fh = reinterpret_cast<uint64_t>(new DirInfo(fusepath_));
if(config.cache_readdir)
{
ffi_->keep_cache = 1;
ffi_->cache_readdir = 1;
}
return 0; return 0;
} }
} }

4
src/fuse_setxattr.cpp

@ -178,7 +178,7 @@ namespace l
int int
setxattr_xattr(const string &attrval_, setxattr_xattr(const string &attrval_,
const int flags_, const int flags_,
int xattr_)
int &xattr_)
{ {
if((flags_ & XATTR_CREATE) == XATTR_CREATE) if((flags_ & XATTR_CREATE) == XATTR_CREATE)
return -EEXIST; return -EEXIST;
@ -436,6 +436,8 @@ namespace l
return l::setxattr_controlfile_cache_entry(attrval,flags); return l::setxattr_controlfile_cache_entry(attrval,flags);
else if((attr[2] == "cache") && (attr[3] == "negative_entry")) else if((attr[2] == "cache") && (attr[3] == "negative_entry"))
return l::setxattr_controlfile_cache_negative_entry(attrval,flags); return l::setxattr_controlfile_cache_negative_entry(attrval,flags);
else if((attr[2] == "cache") && (attr[3] == "readdir"))
return l::setxattr_bool(attrval,flags,config.cache_readdir);
break; break;
default: default:

4
src/option_parser.cpp

@ -241,6 +241,8 @@ parse_and_process_cache(Config &config_,
return (set_kv_option(outargs,"attr_timeout",value_),0); return (set_kv_option(outargs,"attr_timeout",value_),0);
else if(func_ == "symlinks") else if(func_ == "symlinks")
return parse_and_process(value_,config_.cache_symlinks); return parse_and_process(value_,config_.cache_symlinks);
else if(func_ == "readdir")
return parse_and_process(value_,config_.cache_readdir);
return 1; return 1;
} }
@ -395,6 +397,8 @@ usage(void)
" -o cache.negative_entry=<int>\n" " -o cache.negative_entry=<int>\n"
" negative file name lookup cache timeout in\n" " negative file name lookup cache timeout in\n"
" seconds. default = 0\n" " seconds. default = 0\n"
" -o cache.readdir=<bool>\n"
" enable kernel caching readdir (if supported)\n"
" -o direct_io Bypass page caching, may increase write\n" " -o direct_io Bypass page caching, may increase write\n"
" speeds at the cost of reads. Please read docs\n" " speeds at the cost of reads. Please read docs\n"
" for more details as there are tradeoffs.\n" " for more details as there are tradeoffs.\n"

Loading…
Cancel
Save