From 87c2f2f9dc36491a7fbeab73a7441bb94c590826 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Fri, 26 May 2017 16:51:30 -0400 Subject: [PATCH] add nullrw feature to facilitate benchmarking --- README.md | 34 +++++++++++++++++++++++++- man/mergerfs.1 | 56 ++++++++++++++++++++++++++++++++++++++++++- src/config.cpp | 1 + src/config.hpp | 1 + src/mergerfs.cpp | 31 ++++++++++++++++-------- src/option_parser.cpp | 3 +++ src/read.cpp | 11 +++++++++ src/read.hpp | 7 ++++++ src/write.cpp | 10 ++++++++ src/write.hpp | 7 ++++++ src/write_buf.cpp | 9 +++++++ src/write_buf.hpp | 6 +++++ 12 files changed, 164 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index faae609b..12e18891 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2017-02-18 +% 2017-05-26 # NAME @@ -40,6 +40,7 @@ mergerfs -o<options> <srcmounts> <mountpoint> * **dropcacheonclose**: when a file is requested to be closed call `posix_fadvise` on it first to instruct the kernel that we no longer need the data and it can drop its cache. Recommended when **direct_io** is not enabled to limit double caching. (default: false) * **symlinkify**: when enabled (set to **true**) and a file is not writable and its mtime or ctime is older than **symlinkify_timeout** files will be reported as symlinks to the original files. Please read more below before using. (default: false) * **symlinkify_timeout**: time to wait, in seconds, to activate the **symlinkify** behavior. (default: 3600) +* **nullrw**: turns reads and writes into no-ops. The request will succeed but do nothing. Useful for benchmarking mergerfs. (default: false) * **fsname**: sets the name of the filesystem as seen in **mount**, **df**, etc. Defaults to a list of the source paths concatenated together with the longest common prefix removed. * **func.<func>=<policy>**: sets the specific FUSE function's policy. See below for the list of value types. Example: **func.getattr=newest** * **category.<category>=<policy>**: Sets policy of all FUSE functions in the provided category. Example: **category.create=mfs** @@ -77,6 +78,37 @@ Due to the levels of indirection introduced by mergerfs and the underlying techn **WARNING:** Some backup solutions, such as CrashPlan, do not backup the target of a symlink. If using this feature it will be necessary to point any backup software to the original drives or configure the software to follow symlinks if such an option is available. Alternatively create two mounts. One for backup and one for general consumption. +### nullrw + +Due to how FUSE works there is an overhead to all requests made to a FUSE filesystem. Meaning that even a simple passthrough will have some slowdown. However, generally the overhead is minimal in comparison to the cost of the underlying I/O. By disabling the underlying I/O we can test the theoretical performance boundries. + +By enabling `nullrw` mergerfs will work as it always does **except** that all reads and writes will be no-ops. A write will succeed (the size of the write will be returned as if it were successful) but mergerfs does nothing with the data it was given. Similarly a read will return the size requested but won't touch the buffer. + +Example: +``` +$ dd if=/dev/zero of=/path/to/mergerfs/mount/benchmark ibs=1M obs=512 count=1024 +1024+0 records in +2097152+0 records out +1073741824 bytes (1.1 GB, 1.0 GiB) copied, 15.4067 s, 69.7 MB/s + +$ dd if=/dev/zero of=/path/to/mergerfs/mount/benchmark ibs=1M obs=1M count=1024 +1024+0 records in +1024+0 records out +1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.219585 s, 4.9 GB/s + +$ dd if=/path/to/mergerfs/mount/benchmark of=/dev/null bs=512 count=102400 +102400+0 records in +102400+0 records out +52428800 bytes (52 MB, 50 MiB) copied, 0.757991 s, 69.2 MB/s + +$ dd if=/path/to/mergerfs/mount/benchmark of=/dev/null bs=1M count=1024 +1024+0 records in +1024+0 records out +1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.18405 s, 5.8 GB/s +``` + +It's important to test with different `obs` (output block size) values since the relative overhead is greater with smaller values. As you can see above the size of a read or write can massively impact theoretical performance. If an application performs much worse through mergerfs it could very well be that it doesn't optimally size its read and write requests. + # FUNCTIONS / POLICIES / CATEGORIES The POSIX filesystem API has a number of functions. **creat**, **stat**, **chown**, etc. In mergerfs these functions are grouped into 3 categories: **action**, **create**, and **search**. Functions and categories can be assigned a policy which dictates how **mergerfs** behaves. Any policy can be assigned to a function or category though some may not be very useful in practice. For instance: **rand** (random) may be useful for file creation (create) but could lead to very odd behavior if used for `chmod` (though only if there were more than one copy of the file). diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 6510b763..ee7c7161 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 1.16.0.2 .\" -.TH "mergerfs" "1" "2017\-02\-18" "mergerfs user manual" "" +.TH "mergerfs" "1" "2017\-05\-26" "mergerfs user manual" "" .hy .SH NAME .PP @@ -91,6 +91,11 @@ Please read more below before using. \f[B]symlinkify\f[] behavior. (default: 3600) .IP \[bu] 2 +\f[B]nullrw\f[]: turns reads and writes into no\-ops. +The request will succeed but do nothing. +Useful for benchmarking mergerfs. +(default: false) +.IP \[bu] 2 \f[B]fsname\f[]: sets the name of the filesystem as seen in \f[B]mount\f[], \f[B]df\f[], etc. Defaults to a list of the source paths concatenated together with the @@ -174,6 +179,55 @@ to the original drives or configure the software to follow symlinks if such an option is available. Alternatively create two mounts. One for backup and one for general consumption. +.SS nullrw +.PP +Due to how FUSE works there is an overhead to all requests made to a +FUSE filesystem. +Meaning that even a simple passthrough will have some slowdown. +However, generally the overhead is minimal in comparison to the cost of +the underlying I/O. +By disabling the underlying I/O we can test the theoretical performance +boundries. +.PP +By enabling \f[C]nullrw\f[] mergerfs will work as it always does +\f[B]except\f[] that all reads and writes will be no\-ops. +A write will succeed (the size of the write will be returned as if it +were successful) but mergerfs does nothing with the data it was given. +Similarly a read will return the size requested but won\[aq]t touch the +buffer. +.PP +Example: +.IP +.nf +\f[C] +$\ dd\ if=/dev/zero\ of=/path/to/mergerfs/mount/benchmark\ ibs=1M\ obs=512\ count=1024 +1024+0\ records\ in +2097152+0\ records\ out +1073741824\ bytes\ (1.1\ GB,\ 1.0\ GiB)\ copied,\ 15.4067\ s,\ 69.7\ MB/s + +$\ dd\ if=/dev/zero\ of=/path/to/mergerfs/mount/benchmark\ ibs=1M\ obs=1M\ count=1024 +1024+0\ records\ in +1024+0\ records\ out +1073741824\ bytes\ (1.1\ GB,\ 1.0\ GiB)\ copied,\ 0.219585\ s,\ 4.9\ GB/s + +$\ dd\ if=/path/to/mergerfs/mount/benchmark\ of=/dev/null\ bs=512\ count=102400 +102400+0\ records\ in +102400+0\ records\ out +52428800\ bytes\ (52\ MB,\ 50\ MiB)\ copied,\ 0.757991\ s,\ 69.2\ MB/s + +$\ dd\ if=/path/to/mergerfs/mount/benchmark\ of=/dev/null\ bs=1M\ count=1024 +1024+0\ records\ in +1024+0\ records\ out +1073741824\ bytes\ (1.1\ GB,\ 1.0\ GiB)\ copied,\ 0.18405\ s,\ 5.8\ GB/s +\f[] +.fi +.PP +It\[aq]s important to test with different \f[C]obs\f[] (output block +size) values since the relative overhead is greater with smaller values. +As you can see above the size of a read or write can massively impact +theoretical performance. +If an application performs much worse through mergerfs it could very +well be that it doesn\[aq]t optimally size its read and write requests. .SH FUNCTIONS / POLICIES / CATEGORIES .PP The POSIX filesystem API has a number of functions. diff --git a/src/config.cpp b/src/config.cpp index a41366d6..0fc81e39 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -43,6 +43,7 @@ namespace mergerfs dropcacheonclose(false), symlinkify(false), symlinkify_timeout(3600), + nullrw(false), POLICYINIT(access), POLICYINIT(chmod), POLICYINIT(chown), diff --git a/src/config.hpp b/src/config.hpp index ec7fee94..f829c9e3 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -51,6 +51,7 @@ namespace mergerfs bool dropcacheonclose; bool symlinkify; time_t symlinkify_timeout; + bool nullrw; public: const Policy *policies[FuseFunc::Enum::END]; diff --git a/src/mergerfs.cpp b/src/mergerfs.cpp index bc552edf..bac24e65 100644 --- a/src/mergerfs.cpp +++ b/src/mergerfs.cpp @@ -70,7 +70,8 @@ namespace local static void get_fuse_operations(struct fuse_operations &ops, - const bool direct_io) + const bool direct_io, + const bool nullrw) { ops.flag_nullpath_ok = true; #if FLAG_NOPATH @@ -110,11 +111,15 @@ namespace local ops.open = mergerfs::fuse::open; ops.opendir = mergerfs::fuse::opendir; ops.poll = NULL; - ops.read = direct_io ? - mergerfs::fuse::read_direct_io : - mergerfs::fuse::read; + ops.read = (nullrw ? + mergerfs::fuse::read_null : + (direct_io ? + mergerfs::fuse::read_direct_io : + mergerfs::fuse::read)); #if READ_BUF - ops.read_buf = mergerfs::fuse::read_buf; + ops.read_buf = (nullrw ? + NULL : + mergerfs::fuse::read_buf); #endif ops.readdir = mergerfs::fuse::readdir; ops.readlink = mergerfs::fuse::readlink; @@ -130,11 +135,15 @@ namespace local ops.unlink = mergerfs::fuse::unlink; ops.utime = NULL; /* deprecated; use utimens() */ ops.utimens = mergerfs::fuse::utimens; - ops.write = direct_io ? - mergerfs::fuse::write_direct_io : - mergerfs::fuse::write; + ops.write = (nullrw ? + mergerfs::fuse::write_null : + (direct_io ? + mergerfs::fuse::write_direct_io : + mergerfs::fuse::write)); #if WRITE_BUF - ops.write_buf = mergerfs::fuse::write_buf; + ops.write_buf = (nullrw ? + mergerfs::fuse::write_buf_null : + mergerfs::fuse::write_buf); #endif return; @@ -171,7 +180,9 @@ namespace mergerfs mergerfs::options::parse(args,config); local::setup_resources(); - local::get_fuse_operations(ops,config.direct_io); + local::get_fuse_operations(ops, + config.direct_io, + config.nullrw); return fuse_main(args.argc, args.argv, diff --git a/src/option_parser.cpp b/src/option_parser.cpp index e1e16f20..9aae0c32 100644 --- a/src/option_parser.cpp +++ b/src/option_parser.cpp @@ -190,6 +190,8 @@ parse_and_process_kv_arg(Config &config, rv = parse_and_process(value,config.symlinkify); else if(key == "symlinkify_timeout") rv = parse_and_process(value,config.symlinkify_timeout); + else if(key == "nullrw") + rv = parse_and_process(value,config.nullrw); } if(rv == -1) @@ -291,6 +293,7 @@ usage(void) " -o symlinkify_timeout=\n" " timeout in seconds before will turn to symlinks.\n" " default=3600\n" + " -o nullrw= Disables reads and writes. For benchmarking.\n" << std::endl; } diff --git a/src/read.cpp b/src/read.cpp index a3e6489a..d59d35bf 100644 --- a/src/read.cpp +++ b/src/read.cpp @@ -87,5 +87,16 @@ namespace mergerfs return ::_read_direct_io(fi->fd,buf,count,offset); } + + int + read_null(const char *fusepath, + char *buf, + size_t count, + off_t offset, + fuse_file_info *ffi) + + { + return count; + } } } diff --git a/src/read.hpp b/src/read.hpp index 0c106757..9c5b3ff7 100644 --- a/src/read.hpp +++ b/src/read.hpp @@ -34,6 +34,13 @@ namespace mergerfs size_t count, off_t offset, fuse_file_info *fi); + + int + read_null(const char *fusepath, + char *buf, + size_t count, + off_t offset, + fuse_file_info *fi); } } diff --git a/src/write.cpp b/src/write.cpp index b7da6d64..d77d3519 100644 --- a/src/write.cpp +++ b/src/write.cpp @@ -129,5 +129,15 @@ namespace mergerfs { return write(_write_direct_io,buf,count,offset,ffi); } + + int + write_null(const char *fusepath, + const char *buf, + size_t count, + off_t offset, + fuse_file_info *ffi) + { + return count; + } } } diff --git a/src/write.hpp b/src/write.hpp index 8a144124..1a736c4b 100644 --- a/src/write.hpp +++ b/src/write.hpp @@ -34,6 +34,13 @@ namespace mergerfs size_t count, off_t offset, fuse_file_info *fi); + + int + write_null(const char *fusepath, + const char *buf, + size_t count, + off_t offset, + fuse_file_info *fi); } } diff --git a/src/write_buf.cpp b/src/write_buf.cpp index db262436..26abb900 100644 --- a/src/write_buf.cpp +++ b/src/write_buf.cpp @@ -99,6 +99,15 @@ namespace mergerfs return rv; } + + int + write_buf_null(const char *fusepath, + fuse_bufvec *src, + off_t offset, + fuse_file_info *ffi) + { + return src->buf[0].size; + } } } diff --git a/src/write_buf.hpp b/src/write_buf.hpp index 504141bf..6063e9aa 100644 --- a/src/write_buf.hpp +++ b/src/write_buf.hpp @@ -30,6 +30,12 @@ namespace mergerfs struct fuse_bufvec *buf, off_t offset, fuse_file_info *fi); + + int + write_buf_null(const char *fusepath, + struct fuse_bufvec *buf, + off_t offset, + fuse_file_info *fi); } }