From 6a7675f2bf74d16a8b007811e1c4ee2af7283097 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Mon, 24 Apr 2017 06:57:29 -0400 Subject: [PATCH] symlinkify: file -> symlink-to-original-file after timeout --- README.md | 23 +++++++++++---- man/mergerfs.1 | 39 +++++++++++++++++++++++-- src/config.cpp | 2 ++ src/config.hpp | 2 ++ src/getattr.cpp | 12 ++++++-- src/getxattr.cpp | 45 +++++++++++++++++++---------- src/listxattr.cpp | 2 ++ src/num.cpp | 18 ++++++++++++ src/num.hpp | 1 + src/option_parser.cpp | 42 +++++++++++++++++++++------ src/readlink.cpp | 67 ++++++++++++++++++++++++++++++++++++------- src/setxattr.cpp | 32 +++++++++++++++++++-- src/symlinkify.hpp | 52 +++++++++++++++++++++++++++++++++ 13 files changed, 290 insertions(+), 47 deletions(-) create mode 100644 src/symlinkify.hpp diff --git a/README.md b/README.md index 8b5cf982..faae609b 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ mergerfs -o<options> <srcmounts> <mountpoint> * Works with heterogeneous filesystem types * Handling of writes to full drives (transparently move file to drive with capacity) * Handles pool of readonly and read/write drives +* Turn read-only files into symlinks to increase read performance # OPTIONS @@ -37,6 +38,8 @@ mergerfs -o<options> <srcmounts> <mountpoint> * **moveonenospc**: when enabled (set to **true**) if a **write** fails with **ENOSPC** or **EDQUOT** a scan of all drives will be done looking for the drive with most free space which is at least the size of the file plus the amount which failed to write. An attempt to move the file to that drive will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. (default: false) * **use_ino**: causes mergerfs to supply file/directory inodes rather than libfuse. While not a default it is generally recommended it be enabled so that hard linked files share the same inode value. * **dropcacheonclose**: when a file is requested to be closed call `posix_fadvise` on it first to instruct the kernel that we no longer need the data and it can drop its cache. Recommended when **direct_io** is not enabled to limit double caching. (default: false) +* **symlinkify**: when enabled (set to **true**) and a file is not writable and its mtime or ctime is older than **symlinkify_timeout** files will be reported as symlinks to the original files. Please read more below before using. (default: false) +* **symlinkify_timeout**: time to wait, in seconds, to activate the **symlinkify** behavior. (default: 3600) * **fsname**: sets the name of the filesystem as seen in **mount**, **df**, etc. Defaults to a list of the source paths concatenated together with the longest common prefix removed. * **func.<func>=<policy>**: sets the specific FUSE function's policy. See below for the list of value types. Example: **func.getattr=newest** * **category.<category>=<policy>**: Sets policy of all FUSE functions in the provided category. Example: **category.create=mfs** @@ -66,6 +69,14 @@ To have the pool mounted at boot or otherwise accessable from related tools use **NOTE:** for mounting via **fstab** to work you must have **mount.fuse** installed. For Ubuntu/Debian it is included in the **fuse** package. +### symlinkify + +Due to the levels of indirection introduced by mergerfs and the underlying technology FUSE there can be varying levels of performance degredation. This feature will turn non-directories which are not writable into symlinks to the original file found by the `readlink` policy after the mtime and ctime are older than the timeout. + +**WARNING:** The current implementation has a known issue in which if the file is open and being used when the file is converted to a symlink then the application which has that file open will receive an error when using it. This is unlikely to occur in practice but is something to keep in mind. + +**WARNING:** Some backup solutions, such as CrashPlan, do not backup the target of a symlink. If using this feature it will be necessary to point any backup software to the original drives or configure the software to follow symlinks if such an option is available. Alternatively create two mounts. One for backup and one for general consumption. + # FUNCTIONS / POLICIES / CATEGORIES The POSIX filesystem API has a number of functions. **creat**, **stat**, **chown**, etc. In mergerfs these functions are grouped into 3 categories: **action**, **create**, and **search**. Functions and categories can be assigned a policy which dictates how **mergerfs** behaves. Any policy can be assigned to a function or category though some may not be very useful in practice. For instance: **rand** (random) may be useful for file creation (create) but could lead to very odd behavior if used for `chmod` (though only if there were more than one copy of the file). @@ -74,11 +85,11 @@ Policies, when called to create, will ignore drives which are readonly. This all #### Function / Category classifications -| Category | FUSE Functions | -|----------|----------------| +| Category | FUSE Functions | +|----------|-------------------------------------------------------------------------------------| | action | chmod, chown, link, removexattr, rename, rmdir, setxattr, truncate, unlink, utimens | -| create | create, mkdir, mknod, symlink | -| search | access, getattr, getxattr, ioctl, listxattr, open, readlink | +| create | create, mkdir, mknod, symlink | +| search | access, getattr, getxattr, ioctl, listxattr, open, readlink | | N/A | fallocate, fgetattr, fsync, ftruncate, ioctl, read, readdir, release, statfs, write | Due to FUSE limitations **ioctl** behaves differently if its acting on a directory. It'll use the **getattr** policy to find and open the directory before issuing the **ioctl**. In other cases where something may be searched (to confirm a directory exists across all source mounts) **getattr** will also be used. @@ -95,8 +106,8 @@ When using non-path preserving policies where something is created paths will be #### Policy descriptions -| Policy | Description | -|--------------|-------------| +| Policy | Description | +|------------------|------------------------------------------------------------| | all | Search category: acts like **ff**. Action category: apply to all found. Create category: for **mkdir**, **mknod**, and **symlink** it will apply to all found. **create** works like **ff**. It will exclude readonly drives and those with free space less than **minfreespace**. | | epall (existing path, all) | Search category: acts like **epff**. Action category: apply to all found. Create category: for **mkdir**, **mknod**, and **symlink** it will apply to all existing paths found. **create** works like **epff**. Excludes readonly drives and those with free space less than **minfreespace**. | | epff (existing path, first found) | Given the order of the drives, as defined at mount time or configured at runtime, act on the first one found where the relative path already exists. For **create** category functions it will exclude readonly drives and those with free space less than **minfreespace** (unless there is no other option). Falls back to **ff**. | diff --git a/man/mergerfs.1 b/man/mergerfs.1 index db65e662..6510b763 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -37,6 +37,8 @@ Handling of writes to full drives (transparently move file to drive with capacity) .IP \[bu] 2 Handles pool of readonly and read/write drives +.IP \[bu] 2 +Turn read\-only files into symlinks to increase read performance .SH OPTIONS .SS mount options .IP \[bu] 2 @@ -78,6 +80,17 @@ Recommended when \f[B]direct_io\f[] is not enabled to limit double caching. (default: false) .IP \[bu] 2 +\f[B]symlinkify\f[]: when enabled (set to \f[B]true\f[]) and a file is +not writable and its mtime or ctime is older than +\f[B]symlinkify_timeout\f[] files will be reported as symlinks to the +original files. +Please read more below before using. +(default: false) +.IP \[bu] 2 +\f[B]symlinkify_timeout\f[]: time to wait, in seconds, to activate the +\f[B]symlinkify\f[] behavior. +(default: 3600) +.IP \[bu] 2 \f[B]fsname\f[]: sets the name of the filesystem as seen in \f[B]mount\f[], \f[B]df\f[], etc. Defaults to a list of the source paths concatenated together with the @@ -139,6 +152,28 @@ be automatically included. \f[B]NOTE:\f[] for mounting via \f[B]fstab\f[] to work you must have \f[B]mount.fuse\f[] installed. For Ubuntu/Debian it is included in the \f[B]fuse\f[] package. +.SS symlinkify +.PP +Due to the levels of indirection introduced by mergerfs and the +underlying technology FUSE there can be varying levels of performance +degredation. +This feature will turn non\-directories which are not writable into +symlinks to the original file found by the \f[C]readlink\f[] policy +after the mtime and ctime are older than the timeout. +.PP +\f[B]WARNING:\f[] The current implementation has a known issue in which +if the file is open and being used when the file is converted to a +symlink then the application which has that file open will receive an +error when using it. +This is unlikely to occur in practice but is something to keep in mind. +.PP +\f[B]WARNING:\f[] Some backup solutions, such as CrashPlan, do not +backup the target of a symlink. +If using this feature it will be necessary to point any backup software +to the original drives or configure the software to follow symlinks if +such an option is available. +Alternatively create two mounts. +One for backup and one for general consumption. .SH FUNCTIONS / POLICIES / CATEGORIES .PP The POSIX filesystem API has a number of functions. @@ -161,7 +196,7 @@ mount option for this to work. .PP .TS tab(@); -lw(10.7n) lw(16.5n). +lw(7.9n) lw(62.1n). T{ Category T}@T{ @@ -216,7 +251,7 @@ paths will be copied to target drives as necessary. .PP .TS tab(@); -lw(14.6n) lw(13.6n). +lw(16.6n) lw(53.4n). T{ Policy T}@T{ diff --git a/src/config.cpp b/src/config.cpp index 282eb13e..a41366d6 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -41,6 +41,8 @@ namespace mergerfs moveonenospc(false), direct_io(false), dropcacheonclose(false), + symlinkify(false), + symlinkify_timeout(3600), POLICYINIT(access), POLICYINIT(chmod), POLICYINIT(chown), diff --git a/src/config.hpp b/src/config.hpp index ac22dc3e..ec7fee94 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -49,6 +49,8 @@ namespace mergerfs bool moveonenospc; bool direct_io; bool dropcacheonclose; + bool symlinkify; + time_t symlinkify_timeout; public: const Policy *policies[FuseFunc::Enum::END]; diff --git a/src/getattr.cpp b/src/getattr.cpp index f3bf450f..66fb0a8a 100644 --- a/src/getattr.cpp +++ b/src/getattr.cpp @@ -25,6 +25,7 @@ #include "fs_inode.hpp" #include "fs_path.hpp" #include "rwlock.hpp" +#include "symlinkify.hpp" #include "ugid.hpp" using std::string; @@ -62,7 +63,9 @@ _getattr(Policy::Func::Search searchFunc, const vector &srcmounts, const uint64_t minfreespace, const char *fusepath, - struct stat &st) + struct stat &st, + const bool symlinkify, + const time_t symlinkify_timeout) { int rv; string fullpath; @@ -78,6 +81,9 @@ _getattr(Policy::Func::Search searchFunc, if(rv == -1) return -errno; + if(symlinkify && symlinkify::can_be_symlink(st,symlinkify_timeout)) + st.st_mode = symlinkify::convert(st.st_mode); + fs::inode::recompute(st); return 0; @@ -104,7 +110,9 @@ namespace mergerfs config.srcmounts, config.minfreespace, fusepath, - *st); + *st, + config.symlinkify, + config.symlinkify_timeout); } } } diff --git a/src/getxattr.cpp b/src/getxattr.cpp index 747eba68..7f4005d3 100644 --- a/src/getxattr.cpp +++ b/src/getxattr.cpp @@ -16,14 +16,15 @@ #include -#include -#include -#include -#include - #include #include +#include +#include +#include +#include +#include + #include "config.hpp" #include "errno.hpp" #include "fs_base_getxattr.hpp" @@ -100,22 +101,32 @@ _getxattr_controlfile_srcmounts(const Config &config, static void -_getxattr_controlfile_minfreespace(const Config &config, - string &attrvalue) +_getxattr_controlfile_uint64_t(const uint64_t uint, + string &attrvalue) { - char buf[64]; - unsigned long long minfreespace; + std::ostringstream os; - minfreespace = (unsigned long long)config.minfreespace; - snprintf(buf,sizeof(buf),"%llu",minfreespace); + os << uint; - attrvalue = buf; + attrvalue = os.str(); +} + +static +void +_getxattr_controlfile_time_t(const time_t time, + string &attrvalue) +{ + std::ostringstream os; + + os << time; + + attrvalue = os.str(); } static void -_getxattr_controlfile_bool(bool boolvalue, - string &attrvalue) +_getxattr_controlfile_bool(const bool boolvalue, + string &attrvalue) { attrvalue = (boolvalue ? "true" : "false"); } @@ -173,11 +184,15 @@ _getxattr_controlfile(const Config &config, if(attr[2] == "srcmounts") _getxattr_controlfile_srcmounts(config,attrvalue); else if(attr[2] == "minfreespace") - _getxattr_controlfile_minfreespace(config,attrvalue); + _getxattr_controlfile_uint64_t(config.minfreespace,attrvalue); else if(attr[2] == "moveonenospc") _getxattr_controlfile_bool(config.moveonenospc,attrvalue); else if(attr[2] == "dropcacheonclose") _getxattr_controlfile_bool(config.dropcacheonclose,attrvalue); + else if(attr[2] == "symlinkify") + _getxattr_controlfile_bool(config.symlinkify,attrvalue); + else if(attr[2] == "symlinkify_timeout") + _getxattr_controlfile_time_t(config.symlinkify_timeout,attrvalue); else if(attr[2] == "policies") _getxattr_controlfile_policies(config,attrvalue); else if(attr[2] == "version") diff --git a/src/listxattr.cpp b/src/listxattr.cpp index b03a9efe..7dbee28b 100644 --- a/src/listxattr.cpp +++ b/src/listxattr.cpp @@ -47,6 +47,8 @@ _listxattr_controlfile(char *list, ("user.mergerfs.minfreespace") ("user.mergerfs.moveonenospc") ("user.mergerfs.dropcacheonclose") + ("user.mergerfs.symlinkify") + ("user.mergerfs.symlinkify_timeout") ("user.mergerfs.policies") ("user.mergerfs.version") ("user.mergerfs.pid"); diff --git a/src/num.cpp b/src/num.cpp index 7f6a0ec4..874a3828 100644 --- a/src/num.cpp +++ b/src/num.cpp @@ -57,4 +57,22 @@ namespace num return 0; } + + int + to_time_t(const std::string &str, + time_t &value) + { + time_t tmp; + char *endptr; + + tmp = strtoll(str.c_str(),&endptr,10); + if(*endptr != '\0') + return -1; + if(tmp < 0) + return -1; + + value = tmp; + + return 0; + } } diff --git a/src/num.hpp b/src/num.hpp index 2724d5e3..0d832884 100644 --- a/src/num.hpp +++ b/src/num.hpp @@ -24,6 +24,7 @@ namespace num { int to_uint64_t(const std::string &str, uint64_t &value); + int to_time_t(const std::string &str, time_t &value); } #endif diff --git a/src/option_parser.cpp b/src/option_parser.cpp index 7189fda3..e1e16f20 100644 --- a/src/option_parser.cpp +++ b/src/option_parser.cpp @@ -106,8 +106,8 @@ set_default_options(fuse_args &args) static int -parse_and_process_minfreespace(const std::string &value, - uint64_t &minfreespace) +parse_and_process(const std::string &value, + uint64_t &minfreespace) { int rv; @@ -120,8 +120,22 @@ parse_and_process_minfreespace(const std::string &value, static int -parse_and_process_boolean(const std::string &value, - bool &boolean) +parse_and_process(const std::string &value, + time_t &time) +{ + int rv; + + rv = num::to_time_t(value,time); + if(rv == -1) + return 1; + + return 0; +} + +static +int +parse_and_process(const std::string &value, + bool &boolean) { if(value == "false") boolean = false; @@ -167,11 +181,15 @@ parse_and_process_kv_arg(Config &config, else { if(key == "minfreespace") - rv = parse_and_process_minfreespace(value,config.minfreespace); + rv = parse_and_process(value,config.minfreespace); else if(key == "moveonenospc") - rv = parse_and_process_boolean(value,config.moveonenospc); + rv = parse_and_process(value,config.moveonenospc); else if(key == "dropcacheonclose") - rv = parse_and_process_boolean(value,config.dropcacheonclose); + rv = parse_and_process(value,config.dropcacheonclose); + else if(key == "symlinkify") + rv = parse_and_process(value,config.symlinkify); + else if(key == "symlinkify_timeout") + rv = parse_and_process(value,config.symlinkify_timeout); } if(rv == -1) @@ -261,12 +279,18 @@ usage(void) " autogenerated by libfuse. Suggested.\n" " -o minfreespace= minimum free space needed for certain policies.\n" " default=4G\n" - " -o moveonenospc= try to move file to another drive when ENOSPC\n" + " -o moveonenospc= Try to move file to another drive when ENOSPC\n" " on write. default=false\n" " -o dropcacheonclose=\n" - " when a file is closed suggest to OS it drop\n" + " When a file is closed suggest to OS it drop\n" " the file's cache. This is useful when direct_io\n" " is disabled. default=false\n" + " -o symlinkify= Read-only files, after a timeout, will be turned\n" + " into symlinks. Read docs for limitations and\n" + " possible issues. default=false\n" + " -o symlinkify_timeout=\n" + " timeout in seconds before will turn to symlinks.\n" + " default=3600\n" << std::endl; } diff --git a/src/readlink.cpp b/src/readlink.cpp index b484b052..6551d709 100644 --- a/src/readlink.cpp +++ b/src/readlink.cpp @@ -16,11 +16,15 @@ #include +#include + #include "config.hpp" #include "errno.hpp" #include "fs_base_readlink.hpp" +#include "fs_base_stat.hpp" #include "fs_path.hpp" #include "rwlock.hpp" +#include "symlinkify.hpp" #include "ugid.hpp" using std::string; @@ -29,15 +33,12 @@ using mergerfs::Policy; static int -_readlink_core(const string *basepath, - const char *fusepath, - char *buf, - const size_t size) +_readlink_core_standard(const string &fullpath, + char *buf, + const size_t size) + { int rv; - string fullpath; - - fs::path::make(basepath,fusepath,fullpath); rv = fs::readlink(fullpath,buf,size); if(rv == -1) @@ -48,6 +49,47 @@ _readlink_core(const string *basepath, return 0; } +static +int +_readlink_core_symlinkify(const string &fullpath, + char *buf, + const size_t size, + const time_t symlinkify_timeout) +{ + int rv; + struct stat st; + + rv = fs::stat(fullpath,st); + if(rv == -1) + return -errno; + + if(!symlinkify::can_be_symlink(st,symlinkify_timeout)) + return _readlink_core_standard(fullpath,buf,size); + + strncpy(buf,fullpath.c_str(),size); + + return 0; +} + +static +int +_readlink_core(const string *basepath, + const char *fusepath, + char *buf, + const size_t size, + const bool symlinkify, + const time_t symlinkify_timeout) +{ + string fullpath; + + fs::path::make(basepath,fusepath,fullpath); + + if(symlinkify) + return _readlink_core_symlinkify(fullpath,buf,size,symlinkify_timeout); + + return _readlink_core_standard(fullpath,buf,size); +} + static int _readlink(Policy::Func::Search searchFunc, @@ -55,7 +97,9 @@ _readlink(Policy::Func::Search searchFunc, const uint64_t minfreespace, const char *fusepath, char *buf, - const size_t size) + const size_t size, + const bool symlinkify, + const time_t symlinkify_timeout) { int rv; vector basepaths; @@ -64,7 +108,8 @@ _readlink(Policy::Func::Search searchFunc, if(rv == -1) return -errno; - return _readlink_core(basepaths[0],fusepath,buf,size); + return _readlink_core(basepaths[0],fusepath,buf,size, + symlinkify,symlinkify_timeout); } namespace mergerfs @@ -86,7 +131,9 @@ namespace mergerfs config.minfreespace, fusepath, buf, - size); + size, + config.symlinkify, + config.symlinkify_timeout); } } } diff --git a/src/setxattr.cpp b/src/setxattr.cpp index 6391757b..f54668b9 100644 --- a/src/setxattr.cpp +++ b/src/setxattr.cpp @@ -16,11 +16,11 @@ #include +#include + +#include #include #include -#include - -#include #include "config.hpp" #include "errno.hpp" @@ -178,6 +178,24 @@ _setxattr_uint64_t(const string &attrval, return 0; } +static +int +_setxattr_time_t(const string &attrval, + const int flags, + time_t &time) +{ + int rv; + + if((flags & XATTR_CREATE) == XATTR_CREATE) + return -EEXIST; + + rv = num::to_time_t(attrval,time); + if(rv == -1) + return -EINVAL; + + return 0; +} + static int _setxattr_bool(const string &attrval, @@ -267,6 +285,14 @@ _setxattr_controlfile(Config &config, return _setxattr_bool(attrval, flags, config.dropcacheonclose); + else if(attr[2] == "symlinkify") + return _setxattr_bool(attrval, + flags, + config.symlinkify); + else if(attr[2] == "symlinkify_timeout") + return _setxattr_time_t(attrval, + flags, + config.symlinkify_timeout); break; case 4: diff --git a/src/symlinkify.hpp b/src/symlinkify.hpp new file mode 100644 index 00000000..b38aedd5 --- /dev/null +++ b/src/symlinkify.hpp @@ -0,0 +1,52 @@ +/* + ISC License + + Copyright (c) 2017, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef __SYMLINKIFY_HPP__ +#define __SYMLINKIFY_HPP__ + +#include +#include + +namespace symlinkify +{ + static + inline + bool + can_be_symlink(const struct stat &st, + const time_t timeout) + { + if(S_ISDIR(st.st_mode) || + (st.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH))) + return false; + + const time_t now = ::time(NULL); + + return (((now - st.st_mtime) > timeout) && + ((now - st.st_ctime) > timeout)); + } + + static + inline + mode_t + convert(const mode_t mode) + { + return ((mode & ~S_IFMT) | S_IFLNK); + } +} + +#endif