From 93548c41176d970494e1bf73eb263dd38226fb75 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Sun, 21 Jun 2020 14:45:14 -0400 Subject: [PATCH] followsymlinks: new feature that can make mergerfs follow symlinks --- README.md | 10 +++ libfuse/include/fuse.h | 9 ++- libfuse/lib/fuse.c | 132 +++++++++++++++++++--------------- man/mergerfs.1 | 16 +++++ src/config.cpp | 2 + src/config.hpp | 2 + src/config_followsymlinks.cpp | 58 +++++++++++++++ src/config_followsymlinks.hpp | 30 ++++++++ src/fuse_getattr.cpp | 66 ++++++++++++++++- src/fuse_rmdir.cpp | 32 +++++++-- src/fuse_symlink.cpp | 40 ++++++++--- src/fuse_symlink.hpp | 12 +++- 12 files changed, 325 insertions(+), 84 deletions(-) create mode 100644 src/config_followsymlinks.cpp create mode 100644 src/config_followsymlinks.hpp diff --git a/README.md b/README.md index 244c24f7..a990f92a 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ See the mergerfs [wiki for real world deployments](https://github.com/trapexit/m * **statfs=base|full**: Controls how statfs works. 'base' means it will always use all branches in statfs calculations. 'full' is in effect path preserving and only includes drives where the path exists. (default: base) * **statfs_ignore=none|ro|nc**: 'ro' will cause statfs calculations to ignore available space for branches mounted or tagged as 'read-only' or 'no create'. 'nc' will ignore available space for branches tagged as 'no create'. (default: none) * **nfsopenhack=off|git|all**: A workaround for exporting mergerfs over NFS where there are issues with creating files for write while setting the mode to read-only. (default: off) +* **followsymlinks=never|directory|regular|all**: Turns symlinks into what they point to. (default: never) * **posix_acl=BOOL**: Enable POSIX ACL support (if supported by kernel and underlying filesystem). (default: false) * **async_read=BOOL**: Perform reads asynchronously. If disabled or unavailable the kernel will ensure there is at most one pending read request per file handle and will attempt to order requests by offset. (default: true) * **fuse_msg_size=INT**: Set the max number of pages per FUSE message. Only available on Linux >= 4.20 and ignored otherwise. (min: 1; max: 256; default: 256) @@ -218,6 +219,15 @@ In Linux 4.20 a new feature was added allowing the negotiation of the max messag Since there should be no downsides to increasing `fuse_msg_size` / `max_pages`, outside a minor bump in RAM usage due to larger message buffers, mergerfs defaults the value to 256. On kernels before 4.20 the value has no effect. The reason the value is configurable is to enable experimentation and benchmarking. See the BENCHMARKING section for examples. +### followsymlinks + +This feature, when enabled, will cause symlinks to be interpreted by mergerfs as their target (depending on the mode). + +When there is a getattr/stat request for a file mergerfs will check if the file is a symlink and depending on the `followsymlinks` setting will replace the information about the symlink with that of that which it points to. + +When unlink'ing or rmdir'ing the followed symlink it will remove the symlink itself and not that which it points to. + + ### symlinkify Due to the levels of indirection introduced by mergerfs and the underlying technology FUSE there can be varying levels of performance degradation. This feature will turn non-directories which are not writable into symlinks to the original file found by the `readlink` policy after the mtime and ctime are older than the timeout. diff --git a/libfuse/include/fuse.h b/libfuse/include/fuse.h index 538216c8..53ffb73b 100644 --- a/libfuse/include/fuse.h +++ b/libfuse/include/fuse.h @@ -121,7 +121,7 @@ struct fuse_operations int (*rmdir) (const char *); /** Create a symbolic link */ - int (*symlink) (const char *, const char *); + int (*symlink) (const char *, const char *, struct stat *, fuse_timeouts_t *); /** Rename a file */ int (*rename) (const char *, const char *); @@ -784,8 +784,11 @@ int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath); int fuse_fs_unlink(struct fuse_fs *fs, const char *path); int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); -int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, - const char *path); +int fuse_fs_symlink(struct fuse_fs *fs, + const char *linkname, + const char *path, + struct stat *st, + fuse_timeouts_t *timeouts); int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); int fuse_fs_release(struct fuse_fs *fs, struct fuse_file_info *fi); diff --git a/libfuse/lib/fuse.c b/libfuse/lib/fuse.c index 8a53495b..0e59233d 100644 --- a/libfuse/lib/fuse.c +++ b/libfuse/lib/fuse.c @@ -1518,17 +1518,22 @@ int fuse_fs_rmdir(struct fuse_fs *fs, const char *path) } } -int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path) +int +fuse_fs_symlink(struct fuse_fs *fs_, + const char *linkname_, + const char *path_, + struct stat *st_, + fuse_timeouts_t *timeouts_) { - fuse_get_context()->private_data = fs->user_data; - if (fs->op.symlink) { - if (fs->debug) - fprintf(stderr, "symlink %s %s\n", linkname, path); + fuse_get_context()->private_data = fs_->user_data; - return fs->op.symlink(linkname, path); - } else { + if(fs_->op.symlink == NULL) return -ENOSYS; - } + + if(fs_->debug) + fprintf(stderr,"symlink %s %s\n",linkname_,path_); + + return fs_->op.symlink(linkname_,path_,st_,timeouts_); } int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath) @@ -2279,6 +2284,37 @@ update_stat(struct node *node_, *stold = *stnew_; } +static +int +set_path_info(struct fuse *f, + fuse_ino_t nodeid, + const char *name, + struct fuse_entry_param *e) +{ + struct node *node; + + node = find_node(f,nodeid,name); + if(node == NULL) + return -ENOMEM; + + e->ino = node->nodeid; + e->generation = node->generation; + + pthread_mutex_lock(&f->lock); + update_stat(node,&e->attr); + pthread_mutex_unlock(&f->lock); + + set_stat(f,e->ino,&e->attr); + if(f->conf.debug) + fprintf(stderr, + " NODEID: %llu\n" + " GEN: %llu\n", + (unsigned long long)e->ino, + (unsigned long long)e->generation); + + return 0; +} + static int lookup_path(struct fuse *f, @@ -2288,44 +2324,18 @@ lookup_path(struct fuse *f, struct fuse_entry_param *e, struct fuse_file_info *fi) { - int res; + int rv; memset(e,0,sizeof(struct fuse_entry_param)); - if(fi) - res = fuse_fs_fgetattr(f->fs,&e->attr,fi,&e->timeout); - else - res = fuse_fs_getattr(f->fs,path,&e->attr,&e->timeout); - - if(res == 0) - { - struct node *node; + rv = ((fi == NULL) ? + fuse_fs_getattr(f->fs,path,&e->attr,&e->timeout) : + fuse_fs_fgetattr(f->fs,&e->attr,fi,&e->timeout)); - node = find_node(f,nodeid,name); - if(node == NULL) - { - res = -ENOMEM; - } - else - { - e->ino = node->nodeid; - e->generation = node->generation; - - pthread_mutex_lock(&f->lock); - update_stat(node,&e->attr); - pthread_mutex_unlock(&f->lock); - - set_stat(f,e->ino,&e->attr); - if(f->conf.debug) - fprintf(stderr, - " NODEID: %llu\n" - " GEN: %llu\n", - (unsigned long long)e->ino, - (unsigned long long)e->generation); - } - } + if(rv) + return rv; - return res; + return set_path_info(f,nodeid,name,e); } static struct fuse_context_i *fuse_get_context_internal(void) @@ -2926,26 +2936,34 @@ static void fuse_lib_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) reply_err(req, err); } -static void fuse_lib_symlink(fuse_req_t req, const char *linkname, - fuse_ino_t parent, const char *name) +static +void +fuse_lib_symlink(fuse_req_t req_, + const char *linkname_, + fuse_ino_t parent_, + const char *name_) { - struct fuse *f = req_fuse_prepare(req); - struct fuse_entry_param e; + int rv; char *path; - int err; + struct fuse *f; + struct fuse_entry_param e; - err = get_path_name(f, parent, name, &path); - if (!err) { - struct fuse_intr_data d; + f = req_fuse_prepare(req_); - fuse_prepare_interrupt(f, req, &d); - err = fuse_fs_symlink(f->fs, linkname, path); - if (!err) - err = lookup_path(f, parent, name, path, &e, NULL); - fuse_finish_interrupt(f, req, &d); - free_path(f, parent, path); - } - reply_entry(req, &e, err); + rv = get_path_name(f,parent_,name_,&path); + if(!rv) + { + struct fuse_intr_data d; + + fuse_prepare_interrupt(f,req_,&d); + rv = fuse_fs_symlink(f->fs,linkname_,path,&e.attr,&e.timeout); + if(rv == 0) + rv = set_path_info(f,parent_,name_,&e); + fuse_finish_interrupt(f,req_,&d); + free_path(f,parent_,path); + } + + reply_entry(req_,&e,rv); } static diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 6487ff69..30890357 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -216,6 +216,10 @@ over NFS where there are issues with creating files for write while setting the mode to read\-only. (default: off) .IP \[bu] 2 +\f[B]followsymlinks=never|directory|regular|all\f[]: Turns symlinks into +what they point to. +(default: never) +.IP \[bu] 2 \f[B]posix_acl=BOOL\f[]: Enable POSIX ACL support (if supported by kernel and underlying filesystem). (default: false) @@ -513,6 +517,18 @@ On kernels before 4.20 the value has no effect. The reason the value is configurable is to enable experimentation and benchmarking. See the BENCHMARKING section for examples. +.SS followsymlinks +.PP +This feature, when enabled, will cause symlinks to be interpreted by +mergerfs as their target (depending on the mode). +.PP +When there is a getattr/stat request for a file mergerfs will check if +the file is a symlink and depending on the \f[C]followsymlinks\f[] +setting will replace the information about the symlink with that of that +which it points to. +.PP +When unlink\[aq]ing or rmdir\[aq]ing the followed symlink it will remove +the symlink itself and not that which it points to. .SS symlinkify .PP Due to the levels of indirection introduced by mergerfs and the diff --git a/src/config.cpp b/src/config.cpp index 6c8e1064..24f6b62e 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -79,6 +79,7 @@ Config::Config() direct_io(false), dropcacheonclose(false), fsname(), + followsymlinks(FollowSymlinks::ENUM::ALL), func(), fuse_msg_size(FUSE_MAX_MAX_PAGES), ignorepponrename(false), @@ -120,6 +121,7 @@ Config::Config() _map["category.search"] = &category.search; _map["direct_io"] = &direct_io; _map["dropcacheonclose"] = &dropcacheonclose; + _map["followsymlinks"] = &followsymlinks; _map["fsname"] = &fsname; _map["func.access"] = &func.access; _map["func.chmod"] = &func.chmod; diff --git a/src/config.hpp b/src/config.hpp index c66f3815..ce4ce924 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -18,6 +18,7 @@ #include "branch.hpp" #include "config_cachefiles.hpp" +#include "config_followsymlinks.hpp" #include "config_inodecalc.hpp" #include "config_moveonenospc.hpp" #include "config_nfsopenhack.hpp" @@ -73,6 +74,7 @@ public: ConfigBOOL direct_io; ConfigBOOL dropcacheonclose; ConfigSTR fsname; + FollowSymlinks followsymlinks; Funcs func; ConfigUINT64 fuse_msg_size; ConfigBOOL ignorepponrename; diff --git a/src/config_followsymlinks.cpp b/src/config_followsymlinks.cpp new file mode 100644 index 00000000..28da467b --- /dev/null +++ b/src/config_followsymlinks.cpp @@ -0,0 +1,58 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "config_followsymlinks.hpp" +#include "ef.hpp" +#include "errno.hpp" + +template<> +std::string +FollowSymlinks::to_string(void) const +{ + switch(_data) + { + case FollowSymlinks::ENUM::NEVER: + return "never"; + case FollowSymlinks::ENUM::DIRECTORY: + return "directory"; + case FollowSymlinks::ENUM::REGULAR: + return "regular"; + case FollowSymlinks::ENUM::ALL: + return "all"; + } + + return "invalid"; +} + +template<> +int +FollowSymlinks::from_string(const std::string &s_) +{ + if(s_ == "never") + _data = FollowSymlinks::ENUM::NEVER; + ef(s_ == "directory") + _data = FollowSymlinks::ENUM::DIRECTORY; + ef(s_ == "regular") + _data = FollowSymlinks::ENUM::REGULAR; + ef(s_ == "all") + _data = FollowSymlinks::ENUM::ALL; + else + return -EINVAL; + + return 0; +} diff --git a/src/config_followsymlinks.hpp b/src/config_followsymlinks.hpp new file mode 100644 index 00000000..1ebbdecf --- /dev/null +++ b/src/config_followsymlinks.hpp @@ -0,0 +1,30 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#pragma once + +#include "enum.hpp" + +enum class FollowSymlinksEnum + { + NEVER, + DIRECTORY, + REGULAR, + ALL + }; +typedef Enum FollowSymlinks; diff --git a/src/fuse_getattr.cpp b/src/fuse_getattr.cpp index 54669a09..eeee67e0 100644 --- a/src/fuse_getattr.cpp +++ b/src/fuse_getattr.cpp @@ -19,6 +19,7 @@ #include "fs_inode.hpp" #include "fs_lstat.hpp" #include "fs_path.hpp" +#include "fs_stat.hpp" #include "symlinkify.hpp" #include "ugid.hpp" @@ -32,6 +33,42 @@ using std::vector; namespace l { + static + void + set_stat_if_leads_to_dir(const std::string &path_, + struct stat *st_) + { + int rv; + struct stat st; + + rv = fs::stat(path_,&st); + if(rv == -1) + return; + + if(S_ISDIR(st.st_mode)) + *st_ = st; + + return; + } + + static + void + set_stat_if_leads_to_reg(const std::string &path_, + struct stat *st_) + { + int rv; + struct stat st; + + rv = fs::stat(path_,&st); + if(rv == -1) + return; + + if(S_ISREG(st.st_mode)) + *st_ = st; + + return; + } + static int getattr_controlfile(struct stat *st_) @@ -64,7 +101,8 @@ namespace l const char *fusepath_, struct stat *st_, const bool symlinkify_, - const time_t symlinkify_timeout_) + const time_t symlinkify_timeout_, + FollowSymlinks followsymlinks_) { int rv; string fullpath; @@ -76,7 +114,28 @@ namespace l fullpath = fs::path::make(basepaths[0],fusepath_); - rv = fs::lstat(fullpath,st_); + switch(followsymlinks_) + { + case FollowSymlinks::ENUM::NEVER: + rv = fs::lstat(fullpath,st_); + break; + case FollowSymlinks::ENUM::DIRECTORY: + rv = fs::lstat(fullpath,st_); + if(S_ISLNK(st_->st_mode)) + l::set_stat_if_leads_to_dir(fullpath,st_); + break; + case FollowSymlinks::ENUM::REGULAR: + rv = fs::lstat(fullpath,st_); + if(S_ISLNK(st_->st_mode)) + l::set_stat_if_leads_to_reg(fullpath,st_); + break; + case FollowSymlinks::ENUM::ALL: + rv = fs::stat(fullpath,st_); + if(rv != 0) + rv = fs::lstat(fullpath,st_); + break; + } + if(rv == -1) return -errno; @@ -110,7 +169,8 @@ namespace FUSE fusepath_, st_, config.symlinkify, - config.symlinkify_timeout); + config.symlinkify_timeout, + config.followsymlinks); timeout_->entry = ((rv >= 0) ? config.cache_entry : diff --git a/src/fuse_rmdir.cpp b/src/fuse_rmdir.cpp index 9c650caf..003b1c61 100644 --- a/src/fuse_rmdir.cpp +++ b/src/fuse_rmdir.cpp @@ -16,8 +16,9 @@ #include "config.hpp" #include "errno.hpp" -#include "fs_rmdir.hpp" #include "fs_path.hpp" +#include "fs_rmdir.hpp" +#include "fs_unlink.hpp" #include "ugid.hpp" #include @@ -50,9 +51,21 @@ namespace l { static int - rmdir_loop_core(const string &basepath_, - const char *fusepath_, - const int error_) + should_unlink(int rv_, + int errno_, + FollowSymlinks followsymlinks_) + { + return ((rv_ == -1) && + (errno_ == ENOTDIR) && + (followsymlinks_ != FollowSymlinks::ENUM::NEVER)); + } + + static + int + rmdir_core(const string &basepath_, + const char *fusepath_, + const FollowSymlinks followsymlinks_, + const int error_) { int rv; string fullpath; @@ -60,6 +73,8 @@ namespace l fullpath = fs::path::make(basepath_,fusepath_); rv = fs::rmdir(fullpath); + if(l::should_unlink(rv,errno,followsymlinks_)) + rv = fs::unlink(fullpath); return error::calc(rv,error_,errno); } @@ -67,14 +82,15 @@ namespace l static int rmdir_loop(const vector &basepaths_, - const char *fusepath_) + const char *fusepath_, + const FollowSymlinks followsymlinks_) { int error; error = 0; for(size_t i = 0, ei = basepaths_.size(); i != ei; i++) { - error = l::rmdir_loop_core(basepaths_[i],fusepath_,error); + error = l::rmdir_core(basepaths_[i],fusepath_,followsymlinks_,error); } return -error; @@ -84,6 +100,7 @@ namespace l int rmdir(Policy::Func::Action actionFunc_, const Branches &branches_, + const FollowSymlinks followsymlinks_, const char *fusepath_) { int rv; @@ -93,7 +110,7 @@ namespace l if(rv == -1) return -errno; - return l::rmdir_loop(basepaths,fusepath_); + return l::rmdir_loop(basepaths,fusepath_,followsymlinks_); } } @@ -108,6 +125,7 @@ namespace FUSE return l::rmdir(config.func.rmdir.policy, config.branches, + config.followsymlinks, fusepath_); } } diff --git a/src/fuse_symlink.cpp b/src/fuse_symlink.cpp index c332d6f4..9e2e5767 100644 --- a/src/fuse_symlink.cpp +++ b/src/fuse_symlink.cpp @@ -16,9 +16,10 @@ #include "config.hpp" #include "errno.hpp" -#include "fs_symlink.hpp" #include "fs_clonepath.hpp" +#include "fs_lstat.hpp" #include "fs_path.hpp" +#include "fs_symlink.hpp" #include "ugid.hpp" #include @@ -59,6 +60,7 @@ namespace l symlink_loop_core(const string &newbasepath_, const char *oldpath_, const char *newpath_, + struct stat *st_, const int error_) { int rv; @@ -67,6 +69,8 @@ namespace l fullnewpath = fs::path::make(newbasepath_,newpath_); rv = fs::symlink(oldpath_,fullnewpath); + if(rv == 0) + rv = fs::lstat(fullnewpath,st_); return error::calc(rv,error_,errno); } @@ -77,7 +81,8 @@ namespace l const vector &newbasepaths_, const char *oldpath_, const char *newpath_, - const string &newdirpath_) + const string &newdirpath_, + struct stat *st_) { int rv; int error; @@ -92,6 +97,7 @@ namespace l error = l::symlink_loop_core(newbasepaths_[i], oldpath_, newpath_, + st_, error); } @@ -104,7 +110,8 @@ namespace l Policy::Func::Create createFunc_, const Branches &branches_, const char *oldpath_, - const char *newpath_) + const char *newpath_, + struct stat *st_) { int rv; string newdirpath; @@ -122,24 +129,35 @@ namespace l return -errno; return l::symlink_loop(existingpaths[0],newbasepaths, - oldpath_,newpath_,newdirpath); + oldpath_,newpath_,newdirpath,st_); } } namespace FUSE { int - symlink(const char *oldpath_, - const char *newpath_) + symlink(const char *oldpath_, + const char *newpath_, + struct stat *st_, + fuse_timeouts_t *timeout_) { + int rv; const fuse_context *fc = fuse_get_context(); const Config &config = Config::ro(); const ugid::Set ugid(fc->uid,fc->gid); - return l::symlink(config.func.getattr.policy, - config.func.symlink.policy, - config.branches, - oldpath_, - newpath_); + rv = l::symlink(config.func.getattr.policy, + config.func.symlink.policy, + config.branches, + oldpath_, + newpath_, + st_); + + timeout_->entry = ((rv >= 0) ? + config.cache_entry : + config.cache_negative_entry); + timeout_->attr = config.cache_attr; + + return rv; } } diff --git a/src/fuse_symlink.hpp b/src/fuse_symlink.hpp index 561a8300..ab5f7123 100644 --- a/src/fuse_symlink.hpp +++ b/src/fuse_symlink.hpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2016, Antonio SJ Musumeci + Copyright (c) 2020, Antonio SJ Musumeci Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -16,9 +16,15 @@ #pragma once +#include "fuse.h" + +#include + namespace FUSE { int - symlink(const char *oldpath, - const char *newpath); + symlink(const char *oldpath_, + const char *newpath_, + struct stat *st_, + fuse_timeouts_t *timeouts_); }