From 93218a343a9b3202aa459ca36613c473441ec486 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Thu, 16 Jul 2020 15:31:50 -0400 Subject: [PATCH] NFS open/creat hack A hack to work around non-POSIX NFS <-> FUSE behavior where it turns an open(O_EXCL|O_CREAT,0444) into multiple calls that results in EACCES. --- README.md | 14 ++++++- man/mergerfs.1 | 31 +++++++++++++- src/config.cpp | 2 + src/config.hpp | 4 +- src/config_nfsopenhack.cpp | 53 ++++++++++++++++++++++++ src/config_nfsopenhack.hpp | 30 ++++++++++++++ src/fs_cow.cpp | 11 +++-- src/fs_path.hpp | 17 ++++++++ src/fuse_chmod.cpp | 4 +- src/fuse_fchmod.cpp | 2 - src/fuse_open.cpp | 82 ++++++++++++++++++++++++++++++++++---- src/stat_util.hpp | 48 ++++++++++++++++++++++ 12 files changed, 280 insertions(+), 18 deletions(-) create mode 100644 src/config_nfsopenhack.cpp create mode 100644 src/config_nfsopenhack.hpp create mode 100644 src/stat_util.hpp diff --git a/README.md b/README.md index 382c6423..082fc21d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2020-07-21 +% 2020-07-22 # NAME @@ -109,6 +109,7 @@ See the mergerfs [wiki for real world deployments](https://github.com/trapexit/m * **link_cow=BOOL**: When enabled if a regular file is opened which has a link count > 1 it will copy the file to a temporary file and rename over the original. Breaking the link and providing a basic copy-on-write function similar to cow-shell. (default: false) * **statfs=base|full**: Controls how statfs works. 'base' means it will always use all branches in statfs calculations. 'full' is in effect path preserving and only includes drives where the path exists. (default: base) * **statfs_ignore=none|ro|nc**: 'ro' will cause statfs calculations to ignore available space for branches mounted or tagged as 'read-only' or 'no create'. 'nc' will ignore available space for branches tagged as 'no create'. (default: none) +* **nfsopenhack=off|git|all**: A workaround for exporting mergerfs over NFS where there are issues with creating files for write while setting the mode to read-only. (default: off) * **posix_acl=BOOL**: Enable POSIX ACL support (if supported by kernel and underlying filesystem). (default: false) * **async_read=BOOL**: Perform reads asynchronously. If disabled or unavailable the kernel will ensure there is at most one pending read request per file handle and will attempt to order requests by offset. (default: true) * **fuse_msg_size=INT**: Set the max number of pages per FUSE message. Only available on Linux >= 4.20 and ignored otherwise. (min: 1; max: 256; default: 256) @@ -239,6 +240,15 @@ Runtime extended attribute support can be managed via the `xattr` option. By def `nosys` will cause mergerfs to return ENOSYS for any xattr call. The difference with `noattr` is that the kernel will cache this fact and itself short circuit future calls. This is more efficient than `noattr` but will cause mergerfs' runtime control via the hidden file to stop working. +### nfsopenhack + +NFS is not fully POSIX compliant and historically certain behaviors, such as opening files with O_EXCL, are not or not well supported. When mergerfs (or any FUSE filesystem) is exported over NFS some of these issues come up due to how NFS and FUSE interact. + +This hack addresses the issue where the creation of a file with a read-only mode but with a read/write or write only flag. Normally this is perfectly valid but NFS chops the one open call into multiple calls. Exactly how it is translated depends on the configuration and versions of the NFS server and clients but it results in a permission error because a normal user is not allowed to open a read-only file as writable. + +Even though it's a more niche stituation this hack breaks normal security and behavior and as such is `off` by default. If set to `git` it will only perform the hack when the path in question includes `/.git/`. `all` will result it it applying anytime a readonly file which is empty is opened for writing. + + # FUNCTIONS / POLICIES / CATEGORIES The POSIX filesystem API is made up of a number of functions. **creat**, **stat**, **chown**, etc. For ease of configuration in mergerfs most of the core functions are grouped into 3 categories: **action**, **create**, and **search**. These functions and categories can be assigned a policy which dictates which underlying branch/file/directory is chosen when performing that behavior. Any policy can be assigned to a function or category though some may not be very useful in practice. For instance: **rand** (random) may be useful for file creation (create) but could lead to very odd behavior if used for `chmod` if there were more than one copy of the file. @@ -275,7 +285,7 @@ With the `msp` or `most shared path` policies they are defined as `path preservi #### Filters -Policies basically search branches and create a list of files / paths for functions to work on. The policy is responsible for filtering and sorting. Filters include **minfreespace**, whether or not a branch is mounted read only, and the branch tagging (RO,NC,RW). The policy defines the sorting but filtering is mostly uniform as described below. +Policies basically search branches and create a list of files / paths for functions to work on. The policy is responsible for filtering and sorting. Filters include **minfreespace**, whether or not a branch is mounted read-only, and the branch tagging (RO,NC,RW). The policy defines the sorting but filtering is mostly uniform as described below. * No **search** policies filter. * All **action** policies will filter out branches which are mounted **read-only** or tagged as **RO (read-only)**. diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 0aa05af1..1c441faf 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 1.19.2.4 .\" -.TH "mergerfs" "1" "2020\-07\-21" "mergerfs user manual" "" +.TH "mergerfs" "1" "2020\-07\-22" "mergerfs user manual" "" .hy .SH NAME .PP @@ -211,6 +211,11 @@ calculations to ignore available space for branches mounted or tagged as create\[aq]. (default: none) .IP \[bu] 2 +\f[B]nfsopenhack=off|git|all\f[]: A workaround for exporting mergerfs +over NFS where there are issues with creating files for write while +setting the mode to read\-only. +(default: off) +.IP \[bu] 2 \f[B]posix_acl=BOOL\f[]: Enable POSIX ACL support (if supported by kernel and underlying filesystem). (default: false) @@ -564,6 +569,28 @@ The difference with \f[C]noattr\f[] is that the kernel will cache this fact and itself short circuit future calls. This is more efficient than \f[C]noattr\f[] but will cause mergerfs\[aq] runtime control via the hidden file to stop working. +.SS nfsopenhack +.PP +NFS is not fully POSIX compliant and historically certain behaviors, +such as opening files with O_EXCL, are not or not well supported. +When mergerfs (or any FUSE filesystem) is exported over NFS some of +these issues come up due to how NFS and FUSE interact. +.PP +This hack addresses the issue where the creation of a file with a +read\-only mode but with a read/write or write only flag. +Normally this is perfectly valid but NFS chops the one open call into +multiple calls. +Exactly how it is translated depends on the configuration and versions +of the NFS server and clients but it results in a permission error +because a normal user is not allowed to open a read\-only file as +writable. +.PP +Even though it\[aq]s a more niche stituation this hack breaks normal +security and behavior and as such is \f[C]off\f[] by default. +If set to \f[C]git\f[] it will only perform the hack when the path in +question includes \f[C]/.git/\f[]. +\f[C]all\f[] will result it it applying anytime a readonly file which is +empty is opened for writing. .SH FUNCTIONS / POLICIES / CATEGORIES .PP The POSIX filesystem API is made up of a number of functions. @@ -666,7 +693,7 @@ Policies basically search branches and create a list of files / paths for functions to work on. The policy is responsible for filtering and sorting. Filters include \f[B]minfreespace\f[], whether or not a branch is -mounted read only, and the branch tagging (RO,NC,RW). +mounted read\-only, and the branch tagging (RO,NC,RW). The policy defines the sorting but filtering is mostly uniform as described below. .IP \[bu] 2 diff --git a/src/config.cpp b/src/config.cpp index 6053d0b4..d68e9514 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -88,6 +88,7 @@ Config::Config() minfreespace(MINFREESPACE_DEFAULT), mount(), moveonenospc(false), + nfsopenhack(NFSOpenHack::ENUM::OFF), nullrw(false), pid(::getpid()), posix_acl(false), @@ -149,6 +150,7 @@ Config::Config() _map["minfreespace"] = &minfreespace; _map["mount"] = &mount; _map["moveonenospc"] = &moveonenospc; + _map["nfsopenhack"] = &nfsopenhack; _map["nullrw"] = &nullrw; _map["pid"] = &pid; _map["posix_acl"] = &posix_acl; diff --git a/src/config.hpp b/src/config.hpp index 5c0d1e1d..f50ec293 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -18,8 +18,9 @@ #include "branch.hpp" #include "config_inodecalc.hpp" -#include "config_readdir.hpp" #include "config_moveonenospc.hpp" +#include "config_nfsopenhack.hpp" +#include "config_readdir.hpp" #include "enum.hpp" #include "errno.hpp" #include "func_category.hpp" @@ -111,6 +112,7 @@ public: ConfigUINT64 minfreespace; ConfigSTR mount; MoveOnENOSPC moveonenospc; + NFSOpenHack nfsopenhack; ConfigBOOL nullrw; ConfigUINT64 pid; ConfigBOOL posix_acl; diff --git a/src/config_nfsopenhack.cpp b/src/config_nfsopenhack.cpp new file mode 100644 index 00000000..0f529479 --- /dev/null +++ b/src/config_nfsopenhack.cpp @@ -0,0 +1,53 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "config_nfsopenhack.hpp" +#include "ef.hpp" + +template<> +int +NFSOpenHack::from_string(const std::string &s_) +{ + if(s_ == "off") + _data = NFSOpenHack::ENUM::OFF; + ef(s_ == "git") + _data = NFSOpenHack::ENUM::GIT; + ef(s_ == "all") + _data = NFSOpenHack::ENUM::ALL; + else + return -EINVAL; + + return 0; +} + +template<> +std::string +NFSOpenHack::to_string(void) const +{ + switch(_data) + { + case NFSOpenHack::ENUM::OFF: + return "off"; + case NFSOpenHack::ENUM::GIT: + return "git"; + case NFSOpenHack::ENUM::ALL: + return "all"; + } + + return std::string(); +} diff --git a/src/config_nfsopenhack.hpp b/src/config_nfsopenhack.hpp new file mode 100644 index 00000000..e6a3e797 --- /dev/null +++ b/src/config_nfsopenhack.hpp @@ -0,0 +1,30 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#pragma once + +#include "enum.hpp" + +enum class NFSOpenHackEnum + { + OFF, + GIT, + ALL + }; + +typedef Enum NFSOpenHack; diff --git a/src/fs_cow.cpp b/src/fs_cow.cpp index e7ea0284..59bea813 100644 --- a/src/fs_cow.cpp +++ b/src/fs_cow.cpp @@ -25,14 +25,14 @@ #include "fs_base_stat.hpp" #include "fs_base_unlink.hpp" +#include + #include #include #include #include #include -#include - using std::string; static @@ -63,7 +63,12 @@ namespace fs bool is_eligible(const int flags_) { - return ((flags_ & O_RDWR) || (flags_ & O_WRONLY)); + int accmode; + + accmode = (flags_ & O_ACCMODE); + + return ((accmode == O_RDWR) || + (accmode == O_WRONLY)); } bool diff --git a/src/fs_path.hpp b/src/fs_path.hpp index 0bffb21f..6c117469 100644 --- a/src/fs_path.hpp +++ b/src/fs_path.hpp @@ -48,6 +48,23 @@ namespace fs base += suffix; } + static + inline + string + make(const char *base_, + const char *suffix_) + { + char back; + string path(base_); + + back = *path.rbegin(); + if((back != '/') && (suffix_[0] != '/')) + path.push_back('/'); + path += suffix_; + + return path; + } + static inline string diff --git a/src/fuse_chmod.cpp b/src/fuse_chmod.cpp index cf1e8113..c94f7955 100644 --- a/src/fuse_chmod.cpp +++ b/src/fuse_chmod.cpp @@ -21,11 +21,13 @@ #include "rv.hpp" #include "ugid.hpp" -#include +#include "fuse.h" #include #include +#include + using std::string; using std::vector; diff --git a/src/fuse_fchmod.cpp b/src/fuse_fchmod.cpp index a6a9847e..dcd2b863 100644 --- a/src/fuse_fchmod.cpp +++ b/src/fuse_fchmod.cpp @@ -20,8 +20,6 @@ #include -#include - namespace l { static diff --git a/src/fuse_open.cpp b/src/fuse_open.cpp index 2078e233..850b745e 100644 --- a/src/fuse_open.cpp +++ b/src/fuse_open.cpp @@ -17,13 +17,17 @@ #include "config.hpp" #include "errno.hpp" #include "fileinfo.hpp" +#include "fs_base_chmod.hpp" +#include "fs_base_fchmod.hpp" #include "fs_base_open.hpp" +#include "fs_base_stat.hpp" #include "fs_cow.hpp" #include "fs_path.hpp" #include "policy_cache.hpp" +#include "stat_util.hpp" #include "ugid.hpp" -#include +#include "fuse.h" #include #include @@ -34,6 +38,65 @@ typedef Config::CacheFiles CacheFiles; namespace l { + static + bool + rdonly(const int flags_) + { + return ((flags_ & O_ACCMODE) == O_RDONLY); + } + + static + int + chmod_and_open_if_not_writable_and_empty(const string &fullpath_, + const int flags_) + { + int rv; + struct stat st; + + rv = fs::lstat(fullpath_,&st); + if(rv == -1) + return (errno=EACCES,-1); + + if(StatUtil::writable_or_not_empty(st)) + return (errno=EACCES,-1); + + rv = fs::chmod(fullpath_,(st.st_mode|S_IWUSR|S_IWGRP)); + if(rv == -1) + return (errno=EACCES,-1); + + rv = fs::open(fullpath_,flags_); + if(rv == -1) + return (errno=EACCES,-1); + + fs::fchmod(rv,st.st_mode); + + return rv; + } + + static + int + nfsopenhack(const std::string &fullpath_, + const int flags_, + const NFSOpenHack nfsopenhack_) + { + switch(nfsopenhack_) + { + default: + case NFSOpenHack::ENUM::OFF: + return (errno=EACCES,-1); + case NFSOpenHack::ENUM::GIT: + if(l::rdonly(flags_)) + return (errno=EACCES,-1); + if(fullpath_.find("/.git/") == string::npos) + return (errno=EACCES,-1); + return l::chmod_and_open_if_not_writable_and_empty(fullpath_,flags_); + case NFSOpenHack::ENUM::ALL: + if(l::rdonly(flags_)) + return (errno=EACCES,-1); + return l::chmod_and_open_if_not_writable_and_empty(fullpath_,flags_); + } + } + /* The kernel expects being able to issue read requests when running with writeback caching enabled so we must change O_WRONLY to @@ -90,11 +153,12 @@ namespace l static int - open_core(const string &basepath_, - const char *fusepath_, - const int flags_, - const bool link_cow_, - uint64_t *fh_) + open_core(const string &basepath_, + const char *fusepath_, + const int flags_, + const bool link_cow_, + const NFSOpenHack nfsopenhack_, + uint64_t *fh_) { int fd; string fullpath; @@ -105,6 +169,8 @@ namespace l fs::cow::break_link(fullpath.c_str()); fd = fs::open(fullpath,flags_); + if((fd == -1) && (errno == EACCES)) + fd = l::nfsopenhack(fullpath,flags_,nfsopenhack_); if(fd == -1) return -errno; @@ -122,6 +188,7 @@ namespace l const char *fusepath_, const int flags_, const bool link_cow_, + const NFSOpenHack nfsopenhack_, uint64_t *fh_) { int rv; @@ -131,7 +198,7 @@ namespace l if(rv == -1) return -errno; - return l::open_core(basepath,fusepath_,flags_,link_cow_,fh_); + return l::open_core(basepath,fusepath_,flags_,link_cow_,nfsopenhack_,fh_); } } @@ -157,6 +224,7 @@ namespace FUSE fusepath_, ffi_->flags, config.link_cow, + config.nfsopenhack, &ffi_->fh); } } diff --git a/src/stat_util.hpp b/src/stat_util.hpp new file mode 100644 index 00000000..a0beb57d --- /dev/null +++ b/src/stat_util.hpp @@ -0,0 +1,48 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#pragma once + +#include + +namespace StatUtil +{ + static + inline + bool + empty(const struct stat &st_) + { + return (st_.st_size == 0); + } + + static + inline + bool + writable(const struct stat &st_) + { + return (st_.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH)); + } + + static + inline + bool + writable_or_not_empty(const struct stat &st_) + { + return (StatUtil::writable(st_) || !StatUtil::empty(st_)); + } +}