From c8fa51c81ae10f5b4494e070a876e47a3f81041b Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Sun, 11 Dec 2016 18:27:52 -0500 Subject: [PATCH] support setting of inodes (using use_ino option) creates a 64bit inode value from the underlying device value + original inode final_ino = orig_ino | (dev << 32) not perfect but given few filesystems use 64bit inodes nor is st_dev more than 16bit usually it should be fine --- README.md | 15 +++++++++++---- man/mergerfs.1 | 17 ++++++++++++++++- src/fgetattr.cpp | 10 +++++++--- src/fs_inode.hpp | 42 ++++++++++++++++++++++++++++++++++++++++++ src/getattr.cpp | 39 ++++++++++++++++++++++----------------- src/readdir.cpp | 3 +++ 6 files changed, 101 insertions(+), 25 deletions(-) create mode 100644 src/fs_inode.hpp diff --git a/README.md b/README.md index ed78f3ba..33a2fba4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2016-10-13 +% 2016-12-14 # NAME @@ -32,12 +32,13 @@ mergerfs -o<options> <srcmounts> <mountpoint> ###options### * **defaults**: a shortcut for FUSE's **atomic_o_trunc**, **auto_cache**, **big_writes**, **default_permissions**, **splice_move**, **splice_read**, and **splice_write**. These options seem to provide the best performance. -* **direct_io**: causes FUSE to bypass an addition caching step which can increase write speeds at the detriment of read speed. +* **direct_io**: causes FUSE to bypass an addition caching step which can increase write speeds at the detriment of read speed. * **minfreespace**: the minimum space value used for creation policies. Understands 'K', 'M', and 'G' to represent kilobyte, megabyte, and gigabyte respectively. (default: 4G) * **moveonenospc**: when enabled (set to **true**) if a **write** fails with **ENOSPC** or **EDQUOT** a scan of all drives will be done looking for the drive with most free space which is at least the size of the file plus the amount which failed to write. An attempt to move the file to that drive will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. (default: false) * **func.<func>=<policy>**: sets the specific FUSE function's policy. See below for the list of value types. Example: **func.getattr=newest** * **category.<category>=<policy>**: Sets policy of all FUSE functions in the provided category. Example: **category.create=mfs** * **fsname**: sets the name of the filesystem as seen in **mount**, **df**, etc. Defaults to a list of the source paths concatenated together with the longest common prefix removed. +* **use_ino**: causes mergerfs to supply file/directory inodes rather than libfuse. While not a default it is generally recommended it be enabled so that hard linked files share the same inode value. **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category setting will override the **rmdir** setting. @@ -325,11 +326,11 @@ Be sure to turn off `direct_io`. rtorrent and some other applications use [mmap] #### mmap performance is really bad -There [is a bug](https://lkml.org/lkml/2016/3/16/260) in caching which affects overall performance of mmap through FUSE in Linux 4.x kernels. It is fixed in [4.4.10 and 4.5.4](https://lkml.org/lkml/2016/5/11/59). +There [is a bug](https://lkml.org/lkml/2016/3/16/260) in caching which affects overall performance of mmap through FUSE in Linux 4.x kernels. It is fixed in [4.4.10 and 4.5.4](https://lkml.org/lkml/2016/5/11/59). #### Trashing files occasionally fails -This is the same issue as with Samba. `rename` returns `EXDEV` (in our case that will really only happen with path preserving policies like `epmfs`) and the software doesn't handle the situtation well. This is unfortunately a common failure of software which moves files around. The standard indicates that an implementation `MAY` choose to support non-user home directory trashing of files (which is a `MUST`). The implementation `MAY` also support "top directory trashes" which many probably do. +This is the same issue as with Samba. `rename` returns `EXDEV` (in our case that will really only happen with path preserving policies like `epmfs`) and the software doesn't handle the situtation well. This is unfortunately a common failure of software which moves files around. The standard indicates that an implementation `MAY` choose to support non-user home directory trashing of files (which is a `MUST`). The implementation `MAY` also support "top directory trashes" which many probably do. To create a `$topdir/.Trash` directory as defined in the standard use the [mergerfs-tools](https://github.com/trapexit/mergerfs-tools) tool `mergerfs.mktrash`. @@ -422,6 +423,12 @@ Yes. It will be represented immediately in the pool as the policies would descri Please reread the sections above about policies, path preserving, and the **moveonenospc** option. If the policy is path preserving and a drive is almost full and the drive the policy would pick then the writing of the file may fill the drive and receive ENOSPC errors. That is expected with those settings. If you don't want that: enable **moveonenospc** and don't use a path preserving policy. +#### How are inodes calculated? + +mergerfs-inode = (original-inode | (device-id << 32)) + +While `ino_t` is 64 bits few filesystems use more than 32. Similarly, while `dev_t` is also 64 bits it was traditionally 16 bits. Bitwise or'ing them together should work most of the time. Should it cause a problem in the future the values could be hashed instead. + #### It's mentioned that there are some security issues with mhddfs. What are they? How does mergerfs address them? [mhddfs](https://github.com/trapexit/mhddfs) tries to handle being run as **root** by calling [getuid()](https://github.com/trapexit/mhddfs/blob/cae96e6251dd91e2bdc24800b4a18a74044f6672/src/main.c#L319) and if it returns **0** then it will [chown](http://linux.die.net/man/1/chown) the file. Not only is that a race condition but it doesn't handle many other situations. Rather than attempting to simulate POSIX ACL behaviors the proper behavior is to use [seteuid](http://linux.die.net/man/2/seteuid) and [setegid](http://linux.die.net/man/2/setegid), become the user making the original call and perform the action as them. This is how [mergerfs](https://github.com/trapexit/mergerfs) handles things. diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 3bcb0ec9..cdf8bdd8 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,5 +1,5 @@ .\"t -.TH "mergerfs" "1" "2016\-10\-13" "mergerfs user manual" "" +.TH "mergerfs" "1" "2016\-12\-14" "mergerfs user manual" "" .SH NAME .PP mergerfs \- another (FUSE based) union filesystem @@ -73,6 +73,11 @@ Example: \f[B]category.create=mfs\f[] \f[B]mount\f[], \f[B]df\f[], etc. Defaults to a list of the source paths concatenated together with the longest common prefix removed. +.IP \[bu] 2 +\f[B]use_ino\f[]: causes mergerfs to supply file/directory inodes rather +than libfuse. +While not a default it is generally recommended it be enabled so that +hard linked files share the same inode value. .PP \f[B]NOTE:\f[] Options are evaluated in the order listed so if the options are \f[B]func.rmdir=rand,category.action=ff\f[] the @@ -959,6 +964,16 @@ drive and receive ENOSPC errors. That is expected with those settings. If you don\[aq]t want that: enable \f[B]moveonenospc\f[] and don\[aq]t use a path preserving policy. +.SS How are inodes calculated? +.PP +mergerfs\-inode = (original\-inode | (device\-id << 32)) +.PP +While \f[C]ino_t\f[] is 64 bits few filesystems use more than 32. +Similarly, while \f[C]dev_t\f[] is also 64 bits it was traditionally 16 +bits. +Bitwise or\[aq]ing them together should work most of the time. +Should it cause a problem in the future the values could be hashed +instead. .SS It\[aq]s mentioned that there are some security issues with mhddfs. What are they? How does mergerfs address them? .PP diff --git a/src/fgetattr.cpp b/src/fgetattr.cpp index cc4da5c2..82b7a70d 100644 --- a/src/fgetattr.cpp +++ b/src/fgetattr.cpp @@ -19,6 +19,7 @@ #include "errno.hpp" #include "fileinfo.hpp" #include "fs_base_stat.hpp" +#include "fs_inode.hpp" static int @@ -28,8 +29,12 @@ _fgetattr(const int fd, int rv; rv = fs::fstat(fd,st); + if(rv == -1) + return -errno; - return ((rv == -1) ? -errno : 0); + fs::inode::recompute(st); + + return 0; } namespace mergerfs @@ -43,8 +48,7 @@ namespace mergerfs { FileInfo *fi = reinterpret_cast(ffi->fh); - return _fgetattr(fi->fd, - *st); + return _fgetattr(fi->fd,*st); } } } diff --git a/src/fs_inode.hpp b/src/fs_inode.hpp new file mode 100644 index 00000000..b217305f --- /dev/null +++ b/src/fs_inode.hpp @@ -0,0 +1,42 @@ +/* + ISC License + + Copyright (c) 2016, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef __FS_INODE_HPP__ +#define __FS_INODE_HPP__ + +#include + +namespace fs +{ + namespace inode + { + enum + { + MAGIC = 0x7472617065786974 + }; + + inline + void + recompute(struct stat &st) + { + st.st_ino |= (st.st_dev << 32); + } + } +} + +#endif diff --git a/src/getattr.cpp b/src/getattr.cpp index eaba1efb..f3bf450f 100644 --- a/src/getattr.cpp +++ b/src/getattr.cpp @@ -22,6 +22,7 @@ #include "config.hpp" #include "errno.hpp" #include "fs_base_stat.hpp" +#include "fs_inode.hpp" #include "fs_path.hpp" #include "rwlock.hpp" #include "ugid.hpp" @@ -32,25 +33,25 @@ using mergerfs::Policy; static int -_getattr_controlfile(struct stat &buf) +_getattr_controlfile(struct stat &st) { static const uid_t uid = ::getuid(); static const gid_t gid = ::getgid(); static const time_t now = ::time(NULL); - buf.st_dev = 0; - buf.st_ino = 0; - buf.st_mode = (S_IFREG|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH); - buf.st_nlink = 1; - buf.st_uid = uid; - buf.st_gid = gid; - buf.st_rdev = 0; - buf.st_size = 0; - buf.st_blksize = 1024; - buf.st_blocks = 0; - buf.st_atime = now; - buf.st_mtime = now; - buf.st_ctime = now; + st.st_dev = 0; + st.st_ino = fs::inode::MAGIC; + st.st_mode = (S_IFREG|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH); + st.st_nlink = 1; + st.st_uid = uid; + st.st_gid = gid; + st.st_rdev = 0; + st.st_size = 0; + st.st_blksize = 512; + st.st_blocks = 0; + st.st_atime = now; + st.st_mtime = now; + st.st_ctime = now; return 0; } @@ -61,7 +62,7 @@ _getattr(Policy::Func::Search searchFunc, const vector &srcmounts, const uint64_t minfreespace, const char *fusepath, - struct stat &buf) + struct stat &st) { int rv; string fullpath; @@ -73,9 +74,13 @@ _getattr(Policy::Func::Search searchFunc, fs::path::make(basepaths[0],fusepath,fullpath); - rv = fs::lstat(fullpath,buf); + rv = fs::lstat(fullpath,st); + if(rv == -1) + return -errno; - return ((rv == -1) ? -errno : 0); + fs::inode::recompute(st); + + return 0; } namespace mergerfs diff --git a/src/readdir.cpp b/src/readdir.cpp index c82f2368..f14b93b1 100644 --- a/src/readdir.cpp +++ b/src/readdir.cpp @@ -27,6 +27,8 @@ #include "fs_base_closedir.hpp" #include "fs_base_opendir.hpp" #include "fs_base_readdir.hpp" +#include "fs_base_stat.hpp" +#include "fs_inode.hpp" #include "fs_path.hpp" #include "readdir.hpp" #include "rwlock.hpp" @@ -50,6 +52,7 @@ _readdir(const vector &srcmounts, struct stat st = {0}; StrSet names; + st.st_ino = fs::inode::MAGIC; for(size_t i = 0, ei = srcmounts.size(); i != ei; i++) { DIR *dh;