From b4397f7f2efca6314269658f2d1feb9c572cec49 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Mon, 22 Jun 2020 13:03:48 -0400 Subject: [PATCH] add 'inodecalc' option to allow selection of inode calculation algo --- README.md | 79 ++++++++------ man/mergerfs.1 | 177 +++++++++++++++++++++----------- src/config.cpp | 3 +- src/config.hpp | 2 + src/config_inodecalc.cpp | 37 +++++++ src/config_inodecalc.hpp | 31 ++++++ src/fs_inode.cpp | 164 +++++++++++++++++++++++++++++ src/fs_inode.hpp | 41 ++++---- src/fuse_fgetattr.cpp | 9 +- src/fuse_getattr.cpp | 2 +- src/fuse_readdir_linux.cpp | 17 ++- src/fuse_readdir_plus_linux.cpp | 23 ++++- src/fuse_readdir_plus_posix.cpp | 15 ++- src/fuse_readdir_posix.cpp | 8 +- 14 files changed, 474 insertions(+), 134 deletions(-) create mode 100644 src/config_inodecalc.cpp create mode 100644 src/config_inodecalc.hpp create mode 100644 src/fs_inode.cpp diff --git a/README.md b/README.md index 9f176af2..8a2283b5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2020-05-25 +% 2020-06-28 # NAME @@ -98,6 +98,7 @@ See the mergerfs [wiki for real world deployments](https://github.com/trapexit/m * **minfreespace=SIZE**: The minimum space value used for creation policies. Understands 'K', 'M', and 'G' to represent kilobyte, megabyte, and gigabyte respectively. (default: 4G) * **moveonenospc=BOOL**: When enabled if a **write** fails with **ENOSPC** (no space left on device) or **EDQUOT** (disk quota exceeded) a scan of all drives will be done looking for the drive with the most free space which is at least the size of the file plus the amount which failed to write. An attempt to move the file to that drive will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. (default: false) * **use_ino**: Causes mergerfs to supply file/directory inodes rather than libfuse. While not a default it is recommended it be enabled so that linked files share the same inode value. +* **inodecalc=passthrough|path-hash|devino-hash|hybrid-hash**: Selects the inode calculation algorithm. (default: hybrid-hash) * **dropcacheonclose=BOOL**: When a file is requested to be closed call `posix_fadvise` on it first to instruct the kernel that we no longer need the data and it can drop its cache. Recommended when **cache.files=partial|full|auto-full** to limit double caching. (default: false) * **symlinkify=BOOL**: When enabled and a file is not writable and its mtime or ctime is older than **symlinkify_timeout** files will be reported as symlinks to the original files. Please read more below before using. (default: false) * **symlinkify_timeout=INT**: Time to wait, in seconds, to activate the **symlinkify** behavior. (default: 3600) @@ -171,6 +172,37 @@ To have the pool mounted at boot or otherwise accessible from related tools use **NOTE:** for mounting via **fstab** to work you must have **mount.fuse** installed. For Ubuntu/Debian it is included in the **fuse** package. +### inodecalc + +Inodes (st_ino) are unique identifiers within a filesystem. Each mounted filesystem has device ID (st_dev) as well and together they can uniquely identify a file on the whole of the system. Entries on the same device with the same inode are in fact references to the same underlying file. It is a many to one relationship between names and an inode. Directories, however, do not have multiple links on most systems due to the complexity they add. + +FUSE allows the server (mergerfs) to set inode values but not device IDs. Creating an inode value is somewhat complex in mergerfs' case as files aren't really in its control. If a policy changes what directory or file is to be selected or something changes out of band it becomes unclear what value should be used. Most software does not to care what the values are but those that do often break if a value changes unexpectedly. The tool `find` will abort a directory walk if it sees a directory inode change. NFS will return stale handle errors if the inode changes out of band. File dedup tools will usually leverage device ids and inodes as a shortcut in searching for duplicate files and would resort to full file comparisons should it find different inode values. + +mergerfs offers multiple ways to calculate the inode in hopes of covering different usecases. + +* passthrough: Passes through the underlying inode value. Mostly intended for testing as using this does not address any of the problems mentioned above and could confuse file deduplication software as inodes from different filesystems can be the same. +* path-hash: Hashes the relative path of the entry in question. The underlying file's values are completely ignored. This means the inode value will always be the same for that file path. This is useful when using NFS and you make changes out of band such as copy data between branches. This also means that entries that do point to the same file will not be recognizable via inodes. That **does not** mean hard links don't work. They will. +* devino-hash: Hashes the device id and inode of the underlying entry. This won't prevent issues with NFS should the policy pick a different file or files move out of band but will present the same inode for underlying files that do too. +* hybrid-hash: Performs `path-hash` on directories and `devino-hash` on other file types. Since directories can't have hard links the static value won't make a difference and the files will get values useful for finding duplicates. Probably the best to use if not using NFS. As such it is the default. + +While there is a risk of hash collision in tests of a couple million entries there were zero collisions. Unlike a typical filesystem FUSE filesystems can reuse inodes and not refer to the same entry. The internal identifier used to reference a file in FUSE is different from the inode value presented. The former is the `nodeid` and is actually a tuple of 2 64bit values: `nodeid` and `generation`. This tuple is not client facing. The inode that is presented to the client is passed through the kernel uninterpreted. + +From FUSE docs regarding `use_ino`: + +``` +Honor the st_ino field in the functions getattr() and +fill_dir(). This value is used to fill in the st_ino field +in the stat(2), lstat(2), fstat(2) functions and the d_ino +field in the readdir(2) function. The filesystem does not +have to guarantee uniqueness, however some applications +rely on this value being unique for the whole filesystem. +Note that this does *not* affect the inode that libfuse +and the kernel use internally (also called the "nodeid"). +``` + +In the future the `use_ino` option will probably be removed as this feature should replace the original libfuse inode calculation strategy. Currently you still need to use `use_ino` in order to enable `inodecalc`. + + ### fuse_msg_size FUSE applications communicate with the kernel over a special character device: `/dev/fuse`. A large portion of the overhead associated with FUSE is the cost of going back and forth from user space and kernel space over that device. Generally speaking the fewer trips needed the better the performance will be. Reducing the number of trips can be done a number of ways. Kernel level caching and increasing message sizes being two significant ones. When it comes to reads and writes if the message size is doubled the number of trips are approximately halved. @@ -809,14 +841,13 @@ Use `cache.files=off` and/or `dropcacheonclose=true`. See the section on page ca #### NFS clients returning ESTALE / Stale file handle -Be sure to use `noforget` and `use_ino` arguments. - +NFS does not like out of band changes. That is especially true of inode values. -#### NFS clients don't work +Be sure to use the following options: -Some NFS clients appear to fail when a mergerfs mount is exported. Kodi in particular seems to have issues. - -Try enabling the `use_ino` option. Some have reported that it fixes the issue. +* noforget +* use_ino +* inodecalc=path-hash #### rtorrent fails with ENODEV (No such device) @@ -1005,7 +1036,7 @@ This catches a lot of new users off guard but changing the default would break t #### Do hard links work? -Yes. You need to use `use_ino` to support proper reporting of inodes. +Yes. You need to use `use_ino` to support proper reporting of inodes but they work regardless. See also the option `inodecalc`. What mergerfs does not do is fake hard links across branches. Read the section "rename & link" for how it works. @@ -1123,9 +1154,13 @@ Are you using ext4? With reserve for root? mergerfs uses available space for sta #### Can mergerfs mounts be exported over NFS? -Yes. Due to current usage of libfuse by mergerfs and how NFS interacts with it it is necessary to add `noforget` to mergerfs options to keep from getting "stale file handle" errors. +Yes, however if you do anything which may changes files out of band (including for example using the `newest` policy) it will result in "stale file handle" errors. + +Be sure to use the following options: -Some clients (Kodi) have issues in which the contents of the NFS mount will not be presented but users have found that enabling the `use_ino` option often fixes that problem. +* noforget +* use_ino +* inodecalc=path-hash #### Can mergerfs mounts be exported over Samba / SMB? @@ -1133,30 +1168,6 @@ Some clients (Kodi) have issues in which the contents of the NFS mount will not Yes. While some users have reported problems it appears to always be related to how Samba is setup in relation to permissions. -#### How are inodes calculated? - -https://github.com/trapexit/mergerfs/blob/master/src/fs_inode.hpp - -Originally tried to simply OR st_ino and (st_dev << 32) for 64bit systems. After a number of years someone finally ran into a collision that lead to some problems. Traditionally `dev_t` was 16bit and `ino_t` was 32bit so merging into one 64bit value worked but with both types being able to be up to 64bit that is no longer as simple. A proper hash seems like the best compromise. While totally unique inodes are preferred the overhead which would be needed does not seem to be outweighed by the benefits. - -While atypical, yes, inodes can be reused and not refer to the same file. The internal id used to reference a file in FUSE is different from the inode value presented. The former is the `nodeid` and is actually a tuple of (nodeid,generation). That tuple is not user facing. The inode is merely metadata passed through the kernel and found using the `stat` family of calls or `readdir`. - -From FUSE docs regarding `use_ino`: - -``` -Honor the st_ino field in the functions getattr() and -fill_dir(). This value is used to fill in the st_ino field -in the stat(2), lstat(2), fstat(2) functions and the d_ino -field in the readdir(2) function. The filesystem does not -have to guarantee uniqueness, however some applications -rely on this value being unique for the whole filesystem. -Note that this does *not* affect the inode that libfuse -and the kernel use internally (also called the "nodeid"). -``` - -Generally collision, if it occurs, shouldn't be a problem. You can turn off the calculation by not using `use_ino`. In the future it might be worth creating different strategies for users to select from. - - #### I notice massive slowdowns of writes when enabling cache.files. When file caching is enabled in any form (`cache.files!=off` or `direct_io=false`) it will issue `getxattr` requests for `security.capability` prior to *every single write*. This will usually result in a performance degregation, especially when using a network filesystem (such as NFS or CIFS/SMB/Samba.) Unfortunately at this moment the kernel is not caching the response. diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 5cff291a..8321cff5 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 1.19.2.4 .\" -.TH "mergerfs" "1" "2020\-05\-25" "mergerfs user manual" "" +.TH "mergerfs" "1" "2020\-06\-28" "mergerfs user manual" "" .hy .SH NAME .PP @@ -146,6 +146,10 @@ than libfuse. While not a default it is recommended it be enabled so that linked files share the same inode value. .IP \[bu] 2 +\f[B]inodecalc=passthrough|path\-hash|devino\-hash|hybrid\-hash\f[]: +Selects the inode calculation algorithm. +(default: hybrid\-hash) +.IP \[bu] 2 \f[B]dropcacheonclose=BOOL\f[]: When a file is requested to be closed call \f[C]posix_fadvise\f[] on it first to instruct the kernel that we no longer need the data and it can drop its cache. @@ -373,6 +377,96 @@ be automatically included. \f[B]NOTE:\f[] for mounting via \f[B]fstab\f[] to work you must have \f[B]mount.fuse\f[] installed. For Ubuntu/Debian it is included in the \f[B]fuse\f[] package. +.SS inodecalc +.PP +Inodes (st_ino) are unique identifiers within a filesystem. +Each mounted filesystem has device ID (st_dev) as well and together they +can uniquely identify a file on the whole of the system. +Entries on the same device with the same inode are in fact references to +the same underlying file. +It is a many to one relationship between names and an inode. +Directories, however, do not have multiple links on most systems due to +the complexity they add. +.PP +FUSE allows the server (mergerfs) to set inode values but not device +IDs. +Creating an inode value is somewhat complex in mergerfs\[aq] case as +files aren\[aq]t really in its control. +If a policy changes what directory or file is to be selected or +something changes out of band it becomes unclear what value should be +used. +Most software does not to care what the values are but those that do +often break if a value changes unexpectedly. +The tool \f[C]find\f[] will abort a directory walk if it sees a +directory inode change. +NFS will return stale handle errors if the inode changes out of band. +File dedup tools will usually leverage device ids and inodes as a +shortcut in searching for duplicate files and would resort to full file +comparisons should it find different inode values. +.PP +mergerfs offers multiple ways to calculate the inode in hopes of +covering different usecases. +.IP \[bu] 2 +passthrough: Passes through the underlying inode value. +Mostly intended for testing as using this does not address any of the +problems mentioned above and could confuse file deduplication software +as inodes from different filesystems can be the same. +.IP \[bu] 2 +path\-hash: Hashes the relative path of the entry in question. +The underlying file\[aq]s values are completely ignored. +This means the inode value will always be the same for that file path. +This is useful when using NFS and you make changes out of band such as +copy data between branches. +This also means that entries that do point to the same file will not be +recognizable via inodes. +That \f[B]does not\f[] mean hard links don\[aq]t work. +They will. +.IP \[bu] 2 +devino\-hash: Hashes the device id and inode of the underlying entry. +This won\[aq]t prevent issues with NFS should the policy pick a +different file or files move out of band but will present the same inode +for underlying files that do too. +.IP \[bu] 2 +hybrid\-hash: Performs \f[C]path\-hash\f[] on directories and +\f[C]devino\-hash\f[] on other file types. +Since directories can\[aq]t have hard links the static value won\[aq]t +make a difference and the files will get values useful for finding +duplicates. +Probably the best to use if not using NFS. +As such it is the default. +.PP +While there is a risk of hash collision in tests of a couple million +entries there were zero collisions. +Unlike a typical filesystem FUSE filesystems can reuse inodes and not +refer to the same entry. +The internal identifier used to reference a file in FUSE is different +from the inode value presented. +The former is the \f[C]nodeid\f[] and is actually a tuple of 2 64bit +values: \f[C]nodeid\f[] and \f[C]generation\f[]. +This tuple is not client facing. +The inode that is presented to the client is passed through the kernel +uninterpreted. +.PP +From FUSE docs regarding \f[C]use_ino\f[]: +.IP +.nf +\f[C] +Honor\ the\ st_ino\ field\ in\ the\ functions\ getattr()\ and +fill_dir().\ This\ value\ is\ used\ to\ fill\ in\ the\ st_ino\ field +in\ the\ stat(2),\ lstat(2),\ fstat(2)\ functions\ and\ the\ d_ino +field\ in\ the\ readdir(2)\ function.\ The\ filesystem\ does\ not +have\ to\ guarantee\ uniqueness,\ however\ some\ applications +rely\ on\ this\ value\ being\ unique\ for\ the\ whole\ filesystem. +Note\ that\ this\ does\ *not*\ affect\ the\ inode\ that\ libfuse +and\ the\ kernel\ use\ internally\ (also\ called\ the\ "nodeid"). +\f[] +.fi +.PP +In the future the \f[C]use_ino\f[] option will probably be removed as +this feature should replace the original libfuse inode calculation +strategy. +Currently you still need to use \f[C]use_ino\f[] in order to enable +\f[C]inodecalc\f[]. .SS fuse_msg_size .PP FUSE applications communicate with the kernel over a special character @@ -1748,14 +1842,16 @@ Use \f[C]cache.files=off\f[] and/or \f[C]dropcacheonclose=true\f[]. See the section on page caching. .SS NFS clients returning ESTALE / Stale file handle .PP -Be sure to use \f[C]noforget\f[] and \f[C]use_ino\f[] arguments. -.SS NFS clients don\[aq]t work -.PP -Some NFS clients appear to fail when a mergerfs mount is exported. -Kodi in particular seems to have issues. +NFS does not like out of band changes. +That is especially true of inode values. .PP -Try enabling the \f[C]use_ino\f[] option. -Some have reported that it fixes the issue. +Be sure to use the following options: +.IP \[bu] 2 +noforget +.IP \[bu] 2 +use_ino +.IP \[bu] 2 +inodecalc=path\-hash .SS rtorrent fails with ENODEV (No such device) .PP Be sure to set \f[C]cache.files=partial|full|auto\-full\f[] or turn off @@ -2108,7 +2204,9 @@ together directory content by drive. .SS Do hard links work? .PP Yes. -You need to use \f[C]use_ino\f[] to support proper reporting of inodes. +You need to use \f[C]use_ino\f[] to support proper reporting of inodes +but they work regardless. +See also the option \f[C]inodecalc\f[]. .PP What mergerfs does not do is fake hard links across branches. Read the section "rename & link" for how it works. @@ -2317,63 +2415,22 @@ mergerfs uses available space for statfs calculations. If you\[aq]ve reserved space for root then it won\[aq]t show up. .SS Can mergerfs mounts be exported over NFS? .PP -Yes. -Due to current usage of libfuse by mergerfs and how NFS interacts with -it it is necessary to add \f[C]noforget\f[] to mergerfs options to keep -from getting "stale file handle" errors. +Yes, however if you do anything which may changes files out of band +(including for example using the \f[C]newest\f[] policy) it will result +in "stale file handle" errors. .PP -Some clients (Kodi) have issues in which the contents of the NFS mount -will not be presented but users have found that enabling the -\f[C]use_ino\f[] option often fixes that problem. +Be sure to use the following options: +.IP \[bu] 2 +noforget +.IP \[bu] 2 +use_ino +.IP \[bu] 2 +inodecalc=path\-hash .SS Can mergerfs mounts be exported over Samba / SMB? .PP Yes. While some users have reported problems it appears to always be related to how Samba is setup in relation to permissions. -.SS How are inodes calculated? -.PP -https://github.com/trapexit/mergerfs/blob/master/src/fs_inode.hpp -.PP -Originally tried to simply OR st_ino and (st_dev << 32) for 64bit -systems. -After a number of years someone finally ran into a collision that lead -to some problems. -Traditionally \f[C]dev_t\f[] was 16bit and \f[C]ino_t\f[] was 32bit so -merging into one 64bit value worked but with both types being able to be -up to 64bit that is no longer as simple. -A proper hash seems like the best compromise. -While totally unique inodes are preferred the overhead which would be -needed does not seem to be outweighed by the benefits. -.PP -While atypical, yes, inodes can be reused and not refer to the same -file. -The internal id used to reference a file in FUSE is different from the -inode value presented. -The former is the \f[C]nodeid\f[] and is actually a tuple of -(nodeid,generation). -That tuple is not user facing. -The inode is merely metadata passed through the kernel and found using -the \f[C]stat\f[] family of calls or \f[C]readdir\f[]. -.PP -From FUSE docs regarding \f[C]use_ino\f[]: -.IP -.nf -\f[C] -Honor\ the\ st_ino\ field\ in\ the\ functions\ getattr()\ and -fill_dir().\ This\ value\ is\ used\ to\ fill\ in\ the\ st_ino\ field -in\ the\ stat(2),\ lstat(2),\ fstat(2)\ functions\ and\ the\ d_ino -field\ in\ the\ readdir(2)\ function.\ The\ filesystem\ does\ not -have\ to\ guarantee\ uniqueness,\ however\ some\ applications -rely\ on\ this\ value\ being\ unique\ for\ the\ whole\ filesystem. -Note\ that\ this\ does\ *not*\ affect\ the\ inode\ that\ libfuse -and\ the\ kernel\ use\ internally\ (also\ called\ the\ "nodeid"). -\f[] -.fi -.PP -Generally collision, if it occurs, shouldn\[aq]t be a problem. -You can turn off the calculation by not using \f[C]use_ino\f[]. -In the future it might be worth creating different strategies for users -to select from. .SS I notice massive slowdowns of writes when enabling cache.files. .PP When file caching is enabled in any form (\f[C]cache.files!=off\f[] or diff --git a/src/config.cpp b/src/config.cpp index 93297e65..6053d0b4 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -83,6 +83,7 @@ Config::Config() func(), fuse_msg_size(FUSE_MAX_MAX_PAGES), ignorepponrename(false), + inodecalc("hybrid-hash"), link_cow(false), minfreespace(MINFREESPACE_DEFAULT), mount(), @@ -142,6 +143,7 @@ Config::Config() _map["func.utimens"] = &func.utimens; _map["fuse_msg_size"] = &fuse_msg_size; _map["ignorepponrename"] = &ignorepponrename; + _map["inodecalc"] = &inodecalc; _map["kernel_cache"] = &kernel_cache; _map["link_cow"] = &link_cow; _map["minfreespace"] = &minfreespace; @@ -392,7 +394,6 @@ Config::set(const std::string &key_, return set_raw(key_,value_); } - std::ostream& operator<<(std::ostream &os_, const Config &c_) diff --git a/src/config.hpp b/src/config.hpp index 26e32f99..14c64bbb 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -17,6 +17,7 @@ #pragma once #include "branch.hpp" +#include "config_inodecalc.hpp" #include "config_readdir.hpp" #include "enum.hpp" #include "errno.hpp" @@ -103,6 +104,7 @@ public: Funcs func; ConfigUINT64 fuse_msg_size; ConfigBOOL ignorepponrename; + InodeCalc inodecalc; ConfigBOOL kernel_cache; ConfigBOOL link_cow; ConfigUINT64 minfreespace; diff --git a/src/config_inodecalc.cpp b/src/config_inodecalc.cpp new file mode 100644 index 00000000..b471004d --- /dev/null +++ b/src/config_inodecalc.cpp @@ -0,0 +1,37 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "config_inodecalc.hpp" +#include "fs_inode.hpp" + +InodeCalc::InodeCalc(const std::string &s_) +{ + fs::inode::set_algo(s_); +} + +std::string +InodeCalc::to_string(void) const +{ + return fs::inode::get_algo(); +} + +int +InodeCalc::from_string(const std::string &s_) +{ + return fs::inode::set_algo(s_); +} diff --git a/src/config_inodecalc.hpp b/src/config_inodecalc.hpp new file mode 100644 index 00000000..b87b7fc5 --- /dev/null +++ b/src/config_inodecalc.hpp @@ -0,0 +1,31 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#pragma once + +#include "tofrom_string.hpp" + +class InodeCalc : public ToFromString +{ +public: + InodeCalc(const std::string &); + +public: + std::string to_string(void) const; + int from_string(const std::string &); +}; diff --git a/src/fs_inode.cpp b/src/fs_inode.cpp new file mode 100644 index 00000000..7da22f62 --- /dev/null +++ b/src/fs_inode.cpp @@ -0,0 +1,164 @@ +/* + ISC License + + Copyright (c) 2020, Antonio SJ Musumeci + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "ef.hpp" +#include "fasthash.h" +#include "fs_inode.hpp" + +#include + +#include +#include +#include +#include + +typedef uint64_t (*inodefunc_t)(const char*,const uint64_t,const mode_t,const dev_t,const ino_t); + +static uint64_t hybrid_hash(const char*,const uint64_t,const mode_t,const dev_t,const ino_t); + +static inodefunc_t g_func = hybrid_hash; + +static +uint64_t +passthrough(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return ino_; +} + +static +uint64_t +path_hash(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return fasthash64(fusepath_, + fusepath_len_, + fs::inode::MAGIC); +} + +static +uint64_t +devino_hash(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + uint64_t buf[2]; + + buf[0] = dev_; + buf[1] = ino_; + + return fasthash64((void*)&buf[0], + sizeof(buf), + fs::inode::MAGIC); +} + +static +uint64_t +hybrid_hash(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return (S_ISDIR(mode_) ? + path_hash(fusepath_,fusepath_len_,mode_,dev_,ino_) : + devino_hash(fusepath_,fusepath_len_,mode_,dev_,ino_)); +} + + +namespace fs +{ + namespace inode + { + int + set_algo(const std::string &algo_) + { + if(algo_ == "passthrough") + g_func = passthrough; + ef(algo_ == "path-hash") + g_func = path_hash; + ef(algo_ == "devino-hash") + g_func = devino_hash; + ef(algo_ == "hybrid-hash") + g_func = hybrid_hash; + else + return -EINVAL; + + return 0; + } + + std::string + get_algo(void) + { + if(g_func == passthrough) + return "passthrough"; + if(g_func == path_hash) + return "path-hash"; + if(g_func == devino_hash) + return "devino-hash"; + if(g_func == hybrid_hash) + return "hybrid-hash"; + + return std::string(); + } + + uint64_t + calc(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) + { + return g_func(fusepath_,fusepath_len_,mode_,dev_,ino_); + } + + void + calc(const char *fusepath_, + const uint64_t fusepath_len_, + struct stat *st_) + { + st_->st_ino = calc(fusepath_, + fusepath_len_, + st_->st_mode, + st_->st_dev, + st_->st_ino); + } + + void + calc(const char *fusepath_, + struct stat *st_) + { + calc(fusepath_,strlen(fusepath_),st_); + } + + void + calc(const std::string &fusepath_, + struct stat *st_) + { + calc(fusepath_.c_str(),fusepath_.size(),st_); + } + } +} diff --git a/src/fs_inode.hpp b/src/fs_inode.hpp index d629b27c..a7e0fb76 100644 --- a/src/fs_inode.hpp +++ b/src/fs_inode.hpp @@ -18,38 +18,33 @@ #pragma once -#include "fasthash.h" +#include #include #include + namespace fs { namespace inode { static const uint64_t MAGIC = 0x7472617065786974; - inline - uint64_t - recompute(ino_t ino_, - dev_t dev_) - { - uint64_t buf[5]; - - buf[0] = ino_; - buf[1] = dev_; - buf[2] = buf[0] ^ buf[1]; - buf[3] = buf[0] & buf[1]; - buf[4] = buf[0] | buf[1]; - - return fasthash64(&buf[0],sizeof(buf),MAGIC); - } - - inline - void - recompute(struct stat *st_) - { - st_->st_ino = recompute(st_->st_ino,st_->st_dev); - } + int set_algo(const std::string &s); + std::string get_algo(void); + + uint64_t calc(const char *fusepath, + const uint64_t fusepath_len, + const mode_t mode, + const dev_t dev, + const ino_t ion); + void calc(const char *fusepath, + const uint64_t fusepath_len, + struct stat *st); + void calc(const char *fusepath, + struct stat *st); + void calc(const std::string &fusepath, + struct stat *st); + } } diff --git a/src/fuse_fgetattr.cpp b/src/fuse_fgetattr.cpp index 22d4b39a..2c0e8712 100644 --- a/src/fuse_fgetattr.cpp +++ b/src/fuse_fgetattr.cpp @@ -26,8 +26,9 @@ namespace l { static int - fgetattr(const int fd_, - struct stat *st_) + fgetattr(const int fd_, + const std::string &fusepath_, + struct stat *st_) { int rv; @@ -35,7 +36,7 @@ namespace l if(rv == -1) return -errno; - fs::inode::recompute(st_); + fs::inode::calc(fusepath_,st_); return 0; } @@ -53,7 +54,7 @@ namespace FUSE const Config &config = Config::ro(); FileInfo *fi = reinterpret_cast(ffi_->fh); - rv = l::fgetattr(fi->fd,st_); + rv = l::fgetattr(fi->fd,fi->fusepath,st_); timeout_->entry = ((rv >= 0) ? config.cache_entry : diff --git a/src/fuse_getattr.cpp b/src/fuse_getattr.cpp index fb08bd71..f78dab72 100644 --- a/src/fuse_getattr.cpp +++ b/src/fuse_getattr.cpp @@ -84,7 +84,7 @@ namespace l if(symlinkify_ && symlinkify::can_be_symlink(*st_,symlinkify_timeout_)) st_->st_mode = symlinkify::convert(st_->st_mode); - fs::inode::recompute(st_); + fs::inode::calc(fusepath_,st_); return 0; } diff --git a/src/fuse_readdir_linux.cpp b/src/fuse_readdir_linux.cpp index 7c5221aa..1768369c 100644 --- a/src/fuse_readdir_linux.cpp +++ b/src/fuse_readdir_linux.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2019, Antonio SJ Musumeci + Copyright (c) 2020, Antonio SJ Musumeci Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -40,6 +40,13 @@ using std::vector; namespace l { + static + char + denttype(struct linux_dirent *d_) + { + return *((char*)d_ + d_->reclen - 1); + } + static int close_free_ret_enomem(int fd_, @@ -61,6 +68,7 @@ namespace l char *buf; HashSet names; string basepath; + string fullpath; uint64_t namelen; struct linux_dirent *d; @@ -100,7 +108,12 @@ namespace l if(rv == 0) continue; - d->ino = fs::inode::recompute(d->ino,dev); + fullpath = fs::path::make(dirname_,d->name); + d->ino = fs::inode::calc(fullpath.c_str(), + fullpath.size(), + DTTOIF(l::denttype(d)), + dev, + d->ino); rv = fuse_dirents_add_linux(buf_,d,namelen); if(rv) diff --git a/src/fuse_readdir_plus_linux.cpp b/src/fuse_readdir_plus_linux.cpp index e8cf4b27..8c973877 100644 --- a/src/fuse_readdir_plus_linux.cpp +++ b/src/fuse_readdir_plus_linux.cpp @@ -41,6 +41,14 @@ using std::vector; namespace l { + static + char + denttype(struct linux_dirent *d_) + { + return *((char*)d_ + d_->reclen - 1); + } + + static int close_free_ret_enomem(int fd_, void *buf_) @@ -63,6 +71,7 @@ namespace l char *buf; HashSet names; string basepath; + string fullpath; uint64_t namelen; struct stat st; fuse_entry_t entry; @@ -110,10 +119,16 @@ namespace l rv = fs::fstatat_nofollow(dirfd,d->name,&st); if(rv == -1) - memset(&st,0,sizeof(st)); - - d->ino = fs::inode::recompute(d->ino,dev); - st.st_ino = d->ino; + { + memset(&st,0,sizeof(st)); + st.st_ino = d->ino; + st.st_dev = dev; + st.st_mode = DTTOIF(l::denttype(d)); + } + + fullpath = fs::path::make(dirname_,d->name); + fs::inode::calc(fullpath,&st); + d->ino = st.st_ino; rv = fuse_dirents_add_linux_plus(buf_,d,namelen,&entry,&st); if(rv) diff --git a/src/fuse_readdir_plus_posix.cpp b/src/fuse_readdir_plus_posix.cpp index e167465b..32ad0c5c 100644 --- a/src/fuse_readdir_plus_posix.cpp +++ b/src/fuse_readdir_plus_posix.cpp @@ -66,6 +66,7 @@ namespace l dev_t dev; HashSet names; string basepath; + string fullpath; struct stat st; uint64_t namelen; fuse_entry_t entry; @@ -104,10 +105,16 @@ namespace l rv = fs::fstatat_nofollow(dirfd,de->d_name,&st); if(rv == -1) - memset(&st,0,sizeof(st)); - - de->d_ino = fs::inode::recompute(de->d_ino,dev); - st.st_ino = de->d_ino; + { + memset(&st,0,sizeof(st)); + st.st_ino = de->d_ino; + st.st_dev = dev; + st.st_mode = DTTOIF(de->d_type); + } + + fullpath = fs::path::make(dirname_,de->d_name); + fs::inode::calc(fullpath,&st); + de->d_ino = st.st_ino; rv = fuse_dirents_add_plus(buf_,de,namelen,&entry,&st); if(rv) diff --git a/src/fuse_readdir_posix.cpp b/src/fuse_readdir_posix.cpp index 78646d40..cab4f21b 100644 --- a/src/fuse_readdir_posix.cpp +++ b/src/fuse_readdir_posix.cpp @@ -63,6 +63,7 @@ namespace l dev_t dev; HashSet names; string basepath; + string fullpath; uint64_t namelen; for(size_t i = 0, ei = branches_.size(); i != ei; i++) @@ -91,7 +92,12 @@ namespace l if(rv == 0) continue; - de->d_ino = fs::inode::recompute(de->d_ino,dev); + fullpath = fs::path::make(dirname_,de->d_name); + de->d_ino = fs::inode::calc(fullpath.c_str(), + fullpath.size(), + DTTOIF(de->d_type), + dev, + de->d_ino); rv = fuse_dirents_add(buf_,de,namelen); if(rv)