From 30d13b7f77acb191bd211432613196a075b69ea5 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Mon, 3 Aug 2020 17:18:22 -0400 Subject: [PATCH] inodecalc: add 32bit versions of hashs --- README.md | 12 ++++++++- man/mergerfs.1 | 17 ++++++++++++- src/fs_inode.cpp | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2dfd4d7c..e228bff2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ % mergerfs(1) mergerfs user manual % Antonio SJ Musumeci -% 2020-08-02 +% 2020-08-03 # NAME @@ -183,8 +183,13 @@ mergerfs offers multiple ways to calculate the inode in hopes of covering differ * passthrough: Passes through the underlying inode value. Mostly intended for testing as using this does not address any of the problems mentioned above and could confuse file deduplication software as inodes from different filesystems can be the same. * path-hash: Hashes the relative path of the entry in question. The underlying file's values are completely ignored. This means the inode value will always be the same for that file path. This is useful when using NFS and you make changes out of band such as copy data between branches. This also means that entries that do point to the same file will not be recognizable via inodes. That **does not** mean hard links don't work. They will. +* path-hash32: 32bit version of path-hash. * devino-hash: Hashes the device id and inode of the underlying entry. This won't prevent issues with NFS should the policy pick a different file or files move out of band but will present the same inode for underlying files that do too. +* devino-hash32: 32bit version of devino-hash. * hybrid-hash: Performs `path-hash` on directories and `devino-hash` on other file types. Since directories can't have hard links the static value won't make a difference and the files will get values useful for finding duplicates. Probably the best to use if not using NFS. As such it is the default. +* hybrid-hash32: 32bit version of hybrid-hash. + +32bit versions are provided as there is some software which does not handle 64bit inodes well. While there is a risk of hash collision in tests of a couple million entries there were zero collisions. Unlike a typical filesystem FUSE filesystems can reuse inodes and not refer to the same entry. The internal identifier used to reference a file in FUSE is different from the inode value presented. The former is the `nodeid` and is actually a tuple of 2 64bit values: `nodeid` and `generation`. This tuple is not client facing. The inode that is presented to the client is passed through the kernel uninterpreted. @@ -894,6 +899,11 @@ The problem is that many applications do not properly handle `EXDEV` errors whic Ideally the offending software would be fixed and it is recommended that if you run into this problem you contact the software's author and request proper handling of `EXDEV` errors. +#### my 32bit software has problems + +Some software have problems with 64bit inode values. The symptoms can include EOVERFLOW errors when trying to list files. You can address this by setting `inodecalc` to one of the 32bit based algos as described in the relevant section. + + #### Samba: Moving files / directories fails Workaround: Copy the file/directory and then remove the original rather than move. diff --git a/man/mergerfs.1 b/man/mergerfs.1 index 9136ba99..d168034a 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -1,7 +1,7 @@ .\"t .\" Automatically generated by Pandoc 1.19.2.4 .\" -.TH "mergerfs" "1" "2020\-08\-02" "mergerfs user manual" "" +.TH "mergerfs" "1" "2020\-08\-03" "mergerfs user manual" "" .hy .SH NAME .PP @@ -426,11 +426,15 @@ recognizable via inodes. That \f[B]does not\f[] mean hard links don\[aq]t work. They will. .IP \[bu] 2 +path\-hash32: 32bit version of path\-hash. +.IP \[bu] 2 devino\-hash: Hashes the device id and inode of the underlying entry. This won\[aq]t prevent issues with NFS should the policy pick a different file or files move out of band but will present the same inode for underlying files that do too. .IP \[bu] 2 +devino\-hash32: 32bit version of devino\-hash. +.IP \[bu] 2 hybrid\-hash: Performs \f[C]path\-hash\f[] on directories and \f[C]devino\-hash\f[] on other file types. Since directories can\[aq]t have hard links the static value won\[aq]t @@ -438,6 +442,11 @@ make a difference and the files will get values useful for finding duplicates. Probably the best to use if not using NFS. As such it is the default. +.IP \[bu] 2 +hybrid\-hash32: 32bit version of hybrid\-hash. +.PP +32bit versions are provided as there is some software which does not +handle 64bit inodes well. .PP While there is a risk of hash collision in tests of a couple million entries there were zero collisions. @@ -1967,6 +1976,12 @@ For example: \f[C]\-o\ category.create=mfs\f[] Ideally the offending software would be fixed and it is recommended that if you run into this problem you contact the software\[aq]s author and request proper handling of \f[C]EXDEV\f[] errors. +.SS my 32bit software has problems +.PP +Some software have problems with 64bit inode values. +The symptoms can include EOVERFLOW errors when trying to list files. +You can address this by setting \f[C]inodecalc\f[] to one of the 32bit +based algos as described in the relevant section. .SS Samba: Moving files / directories fails .PP Workaround: Copy the file/directory and then remove the original rather diff --git a/src/fs_inode.cpp b/src/fs_inode.cpp index f5b4b6c8..9b14c111 100644 --- a/src/fs_inode.cpp +++ b/src/fs_inode.cpp @@ -34,6 +34,14 @@ static uint64_t hybrid_hash(const char*,const uint64_t,const mode_t,const dev_t, static inodefunc_t g_func = hybrid_hash; + +static +uint32_t +h64_to_h32(uint64_t h_) +{ + return (h_ - (h_ >> 32)); +} + static uint64_t passthrough(const char *fusepath_, @@ -58,6 +66,25 @@ path_hash(const char *fusepath_, fs::inode::MAGIC); } +static +uint64_t +path_hash32(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + uint64_t h; + + h = path_hash(fusepath_, + fusepath_len_, + mode_, + dev_, + ino_); + + return h64_to_h32(h); +} + static uint64_t devino_hash(const char *fusepath_, @@ -76,6 +103,25 @@ devino_hash(const char *fusepath_, fs::inode::MAGIC); } +static +uint64_t +devino_hash32(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + uint64_t h; + + h = devino_hash(fusepath_, + fusepath_len_, + mode_, + dev_, + ino_); + + return h64_to_h32(h); +} + static uint64_t hybrid_hash(const char *fusepath_, @@ -89,6 +135,18 @@ hybrid_hash(const char *fusepath_, devino_hash(fusepath_,fusepath_len_,mode_,dev_,ino_)); } +static +uint64_t +hybrid_hash32(const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return (S_ISDIR(mode_) ? + path_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_) : + devino_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_)); +} namespace fs { @@ -101,10 +159,16 @@ namespace fs g_func = passthrough; ef(algo_ == "path-hash") g_func = path_hash; + ef(algo_ == "path-hash32") + g_func = path_hash32; ef(algo_ == "devino-hash") g_func = devino_hash; + ef(algo_ == "devino-hash32") + g_func = devino_hash32; ef(algo_ == "hybrid-hash") g_func = hybrid_hash; + ef(algo_ == "hybrid-hash32") + g_func = hybrid_hash32; else return -EINVAL;