From d0d265a26fecc1fcc757d828fd590eee5db1918b Mon Sep 17 00:00:00 2001 From: PhracturedBlue Date: Fri, 5 Apr 2024 14:43:32 +0000 Subject: [PATCH] Add new basepath-hash inode-generation algorithm A filesystem's device id (st_dev) may change on reboot (eg with zfs). Instead, we can use the files base path (+underlying inode) to generate an inode, which will remain constant across reboots. However, this may have unexpected effects if multiple unique devices appear under a base path. Like hybrid_hash, basehybrid_hash/32 hashes relative path for dirs and basepath_hash for files Original patch by thrnz@github --- README.md | 15 +++- src/fileinfo.hpp | 3 + src/fs_inode.cpp | 150 +++++++++++++++++++++++++++++++------- src/fs_inode.hpp | 15 ++-- src/fuse_create.cpp | 2 +- src/fuse_fgetattr.cpp | 5 +- src/fuse_getattr.cpp | 2 +- src/fuse_open.cpp | 2 +- src/fuse_readdir_cor.cpp | 8 +- src/fuse_readdir_cosr.cpp | 5 +- src/fuse_readdir_seq.cpp | 3 +- src/fuse_symlink.cpp | 2 +- 12 files changed, 169 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index f303acfa..a5b7c7ea 100644 --- a/README.md +++ b/README.md @@ -155,8 +155,8 @@ These options are the same regardless of whether you use them with the the file. An attempt to move the file to that branch will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. (default: false, true = mfs) -* **inodecalc=passthrough|path-hash|devino-hash|hybrid-hash**: Selects - the inode calculation algorithm. (default: hybrid-hash) +* **inodecalc=passthrough|path-hash|devino-hash|basepath-hash|hybrid-hash|basehybrid-hash**: + Selects the inode calculation algorithm. (default: hybrid-hash) * **dropcacheonclose=BOOL**: When a file is requested to be closed call `posix_fadvise` on it first to instruct the kernel that we no longer need the data and it can drop its cache. Recommended when @@ -444,12 +444,23 @@ covering different usecases. different file or files move out of band but will present the same inode for underlying files that do too. * devino-hash32: 32bit version of devino-hash. +* basepath-hash: Hashes the branch base path along with + the inode of the underlying entry. This has a similar purpose to + devino-hash, but by using the path instead of the device-id, the inodes + will be guaranteed to be stable across reboots. Useful for backup or + deduplication systems that rely on a static inode. Note that if the + root directory is below the mountpoint of the underlying storage, + duplicate inodes are possible. +* basepath-hash32: 32bit version of basepath-hash. * hybrid-hash: Performs `path-hash` on directories and `devino-hash` on other file types. Since directories can't have hard links the static value won't make a difference and the files will get values useful for finding duplicates. Probably the best to use if not using NFS. As such it is the default. * hybrid-hash32: 32bit version of hybrid-hash. +* basehybrid-hash: Serves the same purpose as `hybrid-hash` but using + the `basepath-hash` algorithm for files. +* basehybrid-hash32: 32bit version of basehybrid-hash 32bit versions are provided as there is some software which does not handle 64bit inodes well. diff --git a/src/fileinfo.hpp b/src/fileinfo.hpp index 4c8beaba..d9eb0a49 100644 --- a/src/fileinfo.hpp +++ b/src/fileinfo.hpp @@ -27,16 +27,19 @@ class FileInfo : public FH { public: FileInfo(int const fd_, + const std::string &basepath_, char const *fusepath_, bool const direct_io_) : FH(fusepath_), fd(fd_), + basepath(basepath_), direct_io(direct_io_) { } public: int fd; + const std::string basepath; uint32_t direct_io:1; std::mutex mutex; }; diff --git a/src/fs_inode.cpp b/src/fs_inode.cpp index 19098440..96ad766f 100644 --- a/src/fs_inode.cpp +++ b/src/fs_inode.cpp @@ -18,6 +18,7 @@ #include "ef.hpp" #include "errno.hpp" +#include "fmt/core.h" #include "fs_inode.hpp" #include "wyhash.h" @@ -28,9 +29,9 @@ #include #include -typedef uint64_t (*inodefunc_t)(const char*,const uint64_t,const mode_t,const dev_t,const ino_t); +typedef uint64_t (*inodefunc_t)(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t); -static uint64_t hybrid_hash(const char*,const uint64_t,const mode_t,const dev_t,const ino_t); +static uint64_t hybrid_hash(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t); static inodefunc_t g_func = hybrid_hash; @@ -44,7 +45,8 @@ h64_to_h32(uint64_t h_) static uint64_t -passthrough(const char *fusepath_, +passthrough(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -55,7 +57,8 @@ passthrough(const char *fusepath_, static uint64_t -path_hash(const char *fusepath_, +path_hash(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -69,7 +72,8 @@ path_hash(const char *fusepath_, static uint64_t -path_hash32(const char *fusepath_, +path_hash32(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -77,7 +81,8 @@ path_hash32(const char *fusepath_, { uint64_t h; - h = path_hash(fusepath_, + h = path_hash(basepath_, + fusepath_, fusepath_len_, mode_, dev_, @@ -88,7 +93,8 @@ path_hash32(const char *fusepath_, static uint64_t -devino_hash(const char *fusepath_, +devino_hash(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -107,7 +113,8 @@ devino_hash(const char *fusepath_, static uint64_t -devino_hash32(const char *fusepath_, +devino_hash32(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -115,7 +122,8 @@ devino_hash32(const char *fusepath_, { uint64_t h; - h = devino_hash(fusepath_, + h = devino_hash(basepath_, + fusepath_, fusepath_len_, mode_, dev_, @@ -126,28 +134,97 @@ devino_hash32(const char *fusepath_, static uint64_t -hybrid_hash(const char *fusepath_, +basepath_hash(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + + std::string buf = fmt::format("{}{}",ino_,basepath_); + + return wyhash(buf.c_str(), + buf.length(), + fs::inode::MAGIC, + _wyp); +} + +static +uint64_t +basepath_hash32(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + uint64_t h; + + h = basepath_hash(basepath_, + fusepath_, + fusepath_len_, + mode_, + dev_, + ino_); + + return h64_to_h32(h); +} + +static +uint64_t +hybrid_hash(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return (S_ISDIR(mode_) ? + path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + devino_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); +} + +static +uint64_t +hybrid_hash32(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return (S_ISDIR(mode_) ? + path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + devino_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); +} + +static +uint64_t +basehybrid_hash(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { return (S_ISDIR(mode_) ? - path_hash(fusepath_,fusepath_len_,mode_,dev_,ino_) : - devino_hash(fusepath_,fusepath_len_,mode_,dev_,ino_)); + path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + basepath_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); } static uint64_t -hybrid_hash32(const char *fusepath_, +basehybrid_hash32(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { return (S_ISDIR(mode_) ? - path_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_) : - devino_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_)); + path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + basepath_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); } namespace fs @@ -171,6 +248,14 @@ namespace fs g_func = hybrid_hash; ef(algo_ == "hybrid-hash32") g_func = hybrid_hash32; + ef(algo_ == "basepath-hash") + g_func = basepath_hash; + ef(algo_ == "basepath-hash32") + g_func = basepath_hash32; + ef(algo_ == "basehybrid-hash") + g_func = basehybrid_hash; + ef(algo_ == "basehybrid-hash32") + g_func = basehybrid_hash32; else return -EINVAL; @@ -194,27 +279,38 @@ namespace fs return "hybrid-hash"; if(g_func == hybrid_hash32) return "hybrid-hash32"; + if(g_func == basepath_hash) + return "basepath-hash"; + if(g_func == basepath_hash) + return "basepath-hash32"; + if(g_func == basehybrid_hash) + return "basehybrid-hash"; + if(g_func == basehybrid_hash) + return "basehybrid-hash32"; return std::string(); } uint64_t - calc(const char *fusepath_, + calc(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { - return g_func(fusepath_,fusepath_len_,mode_,dev_,ino_); + return g_func(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_); } uint64_t - calc(std::string const &fusepath_, + calc(const std::string &basepath_, + std::string const &fusepath_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { - return calc(fusepath_.c_str(), + return calc(basepath_, + fusepath_.c_str(), fusepath_.size(), mode_, dev_, @@ -222,11 +318,13 @@ namespace fs } void - calc(const char *fusepath_, + calc(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, struct stat *st_) { - st_->st_ino = calc(fusepath_, + st_->st_ino = calc(basepath_, + fusepath_, fusepath_len_, st_->st_mode, st_->st_dev, @@ -234,17 +332,19 @@ namespace fs } void - calc(const char *fusepath_, + calc(const std::string &basepath_, + const char *fusepath_, struct stat *st_) { - calc(fusepath_,strlen(fusepath_),st_); + calc(basepath_,fusepath_,strlen(fusepath_),st_); } void - calc(const std::string &fusepath_, + calc(const std::string &basepath_, + const std::string &fusepath_, struct stat *st_) { - calc(fusepath_.c_str(),fusepath_.size(),st_); + calc(basepath_,fusepath_.c_str(),fusepath_.size(),st_); } } } diff --git a/src/fs_inode.hpp b/src/fs_inode.hpp index 11e772c8..87abb88e 100644 --- a/src/fs_inode.hpp +++ b/src/fs_inode.hpp @@ -33,21 +33,26 @@ namespace fs int set_algo(const std::string &s); std::string get_algo(void); - uint64_t calc(const char *fusepath, + uint64_t calc(const std::string &basepath, + const char *fusepath, const uint64_t fusepath_len, const mode_t mode, const dev_t dev, const ino_t ino); - uint64_t calc(std::string const &fusepath, + uint64_t calc(const std::string &basepath, + std::string const &fusepath, mode_t const mode, dev_t const dev, ino_t ino); - void calc(const char *fusepath, + void calc(const std::string &basepath, + const char *fusepath, const uint64_t fusepath_len, struct stat *st); - void calc(const char *fusepath, + void calc(const std::string &basepath, + const char *fusepath, struct stat *st); - void calc(const std::string &fusepath, + void calc(const std::string &basepath, + const std::string &fusepath, struct stat *st); } diff --git a/src/fuse_create.cpp b/src/fuse_create.cpp index 45ee5c68..2ce27e28 100644 --- a/src/fuse_create.cpp +++ b/src/fuse_create.cpp @@ -163,7 +163,7 @@ namespace l if(rv == -1) return -errno; - fi = new FileInfo(rv,fusepath_,ffi_->direct_io); + fi = new FileInfo(rv,createpath_,fusepath_,ffi_->direct_io); ffi_->fh = reinterpret_cast(fi); diff --git a/src/fuse_fgetattr.cpp b/src/fuse_fgetattr.cpp index bc8b44d4..485207c8 100644 --- a/src/fuse_fgetattr.cpp +++ b/src/fuse_fgetattr.cpp @@ -28,6 +28,7 @@ namespace l static int fgetattr(const int fd_, + const std::string &basepath_, const std::string &fusepath_, struct stat *st_) { @@ -37,7 +38,7 @@ namespace l if(rv == -1) return -errno; - fs::inode::calc(fusepath_,st_); + fs::inode::calc(basepath_,fusepath_,st_); return 0; } @@ -54,7 +55,7 @@ namespace FUSE Config::Read cfg; FileInfo *fi = reinterpret_cast(ffi_->fh); - rv = l::fgetattr(fi->fd,fi->fusepath,st_); + rv = l::fgetattr(fi->fd,fi->basepath,fi->fusepath,st_); timeout_->entry = ((rv >= 0) ? cfg->cache_entry : diff --git a/src/fuse_getattr.cpp b/src/fuse_getattr.cpp index 13b5ccbd..5b9c5112 100644 --- a/src/fuse_getattr.cpp +++ b/src/fuse_getattr.cpp @@ -141,7 +141,7 @@ namespace l if(symlinkify_ && symlinkify::can_be_symlink(*st_,symlinkify_timeout_)) symlinkify::convert(fullpath,st_); - fs::inode::calc(fusepath_,st_); + fs::inode::calc(basepaths[0],fusepath_,st_); return 0; } diff --git a/src/fuse_open.cpp b/src/fuse_open.cpp index d49ed797..db30c2c6 100644 --- a/src/fuse_open.cpp +++ b/src/fuse_open.cpp @@ -211,7 +211,7 @@ namespace l if(fd == -1) return -errno; - fi = new FileInfo(fd,fusepath_,ffi_->direct_io); + fi = new FileInfo(fd,basepath_,fusepath_,ffi_->direct_io); ffi_->fh = reinterpret_cast(fi); diff --git a/src/fuse_readdir_cor.cpp b/src/fuse_readdir_cor.cpp index 2b96c0d2..436d4a42 100644 --- a/src/fuse_readdir_cor.cpp +++ b/src/fuse_readdir_cor.cpp @@ -77,7 +77,8 @@ namespace l static inline int - readdir(std::string basepath_, + readdir(const std::string &branchdir_, + std::string basepath_, HashSet &names_, fuse_dirents_t *buf_, std::mutex &mutex_) @@ -122,7 +123,8 @@ namespace l continue; filepath = fs::path::make(basepath_,d->name); - d->ino = fs::inode::calc(filepath, + d->ino = fs::inode::calc(branchdir_, + filepath, DTTOIF(d->type), dev, d->ino); @@ -161,7 +163,7 @@ namespace l basepath = fs::path::make(branch.path,dirname_); - return l::readdir(basepath,names,buf_,mutex); + return l::readdir(branch.path,basepath,names,buf_,mutex); }; auto rv = tp_.enqueue_task(func); diff --git a/src/fuse_readdir_cosr.cpp b/src/fuse_readdir_cosr.cpp index 6be0a97e..83c2394c 100644 --- a/src/fuse_readdir_cosr.cpp +++ b/src/fuse_readdir_cosr.cpp @@ -52,6 +52,7 @@ namespace l { DIR *dir; int err; + std::string basepath; }; struct Error @@ -119,6 +120,7 @@ namespace l errno = 0; rv.dir = fs::opendir(basepath); rv.err = errno; + rv.basepath = branch.path; return rv; }; @@ -169,7 +171,8 @@ namespace l continue; fullpath = fs::path::make(dirname_,de->d_name); - de->d_ino = fs::inode::calc(fullpath, + de->d_ino = fs::inode::calc(dirrv.basepath, + fullpath, DTTOIF(de->d_type), dev, de->d_ino); diff --git a/src/fuse_readdir_seq.cpp b/src/fuse_readdir_seq.cpp index 9aa0c6f0..d6568bfe 100644 --- a/src/fuse_readdir_seq.cpp +++ b/src/fuse_readdir_seq.cpp @@ -125,7 +125,8 @@ namespace l continue; fullpath = fs::path::make(dirname_,de->d_name); - de->d_ino = fs::inode::calc(fullpath, + de->d_ino = fs::inode::calc(branch.path, + fullpath, DTTOIF(de->d_type), dev, de->d_ino); diff --git a/src/fuse_symlink.cpp b/src/fuse_symlink.cpp index fcc9ef36..a8ed41e2 100644 --- a/src/fuse_symlink.cpp +++ b/src/fuse_symlink.cpp @@ -74,7 +74,7 @@ namespace l { fs::lstat(fullnewpath,st_); if(st_->st_ino != 0) - fs::inode::calc(linkpath_,st_); + fs::inode::calc(newbasepath_,linkpath_,st_); } return error::calc(rv,error_,errno);