Browse Source

Add new basepath-hash inode-generation algorithm

A filesystem's device id (st_dev) may change on reboot (eg with zfs). Instead, we can use the files base path (+underlying inode) to generate an inode, which will remain constant across reboots. However, this may have unexpected effects if multiple unique devices appear under a base path.

Like hybrid_hash, basehybrid_hash/32 hashes relative path for dirs and basepath_hash for files

Original patch by thrnz@github
pull/1323/head
PhracturedBlue 9 months ago
parent
commit
d0d265a26f
  1. 15
      README.md
  2. 3
      src/fileinfo.hpp
  3. 150
      src/fs_inode.cpp
  4. 15
      src/fs_inode.hpp
  5. 2
      src/fuse_create.cpp
  6. 5
      src/fuse_fgetattr.cpp
  7. 2
      src/fuse_getattr.cpp
  8. 2
      src/fuse_open.cpp
  9. 8
      src/fuse_readdir_cor.cpp
  10. 5
      src/fuse_readdir_cosr.cpp
  11. 3
      src/fuse_readdir_seq.cpp
  12. 2
      src/fuse_symlink.cpp

15
README.md

@ -155,8 +155,8 @@ These options are the same regardless of whether you use them with the
the file. An attempt to move the file to that branch will occur the file. An attempt to move the file to that branch will occur
(keeping all metadata possible) and if successful the original is (keeping all metadata possible) and if successful the original is
unlinked and the write retried. (default: false, true = mfs) unlinked and the write retried. (default: false, true = mfs)
* **inodecalc=passthrough|path-hash|devino-hash|hybrid-hash**: Selects
the inode calculation algorithm. (default: hybrid-hash)
* **inodecalc=passthrough|path-hash|devino-hash|basepath-hash|hybrid-hash|basehybrid-hash**:
Selects the inode calculation algorithm. (default: hybrid-hash)
* **dropcacheonclose=BOOL**: When a file is requested to be closed * **dropcacheonclose=BOOL**: When a file is requested to be closed
call `posix_fadvise` on it first to instruct the kernel that we no call `posix_fadvise` on it first to instruct the kernel that we no
longer need the data and it can drop its cache. Recommended when longer need the data and it can drop its cache. Recommended when
@ -444,12 +444,23 @@ covering different usecases.
different file or files move out of band but will present the same different file or files move out of band but will present the same
inode for underlying files that do too. inode for underlying files that do too.
* devino-hash32: 32bit version of devino-hash. * devino-hash32: 32bit version of devino-hash.
* basepath-hash: Hashes the branch base path along with
the inode of the underlying entry. This has a similar purpose to
devino-hash, but by using the path instead of the device-id, the inodes
will be guaranteed to be stable across reboots. Useful for backup or
deduplication systems that rely on a static inode. Note that if the
root directory is below the mountpoint of the underlying storage,
duplicate inodes are possible.
* basepath-hash32: 32bit version of basepath-hash.
* hybrid-hash: Performs `path-hash` on directories and `devino-hash` * hybrid-hash: Performs `path-hash` on directories and `devino-hash`
on other file types. Since directories can't have hard links the on other file types. Since directories can't have hard links the
static value won't make a difference and the files will get values static value won't make a difference and the files will get values
useful for finding duplicates. Probably the best to use if not using useful for finding duplicates. Probably the best to use if not using
NFS. As such it is the default. NFS. As such it is the default.
* hybrid-hash32: 32bit version of hybrid-hash. * hybrid-hash32: 32bit version of hybrid-hash.
* basehybrid-hash: Serves the same purpose as `hybrid-hash` but using
the `basepath-hash` algorithm for files.
* basehybrid-hash32: 32bit version of basehybrid-hash
32bit versions are provided as there is some software which does not 32bit versions are provided as there is some software which does not
handle 64bit inodes well. handle 64bit inodes well.

3
src/fileinfo.hpp

@ -27,16 +27,19 @@ class FileInfo : public FH
{ {
public: public:
FileInfo(int const fd_, FileInfo(int const fd_,
const std::string &basepath_,
char const *fusepath_, char const *fusepath_,
bool const direct_io_) bool const direct_io_)
: FH(fusepath_), : FH(fusepath_),
fd(fd_), fd(fd_),
basepath(basepath_),
direct_io(direct_io_) direct_io(direct_io_)
{ {
} }
public: public:
int fd; int fd;
const std::string basepath;
uint32_t direct_io:1; uint32_t direct_io:1;
std::mutex mutex; std::mutex mutex;
}; };

150
src/fs_inode.cpp

@ -18,6 +18,7 @@
#include "ef.hpp" #include "ef.hpp"
#include "errno.hpp" #include "errno.hpp"
#include "fmt/core.h"
#include "fs_inode.hpp" #include "fs_inode.hpp"
#include "wyhash.h" #include "wyhash.h"
@ -28,9 +29,9 @@
#include <string.h> #include <string.h>
#include <sys/stat.h> #include <sys/stat.h>
typedef uint64_t (*inodefunc_t)(const char*,const uint64_t,const mode_t,const dev_t,const ino_t);
typedef uint64_t (*inodefunc_t)(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t);
static uint64_t hybrid_hash(const char*,const uint64_t,const mode_t,const dev_t,const ino_t);
static uint64_t hybrid_hash(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t);
static inodefunc_t g_func = hybrid_hash; static inodefunc_t g_func = hybrid_hash;
@ -44,7 +45,8 @@ h64_to_h32(uint64_t h_)
static static
uint64_t uint64_t
passthrough(const char *fusepath_,
passthrough(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
@ -55,7 +57,8 @@ passthrough(const char *fusepath_,
static static
uint64_t uint64_t
path_hash(const char *fusepath_,
path_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
@ -69,7 +72,8 @@ path_hash(const char *fusepath_,
static static
uint64_t uint64_t
path_hash32(const char *fusepath_,
path_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
@ -77,7 +81,8 @@ path_hash32(const char *fusepath_,
{ {
uint64_t h; uint64_t h;
h = path_hash(fusepath_,
h = path_hash(basepath_,
fusepath_,
fusepath_len_, fusepath_len_,
mode_, mode_,
dev_, dev_,
@ -88,7 +93,8 @@ path_hash32(const char *fusepath_,
static static
uint64_t uint64_t
devino_hash(const char *fusepath_,
devino_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
@ -107,7 +113,8 @@ devino_hash(const char *fusepath_,
static static
uint64_t uint64_t
devino_hash32(const char *fusepath_,
devino_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
@ -115,7 +122,8 @@ devino_hash32(const char *fusepath_,
{ {
uint64_t h; uint64_t h;
h = devino_hash(fusepath_,
h = devino_hash(basepath_,
fusepath_,
fusepath_len_, fusepath_len_,
mode_, mode_,
dev_, dev_,
@ -126,28 +134,97 @@ devino_hash32(const char *fusepath_,
static static
uint64_t uint64_t
hybrid_hash(const char *fusepath_,
basepath_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
std::string buf = fmt::format("{}{}",ino_,basepath_);
return wyhash(buf.c_str(),
buf.length(),
fs::inode::MAGIC,
_wyp);
}
static
uint64_t
basepath_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
uint64_t h;
h = basepath_hash(basepath_,
fusepath_,
fusepath_len_,
mode_,
dev_,
ino_);
return h64_to_h32(h);
}
static
uint64_t
hybrid_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return (S_ISDIR(mode_) ?
path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
}
static
uint64_t
hybrid_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return (S_ISDIR(mode_) ?
path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
}
static
uint64_t
basehybrid_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
const ino_t ino_) const ino_t ino_)
{ {
return (S_ISDIR(mode_) ? return (S_ISDIR(mode_) ?
path_hash(fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash(fusepath_,fusepath_len_,mode_,dev_,ino_));
path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
basepath_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
} }
static static
uint64_t uint64_t
hybrid_hash32(const char *fusepath_,
basehybrid_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
const ino_t ino_) const ino_t ino_)
{ {
return (S_ISDIR(mode_) ? return (S_ISDIR(mode_) ?
path_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_));
path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
basepath_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
} }
namespace fs namespace fs
@ -171,6 +248,14 @@ namespace fs
g_func = hybrid_hash; g_func = hybrid_hash;
ef(algo_ == "hybrid-hash32") ef(algo_ == "hybrid-hash32")
g_func = hybrid_hash32; g_func = hybrid_hash32;
ef(algo_ == "basepath-hash")
g_func = basepath_hash;
ef(algo_ == "basepath-hash32")
g_func = basepath_hash32;
ef(algo_ == "basehybrid-hash")
g_func = basehybrid_hash;
ef(algo_ == "basehybrid-hash32")
g_func = basehybrid_hash32;
else else
return -EINVAL; return -EINVAL;
@ -194,27 +279,38 @@ namespace fs
return "hybrid-hash"; return "hybrid-hash";
if(g_func == hybrid_hash32) if(g_func == hybrid_hash32)
return "hybrid-hash32"; return "hybrid-hash32";
if(g_func == basepath_hash)
return "basepath-hash";
if(g_func == basepath_hash)
return "basepath-hash32";
if(g_func == basehybrid_hash)
return "basehybrid-hash";
if(g_func == basehybrid_hash)
return "basehybrid-hash32";
return std::string(); return std::string();
} }
uint64_t uint64_t
calc(const char *fusepath_,
calc(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
const ino_t ino_) const ino_t ino_)
{ {
return g_func(fusepath_,fusepath_len_,mode_,dev_,ino_);
return g_func(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_);
} }
uint64_t uint64_t
calc(std::string const &fusepath_,
calc(const std::string &basepath_,
std::string const &fusepath_,
const mode_t mode_, const mode_t mode_,
const dev_t dev_, const dev_t dev_,
const ino_t ino_) const ino_t ino_)
{ {
return calc(fusepath_.c_str(),
return calc(basepath_,
fusepath_.c_str(),
fusepath_.size(), fusepath_.size(),
mode_, mode_,
dev_, dev_,
@ -222,11 +318,13 @@ namespace fs
} }
void void
calc(const char *fusepath_,
calc(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_, const uint64_t fusepath_len_,
struct stat *st_) struct stat *st_)
{ {
st_->st_ino = calc(fusepath_,
st_->st_ino = calc(basepath_,
fusepath_,
fusepath_len_, fusepath_len_,
st_->st_mode, st_->st_mode,
st_->st_dev, st_->st_dev,
@ -234,17 +332,19 @@ namespace fs
} }
void void
calc(const char *fusepath_,
calc(const std::string &basepath_,
const char *fusepath_,
struct stat *st_) struct stat *st_)
{ {
calc(fusepath_,strlen(fusepath_),st_);
calc(basepath_,fusepath_,strlen(fusepath_),st_);
} }
void void
calc(const std::string &fusepath_,
calc(const std::string &basepath_,
const std::string &fusepath_,
struct stat *st_) struct stat *st_)
{ {
calc(fusepath_.c_str(),fusepath_.size(),st_);
calc(basepath_,fusepath_.c_str(),fusepath_.size(),st_);
} }
} }
} }

15
src/fs_inode.hpp

@ -33,21 +33,26 @@ namespace fs
int set_algo(const std::string &s); int set_algo(const std::string &s);
std::string get_algo(void); std::string get_algo(void);
uint64_t calc(const char *fusepath,
uint64_t calc(const std::string &basepath,
const char *fusepath,
const uint64_t fusepath_len, const uint64_t fusepath_len,
const mode_t mode, const mode_t mode,
const dev_t dev, const dev_t dev,
const ino_t ino); const ino_t ino);
uint64_t calc(std::string const &fusepath,
uint64_t calc(const std::string &basepath,
std::string const &fusepath,
mode_t const mode, mode_t const mode,
dev_t const dev, dev_t const dev,
ino_t ino); ino_t ino);
void calc(const char *fusepath,
void calc(const std::string &basepath,
const char *fusepath,
const uint64_t fusepath_len, const uint64_t fusepath_len,
struct stat *st); struct stat *st);
void calc(const char *fusepath,
void calc(const std::string &basepath,
const char *fusepath,
struct stat *st); struct stat *st);
void calc(const std::string &fusepath,
void calc(const std::string &basepath,
const std::string &fusepath,
struct stat *st); struct stat *st);
} }

2
src/fuse_create.cpp

@ -163,7 +163,7 @@ namespace l
if(rv == -1) if(rv == -1)
return -errno; return -errno;
fi = new FileInfo(rv,fusepath_,ffi_->direct_io);
fi = new FileInfo(rv,createpath_,fusepath_,ffi_->direct_io);
ffi_->fh = reinterpret_cast<uint64_t>(fi); ffi_->fh = reinterpret_cast<uint64_t>(fi);

5
src/fuse_fgetattr.cpp

@ -28,6 +28,7 @@ namespace l
static static
int int
fgetattr(const int fd_, fgetattr(const int fd_,
const std::string &basepath_,
const std::string &fusepath_, const std::string &fusepath_,
struct stat *st_) struct stat *st_)
{ {
@ -37,7 +38,7 @@ namespace l
if(rv == -1) if(rv == -1)
return -errno; return -errno;
fs::inode::calc(fusepath_,st_);
fs::inode::calc(basepath_,fusepath_,st_);
return 0; return 0;
} }
@ -54,7 +55,7 @@ namespace FUSE
Config::Read cfg; Config::Read cfg;
FileInfo *fi = reinterpret_cast<FileInfo*>(ffi_->fh); FileInfo *fi = reinterpret_cast<FileInfo*>(ffi_->fh);
rv = l::fgetattr(fi->fd,fi->fusepath,st_);
rv = l::fgetattr(fi->fd,fi->basepath,fi->fusepath,st_);
timeout_->entry = ((rv >= 0) ? timeout_->entry = ((rv >= 0) ?
cfg->cache_entry : cfg->cache_entry :

2
src/fuse_getattr.cpp

@ -141,7 +141,7 @@ namespace l
if(symlinkify_ && symlinkify::can_be_symlink(*st_,symlinkify_timeout_)) if(symlinkify_ && symlinkify::can_be_symlink(*st_,symlinkify_timeout_))
symlinkify::convert(fullpath,st_); symlinkify::convert(fullpath,st_);
fs::inode::calc(fusepath_,st_);
fs::inode::calc(basepaths[0],fusepath_,st_);
return 0; return 0;
} }

2
src/fuse_open.cpp

@ -211,7 +211,7 @@ namespace l
if(fd == -1) if(fd == -1)
return -errno; return -errno;
fi = new FileInfo(fd,fusepath_,ffi_->direct_io);
fi = new FileInfo(fd,basepath_,fusepath_,ffi_->direct_io);
ffi_->fh = reinterpret_cast<uint64_t>(fi); ffi_->fh = reinterpret_cast<uint64_t>(fi);

8
src/fuse_readdir_cor.cpp

@ -77,7 +77,8 @@ namespace l
static static
inline inline
int int
readdir(std::string basepath_,
readdir(const std::string &branchdir_,
std::string basepath_,
HashSet &names_, HashSet &names_,
fuse_dirents_t *buf_, fuse_dirents_t *buf_,
std::mutex &mutex_) std::mutex &mutex_)
@ -122,7 +123,8 @@ namespace l
continue; continue;
filepath = fs::path::make(basepath_,d->name); filepath = fs::path::make(basepath_,d->name);
d->ino = fs::inode::calc(filepath,
d->ino = fs::inode::calc(branchdir_,
filepath,
DTTOIF(d->type), DTTOIF(d->type),
dev, dev,
d->ino); d->ino);
@ -161,7 +163,7 @@ namespace l
basepath = fs::path::make(branch.path,dirname_); basepath = fs::path::make(branch.path,dirname_);
return l::readdir(basepath,names,buf_,mutex);
return l::readdir(branch.path,basepath,names,buf_,mutex);
}; };
auto rv = tp_.enqueue_task(func); auto rv = tp_.enqueue_task(func);

5
src/fuse_readdir_cosr.cpp

@ -52,6 +52,7 @@ namespace l
{ {
DIR *dir; DIR *dir;
int err; int err;
std::string basepath;
}; };
struct Error struct Error
@ -119,6 +120,7 @@ namespace l
errno = 0; errno = 0;
rv.dir = fs::opendir(basepath); rv.dir = fs::opendir(basepath);
rv.err = errno; rv.err = errno;
rv.basepath = branch.path;
return rv; return rv;
}; };
@ -169,7 +171,8 @@ namespace l
continue; continue;
fullpath = fs::path::make(dirname_,de->d_name); fullpath = fs::path::make(dirname_,de->d_name);
de->d_ino = fs::inode::calc(fullpath,
de->d_ino = fs::inode::calc(dirrv.basepath,
fullpath,
DTTOIF(de->d_type), DTTOIF(de->d_type),
dev, dev,
de->d_ino); de->d_ino);

3
src/fuse_readdir_seq.cpp

@ -125,7 +125,8 @@ namespace l
continue; continue;
fullpath = fs::path::make(dirname_,de->d_name); fullpath = fs::path::make(dirname_,de->d_name);
de->d_ino = fs::inode::calc(fullpath,
de->d_ino = fs::inode::calc(branch.path,
fullpath,
DTTOIF(de->d_type), DTTOIF(de->d_type),
dev, dev,
de->d_ino); de->d_ino);

2
src/fuse_symlink.cpp

@ -74,7 +74,7 @@ namespace l
{ {
fs::lstat(fullnewpath,st_); fs::lstat(fullnewpath,st_);
if(st_->st_ino != 0) if(st_->st_ino != 0)
fs::inode::calc(linkpath_,st_);
fs::inode::calc(newbasepath_,linkpath_,st_);
} }
return error::calc(rv,error_,errno); return error::calc(rv,error_,errno);

Loading…
Cancel
Save