From a646fe016dbd42abf9166ea47a473ca1177308dd Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Wed, 12 Feb 2020 20:55:14 -0500 Subject: [PATCH] change inode conversion algo to reduce collision --- README.md | 5 +++-- man/mergerfs.1 | 23 ++++++++++++++++------- src/fs_inode.hpp | 16 +++++++++++----- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 7469f639..3807eb41 100644 --- a/README.md +++ b/README.md @@ -1070,9 +1070,9 @@ Yes. While some users have reported problems it appears to always be related to #### How are inodes calculated? -mergerfs-inode = (original-inode | (device-id << 32)) +https://github.com/trapexit/mergerfs/blob/master/src/fs_inode.hpp -While `ino_t` is 64 bits only a few filesystems use more than 32. Similarly, while `dev_t` is also 64 bits it was traditionally 16 bits. Bitwise or'ing them together should work most of the time. While totally unique inodes are preferred the overhead which would be needed does not seem to out weighted by the benefits. +Originally tried to simply OR st_ino and (st_dev << 32) for 64bit systems. After a number of years someone finally ran into a collision that lead to some problems. Traditionally `dev_t` was 16bit and `ino_t` was 32bit so merging into one 64bit value worked but with both types being able to be up to 64bit that is no longer as simple. A proper hash seems like the best compromise. While totally unique inodes are preferred the overhead which would be needed does not seem to be outweighed by the benefits. While atypical, yes, inodes can be reused and not refer to the same file. The internal id used to reference a file in FUSE is different from the inode value presented. The former is the `nodeid` and is actually a tuple of (nodeid,generation). That tuple is not user facing. The inode is merely metadata passed through the kernel and found using the `stat` family of calls or `readdir`. @@ -1089,6 +1089,7 @@ Note that this does *not* affect the inode that libfuse and the kernel use internally (also called the "nodeid"). ``` +Generally collision, if it occurs, shouldn't be a problem. You can turn off the calculation by not using `use_ino`. In the future it might be worth creating different strategies for users to select from. #### I notice massive slowdowns of writes over NFS diff --git a/man/mergerfs.1 b/man/mergerfs.1 index ebbe9d22..ba621434 100644 --- a/man/mergerfs.1 +++ b/man/mergerfs.1 @@ -2227,14 +2227,18 @@ While some users have reported problems it appears to always be related to how Samba is setup in relation to permissions. .SS How are inodes calculated? .PP -mergerfs\-inode = (original\-inode | (device\-id << 32)) -.PP -While \f[C]ino_t\f[] is 64 bits only a few filesystems use more than 32. -Similarly, while \f[C]dev_t\f[] is also 64 bits it was traditionally 16 -bits. -Bitwise or\[aq]ing them together should work most of the time. +https://github.com/trapexit/mergerfs/blob/master/src/fs_inode.hpp +.PP +Originally tried to simply OR st_ino and (st_dev << 32) for 64bit +systems. +After a number of years someone finally ran into a collision that lead +to some problems. +Traditionally \f[C]dev_t\f[] was 16bit and \f[C]ino_t\f[] was 32bit so +merging into one 64bit value worked but with both types being able to be +up to 64bit that is no longer as simple. +A proper hash seems like the best compromise. While totally unique inodes are preferred the overhead which would be -needed does not seem to out weighted by the benefits. +needed does not seem to be outweighed by the benefits. .PP While atypical, yes, inodes can be reused and not refer to the same file. @@ -2260,6 +2264,11 @@ Note\ that\ this\ does\ *not*\ affect\ the\ inode\ that\ libfuse and\ the\ kernel\ use\ internally\ (also\ called\ the\ "nodeid"). \f[] .fi +.PP +Generally collision, if it occurs, shouldn\[aq]t be a problem. +You can turn off the calculation by not using \f[C]use_ino\f[]. +In the future it might be worth creating different strategies for users +to select from. .SS I notice massive slowdowns of writes over NFS .PP Due to how NFS works and interacts with FUSE when not using diff --git a/src/fs_inode.hpp b/src/fs_inode.hpp index 29127fa1..8209172d 100644 --- a/src/fs_inode.hpp +++ b/src/fs_inode.hpp @@ -18,6 +18,8 @@ #pragma once +#include "fasthash.h" + #include #include @@ -31,11 +33,15 @@ namespace fs void recompute(struct stat *st_) { - /* not ideal to do this at runtime but likely gets optimized out */ - if(sizeof(st_->st_ino) == 4) - st_->st_ino |= ((uint32_t)st_->st_dev << 16); - else - st_->st_ino |= ((uint64_t)st_->st_dev << 32); + uint64_t buf[5]; + + buf[0] = st_->st_ino; + buf[1] = st_->st_dev; + buf[2] = buf[0] ^ buf[1]; + buf[3] = buf[0] & buf[1]; + buf[4] = buf[0] | buf[1]; + + st_->st_ino = fasthash64(&buf[0],sizeof(buf),MAGIC); } } }