From f8524740736cb8cbea5af8707dbc80cc7a396536 Mon Sep 17 00:00:00 2001 From: trapexit Date: Sat, 5 Jul 2025 17:05:53 -0500 Subject: [PATCH] Update the secondary group cache (#1492) * Use concurrent_flat_map to build a global cache rather than per thread. * Create global maintanence thread manager for clearing unused cache entries. --- libfuse/Makefile | 3 +- libfuse/include/fuse.h | 3 +- libfuse/include/maintenance_thread.hpp | 13 ++ libfuse/lib/fuse.cpp | 62 +++---- libfuse/lib/fuse_loop.cpp | 18 +- libfuse/lib/maintenance_thread.cpp | 65 +++++++ mkdocs/docs/config/options.md | 8 +- mkdocs/docs/known_issues_bugs.md | 23 +-- src/config.cpp | 4 + src/config.hpp | 3 + src/config_gidcache.cpp | 65 +++++++ src/config_gidcache.hpp | 25 +++ src/fuse_ioctl.cpp | 2 +- src/gidcache.cpp | 233 +++++++++---------------- src/gidcache.hpp | 41 ++--- src/mergerfs.cpp | 8 +- src/ugid.cpp | 4 +- 17 files changed, 338 insertions(+), 242 deletions(-) create mode 100644 libfuse/include/maintenance_thread.hpp create mode 100644 libfuse/lib/maintenance_thread.cpp create mode 100644 src/config_gidcache.cpp create mode 100644 src/config_gidcache.hpp diff --git a/libfuse/Makefile b/libfuse/Makefile index e5a1fdb1..953f488e 100644 --- a/libfuse/Makefile +++ b/libfuse/Makefile @@ -56,7 +56,8 @@ SRC_CPP = \ lib/fuse_msgbuf.cpp \ lib/pin_threads.cpp \ lib/format.cpp \ - lib/node.cpp + lib/node.cpp \ + lib/maintenance_thread.cpp OBJS_C = $(SRC_C:lib/%.c=build/%.o) OBJS_CPP = $(SRC_CPP:lib/%.cpp=build/%.o) diff --git a/libfuse/include/fuse.h b/libfuse/include/fuse.h index ca48ffc8..8259ce35 100644 --- a/libfuse/include/fuse.h +++ b/libfuse/include/fuse.h @@ -688,8 +688,7 @@ int fuse_is_lib_option(const char *opt); */ int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, size_t op_size); -int fuse_start_maintenance_thread(struct fuse *fuse); -void fuse_stop_maintenance_thread(struct fuse *fuse); +void fuse_populate_maintenance_thread(struct fuse *fuse); int fuse_log_metrics_get(void); void fuse_log_metrics_set(int enabled); diff --git a/libfuse/include/maintenance_thread.hpp b/libfuse/include/maintenance_thread.hpp new file mode 100644 index 00000000..3df21a38 --- /dev/null +++ b/libfuse/include/maintenance_thread.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + + +class MaintenanceThread +{ +public: + static void setup(); + static void stop(); + static void push_job(const std::function &func); +}; diff --git a/libfuse/lib/fuse.cpp b/libfuse/lib/fuse.cpp index fbda410d..b535c1b1 100644 --- a/libfuse/lib/fuse.cpp +++ b/libfuse/lib/fuse.cpp @@ -27,8 +27,10 @@ #include "fuse_pollhandle.h" #include "fuse_msgbuf.hpp" -#include +#include "maintenance_thread.hpp" + #include +#include #include #include @@ -141,7 +143,6 @@ struct fuse struct fuse_fs *fs; struct lock_queue_element *lockq; - pthread_t maintenance_thread; kvec_t(remembered_node_t) remembered_nodes; }; @@ -4032,49 +4033,26 @@ fuse_gc1() fuse_malloc_trim(); } -static -void* -fuse_maintenance_loop(void *fuse_) +void +fuse_populate_maintenance_thread(struct fuse *f_) { - int loops; - int sleep_time; - struct fuse *f = (struct fuse*)fuse_; - - pthread_setname_np(pthread_self(),"fuse.maint"); - - loops = 0; - sleep_time = 60; - while(1) - { - if(remember_nodes(f)) - fuse_prune_remembered_nodes(f); - - if((loops % 15) == 0) - fuse_gc1(); - - if(g_LOG_METRICS) - metrics_log_nodes_info_to_tmp_dir(f); - - loops++; - sleep(sleep_time); - } - - return NULL; -} + MaintenanceThread::push_job([=](int count_) + { + if(remember_nodes(f_)) + fuse_prune_remembered_nodes(f_); + }); -int -fuse_start_maintenance_thread(struct fuse *f_) -{ - return fuse_start_thread(&f_->maintenance_thread,fuse_maintenance_loop,f_); -} + MaintenanceThread::push_job([](int count_) + { + if((count_ % 15) == 0) + fuse_gc1(); + }); -void -fuse_stop_maintenance_thread(struct fuse *f_) -{ - mutex_lock(&f_->lock); - pthread_cancel(f_->maintenance_thread); - mutex_unlock(&f_->lock); - pthread_join(f_->maintenance_thread,NULL); + MaintenanceThread::push_job([=](int count_) + { + if(g_LOG_METRICS) + metrics_log_nodes_info_to_tmp_dir(f_); + }); } struct fuse* diff --git a/libfuse/lib/fuse_loop.cpp b/libfuse/lib/fuse_loop.cpp index 7838abd0..f04c6e57 100644 --- a/libfuse/lib/fuse_loop.cpp +++ b/libfuse/lib/fuse_loop.cpp @@ -8,6 +8,7 @@ #include "scope_guard.hpp" #include "thread_pool.hpp" #include "syslog.hpp" +#include "maintenance_thread.hpp" #include "fuse_i.h" #include "fuse_kernel.h" @@ -317,7 +318,7 @@ fuse_session_loop_mt(struct fuse_session *se_, pin_threads_type_); while(!fuse_session_exited(se_)) - sem_wait(&finished); + sem_wait(&finished); sem_destroy(&finished); @@ -325,22 +326,23 @@ fuse_session_loop_mt(struct fuse_session *se_, } int -fuse_loop_mt(struct fuse *f) +fuse_loop_mt(struct fuse *f_) { - if(f == NULL) - return -1; + int res; - int res = fuse_start_maintenance_thread(f); - if(res) + if(f_ == NULL) return -1; - res = fuse_session_loop_mt(fuse_get_session(f), + MaintenanceThread::setup(); + fuse_populate_maintenance_thread(f_); + + res = fuse_session_loop_mt(fuse_get_session(f_), fuse_config_get_read_thread_count(), fuse_config_get_process_thread_count(), fuse_config_get_process_thread_queue_depth(), fuse_config_get_pin_threads()); - fuse_stop_maintenance_thread(f); + MaintenanceThread::stop(); return res; } diff --git a/libfuse/lib/maintenance_thread.cpp b/libfuse/lib/maintenance_thread.cpp new file mode 100644 index 00000000..37ef2205 --- /dev/null +++ b/libfuse/lib/maintenance_thread.cpp @@ -0,0 +1,65 @@ +#include "maintenance_thread.hpp" + +#include "fmt/core.h" + +#include +#include +#include + +#include +#include + + +pthread_t g_thread; +std::vector> g_funcs; +std::mutex g_mutex; + +static +void* +_thread_loop(void *) +{ + int count; + + pthread_setname_np(pthread_self(),"fuse.maint"); + + count = 0; + while(true) + { + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,NULL); + { + std::lock_guard lg(g_mutex); + for(auto &func : g_funcs) + func(count); + } + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE,NULL); + + count++; + ::sleep(60); + } + + return NULL; +} + +void +MaintenanceThread::setup() +{ + int rv; + + rv = pthread_create(&g_thread,NULL,_thread_loop,NULL); + assert((rv == 0) && "pthread_create failed"); +} + +void +MaintenanceThread::push_job(const std::function &func_) +{ + std::lock_guard lg(g_mutex); + + g_funcs.emplace_back(func_); +} + +void +MaintenanceThread::stop() +{ + pthread_cancel(g_thread); + pthread_join(g_thread,NULL); +} diff --git a/mkdocs/docs/config/options.md b/mkdocs/docs/config/options.md index aeb89e22..8a282dad 100644 --- a/mkdocs/docs/config/options.md +++ b/mkdocs/docs/config/options.md @@ -8,7 +8,7 @@ background](../intro_to_filesystems.md) in such things is recommended for more advanced configurations. These option names and values are the same regardless of whether you -use them with the `mergerfs` commandline program, in fstab, or in a +use them with the `mergerfs` command line program, in fstab, or in a config file. @@ -204,6 +204,12 @@ config file. * **[passthrough](passthrough.md)**: Enable [FUSE IO passthrough](https://kernelnewbies.org/Linux_6.9#Faster_FUSE_I.2FO) if available. (default: off) +* **gid-cache-expire-timeout**: Number of seconds till supplemental + group data is refreshed in the [GID + cache](../known_issues_bugs.md#supplemental-user-groups). (default: + 3600) +* **gid-cache-remove-timeout**: Number of seconds to wait till cached + data is removed due to lack of usage. (default: 43200) **NOTE:** Options are evaluated in the order listed so if the options are **func.rmdir=rand,category.action=ff** the **action** category diff --git a/mkdocs/docs/known_issues_bugs.md b/mkdocs/docs/known_issues_bugs.md index 7575f00d..2142a646 100644 --- a/mkdocs/docs/known_issues_bugs.md +++ b/mkdocs/docs/known_issues_bugs.md @@ -4,25 +4,26 @@ ### Supplemental user groups +#### Supplemental group caching + Due to the overhead of [getgroups/setgroups](http://linux.die.net/man/2/setgroups) mergerfs -utilizes a cache. This cache is opportunistic and per thread. Each -thread will query the supplemental groups for a user when that -particular thread needs to change credentials and will keep that data -for the lifetime of the thread. This means that if a user is added to -a group it may not be picked up without the restart of -mergerfs. In the future this may be improved to allow a periodic or -manual clearing of the cache. +utilizes a cache. As necessary the supplemental group information will +be queried and cached. That cached list of groups will be used to set +the supplement groups as necessary. Due to the high cost of querying +the group list the default expiry for said data is 1 hour and after 12 +hours of no usage will be removed from the cache all together. + +#### Host vs Container identity While not a bug some users have found when using containers that supplemental groups defined inside the container don't work as expected. Since mergerfs lives outside the container it is querying -the host's group database. Effectively containers have their own user -and group definitions unless setup otherwise just as different systems -would. +the host's group database. Containers have their own user and group +definitions unless setup otherwise just as different systems would. Users should mount in the host group file into the containers or use a -standard shared user & groups technology like NIS or LDAP. +standard shared user and groups technology like NIS or LDAP. ### directory mtime is not being updated diff --git a/src/config.cpp b/src/config.cpp index d4b476d6..effd687a 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -106,6 +106,8 @@ Config::Config() fsname(), func(), fuse_msg_size("1M"), + gid_cache_expire_timeout(60 * 60), + gid_cache_remove_timeout(60 * 60 * 12), ignorepponrename(false), inodecalc("hybrid-hash"), lazy_umount_mountpoint(false), @@ -184,6 +186,8 @@ Config::Config() _map["func.unlink"] = &func.unlink; _map["func.utimens"] = &func.utimens; _map["fuse_msg_size"] = &fuse_msg_size; + _map["gid-cache-expire-timeout"] = &gid_cache_expire_timeout; + _map["gid-cache-remove-timeout"] = &gid_cache_remove_timeout; _map["handle-killpriv"] = &handle_killpriv; _map["handle-killpriv-v2"] = &handle_killpriv_v2; _map["ignorepponrename"] = &ignorepponrename; diff --git a/src/config.hpp b/src/config.hpp index bb0606aa..e1cc7188 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -21,6 +21,7 @@ #include "config_cachefiles.hpp" #include "config_flushonclose.hpp" #include "config_follow_symlinks.hpp" +#include "config_gidcache.hpp" #include "config_inodecalc.hpp" #include "config_link_exdev.hpp" #include "config_log_metrics.hpp" @@ -130,6 +131,8 @@ public: ConfigSTR fsname; Funcs func; ConfigPageSize fuse_msg_size; + GIDCacheExpireTimeout gid_cache_expire_timeout; + GIDCacheRemoveTimeout gid_cache_remove_timeout; ConfigBOOL handle_killpriv = true; ConfigBOOL handle_killpriv_v2 = true; ConfigBOOL ignorepponrename; diff --git a/src/config_gidcache.cpp b/src/config_gidcache.cpp new file mode 100644 index 00000000..8cdec73e --- /dev/null +++ b/src/config_gidcache.cpp @@ -0,0 +1,65 @@ +#include "config_gidcache.hpp" + +#include "gidcache.hpp" + +#include "to_string.hpp" +#include "from_string.hpp" + + +GIDCacheExpireTimeout::GIDCacheExpireTimeout(const int i_) +{ + GIDCache::expire_timeout = i_; +} + +GIDCacheExpireTimeout::GIDCacheExpireTimeout(const std::string &s_) +{ + from_string(s_); +} + +std::string +GIDCacheExpireTimeout::to_string(void) const +{ + return str::to(GIDCache::expire_timeout); +} + +int +GIDCacheExpireTimeout::from_string(const std::string &s_) +{ + int rv; + + rv = str::from(s_,&GIDCache::expire_timeout); + if(rv < 0) + return rv; + + return 0; +} + + +GIDCacheRemoveTimeout::GIDCacheRemoveTimeout(const int i_) +{ + GIDCache::remove_timeout = i_; +} + + +GIDCacheRemoveTimeout::GIDCacheRemoveTimeout(const std::string &s_) +{ + from_string(s_); +} + +std::string +GIDCacheRemoveTimeout::to_string(void) const +{ + return str::to(GIDCache::remove_timeout); +} + +int +GIDCacheRemoveTimeout::from_string(const std::string &s_) +{ + int rv; + + rv = str::from(s_,&GIDCache::remove_timeout); + if(rv < 0) + return rv; + + return 0; +} diff --git a/src/config_gidcache.hpp b/src/config_gidcache.hpp new file mode 100644 index 00000000..f5d68ccf --- /dev/null +++ b/src/config_gidcache.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include "tofrom_string.hpp" + +class GIDCacheExpireTimeout : public ToFromString +{ +public: + GIDCacheExpireTimeout(const int i = 60 * 60); + GIDCacheExpireTimeout(const std::string &); + +public: + std::string to_string(void) const final; + int from_string(const std::string &) final; +}; + +class GIDCacheRemoveTimeout : public ToFromString +{ +public: + GIDCacheRemoveTimeout(const int = 60 * 60 * 12); + GIDCacheRemoveTimeout(const std::string &); + +public: + std::string to_string(void) const final; + int from_string(const std::string &) final; +}; diff --git a/src/fuse_ioctl.cpp b/src/fuse_ioctl.cpp index 60121c5c..92c2cd40 100644 --- a/src/fuse_ioctl.cpp +++ b/src/fuse_ioctl.cpp @@ -336,7 +336,7 @@ namespace l fuse_invalidate_all_nodes(); return 0; case IOCTL_INVALIDATE_GID_CACHE: - GIDCache::invalidate_all_caches(); + GIDCache::invalidate_all(); break; } diff --git a/src/gidcache.cpp b/src/gidcache.cpp index cdf71b4e..df0fdd69 100644 --- a/src/gidcache.cpp +++ b/src/gidcache.cpp @@ -14,7 +14,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "rnd.hpp" +#include "gidcache.hpp" #include #include @@ -28,94 +28,9 @@ # include #endif -#include -#include -#include -#include -#include - -#include "gidcache.hpp" - -std::mutex g_REGISTERED_CACHES_MUTEX; -std::unordered_map g_REGISTERED_CACHES; - - -inline -bool -GIDRecord::operator<(const struct GIDRecord &b) const -{ - return uid < b.uid; -} - -GIDCache::GIDCache() - : invalidate(false), - size(0), - recs() -{ - std::lock_guard guard(g_REGISTERED_CACHES_MUTEX); - - bool inserted; - pthread_t pthid; - - pthid = pthread_self(); - inserted = g_REGISTERED_CACHES.emplace(pthid,this).second; - - assert(inserted == true); -} - -inline -GIDRecord * -GIDCache::begin(void) -{ - return &recs[0]; -} - -inline -GIDRecord * -GIDCache::end(void) -{ - return &recs[size]; -} - -inline -GIDRecord * -GIDCache::allocrec(void) -{ - if(size == MAXRECS) - return &recs[RND::rand64(MAXRECS)]; - else - return &recs[size++]; -} - -inline -GIDRecord * -GIDCache::lower_bound(GIDRecord *begin, - GIDRecord *end, - const uid_t uid) -{ - int step; - int count; - GIDRecord *iter; - - count = std::distance(begin,end); - while(count > 0) - { - iter = begin; - step = count / 2; - std::advance(iter,step); - if(iter->uid < uid) - { - begin = ++iter; - count -= step + 1; - } - else - { - count = step; - } - } - - return begin; -} +int GIDCache::expire_timeout = (60 * 60); +int GIDCache::remove_timeout = (60 * 60 * 12); +boost::concurrent_flat_map GIDCache::_records; static int @@ -131,85 +46,111 @@ _getgrouplist(const char *user, #endif } -GIDRecord * -GIDCache::cache(const uid_t uid, - const gid_t gid) -{ - int rv; - char buf[4096]; - struct passwd pwd; - struct passwd *pwdrv; - GIDRecord *rec; - - rec = allocrec(); - - rec->uid = uid; - rv = ::getpwuid_r(uid,&pwd,buf,sizeof(buf),&pwdrv); - if(pwdrv != NULL && rv == 0) - { - rec->size = 0; - ::_getgrouplist(pwd.pw_name,gid,NULL,&rec->size); - rec->size = std::min(MAXGIDS,rec->size); - rv = ::_getgrouplist(pwd.pw_name,gid,rec->gids,&rec->size); - if(rv == -1) - { - rec->gids[0] = gid; - rec->size = 1; - } - } - - return rec; -} - static inline int -setgroups(const GIDRecord *rec) +_setgroups(const std::vector gids_) { #if defined __linux__ and UGID_USE_RWLOCK == 0 # if defined SYS_setgroups32 - return ::syscall(SYS_setgroups32,rec->size,rec->gids); + return ::syscall(SYS_setgroups32,gids_.size(),gids_.data()); # else - return ::syscall(SYS_setgroups,rec->size,rec->gids); + return ::syscall(SYS_setgroups,gids_.size(),gids_.data()); # endif #else - return ::setgroups(rec->size,rec->gids); + return ::setgroups(gids_.size(),gids_.data()); #endif } -int -GIDCache::initgroups(const uid_t uid, - const gid_t gid) +static +void +_getgroups(const uid_t uid_, + const gid_t gid_, + std::vector &gids_) { int rv; - GIDRecord *rec; + int ngroups; + char buf[4096]; + struct passwd pwd; + struct passwd *pwdrv; - if(invalidate) - { - size = 0; - invalidate = false; - } + gids_.clear(); + rv = ::getpwuid_r(uid_,&pwd,buf,sizeof(buf),&pwdrv); + if((rv == -1) || (pwdrv == NULL)) + goto error; + + ngroups = 0; + rv = ::_getgrouplist(pwd.pw_name,gid_,NULL,&ngroups); + gids_.resize(ngroups); + + rv = ::_getgrouplist(pwd.pw_name,gid_,gids_.data(),&ngroups); + if((size_t)ngroups < gids_.size()) + gids_.resize(ngroups); - rec = lower_bound(begin(),end(),uid); - if(rec == end() || rec->uid != uid) + return; + + error: + gids_.clear(); + gids_.push_back(gid_); +} + + +int +GIDCache::initgroups(const uid_t uid_, + const gid_t gid_) +{ + auto first_func = + [=](auto &x) { - rec = cache(uid,gid); - rv = ::setgroups(rec); - std::sort(begin(),end()); - } - else + x.second.last_update = ::time(NULL); + ::_getgroups(uid_,gid_,x.second.gids); + ::_setgroups(x.second.gids); + }; + auto exists_func = + [=](auto &x) { - rv = ::setgroups(rec); - } + time_t now; + + now = ::time(NULL); + if((now - x.second.last_update) > GIDCache::expire_timeout) + { + ::_getgroups(uid_,gid_,x.second.gids); + x.second.last_update = now; + } + ::_setgroups(x.second.gids); + }; + + _records.try_emplace_and_visit(uid_, + first_func, + exists_func); - return rv; + return 0; } void -GIDCache::invalidate_all_caches() +GIDCache::invalidate_all() { - std::lock_guard guard(g_REGISTERED_CACHES_MUTEX); + _records.visit_all([](auto &x) + { + x.second.last_update = 0; + }); +} + +void +GIDCache::clear_all() +{ + _records.clear(); +} + +void +GIDCache::clear_unused() +{ + time_t now = ::time(NULL); + auto erase_func = + [=](auto &x) + { + return ((now - x.second.last_update) > GIDCache::remove_timeout); + }; - for(auto &p : g_REGISTERED_CACHES) - p.second->invalidate = true; + _records.erase_if(erase_func); } diff --git a/src/gidcache.hpp b/src/gidcache.hpp index a611dc6f..f23ac212 100644 --- a/src/gidcache.hpp +++ b/src/gidcache.hpp @@ -16,10 +16,13 @@ #pragma once +#include "boost/unordered/concurrent_flat_map.hpp" + #include #include #include +#include #define MAXGIDS 32 #define MAXRECS 256 @@ -32,42 +35,28 @@ // told to invalidate the cache on demand. A second instance on the // same thread will cause an assert to be triggered. - struct GIDRecord { - uid_t uid; - int size; - gid_t gids[MAXGIDS]; - - bool - operator<(const struct GIDRecord &b) const; + std::vector gids; + time_t last_update; }; struct GIDCache { public: - GIDCache(); - -public: - bool invalidate; - size_t size; - std::array recs; - -private: - GIDRecord *begin(void); - GIDRecord *end(void); - GIDRecord *allocrec(void); - GIDRecord *lower_bound(GIDRecord *begin, - GIDRecord *end, - const uid_t uid); - GIDRecord *cache(const uid_t uid, - const gid_t gid); - -public: + static int initgroups(const uid_t uid, const gid_t gid); + static void invalidate_all(); + static void clear_all(); + static void clear_unused(); + public: - static void invalidate_all_caches(); + static int expire_timeout; + static int remove_timeout; + +private: + static boost::concurrent_flat_map _records; }; diff --git a/src/mergerfs.cpp b/src/mergerfs.cpp index ed4ed423..c7ffdcb0 100644 --- a/src/mergerfs.cpp +++ b/src/mergerfs.cpp @@ -29,6 +29,7 @@ #include "strvec.hpp" #include "syslog.hpp" #include "version.hpp" +#include "maintenance_thread.hpp" #include "fuse_access.hpp" #include "fuse_bmap.hpp" @@ -246,7 +247,7 @@ namespace l { SysLog::info("Received SIGUSR2 - triggering thorough gc"); fuse_gc(); - GIDCache::invalidate_all_caches(); + GIDCache::clear_all(); } static @@ -310,6 +311,11 @@ namespace l l::warn_if_not_root(); + MaintenanceThread::push_job([](int count_) + { + if((count_ % 60) == 0) + GIDCache::clear_unused(); + }); l::setup_resources(cfg->scheduling_priority); l::setup_signal_handlers(); l::get_fuse_operations(ops,cfg->nullrw); diff --git a/src/ugid.cpp b/src/ugid.cpp index 80385482..81a8ff58 100644 --- a/src/ugid.cpp +++ b/src/ugid.cpp @@ -28,8 +28,6 @@ namespace ugid initgroups(const uid_t uid_, const gid_t gid_) { - static thread_local GIDCache cache; - - cache.initgroups(uid_,gid_); + GIDCache::initgroups(uid_,gid_); } }