From 25265f4e85c103ba47bc8b1deae68204019dfda0 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Wed, 20 Jan 2016 16:34:07 -0500 Subject: [PATCH] dedup based on full statvfs struct rather than fsid. closes #183 --- README.md | 2 +- src/statfs.cpp | 53 +++++++++++++++++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 0f865129..a11f54b4 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ It could be extended to offer the ability to see all files found. Perhaps concat #### statvfs #### -[statvfs](http://linux.die.net/man/2/statvfs) normalizes the source drives based on the fragment size and sums the number of adjusted blocks and inodes. This means you will see the combined space of all sources. Total, used, and free. The sources however are dedupped based on the drive so multiple mount points on the same drive will not result in double counting it's space. +[statvfs](http://linux.die.net/man/2/statvfs) normalizes the source drives based on the fragment size and sums the number of adjusted blocks and inodes. This means you will see the combined space of all sources. Total, used, and free. The sources however are dedupped based on the drive so multiple mount points on the same drive will not result in double counting it's space. It is possible due to a race condition that the same drive could be double counted but it's rather unlikely. **NOTE:** Since we can not (easily) replicate the atomicity of an **mkdir** or **mknod** without side effects those calls will first do a scan to see if the file exists and then attempts a create. This means there is a slight race condition. Worse case you'd end up with the directory or file on more than one mount. diff --git a/src/statfs.cpp b/src/statfs.cpp index 53356884..f6d2f765 100644 --- a/src/statfs.cpp +++ b/src/statfs.cpp @@ -20,19 +20,44 @@ #include #include +#include #include #include -#include -#include "ugid.hpp" #include "config.hpp" #include "rwlock.hpp" +#include "ugid.hpp" using std::string; using std::vector; -using std::map; +using std::set; using std::pair; +#define CMP(FOO) (lhs.f_##FOO < rhs.f_##FOO) + +struct +statvfs_compare +{ + bool + operator()(const struct statvfs &lhs, + const struct statvfs &rhs) const + { + return (CMP(bsize) && + CMP(frsize) && + CMP(blocks) && + CMP(bfree) && + CMP(bavail) && + CMP(files) && + CMP(ffree) && + CMP(favail) && + CMP(fsid) && + CMP(flag) && + CMP(namemax)); + } +}; + +typedef set statvfs_set; + static void _normalize_statvfs(struct statvfs *fsstat, @@ -67,19 +92,18 @@ int _statfs(const vector &srcmounts, struct statvfs &fsstat) { + statvfs_set fsstats; unsigned long min_bsize = ULONG_MAX; unsigned long min_frsize = ULONG_MAX; unsigned long min_namemax = ULONG_MAX; - map fsstats; - vector::const_iterator iter; - vector::const_iterator enditer; for(size_t i = 0, ei = srcmounts.size(); i != ei; i++) { int rv; struct statvfs fsstat; + rv = ::statvfs(srcmounts[i].c_str(),&fsstat); - if(rv != 0) + if(rv == -1) continue; if(min_bsize > fsstat.f_bsize) @@ -89,19 +113,22 @@ _statfs(const vector &srcmounts, if(min_namemax > fsstat.f_namemax) min_namemax = fsstat.f_namemax; - fsstats.insert(pair(fsstat.f_fsid,fsstat)); + fsstats.insert(fsstat); } - map::iterator fsstatiter = fsstats.begin(); - map::iterator endfsstatiter = fsstats.end(); + statvfs_set::const_iterator fsstatiter = fsstats.begin(); + statvfs_set::const_iterator endfsstatiter = fsstats.end(); if(fsstatiter != endfsstatiter) { - fsstat = fsstatiter->second; + fsstat = *fsstatiter; _normalize_statvfs(&fsstat,min_bsize,min_frsize,min_namemax); for(++fsstatiter;fsstatiter != endfsstatiter;++fsstatiter) { - _normalize_statvfs(&fsstatiter->second,min_bsize,min_frsize,min_namemax); - _merge_statvfs(&fsstat,&fsstatiter->second); + struct statvfs tmp = *fsstatiter; + + _normalize_statvfs(&tmp,min_bsize,min_frsize,min_namemax); + + _merge_statvfs(&fsstat,&tmp); } }