From 19509e33d26285ad68d71d9b2ee002584518a1b8 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Sun, 19 Jan 2025 21:33:05 -0600 Subject: [PATCH] Doc and mover script updates --- .../docs/faq/reliability_and_scalability.md | 16 ++- mkdocs/docs/media_and_publicity.md | 11 +- mkdocs/docs/related_projects.md | 33 ++++-- mkdocs/docs/usage_patterns.md | 105 ++++++++++++------ tools/mergerfs.percent-full-mover | 36 ++++-- tools/mergerfs.time-based-mover | 27 +++-- 6 files changed, 162 insertions(+), 66 deletions(-) diff --git a/mkdocs/docs/faq/reliability_and_scalability.md b/mkdocs/docs/faq/reliability_and_scalability.md index 131028a9..4158c8c6 100644 --- a/mkdocs/docs/faq/reliability_and_scalability.md +++ b/mkdocs/docs/faq/reliability_and_scalability.md @@ -26,6 +26,16 @@ Users have pooled everything from USB thumb drives to enterprise NVME SSDs to remote filesystems and rclone mounts. The cost of many calls can be `O(n)` meaning adding more branches to -the pool will increase the cost of certain functions but there are a -number of caches and strategies in place to limit overhead where -possible. +the pool will increase the cost of certain functions, such as reading +directories or finding files to open, but there are a number of caches +and strategies in place to limit overhead where possible. + + +## Are there any limits? + +There is no maximum capacity beyond what is imposed by the operating +system itself. Any limit is practical rather than technical. As +explained in the question about scale mergerfs is mostly limited by +the tolerated cost of aggregating branches and the cost associated +with interacting with them. If you pool slow network filesystem then +that will naturally impact performance more than low latency SSDs. diff --git a/mkdocs/docs/media_and_publicity.md b/mkdocs/docs/media_and_publicity.md index b2ca4192..d5589756 100644 --- a/mkdocs/docs/media_and_publicity.md +++ b/mkdocs/docs/media_and_publicity.md @@ -20,12 +20,16 @@ - 2020-08-20 - [Setting up Rclone, Mergerfs and Crontab for automated cloud storage](https://bytesized-hosting.com/pages/setting-up-rclone-mergerfs-and-crontab-for-automated-cloud-storage) - 2020-11-22 - [Introducing… MergerFS – My FREE UNRAID alternative](https://supertechfreaks.com/introducing-mergerfs-free-unraid-alternative/) - 2020-12-30 - [Perfect Media Server](https://perfectmediaserver.com) (a new site with docs fully fleshing out the 'Perfect Media Server' blog series) +- 2021-07-24 - [Building the Ultimate Linux Home Server - Part 1: Intro, MergerFS, and SnapRAID](https://blog.karaolidis.com/ultimate-home-server-part-1/) - 2021-10-31 - [Better Home Storage: MergerFS + SnapRAID on OpenMediaVault](https://blog.sakuragawa.moe/better-home-storage-mergerfs-snapraid-on-openmediavault/) - 2021-11-28 - [Linux Magazine: Come Together - Merging file systems for a simple NAS with MergerFS](https://www.linux-magazine.com/Issues/2022/254/MergerFS) - 2022-06-04 - [MergerFS + SnapRaid Study](https://crashlaker.github.io/2022/06/04/mergerfs_+_snapraid_study.html) - 2022-12-31 - [Merge Storages in CasaOS: A secret beta feature you know now](https://blog.casaos.io/blog/13.html) - 2023-02-03 - [(MergerFS + SnapRAID) is the new RAID 5](https://thenomadcode.tech/mergerfs-snapraid-is-the-new-raid-5) - 2024-02-07 - [Designing & Deploying MANS - A Hybrid NAS Approach with SnapRAID, MergerFS, and OpenZFS](https://blog.muffn.io/posts/part-3-mini-100tb-nas) +- 2024-03-11 - [Using MergerFS to combine multiple hard drives into one unified media storage](https://fullmetalbrackets.com/blog/two-drives-mergerfs/) +- 2024-12-20 - [Pooling multiple drives on my Raspberry Pi with mergerfs](https://sebi.io/posts/2024-12-20-pooling-multiple-drives-with-mergerfs/) + ## Videos @@ -56,16 +60,21 @@ - 2023-06-26 - [How to install and setup MergerFS](https://www.youtube.com/watch?v=n7piuhTXeG4) - 2023-07-31 - [How to recover a dead drive using Snapraid](https://www.youtube.com/watch?v=fmuiRLPcuJE) - 2024-01-05 - [OpenMediaVault MergerFS Tutorial (Portuguese)](https://www.youtube.com/watch?v=V6Yw86dRUPQ) +- 2024-02-19 - [Setup and Install MergerFS and SnapRAID (Part 1)](https://noted.lol/mergerfs-and-snapraid-setup-1/) +- 2024-02-22 - [Setup and Install MergerFS and SnapRAID (Part 2)](https://noted.lol/mergerfs-and-snapraid-setup-part-2/) - 2024-11-15 - [Meu servidor NAS - Parte 18: Recuperando um HD, recuperando o MergerFS e os próximos passos do NAS!](https://www.youtube.com/watch?v=5fy98kPzE3s) + ## Podcasts - 2019-11-04 - [Jupiter Extras: A Chat with mergerfs Developer Antonio Musumeci | Jupiter Extras 28](https://www.youtube.com/watch?v=VmJUAyyhSPk) - 2019-11-07 - [Jupiter Broadcasting: ZFS Isn’t the Only Option | Self-Hosted 5](https://www.youtube.com/watch?v=JEW7UuKhMJ8) - 2023-10-08 - [Self Hosted Episode 105 - Sleeper Storage Technology](https://selfhosted.show/105) + ## Social Media - [Reddit](https://www.reddit.com/search/?q=mergerfs&sort=new) -- [Twitter](https://twitter.com/search?q=mergerfs&src=spelling_expansion_revert_click&f=live) +- [X](https://x.com/search?q=mergerfs&src=spelling_expansion_revert_click&f=live) - [YouTube](https://www.youtube.com/results?search_query=mergerfs&sp=CAI%253D) +- [ServeTheHome Forum](https://forums.servethehome.com/index.php?search/3105813/&q=mergerfs&o=date) diff --git a/mkdocs/docs/related_projects.md b/mkdocs/docs/related_projects.md index a87f264e..7c1d4036 100644 --- a/mkdocs/docs/related_projects.md +++ b/mkdocs/docs/related_projects.md @@ -21,16 +21,33 @@ ## Software and services commonly used with mergerfs -* [snapraid](https://www.snapraid.it/) -* [rclone](https://rclone.org/) - * rclone's [union](https://rclone.org/union/) feature is based on - mergerfs policies -* [ZFS](https://openzfs.org/): Common to use ZFS w/ mergerfs +* [snapraid](https://www.snapraid.it/): a backup program designed for + disk arrays, storing parity information for data recovery in the + event of up to six disk failures. +* [rclone](https://rclone.org/): a command-line program to manage + files on cloud storage. It is a feature-rich alternative to cloud + vendors' web storage interfaces. rclone's + [union](https://rclone.org/union/) feature is based on mergerfs + policies. +* [ZFS](https://openzfs.org/): Common to use ZFS w/ mergerfs. ZFS for + important data and mergerfs pool for replacable media. * [UnRAID](https://unraid.net): While UnRAID has its own union filesystem it isn't uncommon to see UnRAID users leverage mergerfs - given the differences in the technologies. -* For a time there were a number of Chia miners recommending mergerfs -* [cloudboxes.io](https://cloudboxes.io/wiki/how-to/apps/set-up-mergerfs-using-ssh) + given the differences in the technologies. There is a [plugin + available by + Rysz](https://forums.unraid.net/topic/144999-plugin-mergerfs-for-unraid-support-topic/) + to ease installation and setup. +* [TrueNAS](https://www.truenas.com): Some users are requesting + mergerfs be [made part + of](https://forums.truenas.com/t/add-unionfs-or-mergerfs-and-rdam-enhancement-then-beat-all-other-nas-systems/23218) + TrueNAS. +* For a time there were a number of Chia miners recommending mergerfs. +* [cloudboxes.io](https://cloudboxes.io): VPS provider. Includes + details [on their + wiki](https://cloudboxes.io/wiki/how-to/apps/set-up-mergerfs-using-ssh): + on how to setup mergerfs. +* [QNAP](https://www.myqnap.org/product/mergerfs-apache83/): Someone + has create builds of mergerfs for different QNAP devices. ## Distributions including mergerfs diff --git a/mkdocs/docs/usage_patterns.md b/mkdocs/docs/usage_patterns.md index 358efc73..5cdc130f 100644 --- a/mkdocs/docs/usage_patterns.md +++ b/mkdocs/docs/usage_patterns.md @@ -29,45 +29,40 @@ across filesystems (see the mergerfs.dup tool) and setting `func.open=rand`, using `symlinkify`, or using dm-cache or a similar technology to add tiered cache to the underlying device itself. -With #2 one could use dm-cache as well but there is another solution -which requires only mergerfs and a cronjob. - -1. Create 2 mergerfs pools. One which includes just the slow branches - and one which has both the fast branches (SSD,NVME,etc.) and slow - branches. The 'base' pool and the 'cache' pool. -2. The 'cache' pool should have the cache branches listed first in - the branch list. -3. The best `create` policies to use for the 'cache' pool would - probably be `ff`, `epff`, `lfs`, `msplfs`, or `eplfs`. The latter - three under the assumption that the cache filesystem(s) are far - smaller than the backing filesystems. If using path preserving - policies remember that you'll need to manually create the core - directories of those paths you wish to be cached. Be sure the - permissions are in sync. Use `mergerfs.fsck` to check / correct - them. You could also set the slow filesystems mode to `NC` though - that'd mean if the cache filesystems fill you'd get "out of space" - errors. -4. Enable `moveonenospc` and set `minfreespace` appropriately. To - make sure there is enough room on the "slow" pool you might want - to set `minfreespace` to at least as large as the size of the - largest cache filesystem if not larger. This way in the worst case - the whole of the cache filesystem(s) can be moved to the other - drives. -5. Set your programs to use the 'cache' pool. -6. Save one of the below scripts or create you're own. The script's - responsibility is to move files from the cache filesystems (not - pool) to the 'base' pool. -7. Use `cron` (as root) to schedule the command at whatever frequency - is appropriate for your workflow. +With #2 one could use a block cache solution as available via LVM and +dm-cache but there is another solution which requires only mergerfs, a +script to move files around, and a cron job to run said script. + +* Create two mergerfs pools. One which includes just the **slow** + branches and one which has both the **fast** branches + (SSD,NVME,etc.) and **slow** branches. The **base** pool and the + **cache** pool. +* The **cache** pool should have the cache branches listed first in + the branch list in order to to make it easier to prioritize them. +* The best `create` policies to use for the **cache** pool would + probably be `ff`, `lus`, or `lfs`. The latter two under the + assumption that the cache filesystem(s) are far smaller than the + backing filesystems. +* You can also set the **slow** filesystems mode to `NC` which would + give you the ability to use other `create` policies though that'd + mean if the cache filesystems fill you'd get "out of space" + errors. This however may be good as it would indicate the script + moving files around is not configured properly. +* Set your programs to use the **cache** pool. +* Configure the **base** pool with the `create` policy you would like + to lay out files as you like. +* Save one of the below scripts or create your own. The script's + responsibility is to move files from the **cache** branches (not + pool) to the **base** pool. +* Use `cron` (as root) to schedule the command at whatever frequency + is appropriate for your workflow. ### time based expiring -Move files from cache to base pool based only on the last time the -file was accessed. Replace `-atime` with `-amin` if you want minutes -rather than days. May want to use the `fadvise` / `--drop-cache` -version of rsync or run rsync with the tool -[nocache](https://github.com/Feh/nocache). +Move files from cache filesystem to base pool which have an access +time older than the supplied number of days. Replace `-atime` with +`-amin` in the script if you want minutes rather than days. **NOTE:** The arguments to these scripts include the cache **filesystem** itself. Not the pool with the cache filesystem. You @@ -75,14 +70,50 @@ could have data loss if the source is the cache pool. [mergerfs.time-based-mover](https://github.com/trapexit/mergerfs/blob/latest-release/tools/mergerfs.time-based-mover?raw=1) +Download: +``` +curl -o /usr/local/bin/mergerfs.time-based-mover https://raw.githubusercontent.com/trapexit/mergerfs/refs/heads/latest-release/tools/mergerfs.time-based-mover +``` + +crontab entry: +``` +# m h dom mon dow command +0 * * * * /usr/local/bin/mergerfs.time-based-mover /mnt/ssd/cache00 /mnt/base-pool 1 +``` + +If you have more than one cache filesystem then simply add a cron +entry for each. + +If you want to only move files from a subdirectory then use the +subdirectories. `/mnt/ssd/cache00/foo` and `/mnt/base-pool/foo` +respectively. + ### percentage full expiring -Move the oldest file from the cache to the backing pool. Continue till -below percentage threshold. +While the cache filesystem's percentage full is above the provided +value move the oldest file from the cache filesystem to the base pool. **NOTE:** The arguments to these scripts include the cache **filesystem** itself. Not the pool with the cache filesystem. You could have data loss if the source is the cache pool. [mergerfs.percent-full-mover](https://github.com/trapexit/mergerfs/blob/latest-release/tools/mergerfs.percent-full-mover?raw=1) + +Download: +``` +curl -o /usr/local/bin/mergerfs.percent-full-mover https://raw.githubusercontent.com/trapexit/mergerfs/refs/heads/latest-release/tools/mergerfs.percent-full-mover +``` + +crontab entry: +``` +# m h dom mon dow command +0 * * * * /usr/local/bin/mergerfs.percent-full-mover /mnt/ssd/cache00 /mnt/base-pool 80 +``` + +If you have more than one cache filesystem then simply add a cron +entry for each. + +If you want to only move files from a subdirectory then use the +subdirectories. `/mnt/ssd/cache00/foo` and `/mnt/base-pool/foo` +respectively. diff --git a/tools/mergerfs.percent-full-mover b/tools/mergerfs.percent-full-mover index aad5965c..27b8dd11 100755 --- a/tools/mergerfs.percent-full-mover +++ b/tools/mergerfs.percent-full-mover @@ -1,21 +1,37 @@ #!/usr/bin/env sh if [ $# != 3 ]; then - echo "usage: $0 " - exit 1 + echo "usage: $0 " + exit 1 fi -CACHE="${1}" -BACKING="${2}" +CACHEFS="${1}" +BASEPOOL="${2}" PERCENTAGE=${3} set -o errexit -while [ $(df --output=pcent "${CACHE}" | grep -v Use | cut -d'%' -f1) -gt ${PERCENTAGE} ] +while [ $(df "${CACHE}" | tail -n1 | awk '{print $5}' | cut -d'%' -f1) -gt ${PERCENTAGE} ] do + # Find the file with the oldest access time FILE=$(find "${CACHE}" -type f -printf '%A@ %P\n' | \ - sort | \ - head -n 1 | \ - cut -d' ' -f2-) - test -n "${FILE}" - rsync -axqHAXWESR --preallocate --relative --remove-source-files "${CACHE}/./${FILE}" "${BACKING}/" + sort | \ + head -n 1 | \ + cut -d' ' -f2-) + # If no file found, exit + test -n "${FILE}" || exit 0 + # Move file + rsync \ + --archive \ + --acls \ + --xattrs \ + --atimes \ + --hard-links \ + --one-file-system \ + --quiet \ + --preallocate \ + --remove-source-files \ + --relative \ + --log-file=/tmp/mergerfs-cache-rsync.log \ + "${CACHE}/./${FILE}" \ + "${BACKING}/" done diff --git a/tools/mergerfs.time-based-mover b/tools/mergerfs.time-based-mover index b2af7966..af6a8d22 100755 --- a/tools/mergerfs.time-based-mover +++ b/tools/mergerfs.time-based-mover @@ -1,13 +1,26 @@ #!/usr/bin/env sh if [ $# != 3 ]; then - echo "usage: $0 " - exit 1 + echo "usage: $0 " + exit 1 fi -CACHE="${1}" -BACKING="${2}" -N=${3} +CACHEFS="${1}" +BASEPOOL="${2}" +DAYS_OLD=${3} -find "${CACHE}" -type f -atime +${N} -printf '%P\n' | \ - rsync --files-from=- -axqHAXWES --preallocate --remove-source-files "${CACHE}/" "${BACKING}/" +find "${CACHEFS}" -type f -atime +${DAYS_OLD} -printf '%P\n' | \ + rsync \ + --files-from=- \ + --archive \ + --acls \ + --xattrs \ + --atimes \ + --hard-links \ + --one-file-system \ + --quiet \ + --preallocate \ + --remove-source-files \ + --log-file=/tmp/mergerfs-cache-rsync.log \ + "${CACHEFS}/" \ + "${BASEPOOL}/"