From 73098c97922f6f2f0f49bd1ad6820663e2d9dc1c Mon Sep 17 00:00:00 2001 From: ai8future <2287988+ai8future@users.noreply.github.com> Date: Tue, 30 Dec 2025 23:28:50 +0100 Subject: [PATCH] filer.meta.backup: add -excludePaths flag to skip paths from backup (#7916) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * filer.meta.backup: add -excludePaths flag to skip paths from backup Add a new -excludePaths flag that accepts comma-separated path prefixes to exclude from backup operations. This enables selective backup when certain directories (e.g., legacy buckets) should be skipped. Usage: weed filer.meta.backup -filerDir=/buckets -excludePaths=/buckets/legacy1,/buckets/legacy2 -config=backup.toml 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 * filer.meta.backup: address code review feedback for -excludePaths Fixes based on CodeRabbit and Gemini review: - Cache parsed exclude paths in struct (performance) - TrimSpace and skip empty entries (handles "a,,b" and "a, b") - Add trailing slash for directory boundary matching (prevents /buckets/legacy matching /buckets/legacy_backup) - Validate paths start with '/' and warn if not - Log excluded paths at startup for debugging - Fix rename handling: check both old and new paths, handle all four combinations correctly - Add docstring to shouldExclude() - Update UsageLine and Long description with new flag 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 * filer.meta.backup: address nitpick feedback - Clarify directory boundary matching behavior in help text - Add warning when root path '/' is excluded (would exclude everything) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 * includePrefixes and excludePrefixes --------- Co-authored-by: C Shaw Co-authored-by: Claude Opus 4.5 Co-authored-by: Chris Lu --- weed/command/filer_meta_backup.go | 89 +++++++++++-- weed/util/path_filter.go | 128 +++++++++++++++++++ weed/util/path_filter_test.go | 201 ++++++++++++++++++++++++++++++ 3 files changed, 405 insertions(+), 13 deletions(-) create mode 100644 weed/util/path_filter.go create mode 100644 weed/util/path_filter_test.go diff --git a/weed/command/filer_meta_backup.go b/weed/command/filer_meta_backup.go index 89ef5b4bb..da4de58a5 100644 --- a/weed/command/filer_meta_backup.go +++ b/weed/command/filer_meta_backup.go @@ -26,9 +26,12 @@ type FilerMetaBackupOptions struct { grpcDialOption grpc.DialOption filerAddress *string filerDirectory *string + includePrefixes *string + excludePrefixes *string restart *bool backupFilerConfig *string + pathFilter *util.PathPrefixFilter store filer.FilerStore clientId int32 clientEpoch int32 @@ -38,20 +41,25 @@ func init() { cmdFilerMetaBackup.Run = runFilerMetaBackup // break init cycle metaBackup.filerAddress = cmdFilerMetaBackup.Flag.String("filer", "localhost:8888", "filer hostname:port") metaBackup.filerDirectory = cmdFilerMetaBackup.Flag.String("filerDir", "/", "a folder on the filer") + metaBackup.includePrefixes = cmdFilerMetaBackup.Flag.String("includePrefixes", "", "comma-separated path prefixes to include in backup (if set, only these paths are backed up)") + metaBackup.excludePrefixes = cmdFilerMetaBackup.Flag.String("excludePrefixes", "", "comma-separated path prefixes to exclude from backup") metaBackup.restart = cmdFilerMetaBackup.Flag.Bool("restart", false, "copy the full metadata before async incremental backup") metaBackup.backupFilerConfig = cmdFilerMetaBackup.Flag.String("config", "", "path to filer.toml specifying backup filer store") metaBackup.clientId = util.RandomInt32() } var cmdFilerMetaBackup = &Command{ - UsageLine: "filer.meta.backup [-filer=localhost:8888] [-filerDir=/] [-restart] -config=/path/to/backup_filer.toml", + UsageLine: "filer.meta.backup [-filer=localhost:8888] [-filerDir=/] [-includePrefixes=...] [-excludePrefixes=...] [-restart] -config=/path/to/backup_filer.toml", Short: "continuously backup filer meta data changes to anther filer store specified in a backup_filer.toml", - Long: `continuously backup filer meta data changes. + Long: `continuously backup filer meta data changes. The backup writes to another filer store specified in a backup_filer.toml. weed filer.meta.backup -config=/path/to/backup_filer.toml -filer="localhost:8888" weed filer.meta.backup -config=/path/to/backup_filer.toml -filer="localhost:8888" -restart +The -includePrefixes and -excludePrefixes flags accept comma-separated path prefixes. +Paths must be absolute (start with '/'). Matching is at directory boundaries. +When both match, the deeper prefix wins. `, } @@ -75,6 +83,23 @@ func runFilerMetaBackup(cmd *Command, args []string) bool { return true } + // Initialize path filter + metaBackup.pathFilter = util.NewPathPrefixFilter( + *metaBackup.includePrefixes, + *metaBackup.excludePrefixes, + func(format string, args ...interface{}) { + glog.Warningf(format, args...) + }, + ) + if metaBackup.pathFilter.HasFilters() { + if len(metaBackup.pathFilter.GetIncludePrefixes()) > 0 { + glog.V(0).Infof("including prefixes: %v", metaBackup.pathFilter.GetIncludePrefixes()) + } + if len(metaBackup.pathFilter.GetExcludePrefixes()) > 0 { + glog.V(0).Infof("excluding prefixes: %v", metaBackup.pathFilter.GetExcludePrefixes()) + } + } + missingPreviousBackup := false _, err := metaBackup.getOffset() if err != nil { @@ -127,12 +152,22 @@ func (metaBackup *FilerMetaBackupOptions) initStore(v *viper.Viper) error { return nil } +// shouldInclude checks if the given path should be included in backup +// based on the configured include/exclude path prefixes. +func (metaBackup *FilerMetaBackupOptions) shouldInclude(fullpath string) bool { + return metaBackup.pathFilter.ShouldInclude(fullpath) +} + func (metaBackup *FilerMetaBackupOptions) traverseMetadata() (err error) { var saveErr error traverseErr := filer_pb.TraverseBfs(metaBackup, util.FullPath(*metaBackup.filerDirectory), func(parentPath util.FullPath, entry *filer_pb.Entry) { + fullpath := string(parentPath.Child(entry.Name)) + if !metaBackup.shouldInclude(fullpath) { + return + } - println("+", parentPath.Child(entry.Name)) + println("+", fullpath) if err := metaBackup.store.InsertEntry(context.Background(), filer.FromPbEntry(string(parentPath), entry)); err != nil { saveErr = fmt.Errorf("insert entry error: %w\n", err) return @@ -167,25 +202,53 @@ func (metaBackup *FilerMetaBackupOptions) streamMetadataBackup() error { if filer_pb.IsEmpty(resp) { return nil - } else if filer_pb.IsCreate(resp) { - println("+", util.FullPath(message.NewParentPath).Child(message.NewEntry.Name)) + } + + // Compute exclusion for both old and new paths + var oldPathExcluded, newPathExcluded bool + var oldPath, newPath string + if message.OldEntry != nil { + oldPath = string(util.FullPath(resp.Directory).Child(message.OldEntry.Name)) + oldPathExcluded = !metaBackup.shouldInclude(oldPath) + } + if message.NewEntry != nil { + newPath = string(util.FullPath(message.NewParentPath).Child(message.NewEntry.Name)) + newPathExcluded = !metaBackup.shouldInclude(newPath) + } + + if filer_pb.IsCreate(resp) { + if newPathExcluded { + return nil + } + println("+", newPath) entry := filer.FromPbEntry(message.NewParentPath, message.NewEntry) return store.InsertEntry(ctx, entry) } else if filer_pb.IsDelete(resp) { - println("-", util.FullPath(resp.Directory).Child(message.OldEntry.Name)) + if oldPathExcluded { + return nil + } + println("-", oldPath) return store.DeleteEntry(ctx, util.FullPath(resp.Directory).Child(message.OldEntry.Name)) } else if filer_pb.IsUpdate(resp) { - println("~", util.FullPath(message.NewParentPath).Child(message.NewEntry.Name)) + if newPathExcluded { + return nil + } + println("~", newPath) entry := filer.FromPbEntry(message.NewParentPath, message.NewEntry) return store.UpdateEntry(ctx, entry) } else { - // renaming - println("-", util.FullPath(resp.Directory).Child(message.OldEntry.Name)) - if err := store.DeleteEntry(ctx, util.FullPath(resp.Directory).Child(message.OldEntry.Name)); err != nil { - return err + // renaming - handle all four combinations + if !oldPathExcluded { + println("-", oldPath) + if err := store.DeleteEntry(ctx, util.FullPath(resp.Directory).Child(message.OldEntry.Name)); err != nil { + return err + } } - println("+", util.FullPath(message.NewParentPath).Child(message.NewEntry.Name)) - return store.InsertEntry(ctx, filer.FromPbEntry(message.NewParentPath, message.NewEntry)) + if !newPathExcluded { + println("+", newPath) + return store.InsertEntry(ctx, filer.FromPbEntry(message.NewParentPath, message.NewEntry)) + } + return nil } } diff --git a/weed/util/path_filter.go b/weed/util/path_filter.go new file mode 100644 index 000000000..d74779571 --- /dev/null +++ b/weed/util/path_filter.go @@ -0,0 +1,128 @@ +package util + +import ( + "strings" +) + +// PathPrefixFilter provides filtering based on include and exclude path prefixes. +// When both include and exclude prefixes match a path, the deepest matching prefix wins. +// This enables fine-grained control like: exclude /buckets/legacy but include /buckets/legacy/important +type PathPrefixFilter struct { + includePrefixes []string // normalized with trailing / + excludePrefixes []string // normalized with trailing / +} + +// NewPathPrefixFilter creates a new PathPrefixFilter from comma-separated include and exclude prefix strings. +// Each prefix is normalized to have a trailing slash for directory boundary matching. +// Invalid prefixes (empty or not starting with /) are skipped with a warning via the provided warn function. +func NewPathPrefixFilter(includePrefixes, excludePrefixes string, warn func(format string, args ...interface{})) *PathPrefixFilter { + pf := &PathPrefixFilter{} + + pf.includePrefixes = parsePrefixes(includePrefixes, warn) + pf.excludePrefixes = parsePrefixes(excludePrefixes, warn) + + return pf +} + +// parsePrefixes parses a comma-separated list of prefixes and normalizes them. +func parsePrefixes(prefixList string, warn func(format string, args ...interface{})) []string { + if prefixList == "" { + return nil + } + + var result []string + for _, p := range strings.Split(prefixList, ",") { + p = strings.TrimSpace(p) + if p == "" { + continue + } + if !strings.HasPrefix(p, "/") { + if warn != nil { + warn("prefix %q does not start with '/', skipping", p) + } + continue + } + // Normalize: ensure trailing slash for directory boundary matching + if !strings.HasSuffix(p, "/") { + p = p + "/" + } + result = append(result, p) + } + return result +} + +// HasFilters returns true if any include or exclude prefixes are configured. +func (pf *PathPrefixFilter) HasFilters() bool { + return len(pf.includePrefixes) > 0 || len(pf.excludePrefixes) > 0 +} + +// ShouldInclude returns true if the path should be included based on the configured prefixes. +// +// Logic: +// - If no filters are configured, include everything. +// - Find the deepest matching prefix from either include or exclude list. +// - If the deepest match is in includePrefixes, include the path. +// - If the deepest match is in excludePrefixes, exclude the path. +// - If no match is found and includePrefixes is non-empty, exclude (explicit include required). +// - If no match is found and includePrefixes is empty, include (default allow with excludes). +func (pf *PathPrefixFilter) ShouldInclude(fullpath string) bool { + if !pf.HasFilters() { + return true + } + + // Normalize path for matching + checkPath := fullpath + if !strings.HasSuffix(checkPath, "/") { + checkPath = checkPath + "/" + } + + // Find deepest matching prefix from each list + includeMatch := findDeepestMatch(checkPath, pf.includePrefixes) + excludeMatch := findDeepestMatch(checkPath, pf.excludePrefixes) + + // Determine result based on which match is deeper + if includeMatch != "" && excludeMatch != "" { + // Both matched - deeper prefix wins + return len(includeMatch) >= len(excludeMatch) + } + + if includeMatch != "" { + return true + } + + if excludeMatch != "" { + return false + } + + // No match found + if len(pf.includePrefixes) > 0 { + // If includes are specified, require explicit include + return false + } + + // Default: include if only excludes are specified + return true +} + +// findDeepestMatch finds the longest prefix that matches the path. +func findDeepestMatch(path string, prefixes []string) string { + var deepest string + for _, prefix := range prefixes { + if strings.HasPrefix(path, prefix) { + if len(prefix) > len(deepest) { + deepest = prefix + } + } + } + return deepest +} + +// GetIncludePrefixes returns the configured include prefixes. +func (pf *PathPrefixFilter) GetIncludePrefixes() []string { + return pf.includePrefixes +} + +// GetExcludePrefixes returns the configured exclude prefixes. +func (pf *PathPrefixFilter) GetExcludePrefixes() []string { + return pf.excludePrefixes +} diff --git a/weed/util/path_filter_test.go b/weed/util/path_filter_test.go new file mode 100644 index 000000000..7d4186f2d --- /dev/null +++ b/weed/util/path_filter_test.go @@ -0,0 +1,201 @@ +package util + +import ( + "testing" +) + +func TestPathPrefixFilter_Empty(t *testing.T) { + pf := NewPathPrefixFilter("", "", nil) + + if pf.HasFilters() { + t.Error("empty filter should have no filters") + } + + // Should include everything when no filters + tests := []string{"/", "/foo", "/foo/bar", "/buckets/test"} + for _, path := range tests { + if !pf.ShouldInclude(path) { + t.Errorf("empty filter should include %q", path) + } + } +} + +func TestPathPrefixFilter_ExcludeOnly(t *testing.T) { + pf := NewPathPrefixFilter("", "/buckets/legacy,/buckets/old", nil) + + tests := []struct { + path string + include bool + }{ + {"/buckets/active", true}, + {"/buckets/active/file.txt", true}, + {"/buckets/legacy", false}, + {"/buckets/legacy/file.txt", false}, + {"/buckets/legacy_new", true}, // boundary check: not a prefix match + {"/buckets/old", false}, + {"/buckets/old/data", false}, + {"/other", true}, + } + + for _, tc := range tests { + got := pf.ShouldInclude(tc.path) + if got != tc.include { + t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include) + } + } +} + +func TestPathPrefixFilter_IncludeOnly(t *testing.T) { + pf := NewPathPrefixFilter("/buckets/important,/data", "", nil) + + tests := []struct { + path string + include bool + }{ + {"/buckets/important", true}, + {"/buckets/important/file.txt", true}, + {"/data", true}, + {"/data/file.txt", true}, + {"/buckets/other", false}, // not in include list + {"/other", false}, + } + + for _, tc := range tests { + got := pf.ShouldInclude(tc.path) + if got != tc.include { + t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include) + } + } +} + +func TestPathPrefixFilter_DeeperPrefixWins(t *testing.T) { + // Exclude /buckets/keep but include /buckets/keep/important + pf := NewPathPrefixFilter("/buckets/keep/important", "/buckets/keep", nil) + + tests := []struct { + path string + include bool + }{ + {"/buckets/keep", false}, + {"/buckets/keep/other", false}, + {"/buckets/keep/important", true}, // deeper include wins + {"/buckets/keep/important/file.txt", true}, // deeper include wins + {"/buckets/other", false}, // not matched, include required + } + + for _, tc := range tests { + got := pf.ShouldInclude(tc.path) + if got != tc.include { + t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include) + } + } +} + +func TestPathPrefixFilter_DeeperExcludeWins(t *testing.T) { + // Include /buckets but exclude /buckets/legacy + pf := NewPathPrefixFilter("/buckets", "/buckets/legacy", nil) + + tests := []struct { + path string + include bool + }{ + {"/buckets", true}, + {"/buckets/active", true}, + {"/buckets/legacy", false}, // deeper exclude wins + {"/buckets/legacy/file.txt", false}, // deeper exclude wins + {"/other", false}, // not in include list + } + + for _, tc := range tests { + got := pf.ShouldInclude(tc.path) + if got != tc.include { + t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include) + } + } +} + +func TestPathPrefixFilter_MultipleOverlappingPrefixes(t *testing.T) { + // Complex scenario with multiple overlapping prefixes + pf := NewPathPrefixFilter( + "/a,/a/b/c/d", // includes + "/a/b,/a/b/c/d/e", // excludes + nil, + ) + + tests := []struct { + path string + include bool + }{ + {"/a", true}, // direct include match + {"/a/x", true}, // under include /a + {"/a/b", false}, // deeper exclude /a/b beats /a + {"/a/b/x", false}, // under exclude /a/b + {"/a/b/c", false}, // under exclude /a/b + {"/a/b/c/d", true}, // deeper include /a/b/c/d beats /a/b + {"/a/b/c/d/x", true}, // under include /a/b/c/d + {"/a/b/c/d/e", false}, // deeper exclude /a/b/c/d/e beats /a/b/c/d + {"/a/b/c/d/e/f", false}, // under exclude /a/b/c/d/e + } + + for _, tc := range tests { + got := pf.ShouldInclude(tc.path) + if got != tc.include { + t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include) + } + } +} + +func TestPathPrefixFilter_InvalidPrefixes(t *testing.T) { + var warnings []string + warn := func(format string, args ...interface{}) { + warnings = append(warnings, format) + } + + pf := NewPathPrefixFilter("invalid,/valid", "also_invalid", warn) + + if len(warnings) != 2 { + t.Errorf("expected 2 warnings, got %d", len(warnings)) + } + + // Only valid prefix should be stored + if len(pf.includePrefixes) != 1 { + t.Errorf("expected 1 include prefix, got %d", len(pf.includePrefixes)) + } + if len(pf.excludePrefixes) != 0 { + t.Errorf("expected 0 exclude prefixes, got %d", len(pf.excludePrefixes)) + } +} + +func TestPathPrefixFilter_TrailingSlashNormalization(t *testing.T) { + pf := NewPathPrefixFilter("/path/to/dir", "/exclude/this/", nil) + + // Both should be normalized with trailing slash + if pf.includePrefixes[0] != "/path/to/dir/" { + t.Errorf("include prefix not normalized: %q", pf.includePrefixes[0]) + } + if pf.excludePrefixes[0] != "/exclude/this/" { + t.Errorf("exclude prefix not normalized: %q", pf.excludePrefixes[0]) + } +} + +func TestPathPrefixFilter_BoundaryMatching(t *testing.T) { + pf := NewPathPrefixFilter("", "/buckets/legacy1", nil) + + tests := []struct { + path string + include bool + }{ + {"/buckets/legacy1", false}, + {"/buckets/legacy1/file", false}, + {"/buckets/legacy1_backup", true}, // not a prefix match due to boundary + {"/buckets/legacy10", true}, // not a prefix match due to boundary + {"/buckets/legacy", true}, + } + + for _, tc := range tests { + got := pf.ShouldInclude(tc.path) + if got != tc.include { + t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include) + } + } +}