Browse Source

filer.meta.backup: add -excludePaths flag to skip paths from backup (#7916)

* filer.meta.backup: add -excludePaths flag to skip paths from backup

Add a new -excludePaths flag that accepts comma-separated path prefixes
to exclude from backup operations. This enables selective backup when
certain directories (e.g., legacy buckets) should be skipped.

Usage:
  weed filer.meta.backup -filerDir=/buckets     -excludePaths=/buckets/legacy1,/buckets/legacy2     -config=backup.toml

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* filer.meta.backup: address code review feedback for -excludePaths

Fixes based on CodeRabbit and Gemini review:
- Cache parsed exclude paths in struct (performance)
- TrimSpace and skip empty entries (handles "a,,b" and "a, b")
- Add trailing slash for directory boundary matching (prevents
  /buckets/legacy matching /buckets/legacy_backup)
- Validate paths start with '/' and warn if not
- Log excluded paths at startup for debugging
- Fix rename handling: check both old and new paths, handle all
  four combinations correctly
- Add docstring to shouldExclude()
- Update UsageLine and Long description with new flag

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* filer.meta.backup: address nitpick feedback

- Clarify directory boundary matching behavior in help text
- Add warning when root path '/' is excluded (would exclude everything)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* includePrefixes and excludePrefixes

---------

Co-authored-by: C Shaw <cliffshaw@users.noreply.github.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Chris Lu <chris.lu@gmail.com>
pull/7920/head
ai8future 1 day ago
committed by GitHub
parent
commit
73098c9792
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 89
      weed/command/filer_meta_backup.go
  2. 128
      weed/util/path_filter.go
  3. 201
      weed/util/path_filter_test.go

89
weed/command/filer_meta_backup.go

@ -26,9 +26,12 @@ type FilerMetaBackupOptions struct {
grpcDialOption grpc.DialOption
filerAddress *string
filerDirectory *string
includePrefixes *string
excludePrefixes *string
restart *bool
backupFilerConfig *string
pathFilter *util.PathPrefixFilter
store filer.FilerStore
clientId int32
clientEpoch int32
@ -38,20 +41,25 @@ func init() {
cmdFilerMetaBackup.Run = runFilerMetaBackup // break init cycle
metaBackup.filerAddress = cmdFilerMetaBackup.Flag.String("filer", "localhost:8888", "filer hostname:port")
metaBackup.filerDirectory = cmdFilerMetaBackup.Flag.String("filerDir", "/", "a folder on the filer")
metaBackup.includePrefixes = cmdFilerMetaBackup.Flag.String("includePrefixes", "", "comma-separated path prefixes to include in backup (if set, only these paths are backed up)")
metaBackup.excludePrefixes = cmdFilerMetaBackup.Flag.String("excludePrefixes", "", "comma-separated path prefixes to exclude from backup")
metaBackup.restart = cmdFilerMetaBackup.Flag.Bool("restart", false, "copy the full metadata before async incremental backup")
metaBackup.backupFilerConfig = cmdFilerMetaBackup.Flag.String("config", "", "path to filer.toml specifying backup filer store")
metaBackup.clientId = util.RandomInt32()
}
var cmdFilerMetaBackup = &Command{
UsageLine: "filer.meta.backup [-filer=localhost:8888] [-filerDir=/] [-restart] -config=/path/to/backup_filer.toml",
UsageLine: "filer.meta.backup [-filer=localhost:8888] [-filerDir=/] [-includePrefixes=...] [-excludePrefixes=...] [-restart] -config=/path/to/backup_filer.toml",
Short: "continuously backup filer meta data changes to anther filer store specified in a backup_filer.toml",
Long: `continuously backup filer meta data changes.
Long: `continuously backup filer meta data changes.
The backup writes to another filer store specified in a backup_filer.toml.
weed filer.meta.backup -config=/path/to/backup_filer.toml -filer="localhost:8888"
weed filer.meta.backup -config=/path/to/backup_filer.toml -filer="localhost:8888" -restart
The -includePrefixes and -excludePrefixes flags accept comma-separated path prefixes.
Paths must be absolute (start with '/'). Matching is at directory boundaries.
When both match, the deeper prefix wins.
`,
}
@ -75,6 +83,23 @@ func runFilerMetaBackup(cmd *Command, args []string) bool {
return true
}
// Initialize path filter
metaBackup.pathFilter = util.NewPathPrefixFilter(
*metaBackup.includePrefixes,
*metaBackup.excludePrefixes,
func(format string, args ...interface{}) {
glog.Warningf(format, args...)
},
)
if metaBackup.pathFilter.HasFilters() {
if len(metaBackup.pathFilter.GetIncludePrefixes()) > 0 {
glog.V(0).Infof("including prefixes: %v", metaBackup.pathFilter.GetIncludePrefixes())
}
if len(metaBackup.pathFilter.GetExcludePrefixes()) > 0 {
glog.V(0).Infof("excluding prefixes: %v", metaBackup.pathFilter.GetExcludePrefixes())
}
}
missingPreviousBackup := false
_, err := metaBackup.getOffset()
if err != nil {
@ -127,12 +152,22 @@ func (metaBackup *FilerMetaBackupOptions) initStore(v *viper.Viper) error {
return nil
}
// shouldInclude checks if the given path should be included in backup
// based on the configured include/exclude path prefixes.
func (metaBackup *FilerMetaBackupOptions) shouldInclude(fullpath string) bool {
return metaBackup.pathFilter.ShouldInclude(fullpath)
}
func (metaBackup *FilerMetaBackupOptions) traverseMetadata() (err error) {
var saveErr error
traverseErr := filer_pb.TraverseBfs(metaBackup, util.FullPath(*metaBackup.filerDirectory), func(parentPath util.FullPath, entry *filer_pb.Entry) {
fullpath := string(parentPath.Child(entry.Name))
if !metaBackup.shouldInclude(fullpath) {
return
}
println("+", parentPath.Child(entry.Name))
println("+", fullpath)
if err := metaBackup.store.InsertEntry(context.Background(), filer.FromPbEntry(string(parentPath), entry)); err != nil {
saveErr = fmt.Errorf("insert entry error: %w\n", err)
return
@ -167,25 +202,53 @@ func (metaBackup *FilerMetaBackupOptions) streamMetadataBackup() error {
if filer_pb.IsEmpty(resp) {
return nil
} else if filer_pb.IsCreate(resp) {
println("+", util.FullPath(message.NewParentPath).Child(message.NewEntry.Name))
}
// Compute exclusion for both old and new paths
var oldPathExcluded, newPathExcluded bool
var oldPath, newPath string
if message.OldEntry != nil {
oldPath = string(util.FullPath(resp.Directory).Child(message.OldEntry.Name))
oldPathExcluded = !metaBackup.shouldInclude(oldPath)
}
if message.NewEntry != nil {
newPath = string(util.FullPath(message.NewParentPath).Child(message.NewEntry.Name))
newPathExcluded = !metaBackup.shouldInclude(newPath)
}
if filer_pb.IsCreate(resp) {
if newPathExcluded {
return nil
}
println("+", newPath)
entry := filer.FromPbEntry(message.NewParentPath, message.NewEntry)
return store.InsertEntry(ctx, entry)
} else if filer_pb.IsDelete(resp) {
println("-", util.FullPath(resp.Directory).Child(message.OldEntry.Name))
if oldPathExcluded {
return nil
}
println("-", oldPath)
return store.DeleteEntry(ctx, util.FullPath(resp.Directory).Child(message.OldEntry.Name))
} else if filer_pb.IsUpdate(resp) {
println("~", util.FullPath(message.NewParentPath).Child(message.NewEntry.Name))
if newPathExcluded {
return nil
}
println("~", newPath)
entry := filer.FromPbEntry(message.NewParentPath, message.NewEntry)
return store.UpdateEntry(ctx, entry)
} else {
// renaming
println("-", util.FullPath(resp.Directory).Child(message.OldEntry.Name))
if err := store.DeleteEntry(ctx, util.FullPath(resp.Directory).Child(message.OldEntry.Name)); err != nil {
return err
// renaming - handle all four combinations
if !oldPathExcluded {
println("-", oldPath)
if err := store.DeleteEntry(ctx, util.FullPath(resp.Directory).Child(message.OldEntry.Name)); err != nil {
return err
}
}
println("+", util.FullPath(message.NewParentPath).Child(message.NewEntry.Name))
return store.InsertEntry(ctx, filer.FromPbEntry(message.NewParentPath, message.NewEntry))
if !newPathExcluded {
println("+", newPath)
return store.InsertEntry(ctx, filer.FromPbEntry(message.NewParentPath, message.NewEntry))
}
return nil
}
}

128
weed/util/path_filter.go

@ -0,0 +1,128 @@
package util
import (
"strings"
)
// PathPrefixFilter provides filtering based on include and exclude path prefixes.
// When both include and exclude prefixes match a path, the deepest matching prefix wins.
// This enables fine-grained control like: exclude /buckets/legacy but include /buckets/legacy/important
type PathPrefixFilter struct {
includePrefixes []string // normalized with trailing /
excludePrefixes []string // normalized with trailing /
}
// NewPathPrefixFilter creates a new PathPrefixFilter from comma-separated include and exclude prefix strings.
// Each prefix is normalized to have a trailing slash for directory boundary matching.
// Invalid prefixes (empty or not starting with /) are skipped with a warning via the provided warn function.
func NewPathPrefixFilter(includePrefixes, excludePrefixes string, warn func(format string, args ...interface{})) *PathPrefixFilter {
pf := &PathPrefixFilter{}
pf.includePrefixes = parsePrefixes(includePrefixes, warn)
pf.excludePrefixes = parsePrefixes(excludePrefixes, warn)
return pf
}
// parsePrefixes parses a comma-separated list of prefixes and normalizes them.
func parsePrefixes(prefixList string, warn func(format string, args ...interface{})) []string {
if prefixList == "" {
return nil
}
var result []string
for _, p := range strings.Split(prefixList, ",") {
p = strings.TrimSpace(p)
if p == "" {
continue
}
if !strings.HasPrefix(p, "/") {
if warn != nil {
warn("prefix %q does not start with '/', skipping", p)
}
continue
}
// Normalize: ensure trailing slash for directory boundary matching
if !strings.HasSuffix(p, "/") {
p = p + "/"
}
result = append(result, p)
}
return result
}
// HasFilters returns true if any include or exclude prefixes are configured.
func (pf *PathPrefixFilter) HasFilters() bool {
return len(pf.includePrefixes) > 0 || len(pf.excludePrefixes) > 0
}
// ShouldInclude returns true if the path should be included based on the configured prefixes.
//
// Logic:
// - If no filters are configured, include everything.
// - Find the deepest matching prefix from either include or exclude list.
// - If the deepest match is in includePrefixes, include the path.
// - If the deepest match is in excludePrefixes, exclude the path.
// - If no match is found and includePrefixes is non-empty, exclude (explicit include required).
// - If no match is found and includePrefixes is empty, include (default allow with excludes).
func (pf *PathPrefixFilter) ShouldInclude(fullpath string) bool {
if !pf.HasFilters() {
return true
}
// Normalize path for matching
checkPath := fullpath
if !strings.HasSuffix(checkPath, "/") {
checkPath = checkPath + "/"
}
// Find deepest matching prefix from each list
includeMatch := findDeepestMatch(checkPath, pf.includePrefixes)
excludeMatch := findDeepestMatch(checkPath, pf.excludePrefixes)
// Determine result based on which match is deeper
if includeMatch != "" && excludeMatch != "" {
// Both matched - deeper prefix wins
return len(includeMatch) >= len(excludeMatch)
}
if includeMatch != "" {
return true
}
if excludeMatch != "" {
return false
}
// No match found
if len(pf.includePrefixes) > 0 {
// If includes are specified, require explicit include
return false
}
// Default: include if only excludes are specified
return true
}
// findDeepestMatch finds the longest prefix that matches the path.
func findDeepestMatch(path string, prefixes []string) string {
var deepest string
for _, prefix := range prefixes {
if strings.HasPrefix(path, prefix) {
if len(prefix) > len(deepest) {
deepest = prefix
}
}
}
return deepest
}
// GetIncludePrefixes returns the configured include prefixes.
func (pf *PathPrefixFilter) GetIncludePrefixes() []string {
return pf.includePrefixes
}
// GetExcludePrefixes returns the configured exclude prefixes.
func (pf *PathPrefixFilter) GetExcludePrefixes() []string {
return pf.excludePrefixes
}

201
weed/util/path_filter_test.go

@ -0,0 +1,201 @@
package util
import (
"testing"
)
func TestPathPrefixFilter_Empty(t *testing.T) {
pf := NewPathPrefixFilter("", "", nil)
if pf.HasFilters() {
t.Error("empty filter should have no filters")
}
// Should include everything when no filters
tests := []string{"/", "/foo", "/foo/bar", "/buckets/test"}
for _, path := range tests {
if !pf.ShouldInclude(path) {
t.Errorf("empty filter should include %q", path)
}
}
}
func TestPathPrefixFilter_ExcludeOnly(t *testing.T) {
pf := NewPathPrefixFilter("", "/buckets/legacy,/buckets/old", nil)
tests := []struct {
path string
include bool
}{
{"/buckets/active", true},
{"/buckets/active/file.txt", true},
{"/buckets/legacy", false},
{"/buckets/legacy/file.txt", false},
{"/buckets/legacy_new", true}, // boundary check: not a prefix match
{"/buckets/old", false},
{"/buckets/old/data", false},
{"/other", true},
}
for _, tc := range tests {
got := pf.ShouldInclude(tc.path)
if got != tc.include {
t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include)
}
}
}
func TestPathPrefixFilter_IncludeOnly(t *testing.T) {
pf := NewPathPrefixFilter("/buckets/important,/data", "", nil)
tests := []struct {
path string
include bool
}{
{"/buckets/important", true},
{"/buckets/important/file.txt", true},
{"/data", true},
{"/data/file.txt", true},
{"/buckets/other", false}, // not in include list
{"/other", false},
}
for _, tc := range tests {
got := pf.ShouldInclude(tc.path)
if got != tc.include {
t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include)
}
}
}
func TestPathPrefixFilter_DeeperPrefixWins(t *testing.T) {
// Exclude /buckets/keep but include /buckets/keep/important
pf := NewPathPrefixFilter("/buckets/keep/important", "/buckets/keep", nil)
tests := []struct {
path string
include bool
}{
{"/buckets/keep", false},
{"/buckets/keep/other", false},
{"/buckets/keep/important", true}, // deeper include wins
{"/buckets/keep/important/file.txt", true}, // deeper include wins
{"/buckets/other", false}, // not matched, include required
}
for _, tc := range tests {
got := pf.ShouldInclude(tc.path)
if got != tc.include {
t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include)
}
}
}
func TestPathPrefixFilter_DeeperExcludeWins(t *testing.T) {
// Include /buckets but exclude /buckets/legacy
pf := NewPathPrefixFilter("/buckets", "/buckets/legacy", nil)
tests := []struct {
path string
include bool
}{
{"/buckets", true},
{"/buckets/active", true},
{"/buckets/legacy", false}, // deeper exclude wins
{"/buckets/legacy/file.txt", false}, // deeper exclude wins
{"/other", false}, // not in include list
}
for _, tc := range tests {
got := pf.ShouldInclude(tc.path)
if got != tc.include {
t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include)
}
}
}
func TestPathPrefixFilter_MultipleOverlappingPrefixes(t *testing.T) {
// Complex scenario with multiple overlapping prefixes
pf := NewPathPrefixFilter(
"/a,/a/b/c/d", // includes
"/a/b,/a/b/c/d/e", // excludes
nil,
)
tests := []struct {
path string
include bool
}{
{"/a", true}, // direct include match
{"/a/x", true}, // under include /a
{"/a/b", false}, // deeper exclude /a/b beats /a
{"/a/b/x", false}, // under exclude /a/b
{"/a/b/c", false}, // under exclude /a/b
{"/a/b/c/d", true}, // deeper include /a/b/c/d beats /a/b
{"/a/b/c/d/x", true}, // under include /a/b/c/d
{"/a/b/c/d/e", false}, // deeper exclude /a/b/c/d/e beats /a/b/c/d
{"/a/b/c/d/e/f", false}, // under exclude /a/b/c/d/e
}
for _, tc := range tests {
got := pf.ShouldInclude(tc.path)
if got != tc.include {
t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include)
}
}
}
func TestPathPrefixFilter_InvalidPrefixes(t *testing.T) {
var warnings []string
warn := func(format string, args ...interface{}) {
warnings = append(warnings, format)
}
pf := NewPathPrefixFilter("invalid,/valid", "also_invalid", warn)
if len(warnings) != 2 {
t.Errorf("expected 2 warnings, got %d", len(warnings))
}
// Only valid prefix should be stored
if len(pf.includePrefixes) != 1 {
t.Errorf("expected 1 include prefix, got %d", len(pf.includePrefixes))
}
if len(pf.excludePrefixes) != 0 {
t.Errorf("expected 0 exclude prefixes, got %d", len(pf.excludePrefixes))
}
}
func TestPathPrefixFilter_TrailingSlashNormalization(t *testing.T) {
pf := NewPathPrefixFilter("/path/to/dir", "/exclude/this/", nil)
// Both should be normalized with trailing slash
if pf.includePrefixes[0] != "/path/to/dir/" {
t.Errorf("include prefix not normalized: %q", pf.includePrefixes[0])
}
if pf.excludePrefixes[0] != "/exclude/this/" {
t.Errorf("exclude prefix not normalized: %q", pf.excludePrefixes[0])
}
}
func TestPathPrefixFilter_BoundaryMatching(t *testing.T) {
pf := NewPathPrefixFilter("", "/buckets/legacy1", nil)
tests := []struct {
path string
include bool
}{
{"/buckets/legacy1", false},
{"/buckets/legacy1/file", false},
{"/buckets/legacy1_backup", true}, // not a prefix match due to boundary
{"/buckets/legacy10", true}, // not a prefix match due to boundary
{"/buckets/legacy", true},
}
for _, tc := range tests {
got := pf.ShouldInclude(tc.path)
if got != tc.include {
t.Errorf("ShouldInclude(%q) = %v, want %v", tc.path, got, tc.include)
}
}
}
Loading…
Cancel
Save