diff --git a/weed/command/filer_backup.go b/weed/command/filer_backup.go index 02360dd0b..fb67e1b25 100644 --- a/weed/command/filer_backup.go +++ b/weed/command/filer_backup.go @@ -15,22 +15,25 @@ import ( "github.com/seaweedfs/seaweedfs/weed/security" "github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util/http" + "github.com/seaweedfs/seaweedfs/weed/util/wildcard" "google.golang.org/grpc" ) type FilerBackupOptions struct { - isActivePassive *bool - filer *string - path *string - excludePaths *string - excludeFileName *string - debug *bool - proxyByFiler *bool - doDeleteFiles *bool - disableErrorRetry *bool - ignore404Error *bool - timeAgo *time.Duration - retentionDays *int + isActivePassive *bool + filer *string + path *string + excludePaths *string + excludeFileName *string // deprecated: use excludeFileNames + excludeFileNames *string + excludePathPatterns *string + debug *bool + proxyByFiler *bool + doDeleteFiles *bool + disableErrorRetry *bool + ignore404Error *bool + timeAgo *time.Duration + retentionDays *int } var ( @@ -43,7 +46,9 @@ func init() { filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster") filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer") filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer") - filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "exclude file names that match the regexp to sync on filer") + filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "[DEPRECATED: use -filerExcludeFileNames] exclude file names that match the regexp") + filerBackupOptions.excludeFileNames = cmdFilerBackup.Flag.String("filerExcludeFileNames", "", "comma-separated wildcard patterns to exclude file names, e.g., \"*.tmp,._*\"") + filerBackupOptions.excludePathPatterns = cmdFilerBackup.Flag.String("filerExcludePathPatterns", "", "comma-separated wildcard patterns to exclude paths where any component matches, e.g., \".snapshot,temp*\"") filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers") filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination") filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files") @@ -72,6 +77,15 @@ func runFilerBackup(cmd *Command, args []string) bool { util.LoadSecurityConfiguration() util.LoadConfiguration("replication", true) + // Compile exclude patterns once before the retry loop — these are + // configuration errors and must not be retried. + reExcludeFileName, err := compileExcludePattern(*filerBackupOptions.excludeFileName, "exclude file name") + if err != nil { + glog.Fatalf("invalid -filerExcludeFileName: %v", err) + } + excludeFileNames := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludeFileNames) + excludePathPatterns := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludePathPatterns) + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") clientId := util.RandomInt32() @@ -79,7 +93,7 @@ func runFilerBackup(cmd *Command, args []string) bool { for { clientEpoch++ - err := doFilerBackup(grpcDialOption, &filerBackupOptions, clientId, clientEpoch) + err := doFilerBackup(grpcDialOption, &filerBackupOptions, reExcludeFileName, excludeFileNames, excludePathPatterns, clientId, clientEpoch) if err != nil { glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err) time.Sleep(1747 * time.Millisecond) @@ -91,7 +105,7 @@ const ( BackupKeyPrefix = "backup." ) -func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, clientId int32, clientEpoch int32) error { +func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher, clientId int32, clientEpoch int32) error { // find data sink dataSink := findSink(util.GetViper()) @@ -102,13 +116,6 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti sourceFiler := pb.ServerAddress(*backupOption.filer) sourcePath := *backupOption.path excludePaths := util.StringSplit(*backupOption.excludePaths, ",") - var reExcludeFileName *regexp.Regexp - if *backupOption.excludeFileName != "" { - var err error - if reExcludeFileName, err = regexp.Compile(*backupOption.excludeFileName); err != nil { - return fmt.Errorf("error compile regexp %v for exclude file name: %+v", *backupOption.excludeFileName, err) - } - } timeAgo := *backupOption.timeAgo targetPath := dataSink.GetSinkToDirectory() debug := *backupOption.debug @@ -140,7 +147,7 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti var processEventFn func(*filer_pb.SubscribeMetadataResponse) error if *backupOption.ignore404Error { - processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug) + processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug) processEventFn = func(resp *filer_pb.SubscribeMetadataResponse) error { err := processEventFnGenerated(resp) if err == nil { @@ -153,7 +160,7 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti return err } } else { - processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug) + processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug) } processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error { diff --git a/weed/command/filer_sync.go b/weed/command/filer_sync.go index 3294b4ed7..9bdcd6022 100644 --- a/weed/command/filer_sync.go +++ b/weed/command/filer_sync.go @@ -21,6 +21,7 @@ import ( statsCollect "github.com/seaweedfs/seaweedfs/weed/stats" "github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util/grace" + "github.com/seaweedfs/seaweedfs/weed/util/wildcard" "google.golang.org/grpc" ) @@ -304,7 +305,7 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, grpcDialOpti filerSink.SetChunkConcurrency(chunkConcurrency) filerSink.SetSourceFiler(filerSource) - persistEventFn := genProcessFunction(sourcePath, targetPath, sourceExcludePaths, nil, filerSink, doDeleteFiles, debug) + persistEventFn := genProcessFunction(sourcePath, targetPath, sourceExcludePaths, nil, nil, nil, filerSink, doDeleteFiles, debug) processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error { message := resp.EventNotification @@ -439,7 +440,7 @@ func setOffset(grpcDialOption grpc.DialOption, filer pb.ServerAddress, signature } -func genProcessFunction(sourcePath string, targetPath string, excludePaths []string, reExcludeFileName *regexp.Regexp, dataSink sink.ReplicationSink, doDeleteFiles bool, debug bool) func(resp *filer_pb.SubscribeMetadataResponse) error { +func genProcessFunction(sourcePath string, targetPath string, excludePaths []string, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher, dataSink sink.ReplicationSink, doDeleteFiles bool, debug bool) func(resp *filer_pb.SubscribeMetadataResponse) error { // process function processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error { message := resp.EventNotification @@ -468,9 +469,24 @@ func genProcessFunction(sourcePath string, targetPath string, excludePaths []str return nil } } - if reExcludeFileName != nil && reExcludeFileName.MatchString(message.NewEntry.Name) { + // Compute per-side exclusion so that rename events crossing an + // exclude boundary are handled as delete + create rather than + // being entirely skipped. + oldExcluded := isEntryExcluded(resp.Directory, message.OldEntry, reExcludeFileName, excludeFileNames, excludePathPatterns) + newExcluded := isEntryExcluded(message.NewParentPath, message.NewEntry, reExcludeFileName, excludeFileNames, excludePathPatterns) + + if oldExcluded && newExcluded { return nil } + if oldExcluded { + // Old side is excluded — treat as pure create of new entry. + message.OldEntry = nil + } + if newExcluded { + // New side is excluded — treat as pure delete of old entry. + message.NewEntry = nil + sourceNewKey = "" + } if dataSink.IsIncremental() { doDeleteFiles = false } @@ -578,3 +594,75 @@ func buildKey(dataSink sink.ReplicationSink, message *filer_pb.EventNotification return escapeKey(key) } + +// isEntryExcluded checks whether a single side (old or new) of an event is excluded +// by the deprecated filename regexp, the wildcard file-name matchers, or the +// wildcard path-pattern matchers. +func isEntryExcluded(dir string, entry *filer_pb.Entry, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher) bool { + if entry == nil { + return false + } + // deprecated regexp-based filename exclusion + if reExcludeFileName != nil && reExcludeFileName.MatchString(entry.Name) { + return true + } + // wildcard-based filename exclusion + if len(excludeFileNames) > 0 && matchesAnyWildcard(excludeFileNames, entry.Name) { + return true + } + // wildcard-based path-pattern exclusion: match against each directory + // component and the entry name itself + if len(excludePathPatterns) > 0 { + if pathContainsWildcardMatch(dir, excludePathPatterns) { + return true + } + if matchesAnyWildcard(excludePathPatterns, entry.Name) { + return true + } + } + return false +} + +// compileExcludePattern compiles a regexp pattern string, returning nil if empty. +func compileExcludePattern(pattern string, label string) (*regexp.Regexp, error) { + if pattern == "" { + return nil, nil + } + re, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("error compile regexp %v for %s: %+v", pattern, label, err) + } + return re, nil +} + +// matchesAnyWildcard returns true if any matcher matches the value. +// Returns false when matchers is empty (unlike wildcard.MatchesAnyWildcard +// which returns true for empty matchers). +func matchesAnyWildcard(matchers []*wildcard.WildcardMatcher, value string) bool { + for _, m := range matchers { + if m != nil && m.Match(value) { + return true + } + } + return false +} + +// pathContainsWildcardMatch checks if any component of the given path matches +// any of the wildcard matchers, without allocating a slice. +func pathContainsWildcardMatch(path string, matchers []*wildcard.WildcardMatcher) bool { + for path != "" { + i := strings.IndexByte(path, '/') + var component string + if i < 0 { + component = path + path = "" + } else { + component = path[:i] + path = path[i+1:] + } + if component != "" && matchesAnyWildcard(matchers, component) { + return true + } + } + return false +}