From ecadeddcbee6863b84a6dbc168a2277243b4f743 Mon Sep 17 00:00:00 2001 From: Mmx233 <36563672+Mmx233@users.noreply.github.com> Date: Mon, 23 Mar 2026 11:59:34 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20extend=20ignore404Error=20to=20match=204?= =?UTF-8?q?04=20Not=20Found=20string=20from=20S3=20sink=E2=80=A6=20(#8741)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: extend ignore404Error to match 404 Not Found string from S3 sink errors * test: add unit tests for isIgnorable404 error matching * improve: pre-compute ignorable 404 string and simplify isIgnorable404 * test: replace init() with TestMain for global HTTP client setup --- weed/command/filer_backup.go | 29 ++++++++---- weed/command/filer_backup_test.go | 73 +++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 9 deletions(-) create mode 100644 weed/command/filer_backup_test.go diff --git a/weed/command/filer_backup.go b/weed/command/filer_backup.go index 120e3dbc9..02360dd0b 100644 --- a/weed/command/filer_backup.go +++ b/weed/command/filer_backup.go @@ -3,6 +3,7 @@ package command import ( "errors" "fmt" + nethttp "net/http" "regexp" "strings" "time" @@ -33,7 +34,8 @@ type FilerBackupOptions struct { } var ( - filerBackupOptions FilerBackupOptions + filerBackupOptions FilerBackupOptions + ignorable404ErrString = fmt.Sprintf("%d %s: %s", nethttp.StatusNotFound, nethttp.StatusText(nethttp.StatusNotFound), http.ErrNotFound.Error()) ) func init() { @@ -144,17 +146,10 @@ func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOpti if err == nil { return nil } - // ignore HTTP 404 from remote reads - if errors.Is(err, http.ErrNotFound) { + if isIgnorable404(err) { glog.V(0).Infof("got 404 error for %s, ignore it: %s", getSourceKey(resp), err.Error()) return nil } - // also ignore missing volume/lookup errors coming from LookupFileId or vid map - errStr := err.Error() - if strings.Contains(errStr, "LookupFileId") || (strings.Contains(errStr, "volume id") && strings.Contains(errStr, "not found")) { - glog.V(0).Infof("got missing-volume error for %s, ignore it: %s", getSourceKey(resp), errStr) - return nil - } return err } } else { @@ -218,3 +213,19 @@ func getSourceKey(resp *filer_pb.SubscribeMetadataResponse) string { } return "" } + +// isIgnorable404 returns true if the error represents a 404/not-found condition +// that should be silently ignored during backup. This covers: +// - errors wrapping http.ErrNotFound (direct volume server 404 via non-S3 sinks) +// - errors containing the "404 Not Found: not found" status string (S3 sink path +// where AWS SDK breaks the errors.Is unwrap chain) +// - LookupFileId or volume-id-not-found errors from the volume id map +func isIgnorable404(err error) bool { + if errors.Is(err, http.ErrNotFound) { + return true + } + errStr := err.Error() + return strings.Contains(errStr, ignorable404ErrString) || + strings.Contains(errStr, "LookupFileId") || + (strings.Contains(errStr, "volume id") && strings.Contains(errStr, "not found")) +} diff --git a/weed/command/filer_backup_test.go b/weed/command/filer_backup_test.go new file mode 100644 index 000000000..b22706d0d --- /dev/null +++ b/weed/command/filer_backup_test.go @@ -0,0 +1,73 @@ +package command + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "os" + "testing" + + util_http "github.com/seaweedfs/seaweedfs/weed/util/http" +) + +func TestMain(m *testing.M) { + util_http.InitGlobalHttpClient() + os.Exit(m.Run()) +} + +// readUrlError starts a test HTTP server returning the given status code +// and returns the error produced by ReadUrlAsStream. +// +// The error format is defined in ReadUrlAsStream: +// https://github.com/seaweedfs/seaweedfs/blob/3a765df2ff90839acb9acf910b73513417fa84d1/weed/util/http/http_global_client_util.go#L353 +func readUrlError(t *testing.T, statusCode int) error { + t.Helper() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, http.StatusText(statusCode), statusCode) + })) + defer server.Close() + + _, err := util_http.ReadUrlAsStream(context.Background(), + server.URL+"/437,03f591a3a2b95e?readDeleted=true", "", + nil, false, true, 0, 1024, func(data []byte) {}) + if err == nil { + t.Fatal("expected error from ReadUrlAsStream, got nil") + } + return err +} + +func TestIsIgnorable404_WrappedErrNotFound(t *testing.T) { + readErr := readUrlError(t, http.StatusNotFound) + // genProcessFunction wraps sink errors with %w: + // https://github.com/seaweedfs/seaweedfs/blob/3a765df2ff90839acb9acf910b73513417fa84d1/weed/command/filer_sync.go#L496 + genErr := fmt.Errorf("create entry1 : %w", readErr) + + if !isIgnorable404(genErr) { + t.Errorf("expected ignorable, got not: %v", genErr) + } +} + +func TestIsIgnorable404_BrokenUnwrapChain(t *testing.T) { + readErr := readUrlError(t, http.StatusNotFound) + // AWS SDK v1 wraps transport errors via awserr.New which uses origErr.Error() + // instead of %w, so errors.Is cannot unwrap through it: + // https://github.com/aws/aws-sdk-go/blob/v1.55.8/aws/corehandlers/handlers.go#L173 + // https://github.com/aws/aws-sdk-go/blob/v1.55.8/aws/awserr/types.go#L15 + awsSdkErr := fmt.Errorf("RequestError: send request failed\n"+ + "caused by: Put \"https://s3.amazonaws.com/bucket/key\": %s", readErr.Error()) + genErr := fmt.Errorf("create entry1 : %w", awsSdkErr) + + if !isIgnorable404(genErr) { + t.Errorf("expected ignorable, got not: %v", genErr) + } +} + +func TestIsIgnorable404_NonIgnorableError(t *testing.T) { + readErr := readUrlError(t, http.StatusForbidden) + genErr := fmt.Errorf("create entry1 : %w", readErr) + + if isIgnorable404(genErr) { + t.Errorf("expected not ignorable, got ignorable: %v", genErr) + } +}