From cc2edfaf68e5ea253cd1c9176868f6d131fe0b33 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 22 Dec 2025 00:58:23 -0800 Subject: [PATCH] fix: enable RetryForever for active-active cluster sync to prevent out-of-sync (#7840) Fixes #7230 When a cluster goes down during file replication, the chunk upload process would fail after a limited number of retries. Once the remote cluster came back online, those failed uploads were never retried, leaving the clusters out-of-sync. This change enables the RetryForever flag in the UploadOption when replicating chunks between filers. This ensures that upload operations will keep retrying indefinitely, and once the remote cluster comes back online, the pending uploads will automatically succeed. Users no longer need to manually run fs.meta.save and fs.meta.load as a workaround for out-of-sync clusters. --- weed/replication/sink/filersink/fetch_write.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/weed/replication/sink/filersink/fetch_write.go b/weed/replication/sink/filersink/fetch_write.go index 1f257941f..1bcb36a5f 100644 --- a/weed/replication/sink/filersink/fetch_write.go +++ b/weed/replication/sink/filersink/fetch_write.go @@ -2,12 +2,13 @@ package filersink import ( "fmt" - "github.com/schollz/progressbar/v3" - "github.com/seaweedfs/seaweedfs/weed/util" "os" "path/filepath" "sync" + "github.com/schollz/progressbar/v3" + "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/grpc" "github.com/seaweedfs/seaweedfs/weed/glog" @@ -114,6 +115,7 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string) IsInputCompressed: "gzip" == header.Get("Content-Encoding"), MimeType: header.Get("Content-Type"), PairMap: nil, + RetryForever: true, }, func(host, fileId string) string { fileUrl := fmt.Sprintf("http://%s/%s", host, fileId)