From 581392638bb3e3d2e8f24ef73d686e964056c040 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 14 Sep 2025 22:24:53 -0700 Subject: [PATCH] s3: add retry logic for filer consistency in versioned object operations - Add retry logic to updateLatestVersionInDirectory to handle cases where .versions directory creation succeeds but is not immediately visible - Add retry logic to getLatestObjectVersion for the same consistency issue - Use 3 retries with 50ms delays to handle filer store consistency timing - Addresses CI failures where 'filer: no entry is found in filer store' occurs after successful directory creation - Maintains CI debug logging to track retry attempts and outcomes --- weed/s3api/s3api_object_handlers_put.go | 26 ++++++++++++++++++++----- weed/s3api/s3api_object_versioning.go | 24 +++++++++++++++++++---- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/weed/s3api/s3api_object_handlers_put.go b/weed/s3api/s3api_object_handlers_put.go index 55c3c2e13..c61b6f8a7 100644 --- a/weed/s3api/s3api_object_handlers_put.go +++ b/weed/s3api/s3api_object_handlers_put.go @@ -640,12 +640,28 @@ func (s3a *S3ApiServer) updateLatestVersionInDirectory(bucket, object, versionId glog.V(0).Infof("CI-DEBUG: updateLatestVersionInDirectory: starting update for %s/%s version %s file %s", bucket, object, versionId, versionFileName) - // Get the current .versions directory entry - versionsEntry, err := s3a.getEntry(bucketDir, versionsObjectPath) + // Get the current .versions directory entry with retry logic for filer consistency + var versionsEntry *filer_pb.Entry + var err error + maxRetries := 3 + for attempt := 1; attempt <= maxRetries; attempt++ { + versionsEntry, err = s3a.getEntry(bucketDir, versionsObjectPath) + if err == nil { + break + } + + glog.V(0).Infof("CI-DEBUG: updateLatestVersionInDirectory: attempt %d/%d failed to get .versions entry for %s/%s: %v", attempt, maxRetries, bucket, object, err) + + if attempt < maxRetries { + // Brief wait before retry to allow filer consistency + time.Sleep(time.Millisecond * 50) + } + } + if err != nil { - glog.Errorf("updateLatestVersionInDirectory: failed to get .versions directory for %s/%s: %v", bucket, object, err) - glog.V(0).Infof("CI-DEBUG: updateLatestVersionInDirectory: FAILED to get .versions entry for %s/%s: %v", bucket, object, err) - return fmt.Errorf("failed to get .versions directory: %w", err) + glog.Errorf("updateLatestVersionInDirectory: failed to get .versions directory for %s/%s after %d attempts: %v", bucket, object, maxRetries, err) + glog.V(0).Infof("CI-DEBUG: updateLatestVersionInDirectory: FAILED to get .versions entry for %s/%s after %d attempts: %v", bucket, object, maxRetries, err) + return fmt.Errorf("failed to get .versions directory after %d attempts: %w", maxRetries, err) } glog.V(0).Infof("CI-DEBUG: updateLatestVersionInDirectory: got .versions entry for %s/%s, updating metadata", bucket, object) diff --git a/weed/s3api/s3api_object_versioning.go b/weed/s3api/s3api_object_versioning.go index 792b5141d..d3ced793a 100644 --- a/weed/s3api/s3api_object_versioning.go +++ b/weed/s3api/s3api_object_versioning.go @@ -782,14 +782,30 @@ func (s3a *S3ApiServer) getLatestObjectVersion(bucket, object string) (*filer_pb glog.V(1).Infof("getLatestObjectVersion: looking for latest version of %s/%s", bucket, object) glog.V(0).Infof("CI-DEBUG: getLatestObjectVersion: starting lookup for %s/%s", bucket, object) - // Get the .versions directory entry to read latest version metadata - versionsEntry, err := s3a.getEntry(bucketDir, versionsObjectPath) + // Get the .versions directory entry to read latest version metadata with retry logic for filer consistency + var versionsEntry *filer_pb.Entry + var err error + maxRetries := 3 + for attempt := 1; attempt <= maxRetries; attempt++ { + versionsEntry, err = s3a.getEntry(bucketDir, versionsObjectPath) + if err == nil { + break + } + + glog.V(0).Infof("CI-DEBUG: getLatestObjectVersion: attempt %d/%d failed to get .versions directory for %s/%s: %v", attempt, maxRetries, bucket, object, err) + + if attempt < maxRetries { + // Brief wait before retry to allow filer consistency + time.Sleep(time.Millisecond * 50) + } + } + if err != nil { // .versions directory doesn't exist - this can happen for objects that existed // before versioning was enabled on the bucket. Fall back to checking for a // regular (non-versioned) object file. - glog.V(1).Infof("getLatestObjectVersion: no .versions directory for %s%s (error: %v), checking for pre-versioning object", bucket, object, err) - glog.V(0).Infof("CI-DEBUG: getLatestObjectVersion: no .versions directory for %s/%s (error: %v), falling back to pre-versioning", bucket, object, err) + glog.V(1).Infof("getLatestObjectVersion: no .versions directory for %s%s after %d attempts (error: %v), checking for pre-versioning object", bucket, object, maxRetries, err) + glog.V(0).Infof("CI-DEBUG: getLatestObjectVersion: no .versions directory for %s/%s after %d attempts (error: %v), falling back to pre-versioning", bucket, object, maxRetries, err) regularEntry, regularErr := s3a.getEntry(bucketDir, object) if regularErr != nil {