From d26c260041c73fd9cbc76c8acd3adb2e6f99bb30 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 15 Dec 2025 22:43:09 -0800 Subject: [PATCH] s3: fix memory leak in ListObjectVersions with early termination (#7785) * s3: fix memory leak in ListObjectVersions with early termination This fixes a critical memory leak in S3 versioned bucket listing operations: 1. Add maxCollect parameter to findVersionsRecursively() to stop collecting versions once we have enough for the response + truncation detection 2. Add early termination checks throughout the recursive traversal to prevent scanning entire buckets when only a small number of results are requested 3. Use clear() on tracking maps after collection to help GC reclaim memory 4. Create new slice with exact capacity when truncating results instead of re-slicing, which allows GC to reclaim the excess backing array memory 5. Pre-allocate result slice with reasonable initial capacity to reduce reallocations during collection Before this fix, listing versions on a bucket with many objects and versions would load ALL versions into memory before pagination, causing OOM crashes. Fixes memory exhaustion when calling ListObjectVersions on large versioned buckets. * s3: fix pre-allocation capacity to be consistent with maxCollect Address review feedback: the previous capping logic caused an inconsistency where initialCap was capped to 1000 but maxCollect was maxKeys+1, leading to unnecessary reallocations when maxKeys was 1000. Fix by: 1. Cap maxKeys to 1000 (S3 API limit) at the start of the function 2. Use maxKeys+1 directly for slice capacity, ensuring consistency with the maxCollect parameter passed to findVersionsRecursively --- weed/s3api/s3api_object_versioning.go | 45 ++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/weed/s3api/s3api_object_versioning.go b/weed/s3api/s3api_object_versioning.go index 0174c2a4b..80bd805fe 100644 --- a/weed/s3api/s3api_object_versioning.go +++ b/weed/s3api/s3api_object_versioning.go @@ -149,7 +149,13 @@ func (s3a *S3ApiServer) createDeleteMarker(bucket, object string) (string, error // listObjectVersions lists all versions of an object func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdMarker, delimiter string, maxKeys int) (*S3ListObjectVersionsResult, error) { - var allVersions []interface{} // Can contain VersionEntry or DeleteMarkerEntry + // S3 API limits max-keys to 1000 + if maxKeys > 1000 { + maxKeys = 1000 + } + // Pre-allocate with capacity for maxKeys+1 to reduce reallocations + // The extra 1 is for truncation detection + allVersions := make([]interface{}, 0, maxKeys+1) glog.V(1).Infof("listObjectVersions: listing versions for bucket %s, prefix '%s'", bucket, prefix) @@ -160,13 +166,18 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM seenVersionIds := make(map[string]bool) // Recursively find all .versions directories in the bucket + // Pass maxKeys+1 to collect one extra for truncation detection bucketPath := path.Join(s3a.option.BucketsPath, bucket) - err := s3a.findVersionsRecursively(bucketPath, "", &allVersions, processedObjects, seenVersionIds, bucket, prefix) + err := s3a.findVersionsRecursively(bucketPath, "", &allVersions, processedObjects, seenVersionIds, bucket, prefix, maxKeys+1) if err != nil { glog.Errorf("listObjectVersions: findVersionsRecursively failed: %v", err) return nil, err } + // Clear maps to help GC reclaim memory sooner + clear(processedObjects) + clear(seenVersionIds) + glog.V(1).Infof("listObjectVersions: found %d total versions", len(allVersions)) // Sort by key, then by LastModified (newest first), then by VersionId for deterministic ordering @@ -225,13 +236,12 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM glog.V(1).Infof("listObjectVersions: building response with %d versions (truncated: %v)", len(allVersions), result.IsTruncated) - // Limit results + // Limit results and properly release excess memory if len(allVersions) > maxKeys { - allVersions = allVersions[:maxKeys] result.IsTruncated = true - // Set next markers - switch v := allVersions[len(allVersions)-1].(type) { + // Set next markers from the last item we'll return + switch v := allVersions[maxKeys-1].(type) { case *VersionEntry: result.NextKeyMarker = v.Key result.NextVersionIdMarker = v.VersionId @@ -239,6 +249,11 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM result.NextKeyMarker = v.Key result.NextVersionIdMarker = v.VersionId } + + // Create a new slice with exact capacity to allow GC to reclaim excess memory + truncated := make([]interface{}, maxKeys) + copy(truncated, allVersions[:maxKeys]) + allVersions = truncated } // Always initialize empty slices so boto3 gets the expected fields even when empty @@ -263,16 +278,26 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM } // findVersionsRecursively searches for all .versions directories and regular files recursively -func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string, allVersions *[]interface{}, processedObjects map[string]bool, seenVersionIds map[string]bool, bucket, prefix string) error { +// maxCollect limits the number of versions to collect for memory efficiency (0 = unlimited) +func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string, allVersions *[]interface{}, processedObjects map[string]bool, seenVersionIds map[string]bool, bucket, prefix string, maxCollect int) error { // List entries in current directory with pagination startFrom := "" for { + // Early termination: stop if we've collected enough versions + if maxCollect > 0 && len(*allVersions) >= maxCollect { + return nil + } + entries, isLast, err := s3a.list(currentPath, "", startFrom, false, filer.PaginationSize) if err != nil { return err } for _, entry := range entries { + // Early termination check inside loop + if maxCollect > 0 && len(*allVersions) >= maxCollect { + return nil + } // Track last entry name for pagination startFrom = entry.Name @@ -390,11 +415,15 @@ func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string // Recursively search subdirectories (regardless of whether they're explicit or implicit) fullPath := path.Join(currentPath, entry.Name) - err := s3a.findVersionsRecursively(fullPath, entryPath, allVersions, processedObjects, seenVersionIds, bucket, prefix) + err := s3a.findVersionsRecursively(fullPath, entryPath, allVersions, processedObjects, seenVersionIds, bucket, prefix, maxCollect) if err != nil { glog.Warningf("Error searching subdirectory %s: %v", entryPath, err) continue } + // Check if we've collected enough after recursion + if maxCollect > 0 && len(*allVersions) >= maxCollect { + return nil + } } } else { // This is a regular file - check if it's a pre-versioning object