Browse Source

s3: fix memory leak in ListObjectVersions with early termination (#7785)

* s3: fix memory leak in ListObjectVersions with early termination

This fixes a critical memory leak in S3 versioned bucket listing operations:

1. Add maxCollect parameter to findVersionsRecursively() to stop collecting
   versions once we have enough for the response + truncation detection

2. Add early termination checks throughout the recursive traversal to
   prevent scanning entire buckets when only a small number of results
   are requested

3. Use clear() on tracking maps after collection to help GC reclaim memory

4. Create new slice with exact capacity when truncating results instead of
   re-slicing, which allows GC to reclaim the excess backing array memory

5. Pre-allocate result slice with reasonable initial capacity to reduce
   reallocations during collection

Before this fix, listing versions on a bucket with many objects and versions
would load ALL versions into memory before pagination, causing OOM crashes.

Fixes memory exhaustion when calling ListObjectVersions on large versioned buckets.

* s3: fix pre-allocation capacity to be consistent with maxCollect

Address review feedback: the previous capping logic caused an inconsistency
where initialCap was capped to 1000 but maxCollect was maxKeys+1, leading to
unnecessary reallocations when maxKeys was 1000.

Fix by:
1. Cap maxKeys to 1000 (S3 API limit) at the start of the function
2. Use maxKeys+1 directly for slice capacity, ensuring consistency with
   the maxCollect parameter passed to findVersionsRecursively
pull/7786/head
Chris Lu 3 days ago
committed by GitHub
parent
commit
d26c260041
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 45
      weed/s3api/s3api_object_versioning.go

45
weed/s3api/s3api_object_versioning.go

@ -149,7 +149,13 @@ func (s3a *S3ApiServer) createDeleteMarker(bucket, object string) (string, error
// listObjectVersions lists all versions of an object
func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdMarker, delimiter string, maxKeys int) (*S3ListObjectVersionsResult, error) {
var allVersions []interface{} // Can contain VersionEntry or DeleteMarkerEntry
// S3 API limits max-keys to 1000
if maxKeys > 1000 {
maxKeys = 1000
}
// Pre-allocate with capacity for maxKeys+1 to reduce reallocations
// The extra 1 is for truncation detection
allVersions := make([]interface{}, 0, maxKeys+1)
glog.V(1).Infof("listObjectVersions: listing versions for bucket %s, prefix '%s'", bucket, prefix)
@ -160,13 +166,18 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM
seenVersionIds := make(map[string]bool)
// Recursively find all .versions directories in the bucket
// Pass maxKeys+1 to collect one extra for truncation detection
bucketPath := path.Join(s3a.option.BucketsPath, bucket)
err := s3a.findVersionsRecursively(bucketPath, "", &allVersions, processedObjects, seenVersionIds, bucket, prefix)
err := s3a.findVersionsRecursively(bucketPath, "", &allVersions, processedObjects, seenVersionIds, bucket, prefix, maxKeys+1)
if err != nil {
glog.Errorf("listObjectVersions: findVersionsRecursively failed: %v", err)
return nil, err
}
// Clear maps to help GC reclaim memory sooner
clear(processedObjects)
clear(seenVersionIds)
glog.V(1).Infof("listObjectVersions: found %d total versions", len(allVersions))
// Sort by key, then by LastModified (newest first), then by VersionId for deterministic ordering
@ -225,13 +236,12 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM
glog.V(1).Infof("listObjectVersions: building response with %d versions (truncated: %v)", len(allVersions), result.IsTruncated)
// Limit results
// Limit results and properly release excess memory
if len(allVersions) > maxKeys {
allVersions = allVersions[:maxKeys]
result.IsTruncated = true
// Set next markers
switch v := allVersions[len(allVersions)-1].(type) {
// Set next markers from the last item we'll return
switch v := allVersions[maxKeys-1].(type) {
case *VersionEntry:
result.NextKeyMarker = v.Key
result.NextVersionIdMarker = v.VersionId
@ -239,6 +249,11 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM
result.NextKeyMarker = v.Key
result.NextVersionIdMarker = v.VersionId
}
// Create a new slice with exact capacity to allow GC to reclaim excess memory
truncated := make([]interface{}, maxKeys)
copy(truncated, allVersions[:maxKeys])
allVersions = truncated
}
// Always initialize empty slices so boto3 gets the expected fields even when empty
@ -263,16 +278,26 @@ func (s3a *S3ApiServer) listObjectVersions(bucket, prefix, keyMarker, versionIdM
}
// findVersionsRecursively searches for all .versions directories and regular files recursively
func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string, allVersions *[]interface{}, processedObjects map[string]bool, seenVersionIds map[string]bool, bucket, prefix string) error {
// maxCollect limits the number of versions to collect for memory efficiency (0 = unlimited)
func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string, allVersions *[]interface{}, processedObjects map[string]bool, seenVersionIds map[string]bool, bucket, prefix string, maxCollect int) error {
// List entries in current directory with pagination
startFrom := ""
for {
// Early termination: stop if we've collected enough versions
if maxCollect > 0 && len(*allVersions) >= maxCollect {
return nil
}
entries, isLast, err := s3a.list(currentPath, "", startFrom, false, filer.PaginationSize)
if err != nil {
return err
}
for _, entry := range entries {
// Early termination check inside loop
if maxCollect > 0 && len(*allVersions) >= maxCollect {
return nil
}
// Track last entry name for pagination
startFrom = entry.Name
@ -390,11 +415,15 @@ func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string
// Recursively search subdirectories (regardless of whether they're explicit or implicit)
fullPath := path.Join(currentPath, entry.Name)
err := s3a.findVersionsRecursively(fullPath, entryPath, allVersions, processedObjects, seenVersionIds, bucket, prefix)
err := s3a.findVersionsRecursively(fullPath, entryPath, allVersions, processedObjects, seenVersionIds, bucket, prefix, maxCollect)
if err != nil {
glog.Warningf("Error searching subdirectory %s: %v", entryPath, err)
continue
}
// Check if we've collected enough after recursion
if maxCollect > 0 && len(*allVersions) >= maxCollect {
return nil
}
}
} else {
// This is a regular file - check if it's a pre-versioning object

Loading…
Cancel
Save