From a4b896a2248a12a1113e9584f073ac6b8a62ee2c Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Thu, 2 Apr 2026 15:59:52 -0700 Subject: [PATCH] fix(s3): skip directories before marker in ListObjectVersions pagination (#8890) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(s3): skip directories before marker in ListObjectVersions pagination ListObjectVersions was re-traversing the entire directory tree from the beginning on every paginated request, only skipping entries at the leaf level. For buckets with millions of objects in deep hierarchies, this caused exponentially slower responses as pagination progressed. Two optimizations: 1. Use keyMarker to compute a startFrom position at each directory level, skipping directly to the relevant entry instead of scanning from the beginning (mirroring how ListObjects uses marker descent). 2. Skip recursing into subdirectories whose keys are entirely before the keyMarker. Changes per-page cost from O(entries_before_marker) to O(tree_depth). * test(s3): add integration test for deep-hierarchy version listing pagination Adds TestVersioningPaginationDeepDirectoryHierarchy which creates objects across 20 subdirectories at depth 6 (mimicking Veeam 365 backup layout) and paginates through them with small maxKeys. Verifies correctness (no duplicates, sorted order, all objects found) and checks that later pages don't take dramatically longer than earlier ones — the symptom of the pre-fix re-traversal bug. Also tests delimiter+pagination interaction across subdirectories. * test(s3): strengthen deep-hierarchy pagination assertions - Replace timing warning (t.Logf) with a failing assertion (t.Errorf) so pagination regressions actually fail the test. - Replace generic count/uniqueness/sort checks on CommonPrefixes with exact equality against the expected prefix slice, catching wrong-but- sorted results. * test(s3): use allKeys for exact assertion in deep-hierarchy pagination test Wire the allKeys slice (previously unused dead code) into the version listing assertion, replacing generic count/uniqueness/sort checks with an exact equality comparison against the keys that were created. --- .../s3_versioning_pagination_stress_test.go | 151 ++++++++++++++++++ ...api_object_handlers_list_versioned_test.go | 54 +++++++ weed/s3api/s3api_object_versioning.go | 47 +++++- 3 files changed, 251 insertions(+), 1 deletion(-) diff --git a/test/s3/versioning/s3_versioning_pagination_stress_test.go b/test/s3/versioning/s3_versioning_pagination_stress_test.go index ae18cac65..f22074fab 100644 --- a/test/s3/versioning/s3_versioning_pagination_stress_test.go +++ b/test/s3/versioning/s3_versioning_pagination_stress_test.go @@ -289,6 +289,157 @@ func TestVersioningPaginationMultipleObjectsManyVersions(t *testing.T) { }) } +// TestVersioningPaginationDeepDirectoryHierarchy tests that paginated ListObjectVersions +// correctly skips directory subtrees before the key-marker. This reproduces the +// real-world scenario where Veeam backup objects are spread across many subdirectories +// (e.g., Mailboxes//ItemsData/) and pagination becomes exponentially +// slower as the marker advances through the tree. +// +// Run with: ENABLE_STRESS_TESTS=true go test -v -run TestVersioningPaginationDeepDirectoryHierarchy -timeout 10m +func TestVersioningPaginationDeepDirectoryHierarchy(t *testing.T) { + if os.Getenv("ENABLE_STRESS_TESTS") != "true" { + t.Skip("Skipping stress test. Set ENABLE_STRESS_TESTS=true to run.") + } + + client := getS3Client(t) + bucketName := getNewBucketName() + + createBucket(t, client, bucketName) + defer deleteBucket(t, client, bucketName) + + enableVersioning(t, client, bucketName) + checkVersioningStatus(t, client, bucketName, types.BucketVersioningStatusEnabled) + + // Create a deep directory structure mimicking Veeam 365 backup layout: + // Backup/Organizations//Mailboxes//ItemsData/ + numMailboxes := 20 + filesPerMailbox := 5 + totalObjects := numMailboxes * filesPerMailbox + orgPrefix := "Backup/Organizations/org-001/Mailboxes" + + t.Logf("Creating %d objects across %d subdirectories (depth=6)...", totalObjects, numMailboxes) + startTime := time.Now() + + allKeys := make([]string, 0, totalObjects) + for i := 0; i < numMailboxes; i++ { + mailboxId := fmt.Sprintf("mbx-%03d", i) + for j := 0; j < filesPerMailbox; j++ { + key := fmt.Sprintf("%s/%s/ItemsData/file-%03d.dat", orgPrefix, mailboxId, j) + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(key), + Body: strings.NewReader(fmt.Sprintf("content-%d-%d", i, j)), + }) + require.NoError(t, err) + allKeys = append(allKeys, key) + } + } + t.Logf("Created %d objects in %v", totalObjects, time.Since(startTime)) + + // Test 1: Paginate through all versions with a broad prefix and small maxKeys. + // This forces multiple pages that must skip earlier subdirectory trees. + t.Run("PaginateAcrossSubdirectories", func(t *testing.T) { + maxKeys := int32(10) // Force many pages to exercise marker skipping + var allVersions []types.ObjectVersion + var keyMarker, versionIdMarker *string + pageCount := 0 + pageStartTimes := make([]time.Duration, 0) + + for { + pageStart := time.Now() + resp, err := client.ListObjectVersions(context.TODO(), &s3.ListObjectVersionsInput{ + Bucket: aws.String(bucketName), + Prefix: aws.String(orgPrefix + "/"), + MaxKeys: aws.Int32(maxKeys), + KeyMarker: keyMarker, + VersionIdMarker: versionIdMarker, + }) + pageDuration := time.Since(pageStart) + require.NoError(t, err) + pageCount++ + pageStartTimes = append(pageStartTimes, pageDuration) + + allVersions = append(allVersions, resp.Versions...) + t.Logf("Page %d: %d versions in %v (marker: %v)", + pageCount, len(resp.Versions), pageDuration, + keyMarker) + + if resp.IsTruncated == nil || !*resp.IsTruncated { + break + } + keyMarker = resp.NextKeyMarker + versionIdMarker = resp.NextVersionIdMarker + } + + assert.Greater(t, pageCount, 1, "Should require multiple pages") + + // Verify listed keys exactly match the keys we created (same elements, same order) + listedKeys := make([]string, 0, len(allVersions)) + for _, v := range allVersions { + listedKeys = append(listedKeys, *v.Key) + } + assert.Equal(t, allKeys, listedKeys, + "Listed version keys should exactly match created keys") + + // Check that later pages don't take dramatically longer than earlier ones. + // Before the fix, later pages were exponentially slower because they + // re-traversed the entire tree. With the fix, all pages should be similar. + if len(pageStartTimes) >= 4 { + firstQuarter := pageStartTimes[0] + lastQuarter := pageStartTimes[len(pageStartTimes)-1] + t.Logf("First page: %v, Last page: %v, Ratio: %.1fx", + firstQuarter, lastQuarter, + float64(lastQuarter)/float64(firstQuarter)) + // Allow generous 10x ratio to avoid flakiness; before the fix + // the ratio was 100x+ on large datasets + if lastQuarter > firstQuarter*10 && lastQuarter > 500*time.Millisecond { + t.Errorf("Last page took %.1fx longer than first page (%v vs %v) — possible pagination regression", + float64(lastQuarter)/float64(firstQuarter), lastQuarter, firstQuarter) + } + } + }) + + // Test 2: Paginate with delimiter to verify CommonPrefixes interaction + t.Run("PaginateWithDelimiterAcrossSubdirs", func(t *testing.T) { + maxKeys := int32(5) + var allPrefixes []string + var keyMarker *string + pageCount := 0 + + for { + resp, err := client.ListObjectVersions(context.TODO(), &s3.ListObjectVersionsInput{ + Bucket: aws.String(bucketName), + Prefix: aws.String(orgPrefix + "/"), + Delimiter: aws.String("/"), + MaxKeys: aws.Int32(maxKeys), + KeyMarker: keyMarker, + }) + require.NoError(t, err) + pageCount++ + + for _, cp := range resp.CommonPrefixes { + allPrefixes = append(allPrefixes, *cp.Prefix) + } + + if resp.IsTruncated == nil || !*resp.IsTruncated { + break + } + keyMarker = resp.NextKeyMarker + } + + assert.Greater(t, pageCount, 1, "Should require multiple pages with maxKeys=%d", maxKeys) + + // Build the exact expected prefixes + expectedPrefixes := make([]string, 0, numMailboxes) + for i := 0; i < numMailboxes; i++ { + expectedPrefixes = append(expectedPrefixes, + fmt.Sprintf("%s/mbx-%03d/", orgPrefix, i)) + } + assert.Equal(t, expectedPrefixes, allPrefixes, + "CommonPrefixes should exactly match expected mailbox prefixes") + }) +} + // listAllVersions is a helper to list all versions of a specific object using pagination func listAllVersions(t *testing.T, client *s3.Client, bucketName, objectKey string) []types.ObjectVersion { var allVersions []types.ObjectVersion diff --git a/weed/s3api/s3api_object_handlers_list_versioned_test.go b/weed/s3api/s3api_object_handlers_list_versioned_test.go index c362dfe3c..fd2695e03 100644 --- a/weed/s3api/s3api_object_handlers_list_versioned_test.go +++ b/weed/s3api/s3api_object_handlers_list_versioned_test.go @@ -630,3 +630,57 @@ func TestListObjectVersions_PrefixWithLeadingSlash(t *testing.T) { }) } } + +func TestComputeStartFrom(t *testing.T) { + tests := []struct { + name string + keyMarker string + relativePath string + wantStart string + wantInclusive bool + }{ + {"empty marker", "", "", "", false}, + {"empty marker with path", "", "dir", "", false}, + {"root level file", "file1.txt", "", "file1.txt", true}, + {"root level with subpath", "Mailboxes/5ac/file1", "", "Mailboxes", true}, + {"matching subdir", "Mailboxes/5ac/file1", "Mailboxes", "5ac", true}, + {"deeper subdir", "Mailboxes/5ac/ItemsData/file1", "Mailboxes/5ac", "ItemsData", true}, + {"at leaf level", "Mailboxes/5ac/ItemsData/file1", "Mailboxes/5ac/ItemsData", "file1", true}, + {"unrelated directory", "other/path", "Mailboxes", "", false}, + {"marker equals relativePath", "Mailboxes", "Mailboxes", "", false}, + {"marker before directory", "aaa/file", "zzz", "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vc := &versionCollector{keyMarker: tt.keyMarker} + startFrom, inclusive := vc.computeStartFrom(tt.relativePath) + assert.Equal(t, tt.wantStart, startFrom) + assert.Equal(t, tt.wantInclusive, inclusive) + }) + } +} + +func TestProcessDirectorySkipsBeforeMarker(t *testing.T) { + tests := []struct { + name string + keyMarker string + entryPath string + shouldSkip bool + }{ + {"no marker", "", "dir_a", false}, + {"dir before marker", "dir_b/file", "dir_a", true}, + {"marker descends into dir", "dir_a/file", "dir_a", false}, + {"dir after marker", "dir_a/file", "dir_b", false}, + {"same prefix different suffix", "dir_a0/file", "dir_a", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + skip := tt.keyMarker != "" && + !strings.HasPrefix(tt.keyMarker, tt.entryPath+"/") && + tt.entryPath+"/" < tt.keyMarker + assert.Equal(t, tt.shouldSkip, skip) + }) + } +} diff --git a/weed/s3api/s3api_object_versioning.go b/weed/s3api/s3api_object_versioning.go index 07f3cc250..60d3ad907 100644 --- a/weed/s3api/s3api_object_versioning.go +++ b/weed/s3api/s3api_object_versioning.go @@ -427,6 +427,34 @@ func (vc *versionCollector) matchesPrefixFilter(entryPath string, isDirectory bo return isMatch || canDescend } +// computeStartFrom extracts the first path component from keyMarker that applies +// to the given directory level (relativePath), allowing the directory listing to +// skip directly to the marker position instead of scanning from the beginning. +// Returns ("", false) when no optimization is possible. +func (vc *versionCollector) computeStartFrom(relativePath string) (startFrom string, inclusive bool) { + if vc.keyMarker == "" { + return "", false + } + + var remainder string + if relativePath == "" { + remainder = vc.keyMarker + } else if strings.HasPrefix(vc.keyMarker, relativePath+"/") { + remainder = vc.keyMarker[len(relativePath)+1:] + } else { + return "", false + } + + if remainder == "" { + return "", false + } + + if idx := strings.Index(remainder, "/"); idx >= 0 { + return remainder[:idx], true + } + return remainder, true +} + // shouldSkipObjectForMarker returns true if the object should be skipped based on keyMarker func (vc *versionCollector) shouldSkipObjectForMarker(objectKey string) bool { if vc.keyMarker == "" { @@ -639,12 +667,21 @@ func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string // collectVersions recursively collects versions from the given path func (vc *versionCollector) collectVersions(currentPath, relativePath string) error { startFrom := "" + inclusive := false + // On the first iteration, skip ahead to the marker position to avoid + // re-scanning all entries before the marker on every paginated request. + if markerStart, ok := vc.computeStartFrom(relativePath); ok && markerStart != "" { + startFrom = markerStart + inclusive = true + } for { if vc.isFull() { return nil } - entries, isLast, err := vc.s3a.list(currentPath, "", startFrom, false, filer.PaginationSize) + entries, isLast, err := vc.s3a.list(currentPath, "", startFrom, inclusive, filer.PaginationSize) + // After the first batch, use exclusive mode for standard pagination + inclusive = false if err != nil { return err } @@ -731,6 +768,14 @@ func (vc *versionCollector) processDirectory(currentPath, entryPath string, entr vc.processExplicitDirectory(entryPath, entry) } + // Skip entire subdirectory if all keys within it are before the keyMarker. + // All object keys under this directory start with entryPath+"/". If the marker + // doesn't descend into this directory and entryPath+"/" sorts before the marker, + // then every key in this subtree was already returned in a previous page. + if vc.keyMarker != "" && !strings.HasPrefix(vc.keyMarker, entryPath+"/") && entryPath+"/" < vc.keyMarker { + return nil + } + // Recursively search subdirectory fullPath := path.Join(currentPath, entry.Name) if err := vc.collectVersions(fullPath, entryPath); err != nil {