From 36b8b2147b55068f65f6c4cacc5645bc3453f173 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 13 Dec 2025 10:56:21 -0800 Subject: [PATCH] test: add integration test for versioned object listing path fix (#7731) * test: add integration test for versioned object listing path fix Add integration test that validates the fix for GitHub discussion #7573. The test verifies that: - Entry names use path.Base() to get base filename only - Path doubling bug is prevented when listing versioned objects - Logical entries are created correctly with proper attributes - .versions folder paths are handled correctly This test documents the Velero/Kopia compatibility fix and prevents regression of the path doubling bug. * test: add Velero/Kopia integration test for versioned object listing Add integration tests that simulate Velero/Kopia's exact access patterns when using S3 versioning. These tests validate the fix for GitHub discussion #7573 where versioned objects with nested paths would have their paths doubled in ListObjects responses. Tests added: - TestVeleroKopiaVersionedObjectListing: Tests various Kopia path patterns - TestVeleroKopiaGetAfterList: Verifies list-then-get workflow works - TestVeleroMultipleVersionsWithNestedPaths: Tests multi-version objects - TestVeleroListVersionsWithNestedPaths: Tests ListObjectVersions API Each test verifies: 1. Listed keys match original keys without path doubling 2. Objects can be retrieved using the listed keys 3. Content integrity is maintained Related: https://github.com/seaweedfs/seaweedfs/discussions/7573 * refactor: remove old unit test, keep only Velero integration test Remove weed/s3api/s3api_versioning_list_test.go as it was a simpler unit test that the comprehensive Velero integration test supersedes. The integration test in test/s3/versioning/s3_velero_integration_test.go provides better coverage by actually exercising the S3 API with real AWS SDK calls. * refactor: use defer for response body cleanup in test loop Use anonymous function with defer for getResp.Body.Close() to be more defensive against future code additions in the loop body. * refactor: improve hasDoubledPath function clarity and efficiency - Fix comment to accurately describe checking for repeated pairs - Tighten outer loop bound from len(parts)-2 to len(parts)-3 - Remove redundant bounds checks in the condition --- .../versioning/s3_velero_integration_test.go | 327 ++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 test/s3/versioning/s3_velero_integration_test.go diff --git a/test/s3/versioning/s3_velero_integration_test.go b/test/s3/versioning/s3_velero_integration_test.go new file mode 100644 index 000000000..8d66ded6b --- /dev/null +++ b/test/s3/versioning/s3_velero_integration_test.go @@ -0,0 +1,327 @@ +package s3api + +import ( + "context" + "io" + "strings" + "testing" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestVeleroKopiaVersionedObjectListing tests the exact access patterns used by Velero/Kopia +// when backing up to a versioned S3 bucket. This test validates the fix for: +// https://github.com/seaweedfs/seaweedfs/discussions/7573 +// +// The bug was that when listing versioned objects with nested paths like +// "kopia/logpaste/kopia.blobcfg", the returned Key would be doubled: +// "kopia/logpaste/kopia/logpaste/kopia.blobcfg" instead of the correct path. +// +// This was caused by getLatestVersionEntryForListOperation setting the entry's +// Name to the full path instead of just the base filename. +func TestVeleroKopiaVersionedObjectListing(t *testing.T) { + client := getS3Client(t) + bucketName := getNewBucketName() + + // Step 1: Create bucket + createBucket(t, client, bucketName) + defer deleteBucket(t, client, bucketName) + + // Step 2: Enable versioning (required to reproduce the bug) + enableVersioning(t, client, bucketName) + + // Step 3: Create objects matching Velero/Kopia's typical path structure + // These are the actual paths Kopia uses when storing repository data + testObjects := map[string]string{ + "kopia/logpaste/kopia.blobcfg": "blob-config-content", + "kopia/logpaste/kopia.repository": "repository-content", + "kopia/logpaste/.storageconfig": "storage-config-content", + "backups/namespace1/backup.tar": "backup-content", + "restic/locks/lock-001.json": "lock-content", + "restic/index/index-001.json": "index-content", + "data/a/b/c/deeply-nested.dat": "nested-content", + "single-file.txt": "single-file-content", + } + + // Upload all test objects + for key, content := range testObjects { + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(key), + Body: strings.NewReader(content), + }) + require.NoError(t, err, "Failed to upload object %s", key) + } + + // Step 4: Test listing with various prefixes (mimicking Kopia's ListObjects calls) + testCases := []struct { + name string + prefix string + expectedKeys []string + description string + }{ + { + name: "kopia repository prefix", + prefix: "kopia/logpaste/", + expectedKeys: []string{ + "kopia/logpaste/.storageconfig", + "kopia/logpaste/kopia.blobcfg", + "kopia/logpaste/kopia.repository", + }, + description: "Kopia lists its repository directory to find config files", + }, + { + name: "exact config file", + prefix: "kopia/logpaste/kopia.blobcfg", + expectedKeys: []string{"kopia/logpaste/kopia.blobcfg"}, + description: "Kopia checks for specific config files", + }, + { + name: "restic index directory", + prefix: "restic/index/", + expectedKeys: []string{ + "restic/index/index-001.json", + }, + description: "Restic lists index files (similar pattern to Kopia)", + }, + { + name: "deeply nested path", + prefix: "data/a/b/c/", + expectedKeys: []string{ + "data/a/b/c/deeply-nested.dat", + }, + description: "Tests deeply nested paths don't get doubled", + }, + { + name: "backup directory", + prefix: "backups/", + expectedKeys: []string{ + "backups/namespace1/backup.tar", + }, + description: "Velero lists backup directories", + }, + { + name: "root level single file", + prefix: "single-file", + expectedKeys: []string{ + "single-file.txt", + }, + description: "Files at bucket root should work correctly", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // List objects with prefix + listResp, err := client.ListObjectsV2(context.TODO(), &s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String(tc.prefix), + }) + require.NoError(t, err) + + // Extract returned keys + var actualKeys []string + for _, obj := range listResp.Contents { + key := *obj.Key + actualKeys = append(actualKeys, key) + + // Critical assertion: verify no path doubling + // The bug would cause "kopia/logpaste/kopia.blobcfg" to become + // "kopia/logpaste/kopia/logpaste/kopia.blobcfg" + assert.False(t, hasDoubledPath(key), + "Path doubling detected! Key '%s' should not have repeated path segments", key) + + // Also verify we can actually GET the object using the listed key + getResp, err := client.GetObject(context.TODO(), &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(key), + }) + require.NoError(t, err, "Should be able to GET object using listed key: %s", key) + getResp.Body.Close() + } + + // Verify we got the expected keys + assert.ElementsMatch(t, tc.expectedKeys, actualKeys, + "%s: Listed keys should match expected", tc.description) + }) + } +} + +// TestVeleroKopiaGetAfterList simulates Kopia's pattern of listing and then getting objects +// This specifically tests the bug where listed paths couldn't be retrieved +func TestVeleroKopiaGetAfterList(t *testing.T) { + client := getS3Client(t) + bucketName := getNewBucketName() + + createBucket(t, client, bucketName) + defer deleteBucket(t, client, bucketName) + enableVersioning(t, client, bucketName) + + // Create a nested object like Kopia does + objectKey := "kopia/appwrite/kopia.repository" + objectContent := "kopia-repository-config-data" + + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + }) + require.NoError(t, err) + + // List objects (this is where the bug manifested - returned wrong Key) + listResp, err := client.ListObjectsV2(context.TODO(), &s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String("kopia/appwrite/"), + }) + require.NoError(t, err) + require.Len(t, listResp.Contents, 1, "Should list exactly one object") + + listedKey := *listResp.Contents[0].Key + + // The bug caused listedKey to be "kopia/appwrite/kopia/appwrite/kopia.repository" + // instead of "kopia/appwrite/kopia.repository" + assert.Equal(t, objectKey, listedKey, + "Listed key should match the original key without path doubling") + + // Now try to GET the object using the listed key + // With the bug, this would fail because the doubled path doesn't exist + getResp, err := client.GetObject(context.TODO(), &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(listedKey), + }) + require.NoError(t, err, "Should be able to GET object using the key returned from ListObjects") + defer getResp.Body.Close() + + // Verify content matches + body, err := io.ReadAll(getResp.Body) + require.NoError(t, err) + assert.Equal(t, objectContent, string(body), + "Retrieved content should match original") +} + +// TestVeleroMultipleVersionsWithNestedPaths tests listing when objects have multiple versions +func TestVeleroMultipleVersionsWithNestedPaths(t *testing.T) { + client := getS3Client(t) + bucketName := getNewBucketName() + + createBucket(t, client, bucketName) + defer deleteBucket(t, client, bucketName) + enableVersioning(t, client, bucketName) + + objectKey := "kopia/logpaste/kopia.blobcfg" + + // Create multiple versions of the same object (Kopia updates config files) + versions := []string{"version-1", "version-2", "version-3"} + for _, content := range versions { + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(content), + }) + require.NoError(t, err) + } + + // List objects - should return only the latest version in regular listing + listResp, err := client.ListObjectsV2(context.TODO(), &s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String("kopia/logpaste/"), + }) + require.NoError(t, err) + + // Should only see one object (the latest version) + require.Len(t, listResp.Contents, 1, "Should list only one object (latest version)") + + listedKey := *listResp.Contents[0].Key + assert.Equal(t, objectKey, listedKey, + "Listed key should match original without path doubling") + assert.False(t, hasDoubledPath(listedKey), + "Path should not be doubled") + + // Verify we can GET the latest version + getResp, err := client.GetObject(context.TODO(), &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(listedKey), + }) + require.NoError(t, err) + defer getResp.Body.Close() + + body, err := io.ReadAll(getResp.Body) + require.NoError(t, err) + assert.Equal(t, "version-3", string(body), + "Should get the latest version content") +} + +// TestVeleroListVersionsWithNestedPaths tests ListObjectVersions with nested paths +func TestVeleroListVersionsWithNestedPaths(t *testing.T) { + client := getS3Client(t) + bucketName := getNewBucketName() + + createBucket(t, client, bucketName) + defer deleteBucket(t, client, bucketName) + enableVersioning(t, client, bucketName) + + objectKey := "backups/velero/test-backup/backup.json" + + // Create multiple versions + for i := 1; i <= 3; i++ { + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(strings.Repeat("x", i*100)), + }) + require.NoError(t, err) + } + + // List all versions + listResp, err := client.ListObjectVersions(context.TODO(), &s3.ListObjectVersionsInput{ + Bucket: aws.String(bucketName), + Prefix: aws.String("backups/velero/"), + }) + require.NoError(t, err) + + // Should have 3 versions + require.Len(t, listResp.Versions, 3, "Should have 3 versions") + + // Verify all version keys are correct (not doubled) + for _, version := range listResp.Versions { + key := *version.Key + assert.Equal(t, objectKey, key, + "Version key should match original") + assert.False(t, hasDoubledPath(key), + "Version key should not have doubled path: %s", key) + + // Verify we can GET each version by its version ID + getResp, err := client.GetObject(context.TODO(), &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(key), + VersionId: version.VersionId, + }) + require.NoError(t, err, "Should be able to GET version %s", *version.VersionId) + func() { + defer getResp.Body.Close() + }() + } +} + +// hasDoubledPath checks if a path has a repeated pair of segments, which indicates +// the path doubling bug. For example: "kopia/logpaste/kopia/logpaste/file.txt" would return true. +func hasDoubledPath(path string) bool { + parts := strings.Split(path, "/") + if len(parts) < 4 { + return false + } + + // Check for repeated pairs of segments, e.g., a/b/.../a/b. + for i := 0; i < len(parts)-3; i++ { + for j := i + 2; j < len(parts)-1; j++ { + if parts[i] == parts[j] && parts[i+1] == parts[j+1] { + return true + } + } + } + + return false +}