Browse Source
fix(s3): include directory markers in ListObjects without delimiter (#8704)
fix(s3): include directory markers in ListObjects without delimiter (#8704)
* fix(s3): include directory markers in ListObjects without delimiter (#8698) Directory key objects (zero-byte objects with keys ending in "/") created via PutObject were omitted from ListObjects/ListObjectsV2 results when no delimiter was specified. AWS S3 includes these as regular keys in Contents. The issue was in doListFilerEntries: when recursing into directories in non-delimiter mode, directory key objects were only emitted when prefixEndsOnDelimiter was true. Added an else branch to emit them in the general recursive case as well. * remove issue reference from inline comment * test: add child-under-marker and paginated listing coverage Extend test 6 to place a child object under the directory marker and paginate with MaxKeys=1 so the emit-then-recurse truncation path is exercised. * fix(test): skip directory markers in Spark temporary artifacts check The listing check now correctly shows directory markers (keys ending in "/") after the ListObjects fix. These 0-byte metadata objects are not data artifacts — filter them from the listing check since the HeadObject-based check already verifies their cleanup with a timeout.pull/8707/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 255 additions and 3 deletions
-
244test/s3/normal/s3_list_empty_directory_test.go
-
6test/s3/spark/issue_8285_repro_test.go
-
8weed/s3api/s3api_object_handlers_list.go
@ -0,0 +1,244 @@ |
|||
package example |
|||
|
|||
import ( |
|||
"bytes" |
|||
"sort" |
|||
"testing" |
|||
|
|||
"github.com/aws/aws-sdk-go/aws" |
|||
"github.com/aws/aws-sdk-go/service/s3" |
|||
"github.com/stretchr/testify/assert" |
|||
"github.com/stretchr/testify/require" |
|||
) |
|||
|
|||
// TestS3ListObjectsEmptyDirectoryMarkers reproduces GitHub issue #8698:
|
|||
// S3 API ListObjects does not include empty directory markers (zero-byte
|
|||
// objects with keys ending in "/") created via PutObject.
|
|||
//
|
|||
// AWS S3 includes these markers as regular keys in Contents. SeaweedFS
|
|||
// was filtering them out during recursive directory listing.
|
|||
func TestS3ListObjectsEmptyDirectoryMarkers(t *testing.T) { |
|||
if testing.Short() { |
|||
t.Skip("Skipping integration test in short mode") |
|||
} |
|||
|
|||
cluster, err := startMiniCluster(t) |
|||
require.NoError(t, err) |
|||
defer cluster.Stop() |
|||
|
|||
bucketName := createTestBucket(t, cluster, "test-empty-dirs-") |
|||
|
|||
// Create a regular file at Empty/manifest.yaml
|
|||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(bucketName), |
|||
Key: aws.String("Empty/manifest.yaml"), |
|||
Body: bytes.NewReader([]byte("name: test\nversion: 1.0\n")), |
|||
}) |
|||
require.NoError(t, err, "failed to create regular file") |
|||
|
|||
// Create an empty directory marker via PutObject (key ending in "/", zero bytes).
|
|||
// This is how `aws s3api put-object --key "Empty/empty/"` works.
|
|||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(bucketName), |
|||
Key: aws.String("Empty/empty/"), |
|||
Body: bytes.NewReader([]byte{}), |
|||
}) |
|||
require.NoError(t, err, "failed to create empty directory marker") |
|||
|
|||
// Verify the directory marker exists via HeadObject
|
|||
_, err = cluster.s3Client.HeadObject(&s3.HeadObjectInput{ |
|||
Bucket: aws.String(bucketName), |
|||
Key: aws.String("Empty/empty/"), |
|||
}) |
|||
require.NoError(t, err, "directory marker should exist via HeadObject") |
|||
|
|||
// Test 1: ListObjectsV2 with prefix (no delimiter) — the exact scenario from the issue.
|
|||
// AWS S3 returns both "Empty/empty/" and "Empty/manifest.yaml" in Contents.
|
|||
t.Run("ListV2_WithPrefix_NoDelimiter", func(t *testing.T) { |
|||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ |
|||
Bucket: aws.String(bucketName), |
|||
Prefix: aws.String("Empty"), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
keys := collectKeys(resp.Contents) |
|||
sort.Strings(keys) |
|||
|
|||
assert.Equal(t, []string{"Empty/empty/", "Empty/manifest.yaml"}, keys, |
|||
"both the directory marker and regular file should be listed") |
|||
}) |
|||
|
|||
// Test 2: ListObjectsV1 with prefix (no delimiter).
|
|||
t.Run("ListV1_WithPrefix_NoDelimiter", func(t *testing.T) { |
|||
resp, err := cluster.s3Client.ListObjects(&s3.ListObjectsInput{ |
|||
Bucket: aws.String(bucketName), |
|||
Prefix: aws.String("Empty"), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
keys := collectKeysV1(resp.Contents) |
|||
sort.Strings(keys) |
|||
|
|||
assert.Equal(t, []string{"Empty/empty/", "Empty/manifest.yaml"}, keys, |
|||
"both the directory marker and regular file should be listed") |
|||
}) |
|||
|
|||
// Test 3: ListObjectsV2 without prefix and without delimiter.
|
|||
// All objects in the bucket should appear, including directory markers.
|
|||
t.Run("ListV2_NoPrefix_NoDelimiter", func(t *testing.T) { |
|||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ |
|||
Bucket: aws.String(bucketName), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
keys := collectKeys(resp.Contents) |
|||
|
|||
assert.Contains(t, keys, "Empty/empty/", "directory marker should appear in listing") |
|||
assert.Contains(t, keys, "Empty/manifest.yaml", "regular file should appear in listing") |
|||
}) |
|||
|
|||
// Test 4: ListObjectsV2 with prefix "Empty/" and delimiter "/".
|
|||
// The directory marker "Empty/empty/" should appear as a CommonPrefix,
|
|||
// and "Empty/manifest.yaml" should appear in Contents.
|
|||
t.Run("ListV2_WithPrefix_WithDelimiter", func(t *testing.T) { |
|||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ |
|||
Bucket: aws.String(bucketName), |
|||
Prefix: aws.String("Empty/"), |
|||
Delimiter: aws.String("/"), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
keys := collectKeys(resp.Contents) |
|||
prefixes := collectPrefixes(resp.CommonPrefixes) |
|||
|
|||
assert.Contains(t, keys, "Empty/manifest.yaml", "regular file should be in Contents") |
|||
assert.Contains(t, prefixes, "Empty/empty/", "directory marker should appear as CommonPrefix") |
|||
}) |
|||
|
|||
// Test 5: Multiple empty directory markers at different nesting levels.
|
|||
t.Run("ListV2_NestedEmptyDirs", func(t *testing.T) { |
|||
nestedBucket := createTestBucket(t, cluster, "test-nested-dirs-") |
|||
|
|||
// Create nested structure:
|
|||
// dir/sub1/ (empty dir marker)
|
|||
// dir/sub2/deep/ (empty dir marker at deeper level)
|
|||
// dir/file.txt (regular file)
|
|||
_, err := cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(nestedBucket), |
|||
Key: aws.String("dir/sub1/"), |
|||
Body: bytes.NewReader([]byte{}), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(nestedBucket), |
|||
Key: aws.String("dir/sub2/deep/"), |
|||
Body: bytes.NewReader([]byte{}), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(nestedBucket), |
|||
Key: aws.String("dir/file.txt"), |
|||
Body: bytes.NewReader([]byte("content")), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ |
|||
Bucket: aws.String(nestedBucket), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
keys := collectKeys(resp.Contents) |
|||
sort.Strings(keys) |
|||
|
|||
assert.Equal(t, []string{"dir/file.txt", "dir/sub1/", "dir/sub2/deep/"}, keys, |
|||
"all objects including nested empty directory markers should be listed") |
|||
}) |
|||
|
|||
// Test 6: Directory marker with a child object, paginated with MaxKeys=1.
|
|||
// This exercises the emit-marker-then-recurse path under truncation:
|
|||
// the marker is emitted, maxKeys drops to 0, and the child must be
|
|||
// picked up on a subsequent page.
|
|||
t.Run("ListV2_MarkerWithChild_Paginated", func(t *testing.T) { |
|||
siblingBucket := createTestBucket(t, cluster, "test-sibling-dirs-") |
|||
|
|||
// Create:
|
|||
// docs/ (directory key object)
|
|||
// docs/readme.txt (child file inside the marker)
|
|||
// readme.txt (sibling file at top level)
|
|||
_, err := cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(siblingBucket), |
|||
Key: aws.String("docs/"), |
|||
Body: bytes.NewReader([]byte{}), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(siblingBucket), |
|||
Key: aws.String("docs/readme.txt"), |
|||
Body: bytes.NewReader([]byte("inside docs")), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(siblingBucket), |
|||
Key: aws.String("readme.txt"), |
|||
Body: bytes.NewReader([]byte("hello")), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
// Unpaginated: all three keys should appear.
|
|||
resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ |
|||
Bucket: aws.String(siblingBucket), |
|||
}) |
|||
require.NoError(t, err) |
|||
|
|||
keys := collectKeys(resp.Contents) |
|||
sort.Strings(keys) |
|||
|
|||
assert.Equal(t, []string{"docs/", "docs/readme.txt", "readme.txt"}, keys, |
|||
"directory marker, its child, and sibling file should all be listed") |
|||
|
|||
// Paginated with MaxKeys=1: collect all keys across pages.
|
|||
var allKeys []string |
|||
var token *string |
|||
for { |
|||
pageResp, pageErr := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ |
|||
Bucket: aws.String(siblingBucket), |
|||
MaxKeys: aws.Int64(1), |
|||
ContinuationToken: token, |
|||
}) |
|||
require.NoError(t, pageErr) |
|||
allKeys = append(allKeys, collectKeys(pageResp.Contents)...) |
|||
if !aws.BoolValue(pageResp.IsTruncated) { |
|||
break |
|||
} |
|||
token = pageResp.NextContinuationToken |
|||
} |
|||
sort.Strings(allKeys) |
|||
|
|||
assert.Equal(t, []string{"docs/", "docs/readme.txt", "readme.txt"}, allKeys, |
|||
"paginated listing should return all keys including marker and its child") |
|||
}) |
|||
} |
|||
|
|||
func collectKeys(contents []*s3.Object) []string { |
|||
keys := make([]string, 0, len(contents)) |
|||
for _, obj := range contents { |
|||
keys = append(keys, aws.StringValue(obj.Key)) |
|||
} |
|||
return keys |
|||
} |
|||
|
|||
func collectKeysV1(contents []*s3.Object) []string { |
|||
return collectKeys(contents) |
|||
} |
|||
|
|||
func collectPrefixes(prefixes []*s3.CommonPrefix) []string { |
|||
result := make([]string, 0, len(prefixes)) |
|||
for _, p := range prefixes { |
|||
result = append(result, aws.StringValue(p.Prefix)) |
|||
} |
|||
return result |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue