From 80f3079d2a7658ce3f34418d8a1615e97ebe260e Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Thu, 19 Mar 2026 15:36:11 -0700 Subject: [PATCH] fix(s3): include directory markers in ListObjects without delimiter (#8704) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(s3): include directory markers in ListObjects without delimiter (#8698) Directory key objects (zero-byte objects with keys ending in "/") created via PutObject were omitted from ListObjects/ListObjectsV2 results when no delimiter was specified. AWS S3 includes these as regular keys in Contents. The issue was in doListFilerEntries: when recursing into directories in non-delimiter mode, directory key objects were only emitted when prefixEndsOnDelimiter was true. Added an else branch to emit them in the general recursive case as well. * remove issue reference from inline comment * test: add child-under-marker and paginated listing coverage Extend test 6 to place a child object under the directory marker and paginate with MaxKeys=1 so the emit-then-recurse truncation path is exercised. * fix(test): skip directory markers in Spark temporary artifacts check The listing check now correctly shows directory markers (keys ending in "/") after the ListObjects fix. These 0-byte metadata objects are not data artifacts — filter them from the listing check since the HeadObject-based check already verifies their cleanup with a timeout. --- .../s3/normal/s3_list_empty_directory_test.go | 244 ++++++++++++++++++ test/s3/spark/issue_8285_repro_test.go | 6 + weed/s3api/s3api_object_handlers_list.go | 8 +- 3 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 test/s3/normal/s3_list_empty_directory_test.go diff --git a/test/s3/normal/s3_list_empty_directory_test.go b/test/s3/normal/s3_list_empty_directory_test.go new file mode 100644 index 000000000..b8c64180f --- /dev/null +++ b/test/s3/normal/s3_list_empty_directory_test.go @@ -0,0 +1,244 @@ +package example + +import ( + "bytes" + "sort" + "testing" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestS3ListObjectsEmptyDirectoryMarkers reproduces GitHub issue #8698: +// S3 API ListObjects does not include empty directory markers (zero-byte +// objects with keys ending in "/") created via PutObject. +// +// AWS S3 includes these markers as regular keys in Contents. SeaweedFS +// was filtering them out during recursive directory listing. +func TestS3ListObjectsEmptyDirectoryMarkers(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + cluster, err := startMiniCluster(t) + require.NoError(t, err) + defer cluster.Stop() + + bucketName := createTestBucket(t, cluster, "test-empty-dirs-") + + // Create a regular file at Empty/manifest.yaml + _, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String("Empty/manifest.yaml"), + Body: bytes.NewReader([]byte("name: test\nversion: 1.0\n")), + }) + require.NoError(t, err, "failed to create regular file") + + // Create an empty directory marker via PutObject (key ending in "/", zero bytes). + // This is how `aws s3api put-object --key "Empty/empty/"` works. + _, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String("Empty/empty/"), + Body: bytes.NewReader([]byte{}), + }) + require.NoError(t, err, "failed to create empty directory marker") + + // Verify the directory marker exists via HeadObject + _, err = cluster.s3Client.HeadObject(&s3.HeadObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String("Empty/empty/"), + }) + require.NoError(t, err, "directory marker should exist via HeadObject") + + // Test 1: ListObjectsV2 with prefix (no delimiter) — the exact scenario from the issue. + // AWS S3 returns both "Empty/empty/" and "Empty/manifest.yaml" in Contents. + t.Run("ListV2_WithPrefix_NoDelimiter", func(t *testing.T) { + resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String("Empty"), + }) + require.NoError(t, err) + + keys := collectKeys(resp.Contents) + sort.Strings(keys) + + assert.Equal(t, []string{"Empty/empty/", "Empty/manifest.yaml"}, keys, + "both the directory marker and regular file should be listed") + }) + + // Test 2: ListObjectsV1 with prefix (no delimiter). + t.Run("ListV1_WithPrefix_NoDelimiter", func(t *testing.T) { + resp, err := cluster.s3Client.ListObjects(&s3.ListObjectsInput{ + Bucket: aws.String(bucketName), + Prefix: aws.String("Empty"), + }) + require.NoError(t, err) + + keys := collectKeysV1(resp.Contents) + sort.Strings(keys) + + assert.Equal(t, []string{"Empty/empty/", "Empty/manifest.yaml"}, keys, + "both the directory marker and regular file should be listed") + }) + + // Test 3: ListObjectsV2 without prefix and without delimiter. + // All objects in the bucket should appear, including directory markers. + t.Run("ListV2_NoPrefix_NoDelimiter", func(t *testing.T) { + resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + }) + require.NoError(t, err) + + keys := collectKeys(resp.Contents) + + assert.Contains(t, keys, "Empty/empty/", "directory marker should appear in listing") + assert.Contains(t, keys, "Empty/manifest.yaml", "regular file should appear in listing") + }) + + // Test 4: ListObjectsV2 with prefix "Empty/" and delimiter "/". + // The directory marker "Empty/empty/" should appear as a CommonPrefix, + // and "Empty/manifest.yaml" should appear in Contents. + t.Run("ListV2_WithPrefix_WithDelimiter", func(t *testing.T) { + resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String("Empty/"), + Delimiter: aws.String("/"), + }) + require.NoError(t, err) + + keys := collectKeys(resp.Contents) + prefixes := collectPrefixes(resp.CommonPrefixes) + + assert.Contains(t, keys, "Empty/manifest.yaml", "regular file should be in Contents") + assert.Contains(t, prefixes, "Empty/empty/", "directory marker should appear as CommonPrefix") + }) + + // Test 5: Multiple empty directory markers at different nesting levels. + t.Run("ListV2_NestedEmptyDirs", func(t *testing.T) { + nestedBucket := createTestBucket(t, cluster, "test-nested-dirs-") + + // Create nested structure: + // dir/sub1/ (empty dir marker) + // dir/sub2/deep/ (empty dir marker at deeper level) + // dir/file.txt (regular file) + _, err := cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(nestedBucket), + Key: aws.String("dir/sub1/"), + Body: bytes.NewReader([]byte{}), + }) + require.NoError(t, err) + + _, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(nestedBucket), + Key: aws.String("dir/sub2/deep/"), + Body: bytes.NewReader([]byte{}), + }) + require.NoError(t, err) + + _, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(nestedBucket), + Key: aws.String("dir/file.txt"), + Body: bytes.NewReader([]byte("content")), + }) + require.NoError(t, err) + + resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(nestedBucket), + }) + require.NoError(t, err) + + keys := collectKeys(resp.Contents) + sort.Strings(keys) + + assert.Equal(t, []string{"dir/file.txt", "dir/sub1/", "dir/sub2/deep/"}, keys, + "all objects including nested empty directory markers should be listed") + }) + + // Test 6: Directory marker with a child object, paginated with MaxKeys=1. + // This exercises the emit-marker-then-recurse path under truncation: + // the marker is emitted, maxKeys drops to 0, and the child must be + // picked up on a subsequent page. + t.Run("ListV2_MarkerWithChild_Paginated", func(t *testing.T) { + siblingBucket := createTestBucket(t, cluster, "test-sibling-dirs-") + + // Create: + // docs/ (directory key object) + // docs/readme.txt (child file inside the marker) + // readme.txt (sibling file at top level) + _, err := cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(siblingBucket), + Key: aws.String("docs/"), + Body: bytes.NewReader([]byte{}), + }) + require.NoError(t, err) + + _, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(siblingBucket), + Key: aws.String("docs/readme.txt"), + Body: bytes.NewReader([]byte("inside docs")), + }) + require.NoError(t, err) + + _, err = cluster.s3Client.PutObject(&s3.PutObjectInput{ + Bucket: aws.String(siblingBucket), + Key: aws.String("readme.txt"), + Body: bytes.NewReader([]byte("hello")), + }) + require.NoError(t, err) + + // Unpaginated: all three keys should appear. + resp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(siblingBucket), + }) + require.NoError(t, err) + + keys := collectKeys(resp.Contents) + sort.Strings(keys) + + assert.Equal(t, []string{"docs/", "docs/readme.txt", "readme.txt"}, keys, + "directory marker, its child, and sibling file should all be listed") + + // Paginated with MaxKeys=1: collect all keys across pages. + var allKeys []string + var token *string + for { + pageResp, pageErr := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(siblingBucket), + MaxKeys: aws.Int64(1), + ContinuationToken: token, + }) + require.NoError(t, pageErr) + allKeys = append(allKeys, collectKeys(pageResp.Contents)...) + if !aws.BoolValue(pageResp.IsTruncated) { + break + } + token = pageResp.NextContinuationToken + } + sort.Strings(allKeys) + + assert.Equal(t, []string{"docs/", "docs/readme.txt", "readme.txt"}, allKeys, + "paginated listing should return all keys including marker and its child") + }) +} + +func collectKeys(contents []*s3.Object) []string { + keys := make([]string, 0, len(contents)) + for _, obj := range contents { + keys = append(keys, aws.StringValue(obj.Key)) + } + return keys +} + +func collectKeysV1(contents []*s3.Object) []string { + return collectKeys(contents) +} + +func collectPrefixes(prefixes []*s3.CommonPrefix) []string { + result := make([]string, 0, len(prefixes)) + for _, p := range prefixes { + result = append(result, aws.StringValue(p.Prefix)) + } + return result +} diff --git a/test/s3/spark/issue_8285_repro_test.go b/test/s3/spark/issue_8285_repro_test.go index a514356da..66b46439d 100644 --- a/test/s3/spark/issue_8285_repro_test.go +++ b/test/s3/spark/issue_8285_repro_test.go @@ -50,6 +50,12 @@ print("WRITE_COUNT=" + str(count)) keys := listObjectKeysByPrefix(t, env, "test", "issue-8285/") var temporaryKeys []string for _, key := range keys { + // Skip directory markers (keys ending in "/") — these are 0-byte + // metadata objects, not data artifacts. They are verified separately + // via HeadObject with a timeout below. + if strings.HasSuffix(key, "/") { + continue + } if hasTemporaryPathSegment(key) { temporaryKeys = append(temporaryKeys, key) } diff --git a/weed/s3api/s3api_object_handlers_list.go b/weed/s3api/s3api_object_handlers_list.go index 92344d83a..12f8ec849 100644 --- a/weed/s3api/s3api_object_handlers_list.go +++ b/weed/s3api/s3api_object_handlers_list.go @@ -677,15 +677,17 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d } if delimiter != "/" || cursor.prefixEndsOnDelimiter { - // When delimiter is empty (recursive mode), recurse into directories but don't add them to results - // Only files and versioned objects should appear in results if cursor.prefixEndsOnDelimiter { cursor.prefixEndsOnDelimiter = false if entry.IsDirectoryKeyObject() { eachEntryFn(dir, entry) } + } else if entry.IsDirectoryKeyObject() { + // Directory key objects (created via PutObject with trailing "/") + // must appear as regular keys in recursive listing mode. + eachEntryFn(dir, entry) } - // Recurse into subdirectory - don't add the directory itself to results + // Recurse into subdirectory to list any children subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, eachEntryFn) if subErr != nil { err = fmt.Errorf("doListFilerEntries2: %w", subErr)