From 8c585a9682748353c78b96e5f55238074497b35a Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 1 Dec 2025 15:19:42 -0800 Subject: [PATCH] Fix S3 object tagging issue #7589 - Add X-Amz-Tagging header parsing in putToFiler function for PUT object operations - Store tags with X-Amz-Tagging- prefix in entry.Extended metadata - Add comprehensive test suite for S3 object tagging functionality - Tests cover upload tagging, API operations, special characters, and edge cases --- test/s3/tagging/Makefile | 23 ++ test/s3/tagging/README.md | 53 +++ test/s3/tagging/s3_tagging_test.go | 433 ++++++++++++++++++++++++ weed/s3api/s3api_object_handlers_put.go | 27 +- 4 files changed, 534 insertions(+), 2 deletions(-) create mode 100644 test/s3/tagging/Makefile create mode 100644 test/s3/tagging/README.md create mode 100644 test/s3/tagging/s3_tagging_test.go diff --git a/test/s3/tagging/Makefile b/test/s3/tagging/Makefile new file mode 100644 index 000000000..6d3343a70 --- /dev/null +++ b/test/s3/tagging/Makefile @@ -0,0 +1,23 @@ +# S3 Object Tagging Tests +# Tests for GitHub issue #7589: S3 object Tags query comes back empty + +.PHONY: test + +# Run all tagging tests +test: + go test -v ./... + +# Run specific test +test-upload: + go test -v -run TestObjectTaggingOnUpload + +test-special-chars: + go test -v -run TestObjectTaggingOnUploadWithSpecialCharacters + +test-api: + go test -v -run TestPutObjectTaggingAPI + +# Test configuration (override these for different environments) +S3_ENDPOINT ?= http://localhost:8333 +S3_ACCESS_KEY ?= some_access_key1 +S3_SECRET_KEY ?= some_secret_key1 diff --git a/test/s3/tagging/README.md b/test/s3/tagging/README.md new file mode 100644 index 000000000..73f2a46c4 --- /dev/null +++ b/test/s3/tagging/README.md @@ -0,0 +1,53 @@ +# S3 Object Tagging Tests + +This directory contains tests for S3 object tagging functionality. + +## Issue Reference + +These tests were created to verify the fix for [GitHub Issue #7589](https://github.com/seaweedfs/seaweedfs/issues/7589): +**S3 object Tags query comes back empty** + +## Problem Description + +When uploading an object with tags using the `X-Amz-Tagging` header, the tags were not being stored. +When querying the object tagging with `GetObjectTagging`, the response was empty. + +This was a regression between SeaweedFS 4.00 and 4.01. + +## Root Cause + +The `putToFiler` function in `s3api_object_handlers_put.go` was not parsing the `X-Amz-Tagging` header +and storing the tags in the entry's Extended metadata. The code was only copying user metadata +(headers starting with `X-Amz-Meta-`) but not object tags. + +## Fix + +Added tag parsing logic to `putToFiler` that: +1. Reads the `X-Amz-Tagging` header +2. Parses it using `url.ParseQuery()` for proper URL decoding +3. Stores each tag with the prefix `X-Amz-Tagging-` in the entry's Extended metadata + +## Running Tests + +```bash +# Run all tagging tests +cd test/s3/tagging +make test + +# Run specific test +make test-upload + +# Or using go test directly +go test -v ./... +``` + +## Test Cases + +1. **TestObjectTaggingOnUpload** - Basic test for tags sent during object upload +2. **TestObjectTaggingOnUploadWithSpecialCharacters** - Tests URL-encoded tag values +3. **TestObjectTaggingOnUploadWithEmptyValue** - Tests tags with empty values +4. **TestPutObjectTaggingAPI** - Tests the PutObjectTagging API separately +5. **TestDeleteObjectTagging** - Tests tag deletion +6. **TestTagsNotPreservedAfterObjectOverwrite** - Verifies AWS S3 behavior on overwrite +7. **TestMaximumNumberOfTags** - Tests storing the maximum 10 tags +8. **TestTagCountHeader** - Tests the x-amz-tagging-count header in HeadObject diff --git a/test/s3/tagging/s3_tagging_test.go b/test/s3/tagging/s3_tagging_test.go new file mode 100644 index 000000000..1dd4895b6 --- /dev/null +++ b/test/s3/tagging/s3_tagging_test.go @@ -0,0 +1,433 @@ +package tagging + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// S3TestConfig holds configuration for S3 tests +type S3TestConfig struct { + Endpoint string + AccessKey string + SecretKey string + Region string + BucketPrefix string + UseSSL bool + SkipVerifySSL bool +} + +// getDefaultConfig returns a fresh instance of the default test configuration +func getDefaultConfig() *S3TestConfig { + return &S3TestConfig{ + Endpoint: "http://localhost:8333", // Default SeaweedFS S3 port + AccessKey: "some_access_key1", + SecretKey: "some_secret_key1", + Region: "us-east-1", + BucketPrefix: "test-tagging-", + UseSSL: false, + SkipVerifySSL: true, + } +} + +// getS3Client creates an AWS S3 client for testing +func getS3Client(t *testing.T) *s3.Client { + defaultConfig := getDefaultConfig() + cfg, err := config.LoadDefaultConfig(context.TODO(), + config.WithRegion(defaultConfig.Region), + config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( + defaultConfig.AccessKey, + defaultConfig.SecretKey, + "", + )), + config.WithEndpointResolverWithOptions(aws.EndpointResolverWithOptionsFunc( + func(service, region string, options ...interface{}) (aws.Endpoint, error) { + return aws.Endpoint{ + URL: defaultConfig.Endpoint, + SigningRegion: defaultConfig.Region, + }, nil + })), + ) + require.NoError(t, err) + + client := s3.NewFromConfig(cfg, func(o *s3.Options) { + o.UsePathStyle = true + }) + return client +} + +// createTestBucket creates a test bucket with a unique name +func createTestBucket(t *testing.T, client *s3.Client) string { + defaultConfig := getDefaultConfig() + bucketName := fmt.Sprintf("%s%d", defaultConfig.BucketPrefix, time.Now().UnixNano()) + + _, err := client.CreateBucket(context.TODO(), &s3.CreateBucketInput{ + Bucket: aws.String(bucketName), + }) + require.NoError(t, err) + + // Wait for bucket metadata to be fully processed + time.Sleep(50 * time.Millisecond) + + return bucketName +} + +// cleanupTestBucket removes the test bucket and all its contents +func cleanupTestBucket(t *testing.T, client *s3.Client, bucketName string) { + // First, delete all objects in the bucket + listResp, err := client.ListObjectsV2(context.TODO(), &s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + }) + if err == nil { + for _, obj := range listResp.Contents { + _, err := client.DeleteObject(context.TODO(), &s3.DeleteObjectInput{ + Bucket: aws.String(bucketName), + Key: obj.Key, + }) + if err != nil { + t.Logf("Warning: failed to delete object %s: %v", *obj.Key, err) + } + } + } + + // Then delete the bucket + _, err = client.DeleteBucket(context.TODO(), &s3.DeleteBucketInput{ + Bucket: aws.String(bucketName), + }) + if err != nil { + t.Logf("Warning: failed to delete bucket %s: %v", bucketName, err) + } +} + +// TestObjectTaggingOnUpload tests that tags sent during object upload (via X-Amz-Tagging header) +// are properly stored and can be retrieved. This is the fix for GitHub issue #7589. +func TestObjectTaggingOnUpload(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-with-tags" + objectContent := "Hello, World!" + + // Put object with tags using the Tagging parameter (X-Amz-Tagging header) + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + Tagging: aws.String("env=production&team=platform"), + }) + require.NoError(t, err, "Should be able to put object with tags") + + // Get the tags back + tagResp, err := client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags") + + // Verify tags were stored correctly + require.Len(t, tagResp.TagSet, 2, "Should have 2 tags") + + // Build a map for easier assertion + tagMap := make(map[string]string) + for _, tag := range tagResp.TagSet { + tagMap[*tag.Key] = *tag.Value + } + + assert.Equal(t, "production", tagMap["env"], "env tag should be 'production'") + assert.Equal(t, "platform", tagMap["team"], "team tag should be 'platform'") +} + +// TestObjectTaggingOnUploadWithSpecialCharacters tests tags with URL-encoded characters +func TestObjectTaggingOnUploadWithSpecialCharacters(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-with-special-tags" + objectContent := "Hello, World!" + + // Put object with tags containing special characters + // AWS SDK will URL-encode these automatically + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + Tagging: aws.String("timestamp=2025-07-16 14:40:39&path=/tmp/file.txt"), + }) + require.NoError(t, err, "Should be able to put object with special character tags") + + // Get the tags back + tagResp, err := client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags") + + // Verify tags were stored and URL-decoded correctly + require.Len(t, tagResp.TagSet, 2, "Should have 2 tags") + + tagMap := make(map[string]string) + for _, tag := range tagResp.TagSet { + tagMap[*tag.Key] = *tag.Value + } + + assert.Equal(t, "2025-07-16 14:40:39", tagMap["timestamp"], "timestamp tag should be decoded correctly") + assert.Equal(t, "/tmp/file.txt", tagMap["path"], "path tag should be decoded correctly") +} + +// TestObjectTaggingOnUploadWithEmptyValue tests tags with empty values +func TestObjectTaggingOnUploadWithEmptyValue(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-with-empty-tag" + objectContent := "Hello, World!" + + // Put object with a tag that has an empty value + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + Tagging: aws.String("marker=&env=dev"), + }) + require.NoError(t, err, "Should be able to put object with empty tag value") + + // Get the tags back + tagResp, err := client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags") + + // Verify tags were stored correctly + require.Len(t, tagResp.TagSet, 2, "Should have 2 tags") + + tagMap := make(map[string]string) + for _, tag := range tagResp.TagSet { + tagMap[*tag.Key] = *tag.Value + } + + assert.Equal(t, "", tagMap["marker"], "marker tag should have empty value") + assert.Equal(t, "dev", tagMap["env"], "env tag should be 'dev'") +} + +// TestPutObjectTaggingAPI tests the PutObjectTagging API separately from upload +func TestPutObjectTaggingAPI(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-for-tagging-api" + objectContent := "Hello, World!" + + // First, put object without tags + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + }) + require.NoError(t, err, "Should be able to put object without tags") + + // Get tags - should be empty + tagResp, err := client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags") + assert.Len(t, tagResp.TagSet, 0, "Should have no tags initially") + + // Now add tags using PutObjectTagging API + _, err = client.PutObjectTagging(context.TODO(), &s3.PutObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Tagging: &types.Tagging{ + TagSet: []types.Tag{ + {Key: aws.String("env"), Value: aws.String("staging")}, + {Key: aws.String("version"), Value: aws.String("1.0")}, + }, + }, + }) + require.NoError(t, err, "Should be able to put object tags via API") + + // Get tags - should now have the tags + tagResp, err = client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags after PutObjectTagging") + require.Len(t, tagResp.TagSet, 2, "Should have 2 tags") + + tagMap := make(map[string]string) + for _, tag := range tagResp.TagSet { + tagMap[*tag.Key] = *tag.Value + } + + assert.Equal(t, "staging", tagMap["env"], "env tag should be 'staging'") + assert.Equal(t, "1.0", tagMap["version"], "version tag should be '1.0'") +} + +// TestDeleteObjectTagging tests the DeleteObjectTagging API +func TestDeleteObjectTagging(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-for-delete-tags" + objectContent := "Hello, World!" + + // Put object with tags + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + Tagging: aws.String("env=production"), + }) + require.NoError(t, err, "Should be able to put object with tags") + + // Verify tags exist + tagResp, err := client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags") + require.Len(t, tagResp.TagSet, 1, "Should have 1 tag") + + // Delete tags + _, err = client.DeleteObjectTagging(context.TODO(), &s3.DeleteObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to delete object tags") + + // Verify tags are deleted + tagResp, err = client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags after deletion") + assert.Len(t, tagResp.TagSet, 0, "Should have no tags after deletion") +} + +// TestTagsNotPreservedAfterObjectOverwrite tests that tags are NOT preserved when an object is overwritten +// This matches AWS S3 behavior where overwriting an object replaces all metadata including tags +func TestTagsNotPreservedAfterObjectOverwrite(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-overwrite-tags" + objectContent := "Original content" + + // Put object with tags + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + Tagging: aws.String("original=true"), + }) + require.NoError(t, err, "Should be able to put object with tags") + + // Verify original tags exist + tagResp, err := client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags") + require.Len(t, tagResp.TagSet, 1, "Should have 1 tag") + assert.Equal(t, "original", *tagResp.TagSet[0].Key) + + // Overwrite the object WITHOUT tags + _, err = client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader("New content"), + }) + require.NoError(t, err, "Should be able to overwrite object") + + // Tags should be gone after overwrite (matches AWS S3 behavior) + tagResp, err = client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags after overwrite") + assert.Len(t, tagResp.TagSet, 0, "Tags should be cleared after object overwrite") +} + +// TestMaximumNumberOfTags tests that we can store the maximum 10 tags per object +func TestMaximumNumberOfTags(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-max-tags" + objectContent := "Hello, World!" + + // Build 10 tags (S3 max) + tags := []string{} + for i := 1; i <= 10; i++ { + tags = append(tags, fmt.Sprintf("key%d=value%d", i, i)) + } + tagging := strings.Join(tags, "&") + + // Put object with 10 tags + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + Tagging: aws.String(tagging), + }) + require.NoError(t, err, "Should be able to put object with 10 tags") + + // Get the tags back + tagResp, err := client.GetObjectTagging(context.TODO(), &s3.GetObjectTaggingInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to get object tags") + assert.Len(t, tagResp.TagSet, 10, "Should have 10 tags") +} + +// TestTagCountHeader tests that the x-amz-tagging-count header is returned in HeadObject +func TestTagCountHeader(t *testing.T) { + client := getS3Client(t) + bucketName := createTestBucket(t, client) + defer cleanupTestBucket(t, client, bucketName) + + objectKey := "test-object-tag-count" + objectContent := "Hello, World!" + + // Put object with tags + _, err := client.PutObject(context.TODO(), &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: strings.NewReader(objectContent), + Tagging: aws.String("env=prod&team=backend&version=2.0"), + }) + require.NoError(t, err, "Should be able to put object with tags") + + // Head object to get tag count + headResp, err := client.HeadObject(context.TODO(), &s3.HeadObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + require.NoError(t, err, "Should be able to head object") + + // Check tag count header + if headResp.TagCount != nil { + assert.Equal(t, int32(3), *headResp.TagCount, "Tag count should be 3") + } else { + t.Log("Warning: TagCount header not returned - this may be expected depending on implementation") + } +} diff --git a/weed/s3api/s3api_object_handlers_put.go b/weed/s3api/s3api_object_handlers_put.go index 100796b2e..f848790de 100644 --- a/weed/s3api/s3api_object_handlers_put.go +++ b/weed/s3api/s3api_object_handlers_put.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "net/http" + "net/url" "path/filepath" "strconv" "strings" @@ -548,6 +549,28 @@ func (s3a *S3ApiServer) putToFiler(r *http.Request, filePath string, dataReader } } + // Parse and store object tags from X-Amz-Tagging header + // Fix for GitHub issue #7589: Tags sent during object upload were not being stored + if tagging := r.Header.Get(s3_constants.AmzObjectTagging); tagging != "" { + parsedTags, err := url.ParseQuery(tagging) + if err != nil { + glog.Warningf("putToFiler: Invalid S3 tag format in header '%s': %v", tagging, err) + return "", s3err.ErrInvalidTag, SSEResponseMetadata{} + } + for key, values := range parsedTags { + if len(values) > 1 { + glog.Warningf("putToFiler: Duplicate tag key '%s' in header", key) + return "", s3err.ErrInvalidTag, SSEResponseMetadata{} + } + value := "" + if len(values) > 0 { + value = values[0] + } + entry.Extended[s3_constants.AmzObjectTagging+"-"+key] = []byte(value) + } + glog.V(3).Infof("putToFiler: stored %d tags from X-Amz-Tagging header", len(parsedTags)) + } + // Set SSE-C metadata if customerKey != nil && len(sseIV) > 0 { // Store IV as RAW bytes (matches filer behavior - filer decodes base64 headers and stores raw bytes) @@ -680,9 +703,9 @@ func filerErrorToS3Error(err error) s3err.ErrorCode { if err == nil { return s3err.ErrNone } - + errString := err.Error() - + switch { case errString == constants.ErrMsgBadDigest: return s3err.ErrBadDigest