diff --git a/weed/s3api/copy_source_decode_test.go b/weed/s3api/copy_source_decode_test.go new file mode 100644 index 000000000..df081a80b --- /dev/null +++ b/weed/s3api/copy_source_decode_test.go @@ -0,0 +1,235 @@ +package s3api + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/gorilla/mux" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" +) + +func TestCopySourceWithExclamationMark(t *testing.T) { + // Reproduce https://github.com/seaweedfs/seaweedfs/issues/8544 + testCases := []struct { + name string + rawCopySource string + expectedBucket string + expectedObject string + expectedVersion string + dstBucket string + dstObject string + shouldBeEqual bool + }{ + { + name: "encoded exclamation mark - different dest", + rawCopySource: "my-bucket/path%2Fto%2FAnother%20test%21.odt", + expectedBucket: "my-bucket", + expectedObject: "path/to/Another test!.odt", + dstBucket: "my-bucket", + dstObject: "path/to/Hello.odt", + shouldBeEqual: false, + }, + { + name: "unencoded exclamation mark - different dest", + rawCopySource: "my-bucket/path/to/Another%20test!.odt", + expectedBucket: "my-bucket", + expectedObject: "path/to/Another test!.odt", + dstBucket: "my-bucket", + dstObject: "path/to/Hello.odt", + shouldBeEqual: false, + }, + { + name: "encoded exclamation mark - same dest", + rawCopySource: "my-bucket/path%2Fto%2FAnother%20test%21.odt", + expectedBucket: "my-bucket", + expectedObject: "path/to/Another test!.odt", + dstBucket: "my-bucket", + dstObject: "path/to/Another test!.odt", + shouldBeEqual: true, + }, + { + name: "encoded path with versionId", + rawCopySource: "my-bucket/path%2Fto%2FAnother%20test%21.odt?versionId=abc123", + expectedBucket: "my-bucket", + expectedObject: "path/to/Another test!.odt", + expectedVersion: "abc123", + dstBucket: "my-bucket", + dstObject: "path/to/Hello.odt", + shouldBeEqual: false, + }, + { + name: "unencoded path with versionId", + rawCopySource: "my-bucket/path/to/Another%20test!.odt?versionId=v2", + expectedBucket: "my-bucket", + expectedObject: "path/to/Another test!.odt", + expectedVersion: "v2", + dstBucket: "my-bucket", + dstObject: "path/to/Another test!.odt", + shouldBeEqual: true, + }, + { + name: "plus sign in key with versionId", + rawCopySource: "my-bucket/path/to/file+name.odt?versionId=xyz", + expectedBucket: "my-bucket", + expectedObject: "path/to/file+name.odt", + expectedVersion: "xyz", + dstBucket: "my-bucket", + dstObject: "path/to/file+name.odt", + shouldBeEqual: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cpSrcPath, err := url.PathUnescape(tc.rawCopySource) + if err != nil { + cpSrcPath = tc.rawCopySource + } + + srcBucket, srcObject, srcVersionId := pathToBucketObjectAndVersion(tc.rawCopySource, cpSrcPath) + + if srcBucket != tc.expectedBucket { + t.Errorf("expected srcBucket=%q, got %q", tc.expectedBucket, srcBucket) + } + if srcObject != tc.expectedObject { + t.Errorf("expected srcObject=%q, got %q", tc.expectedObject, srcObject) + } + if srcVersionId != tc.expectedVersion { + t.Errorf("expected versionId=%q, got %q", tc.expectedVersion, srcVersionId) + } + + isEqual := srcBucket == tc.dstBucket && srcObject == tc.dstObject + if isEqual != tc.shouldBeEqual { + t.Errorf("expected comparison result %v, got %v (srcObject=%q, dstObject=%q)", + tc.shouldBeEqual, isEqual, srcObject, tc.dstObject) + } + }) + } +} + +// TestCopySourceDecodingPlusSign verifies that + in the copy source header +// is treated as a literal plus sign (not a space), matching url.PathUnescape +// behavior. Using url.QueryUnescape would incorrectly convert + to space. +func TestCopySourceDecodingPlusSign(t *testing.T) { + // Key: "path/to/file+name.odt" + // With url.QueryUnescape, literal "+" → " " (WRONG for paths) + // With url.PathUnescape, literal "+" → "+" (CORRECT) + rawCopySource := "my-bucket/path/to/file+name.odt" + + // url.QueryUnescape would give "file name.odt" — WRONG + queryDecoded, _ := url.QueryUnescape(rawCopySource) + if queryDecoded == rawCopySource { + t.Fatal("QueryUnescape should have changed + to space") + } + + // url.PathUnescape preserves literal "+" — CORRECT + pathDecoded, _ := url.PathUnescape(rawCopySource) + if pathDecoded != rawCopySource { + t.Fatalf("PathUnescape should have preserved +, got %q", pathDecoded) + } + + _, srcObject, _ := pathToBucketObjectAndVersion(rawCopySource, pathDecoded) + if srcObject != "path/to/file+name.odt" { + t.Errorf("expected srcObject=%q, got %q", "path/to/file+name.odt", srcObject) + } +} + +// TestCopySourceRoutingWithSpecialChars tests that mux variable extraction +// correctly handles special characters like ! (%21) in both the URL path +// and the X-Amz-Copy-Source header. +func TestCopySourceRoutingWithSpecialChars(t *testing.T) { + testCases := []struct { + name string + dstURL string // URL for the PUT request (destination) + copySource string // X-Amz-Copy-Source header value + expectSameKey bool // whether srcObject should equal dstObject + }{ + { + name: "different keys, source has encoded !", + dstURL: "/my-bucket/path/to/Hello.odt", + copySource: "my-bucket/path%2Fto%2FAnother%20test%21.odt", + expectSameKey: false, + }, + { + name: "same key with !, source encoded, dest unencoded", + dstURL: "/my-bucket/path/to/Another%20test%21.odt", + copySource: "my-bucket/path%2Fto%2FAnother%20test%21.odt", + expectSameKey: true, + }, + { + name: "same key with !, both percent-encoded differently", + dstURL: "/my-bucket/path/to/Another%20test!.odt", + copySource: "my-bucket/path%2Fto%2FAnother%20test%21.odt", + expectSameKey: true, + }, + { + name: "key with + sign, source has literal +", + dstURL: "/my-bucket/path/to/file+name.odt", + copySource: "my-bucket/path/to/file+name.odt", + expectSameKey: true, + }, + { + name: "key with + sign, source has %2B", + dstURL: "/my-bucket/path/to/file+name.odt", + copySource: "my-bucket/path/to/file%2Bname.odt", + expectSameKey: true, + }, + { + name: "lowercase %2f in copy source", + dstURL: "/my-bucket/path/to/Hello.odt", + copySource: "my-bucket/path%2fto%2fAnother%20test.odt", + expectSameKey: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var capturedDstBucket, capturedDstObject string + var capturedSrcBucket, capturedSrcObject string + + router := mux.NewRouter().SkipClean(true) + bucket := router.PathPrefix("/{bucket}").Subrouter() + bucket.Methods(http.MethodPut).Path("/{object:(?s).+}"). + HeadersRegexp("X-Amz-Copy-Source", `(?i).*?(\/|%2F).*?`). + HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedDstBucket, capturedDstObject = s3_constants.GetBucketAndObject(r) + + rawCopySource := r.Header.Get("X-Amz-Copy-Source") + cpSrcPath, err := url.PathUnescape(rawCopySource) + if err != nil { + cpSrcPath = rawCopySource + } + capturedSrcBucket, capturedSrcObject, _ = pathToBucketObjectAndVersion(rawCopySource, cpSrcPath) + + if capturedSrcBucket == capturedDstBucket && capturedSrcObject == capturedDstObject { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "ErrInvalidCopyDest") + } else { + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, "OK") + } + }) + + req, _ := http.NewRequest("PUT", tc.dstURL, nil) + req.Header.Set("X-Amz-Copy-Source", tc.copySource) + + rr := httptest.NewRecorder() + router.ServeHTTP(rr, req) + + if tc.expectSameKey { + if rr.Code != http.StatusBadRequest { + t.Errorf("expected same key detection (400), got %d; src=%q dst=%q", + rr.Code, capturedSrcObject, capturedDstObject) + } + } else { + if rr.Code != http.StatusOK { + t.Errorf("expected different keys (200), got %d; src=%q dst=%q", + rr.Code, capturedSrcObject, capturedDstObject) + } + } + }) + } +} diff --git a/weed/s3api/s3api_object_handlers_copy.go b/weed/s3api/s3api_object_handlers_copy.go index 7e1e38b51..4680ace8f 100644 --- a/weed/s3api/s3api_object_handlers_copy.go +++ b/weed/s3api/s3api_object_handlers_copy.go @@ -38,7 +38,9 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request // Copy source path. rawCopySource := r.Header.Get("X-Amz-Copy-Source") - cpSrcPath, err := url.QueryUnescape(rawCopySource) + // Use PathUnescape (not QueryUnescape) because the copy source is a path, + // not a query string. QueryUnescape would incorrectly convert '+' to space. + cpSrcPath, err := url.PathUnescape(rawCopySource) if err != nil { // Save unescaped string as is. cpSrcPath = rawCopySource @@ -438,7 +440,7 @@ func pathToBucketObjectAndVersion(rawPath, decodedPath string) (bucket, object, versionId = values.Get("versionId") rawPathNoQuery := rawPath[:idx] - if unescaped, err := url.QueryUnescape(rawPathNoQuery); err == nil { + if unescaped, err := url.PathUnescape(rawPathNoQuery); err == nil { pathForBucket = unescaped } else { pathForBucket = rawPathNoQuery @@ -470,8 +472,9 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req glog.V(4).Infof("CopyObjectPart: Raw copy source header=%q", rawCopySource) - // Try URL unescaping - AWS SDK sends URL-encoded copy sources - cpSrcPath, err := url.QueryUnescape(rawCopySource) + // Use PathUnescape (not QueryUnescape) because the copy source is a path, + // not a query string. QueryUnescape would incorrectly convert '+' to space. + cpSrcPath, err := url.PathUnescape(rawCopySource) if err != nil { // If unescaping fails, log and use original glog.V(4).Infof("CopyObjectPart: Failed to unescape copy source %q: %v, using as-is", rawCopySource, err) @@ -483,6 +486,17 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req glog.V(4).Infof("CopyObjectPart: Parsed srcBucket=%q, srcObject=%q, srcVersionId=%q", srcBucket, srcObject, srcVersionId) + if err := s3a.validateTableBucketObjectPath(dstBucket, dstObject); err != nil { + s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied) + return + } + if srcBucket != "" && srcBucket != dstBucket { + if err := s3a.validateTableBucketObjectPath(srcBucket, srcObject); err != nil { + s3err.WriteErrorResponse(w, r, s3err.ErrAccessDenied) + return + } + } + // If source object is empty or bucket is empty, reply back invalid copy source. // Note: srcObject can be "/" for root-level objects, but empty string means parsing failed if srcObject == "" || srcBucket == "" { diff --git a/weed/s3api/s3api_server.go b/weed/s3api/s3api_server.go index 8d2fe0f41..065a05d39 100644 --- a/weed/s3api/s3api_server.go +++ b/weed/s3api/s3api_server.go @@ -593,7 +593,7 @@ func (s3a *S3ApiServer) registerRouter(router *mux.Router) { // objects with query // CopyObjectPart - bucket.Methods(http.MethodPut).Path(objectPath).HeadersRegexp("X-Amz-Copy-Source", `.*?(\/|%2F).*?`).HandlerFunc(track(s3a.iam.Auth(s3a.cb.Limit(s3a.CopyObjectPartHandler, ACTION_WRITE)), "PUT")).Queries("partNumber", "{partNumber:[0-9]+}", "uploadId", "{uploadId:.*}") + bucket.Methods(http.MethodPut).Path(objectPath).HeadersRegexp("X-Amz-Copy-Source", `(?i).*?(\/|%2F).*?`).HandlerFunc(track(s3a.iam.Auth(s3a.cb.Limit(s3a.CopyObjectPartHandler, ACTION_WRITE)), "PUT")).Queries("partNumber", "{partNumber:[0-9]+}", "uploadId", "{uploadId:.*}") // PutObjectPart bucket.Methods(http.MethodPut).Path(objectPath).HandlerFunc(track(s3a.iam.Auth(s3a.cb.Limit(s3a.PutObjectPartHandler, ACTION_WRITE)), "PUT")).Queries("partNumber", "{partNumber:[0-9]+}", "uploadId", "{uploadId:.*}") // CompleteMultipartUpload @@ -647,7 +647,7 @@ func (s3a *S3ApiServer) registerRouter(router *mux.Router) { }, ACTION_READ), "GET")) // CopyObject - bucket.Methods(http.MethodPut).Path(objectPath).HeadersRegexp("X-Amz-Copy-Source", ".*?(\\/|%2F).*?").HandlerFunc(track(s3a.iam.Auth(s3a.cb.Limit(s3a.CopyObjectHandler, ACTION_WRITE)), "COPY")) + bucket.Methods(http.MethodPut).Path(objectPath).HeadersRegexp("X-Amz-Copy-Source", `(?i).*?(\/|%2F).*?`).HandlerFunc(track(s3a.iam.Auth(s3a.cb.Limit(s3a.CopyObjectHandler, ACTION_WRITE)), "COPY")) // PutObject bucket.Methods(http.MethodPut).Path(objectPath).HandlerFunc(track(s3a.iam.Auth(s3a.cb.Limit(s3a.PutObjectHandler, ACTION_WRITE)), "PUT")) // DeleteObject