diff --git a/test/s3/parquet/debug_write_dataset.py b/test/s3/parquet/debug_write_dataset.py new file mode 100644 index 000000000..41762497b --- /dev/null +++ b/test/s3/parquet/debug_write_dataset.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +"""Debug script to understand what pads.write_dataset creates.""" + +import sys +import pyarrow as pa +import pyarrow.dataset as pads +import s3fs + +# Create a simple test table +table = pa.table({'id': [1, 2, 3], 'value': [1.0, 2.0, 3.0]}) + +# Initialize S3 filesystem +fs = s3fs.S3FileSystem( + client_kwargs={'endpoint_url': 'http://localhost:8333'}, + key='some_access_key1', + secret='some_secret_key1', + use_listings_cache=False, +) + +# Create bucket +if not fs.exists('test-bucket'): + fs.mkdir('test-bucket') + +# Write with pads.write_dataset +test_path = 's3://test-bucket/test-write-simple/' +print(f"Writing to: {test_path}") +print(f"Table schema: {table.schema}") +print(f"Table rows: {table.num_rows}") + +try: + pads.write_dataset(table, test_path, format='parquet', filesystem=fs) + print("\nāœ“ Write succeeded") + + # List all files recursively + print(f"\nListing all files recursively under {test_path}:") + import os + base_path = 'test-bucket/test-write-simple' + def list_recursive(path, indent=0): + try: + items = fs.ls(path, detail=False) + for item in items: + is_dir = fs.isdir(item) + item_name = item.split('/')[-1] if '/' in item else item + if is_dir: + print(f"{' ' * indent}šŸ“ {item_name}/") + list_recursive(item, indent + 1) + else: + # Get file size + try: + info = fs.info(item) + size = info.get('size', 0) + print(f"{' ' * indent}šŸ“„ {item_name} ({size} bytes)") + except: + print(f"{' ' * indent}šŸ“„ {item_name}") + except Exception as e: + print(f"{' ' * indent}Error listing {path}: {e}") + + list_recursive(base_path) + + # Try to read back with different methods + print(f"\n\nTrying to read back using different methods:") + + # Method 1: pads.dataset with the same path + print(f"\n1. pads.dataset('{test_path}'):") + try: + dataset = pads.dataset(test_path, format='parquet', filesystem=fs) + result = dataset.to_table() + print(f" āœ“ Success: {result.num_rows} rows") + except Exception as e: + print(f" āœ— Failed: {e}") + + # Method 2: pads.dataset with the dir containing parquet files + print(f"\n2. pads.dataset without trailing slash:") + test_path_no_slash = 's3://test-bucket/test-write-simple' + try: + dataset = pads.dataset(test_path_no_slash, format='parquet', filesystem=fs) + result = dataset.to_table() + print(f" āœ“ Success: {result.num_rows} rows") + except Exception as e: + print(f" āœ— Failed: {e}") + +except Exception as e: + import traceback + print(f"āœ— Error: {e}") + traceback.print_exc() + sys.exit(1) diff --git a/test/s3/parquet/test_implicit_directory_fix.py b/test/s3/parquet/test_implicit_directory_fix.py index 9ac8f0346..2ed52e5d7 100755 --- a/test/s3/parquet/test_implicit_directory_fix.py +++ b/test/s3/parquet/test_implicit_directory_fix.py @@ -182,7 +182,7 @@ def test_explicit_directory_marker(fs, s3_client): logger.info("="*80) # Create an explicit directory marker - logger.info(f"\nCreating explicit directory: {BUCKET_NAME}/explicit_dir/") + logger.info(f"Creating explicit directory: {BUCKET_NAME}/explicit_dir/") try: s3_client.put_object( Bucket=BUCKET_NAME, diff --git a/weed/replication/sink/gcssink/gcs_sink.go b/weed/replication/sink/gcssink/gcs_sink.go index 6fe78b21b..1a930fd4a 100644 --- a/weed/replication/sink/gcssink/gcs_sink.go +++ b/weed/replication/sink/gcssink/gcs_sink.go @@ -3,9 +3,10 @@ package gcssink import ( "context" "fmt" - "github.com/seaweedfs/seaweedfs/weed/replication/repl_util" "os" + "github.com/seaweedfs/seaweedfs/weed/replication/repl_util" + "cloud.google.com/go/storage" "google.golang.org/api/option" @@ -83,7 +84,7 @@ func (g *GcsSink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool, } if err := g.client.Bucket(g.bucket).Object(key).Delete(context.Background()); err != nil { - return fmt.Errorf("gcs delete %s%s: %v", g.bucket, key, err) + return fmt.Errorf("gcs delete %s/%s: %v", g.bucket, key, err) } return nil diff --git a/weed/s3api/s3_constants/header.go b/weed/s3api/s3_constants/header.go index 4b34f397e..f379c91ed 100644 --- a/weed/s3api/s3_constants/header.go +++ b/weed/s3api/s3_constants/header.go @@ -144,16 +144,26 @@ func GetBucketAndObject(r *http.Request) (bucket, object string) { return } -// NormalizeObjectKey ensures the object key has a leading slash and no duplicate slashes. +// NormalizeObjectKey normalizes object keys by removing duplicate slashes and converting backslashes. // This normalizes keys from various sources (URL path, form values, etc.) to a consistent format. // It also converts Windows-style backslashes to forward slashes for cross-platform compatibility. +// Returns keys WITHOUT leading slash to match S3 API format (e.g., "foo/bar" not "/foo/bar"). +// Preserves trailing slash if present (e.g., "foo/" stays "foo/"). func NormalizeObjectKey(object string) string { + // Preserve trailing slash if present + hasTrailingSlash := strings.HasSuffix(object, "/") + // Convert Windows-style backslashes to forward slashes object = strings.ReplaceAll(object, "\\", "/") object = removeDuplicateSlashes(object) - if !strings.HasPrefix(object, "/") { - object = "/" + object + // Remove leading slash to match S3 API format + object = strings.TrimPrefix(object, "/") + + // Restore trailing slash if it was present and result is not empty + if hasTrailingSlash && object != "" && !strings.HasSuffix(object, "/") { + object = object + "/" } + return object } @@ -181,10 +191,6 @@ func GetPrefix(r *http.Request) string { query := r.URL.Query() prefix := query.Get("prefix") prefix = removeDuplicateSlashes(prefix) - if !strings.HasPrefix(prefix, "/") { - prefix = "/" + prefix - } - return prefix } diff --git a/weed/s3api/s3_constants/header_test.go b/weed/s3api/s3_constants/header_test.go index f1cb06fac..4b1c1b8ca 100644 --- a/weed/s3api/s3_constants/header_test.go +++ b/weed/s3api/s3_constants/header_test.go @@ -13,67 +13,67 @@ func TestNormalizeObjectKey(t *testing.T) { { name: "simple key", input: "file.txt", - expected: "/file.txt", + expected: "file.txt", }, { name: "key with leading slash", input: "/file.txt", - expected: "/file.txt", + expected: "file.txt", }, { name: "key with directory", input: "folder/file.txt", - expected: "/folder/file.txt", + expected: "folder/file.txt", }, { name: "key with leading slash and directory", input: "/folder/file.txt", - expected: "/folder/file.txt", + expected: "folder/file.txt", }, { name: "key with duplicate slashes", input: "folder//subfolder///file.txt", - expected: "/folder/subfolder/file.txt", + expected: "folder/subfolder/file.txt", }, { name: "Windows backslash - simple", input: "folder\\file.txt", - expected: "/folder/file.txt", + expected: "folder/file.txt", }, { name: "Windows backslash - nested", input: "folder\\subfolder\\file.txt", - expected: "/folder/subfolder/file.txt", + expected: "folder/subfolder/file.txt", }, { name: "Windows backslash - with leading slash", input: "/folder\\subfolder\\file.txt", - expected: "/folder/subfolder/file.txt", + expected: "folder/subfolder/file.txt", }, { name: "mixed slashes", input: "folder\\subfolder/another\\file.txt", - expected: "/folder/subfolder/another/file.txt", + expected: "folder/subfolder/another/file.txt", }, { name: "Windows full path style (edge case)", input: "C:\\Users\\test\\file.txt", - expected: "/C:/Users/test/file.txt", + expected: "C:/Users/test/file.txt", }, { name: "empty string", input: "", - expected: "/", + expected: "", }, { name: "just a slash", input: "/", - expected: "/", + expected: "", }, { name: "just a backslash", input: "\\", - expected: "/", + expected: "", }, } @@ -129,5 +129,3 @@ func TestRemoveDuplicateSlashes(t *testing.T) { }) } } - - diff --git a/weed/s3api/s3api_conditional_headers_test.go b/weed/s3api/s3api_conditional_headers_test.go index 834f57305..20c92af0e 100644 --- a/weed/s3api/s3api_conditional_headers_test.go +++ b/weed/s3api/s3api_conditional_headers_test.go @@ -475,7 +475,7 @@ func createTestGetRequest(bucket, object string) *http.Request { Method: "GET", Header: make(http.Header), URL: &url.URL{ - Path: fmt.Sprintf("/%s%s", bucket, object), + Path: fmt.Sprintf("/%s/%s", bucket, object), }, } } diff --git a/weed/s3api/s3api_list_normalization_test.go b/weed/s3api/s3api_list_normalization_test.go new file mode 100644 index 000000000..128457b8f --- /dev/null +++ b/weed/s3api/s3api_list_normalization_test.go @@ -0,0 +1,85 @@ +package s3api + +import ( + "testing" + + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" +) + +// TestPrefixNormalizationInList verifies that prefixes are normalized consistently in list operations +func TestPrefixNormalizationInList(t *testing.T) { + tests := []struct { + name string + inputPrefix string + expectedPrefix string + description string + }{ + { + name: "simple prefix", + inputPrefix: "parquet-tests/abc123/", + expectedPrefix: "parquet-tests/abc123/", + description: "Normal prefix with trailing slash", + }, + { + name: "leading slash", + inputPrefix: "/parquet-tests/abc123/", + expectedPrefix: "parquet-tests/abc123/", + description: "Prefix with leading slash should be stripped", + }, + { + name: "duplicate slashes", + inputPrefix: "parquet-tests//abc123/", + expectedPrefix: "parquet-tests/abc123/", + description: "Prefix with duplicate slashes should be cleaned", + }, + { + name: "backslashes", + inputPrefix: "parquet-tests\\abc123\\", + expectedPrefix: "parquet-tests/abc123/", + description: "Backslashes should be converted to forward slashes", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Normalize using NormalizeObjectKey (same as object keys) + normalizedPrefix := s3_constants.NormalizeObjectKey(tt.inputPrefix) + + if normalizedPrefix != tt.expectedPrefix { + t.Errorf("Prefix normalization mismatch:\n Input: %q\n Expected: %q\n Got: %q\n Desc: %s", + tt.inputPrefix, tt.expectedPrefix, normalizedPrefix, tt.description) + } + }) + } +} + +// TestListPrefixConsistency verifies that objects written and listed use consistent key formats +func TestListPrefixConsistency(t *testing.T) { + // When an object is written to "parquet-tests/123/data.parquet", + // and we list with prefix "parquet-tests/123/", + // we should find that object + + objectKey := "parquet-tests/123/data.parquet" + listPrefix := "parquet-tests/123/" + + // Normalize as would happen in PUT + normalizedObjectKey := s3_constants.NormalizeObjectKey(objectKey) + + // Check that the list prefix would match the object path + if !startsWithPrefix(normalizedObjectKey, listPrefix) { + t.Errorf("List prefix mismatch:\n Object: %q\n Prefix: %q\n Object doesn't start with prefix", + normalizedObjectKey, listPrefix) + } +} + +func startsWithPrefix(objectKey, prefix string) bool { + // Normalize the prefix using the same logic as NormalizeObjectKey + normalizedPrefix := s3_constants.NormalizeObjectKey(prefix) + + // Check if the object starts with the normalized prefix + if normalizedPrefix == "" { + return true + } + + return objectKey == normalizedPrefix || objectKey[:len(normalizedPrefix)] == normalizedPrefix +} diff --git a/weed/s3api/s3api_object_handlers.go b/weed/s3api/s3api_object_handlers.go index 054a26264..67c40d0c3 100644 --- a/weed/s3api/s3api_object_handlers.go +++ b/weed/s3api/s3api_object_handlers.go @@ -177,11 +177,12 @@ func mimeDetect(r *http.Request, dataReader io.Reader) io.ReadCloser { } func urlEscapeObject(object string) string { - t := urlPathEscape(removeDuplicateSlashes(object)) - if strings.HasPrefix(t, "/") { - return t + normalized := s3_constants.NormalizeObjectKey(object) + // Ensure leading slash for filer paths + if normalized != "" && !strings.HasPrefix(normalized, "/") { + normalized = "/" + normalized } - return "/" + t + return urlPathEscape(normalized) } func entryUrlEncode(dir string, entry string, encodingTypeUrl bool) (dirName string, entryName string, prefix string) { @@ -286,7 +287,7 @@ func (s3a *S3ApiServer) checkDirectoryObject(bucket, object string) (*filer_pb.E } bucketDir := s3a.option.BucketsPath + "/" + bucket - cleanObject := strings.TrimSuffix(strings.TrimPrefix(object, "/"), "/") + cleanObject := strings.TrimSuffix(object, "/") if cleanObject == "" { return nil, true, nil // Root level directory object, but we don't handle it @@ -437,8 +438,8 @@ func newListEntry(entry *filer_pb.Entry, key string, dir string, name string, bu func (s3a *S3ApiServer) toFilerPath(bucket, object string) string { // Returns the raw file path - no URL escaping needed // The path is used directly, not embedded in a URL - object = removeDuplicateSlashes(object) - return fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object) + object = s3_constants.NormalizeObjectKey(object) + return fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object) } // hasConditionalHeaders checks if the request has any conditional headers @@ -535,7 +536,7 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request) if versionId != "" { // Request for specific version - must look in .versions directory - glog.V(3).Infof("GetObject: requesting specific version %s for %s%s", versionId, bucket, object) + glog.V(3).Infof("GetObject: requesting specific version %s for %s/%s", versionId, bucket, object) entry, err = s3a.getSpecificObjectVersion(bucket, object, versionId) if err != nil { glog.Errorf("Failed to get specific version %s: %v", versionId, err) @@ -550,7 +551,7 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request) // - If .versions/ doesn't exist (ErrNotFound): only null version at regular path, use it directly // - If transient error: fall back to getLatestObjectVersion which has retry logic bucketDir := s3a.option.BucketsPath + "/" + bucket - normalizedObject := removeDuplicateSlashes(object) + normalizedObject := s3_constants.NormalizeObjectKey(object) versionsDir := normalizedObject + s3_constants.VersionsFolder // Quick check (no retries) for .versions/ directory @@ -561,7 +562,7 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request) // Use getLatestObjectVersion which will properly find the newest version entry, err = s3a.getLatestObjectVersion(bucket, object) if err != nil { - glog.Errorf("GetObject: Failed to get latest version for %s%s: %v", bucket, object, err) + glog.Errorf("GetObject: Failed to get latest version for %s/%s: %v", bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -574,16 +575,16 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request) targetVersionId = "null" } else { // No object at regular path either - object doesn't exist - glog.Errorf("GetObject: object not found at regular path or .versions for %s%s", bucket, object) + glog.Errorf("GetObject: object not found at regular path or .versions for %s/%s", bucket, object) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } } else { // Transient error checking .versions/, fall back to getLatestObjectVersion with retries - glog.V(2).Infof("GetObject: transient error checking .versions for %s%s: %v, falling back to getLatestObjectVersion", bucket, object, versionsErr) + glog.V(2).Infof("GetObject: transient error checking .versions for %s/%s: %v, falling back to getLatestObjectVersion", bucket, object, versionsErr) entry, err = s3a.getLatestObjectVersion(bucket, object) if err != nil { - glog.Errorf("GetObject: Failed to get latest version for %s%s: %v", bucket, object, err) + glog.Errorf("GetObject: Failed to get latest version for %s/%s: %v", bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -2148,10 +2149,10 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request if versionId != "" { // Request for specific version - glog.V(2).Infof("HeadObject: requesting specific version %s for %s%s", versionId, bucket, object) + glog.V(2).Infof("HeadObject: requesting specific version %s for %s/%s", versionId, bucket, object) entry, err = s3a.getSpecificObjectVersion(bucket, object, versionId) if err != nil { - glog.Errorf("Failed to get specific version %s: %v", versionId, err) + glog.Errorf("Failed to get specific version %s for %s/%s: %v", versionId, bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -2163,7 +2164,7 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request // - If .versions/ doesn't exist (ErrNotFound): only null version at regular path, use it directly // - If transient error: fall back to getLatestObjectVersion which has retry logic bucketDir := s3a.option.BucketsPath + "/" + bucket - normalizedObject := removeDuplicateSlashes(object) + normalizedObject := s3_constants.NormalizeObjectKey(object) versionsDir := normalizedObject + s3_constants.VersionsFolder // Quick check (no retries) for .versions/ directory @@ -2174,7 +2175,7 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request // Use getLatestObjectVersion which will properly find the newest version entry, err = s3a.getLatestObjectVersion(bucket, object) if err != nil { - glog.Errorf("HeadObject: Failed to get latest version for %s%s: %v", bucket, object, err) + glog.Errorf("HeadObject: Failed to get latest version for %s/%s: %v", bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -2187,16 +2188,16 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request targetVersionId = "null" } else { // No object at regular path either - object doesn't exist - glog.Errorf("HeadObject: object not found at regular path or .versions for %s%s", bucket, object) + glog.Errorf("HeadObject: object not found at regular path or .versions for %s/%s", bucket, object) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } } else { // Transient error checking .versions/, fall back to getLatestObjectVersion with retries - glog.V(2).Infof("HeadObject: transient error checking .versions for %s%s: %v, falling back to getLatestObjectVersion", bucket, object, versionsErr) + glog.V(2).Infof("HeadObject: transient error checking .versions for %s/%s: %v, falling back to getLatestObjectVersion", bucket, object, versionsErr) entry, err = s3a.getLatestObjectVersion(bucket, object) if err != nil { - glog.Errorf("HeadObject: Failed to get latest version for %s%s: %v", bucket, object, err) + glog.Errorf("HeadObject: Failed to get latest version for %s/%s: %v", bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -2367,7 +2368,7 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request } // Detect and handle SSE - glog.V(3).Infof("HeadObjectHandler: Retrieved entry for %s%s - %d chunks", bucket, object, len(objectEntryForSSE.Chunks)) + glog.V(3).Infof("HeadObjectHandler: Retrieved entry for %s/%s - %d chunks", bucket, object, len(objectEntryForSSE.Chunks)) sseType := s3a.detectPrimarySSEType(objectEntryForSSE) glog.V(2).Infof("HeadObjectHandler: Detected SSE type: %s", sseType) if sseType != "" && sseType != "None" { @@ -2441,7 +2442,7 @@ func writeFinalResponse(w http.ResponseWriter, proxyResponse *http.Response, bod // fetchObjectEntry fetches the filer entry for an object // Returns nil if not found (not an error), or propagates other errors func (s3a *S3ApiServer) fetchObjectEntry(bucket, object string) (*filer_pb.Entry, error) { - objectPath := fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object) + objectPath := fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object) fetchedEntry, fetchErr := s3a.getEntry("", objectPath) if fetchErr != nil { if errors.Is(fetchErr, filer_pb.ErrNotFound) { @@ -2455,7 +2456,7 @@ func (s3a *S3ApiServer) fetchObjectEntry(bucket, object string) (*filer_pb.Entry // fetchObjectEntryRequired fetches the filer entry for an object // Returns an error if the object is not found or any other error occurs func (s3a *S3ApiServer) fetchObjectEntryRequired(bucket, object string) (*filer_pb.Entry, error) { - objectPath := fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object) + objectPath := fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object) fetchedEntry, fetchErr := s3a.getEntry("", objectPath) if fetchErr != nil { return nil, fetchErr // Return error for both not-found and other errors @@ -3367,7 +3368,7 @@ func (s3a *S3ApiServer) getMultipartInfo(entry *filer_pb.Entry, partNumber int) // This is shared by all remote object caching functions. func (s3a *S3ApiServer) buildRemoteObjectPath(bucket, object string) (dir, name string) { dir = s3a.option.BucketsPath + "/" + bucket - name = strings.TrimPrefix(removeDuplicateSlashes(object), "/") + name = s3_constants.NormalizeObjectKey(object) if idx := strings.LastIndex(name, "/"); idx > 0 { dir = dir + "/" + name[:idx] name = name[idx+1:] @@ -3433,7 +3434,7 @@ func (s3a *S3ApiServer) cacheRemoteObjectForStreaming(r *http.Request, entry *fi var dir, name string if versionId != "" && versionId != "null" { // This is a specific version - entry is located at /buckets//.versions/v_ - normalizedObject := strings.TrimPrefix(removeDuplicateSlashes(object), "/") + normalizedObject := s3_constants.NormalizeObjectKey(object) dir = s3a.option.BucketsPath + "/" + bucket + "/" + normalizedObject + s3_constants.VersionsFolder name = s3a.getVersionFileName(versionId) } else { diff --git a/weed/s3api/s3api_object_handlers_acl.go b/weed/s3api/s3api_object_handlers_acl.go index 212354a30..b0b1c3aa2 100644 --- a/weed/s3api/s3api_object_handlers_acl.go +++ b/weed/s3api/s3api_object_handlers_acl.go @@ -45,16 +45,16 @@ func (s3a *S3ApiServer) GetObjectAclHandler(w http.ResponseWriter, r *http.Reque // Handle versioned object ACL retrieval - use same logic as GetObjectHandler if versionId != "" { // Request for specific version - glog.V(2).Infof("GetObjectAclHandler: requesting ACL for specific version %s of %s%s", versionId, bucket, object) + glog.V(2).Infof("GetObjectAclHandler: requesting ACL for specific version %s of %s/%s", versionId, bucket, object) entry, err = s3a.getSpecificObjectVersion(bucket, object, versionId) } else { // Request for latest version - glog.V(2).Infof("GetObjectAclHandler: requesting ACL for latest version of %s%s", bucket, object) + glog.V(2).Infof("GetObjectAclHandler: requesting ACL for latest version of %s/%s", bucket, object) entry, err = s3a.getLatestObjectVersion(bucket, object) } if err != nil { - glog.Errorf("GetObjectAclHandler: Failed to get object version %s for %s%s: %v", versionId, bucket, object, err) + glog.Errorf("GetObjectAclHandler: Failed to get object version %s for %s/%s: %v", versionId, bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -188,16 +188,16 @@ func (s3a *S3ApiServer) PutObjectAclHandler(w http.ResponseWriter, r *http.Reque // Handle versioned object ACL modification - use same logic as GetObjectHandler if versionId != "" { // Request for specific version - glog.V(2).Infof("PutObjectAclHandler: modifying ACL for specific version %s of %s%s", versionId, bucket, object) + glog.V(2).Infof("PutObjectAclHandler: modifying ACL for specific version %s of %s/%s", versionId, bucket, object) entry, err = s3a.getSpecificObjectVersion(bucket, object, versionId) } else { // Request for latest version - glog.V(2).Infof("PutObjectAclHandler: modifying ACL for latest version of %s%s", bucket, object) + glog.V(2).Infof("PutObjectAclHandler: modifying ACL for latest version of %s/%s", bucket, object) entry, err = s3a.getLatestObjectVersion(bucket, object) } if err != nil { - glog.Errorf("PutObjectAclHandler: Failed to get object version %s for %s%s: %v", versionId, bucket, object, err) + glog.Errorf("PutObjectAclHandler: Failed to get object version %s for %s/%s: %v", versionId, bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -306,7 +306,7 @@ func (s3a *S3ApiServer) PutObjectAclHandler(w http.ResponseWriter, r *http.Reque if versioningConfigured { if versionId != "" && versionId != "null" { // Versioned object - update the specific version file in .versions directory - updateDirectory = s3a.option.BucketsPath + "/" + bucket + object + s3_constants.VersionsFolder + updateDirectory = s3a.option.BucketsPath + "/" + bucket + "/" + object + s3_constants.VersionsFolder } else { // Latest version in versioned bucket - could be null version or versioned object // Extract version ID from the entry to determine where it's stored @@ -322,7 +322,7 @@ func (s3a *S3ApiServer) PutObjectAclHandler(w http.ResponseWriter, r *http.Reque updateDirectory = s3a.option.BucketsPath + "/" + bucket } else { // Versioned object - stored in .versions directory - updateDirectory = s3a.option.BucketsPath + "/" + bucket + object + s3_constants.VersionsFolder + updateDirectory = s3a.option.BucketsPath + "/" + bucket + "/" + object + s3_constants.VersionsFolder } } } else { diff --git a/weed/s3api/s3api_object_handlers_copy.go b/weed/s3api/s3api_object_handlers_copy.go index 01cf9484b..26775f9ae 100644 --- a/weed/s3api/s3api_object_handlers_copy.go +++ b/weed/s3api/s3api_object_handlers_copy.go @@ -65,7 +65,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request replaceMeta, replaceTagging := replaceDirective(r.Header) if (srcBucket == dstBucket && srcObject == dstObject || cpSrcPath == "") && (replaceMeta || replaceTagging) { - fullPath := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, dstBucket, dstObject)) + fullPath := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, dstBucket, dstObject)) dir, name := fullPath.DirAndName() entry, err := s3a.getEntry(dir, name) if err != nil || entry.IsDirectory { @@ -116,7 +116,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request } else if srcVersioningState == s3_constants.VersioningSuspended { // Versioning suspended - current object is stored as regular file ("null" version) // Try regular file first, fall back to latest version if needed - srcPath := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, srcBucket, srcObject)) + srcPath := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, srcBucket, srcObject)) dir, name := srcPath.DirAndName() entry, err = s3a.getEntry(dir, name) if err != nil { @@ -126,7 +126,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request } } else { // No versioning configured - use regular retrieval - srcPath := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, srcBucket, srcObject)) + srcPath := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, srcBucket, srcObject)) dir, name := srcPath.DirAndName() entry, err = s3a.getEntry(dir, name) } @@ -284,7 +284,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request // Calculate ETag for versioning filerEntry := &filer.Entry{ - FullPath: util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, dstBucket, dstObject)), + FullPath: util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, dstBucket, dstObject)), Attr: filer.Attr{ FileSize: dstEntry.Attributes.FileSize, Mtime: time.Unix(dstEntry.Attributes.Mtime, 0), @@ -328,7 +328,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request // Remove any versioning-related metadata from source that shouldn't carry over cleanupVersioningMetadata(dstEntry.Extended) - dstPath := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, dstBucket, dstObject)) + dstPath := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, dstBucket, dstObject)) dstDir, dstName := dstPath.DirAndName() // Check if destination exists and remove it first (S3 copy overwrites) @@ -381,7 +381,7 @@ func pathToBucketAndObject(path string) (bucket, object string) { parts := strings.SplitN(path, "/", 2) if len(parts) == 2 { bucket = parts[0] - object = "/" + parts[1] + object = parts[1] return bucket, object } else if len(parts) == 1 && parts[0] != "" { // Only bucket provided, no object @@ -497,7 +497,7 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req } else if srcVersioningState == s3_constants.VersioningSuspended { // Versioning suspended - current object is stored as regular file ("null" version) // Try regular file first, fall back to latest version if needed - srcPath := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, srcBucket, srcObject)) + srcPath := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, srcBucket, srcObject)) dir, name := srcPath.DirAndName() entry, err = s3a.getEntry(dir, name) if err != nil { @@ -507,7 +507,7 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req } } else { // No versioning configured - use regular retrieval - srcPath := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, srcBucket, srcObject)) + srcPath := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, srcBucket, srcObject)) dir, name := srcPath.DirAndName() entry, err = s3a.getEntry(dir, name) } diff --git a/weed/s3api/s3api_object_handlers_delete.go b/weed/s3api/s3api_object_handlers_delete.go index da0b78654..8618933df 100644 --- a/weed/s3api/s3api_object_handlers_delete.go +++ b/weed/s3api/s3api_object_handlers_delete.go @@ -121,7 +121,7 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque return } - target := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object)) + target := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object)) dir, name := target.DirAndName() err := s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { @@ -331,9 +331,9 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h parentDirectoryPath, entryName, isDeleteData, isRecursive := "", object.Key, true, false if lastSeparator > 0 && lastSeparator+1 < len(object.Key) { entryName = object.Key[lastSeparator+1:] - parentDirectoryPath = "/" + object.Key[:lastSeparator] + parentDirectoryPath = object.Key[:lastSeparator] } - parentDirectoryPath = fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, parentDirectoryPath) + parentDirectoryPath = fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, parentDirectoryPath) err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive) if err == nil { diff --git a/weed/s3api/s3api_object_handlers_postpolicy.go b/weed/s3api/s3api_object_handlers_postpolicy.go index e6e885848..58e2a89ac 100644 --- a/weed/s3api/s3api_object_handlers_postpolicy.go +++ b/weed/s3api/s3api_object_handlers_postpolicy.go @@ -114,7 +114,7 @@ func (s3a *S3ApiServer) PostPolicyBucketHandler(w http.ResponseWriter, r *http.R } } - filePath := fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object) + filePath := fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object) // Get ContentType from post formData // Otherwise from formFile ContentType diff --git a/weed/s3api/s3api_object_handlers_postpolicy_test.go b/weed/s3api/s3api_object_handlers_postpolicy_test.go index 357fb9c7c..0e181d7a1 100644 --- a/weed/s3api/s3api_object_handlers_postpolicy_test.go +++ b/weed/s3api/s3api_object_handlers_postpolicy_test.go @@ -15,54 +15,53 @@ import ( ) // TestPostPolicyKeyNormalization tests that object keys from presigned POST -// are properly normalized with a leading slash and duplicate slashes removed. -// This addresses issue #7713 where keys without leading slashes caused -// bucket and key to be concatenated without a separator. +// are properly normalized without leading slashes and with duplicate slashes removed. +// This ensures consistent key handling across the S3 API. func TestPostPolicyKeyNormalization(t *testing.T) { tests := []struct { name string key string - expectedPrefix string // Expected path prefix after bucket + expectedObject string // Expected normalized object key }{ { name: "key without leading slash", key: "test_image.png", - expectedPrefix: "/test_image.png", + expectedObject: "test_image.png", }, { name: "key with leading slash", key: "/test_image.png", - expectedPrefix: "/test_image.png", + expectedObject: "test_image.png", }, { name: "key with path without leading slash", key: "folder/subfolder/test_image.png", - expectedPrefix: "/folder/subfolder/test_image.png", + expectedObject: "folder/subfolder/test_image.png", }, { name: "key with path with leading slash", key: "/folder/subfolder/test_image.png", - expectedPrefix: "/folder/subfolder/test_image.png", + expectedObject: "folder/subfolder/test_image.png", }, { name: "simple filename", key: "file.txt", - expectedPrefix: "/file.txt", + expectedObject: "file.txt", }, { name: "key with duplicate slashes", key: "folder//subfolder///file.txt", - expectedPrefix: "/folder/subfolder/file.txt", + expectedObject: "folder/subfolder/file.txt", }, { name: "key with leading duplicate slashes", key: "//folder/file.txt", - expectedPrefix: "/folder/file.txt", + expectedObject: "folder/file.txt", }, { name: "key with trailing slash", key: "folder/", - expectedPrefix: "/folder/", + expectedObject: "folder/", }, } @@ -71,15 +70,15 @@ func TestPostPolicyKeyNormalization(t *testing.T) { // Use the actual NormalizeObjectKey function object := s3_constants.NormalizeObjectKey(tt.key) - // Verify the normalized object has the expected prefix - assert.Equal(t, tt.expectedPrefix, object, + // Verify the normalized object matches expected + assert.Equal(t, tt.expectedObject, object, "Key should be normalized correctly") // Verify path construction would be correct bucket := "my-bucket" bucketsPath := "/buckets" - expectedPath := bucketsPath + "/" + bucket + tt.expectedPrefix - actualPath := bucketsPath + "/" + bucket + object + expectedPath := bucketsPath + "/" + bucket + "/" + tt.expectedObject + actualPath := bucketsPath + "/" + bucket + "/" + object assert.Equal(t, expectedPath, actualPath, "File path should be correctly constructed with slash between bucket and key") @@ -98,16 +97,19 @@ func TestNormalizeObjectKey(t *testing.T) { input string expected string }{ - {"empty string", "", "/"}, - {"simple file", "file.txt", "/file.txt"}, - {"with leading slash", "/file.txt", "/file.txt"}, - {"path without slash", "a/b/c.txt", "/a/b/c.txt"}, - {"path with slash", "/a/b/c.txt", "/a/b/c.txt"}, - {"duplicate slashes", "a//b///c.txt", "/a/b/c.txt"}, - {"leading duplicates", "///a/b.txt", "/a/b.txt"}, - {"all duplicates", "//a//b//", "/a/b/"}, - {"just slashes", "///", "/"}, - {"trailing slash", "folder/", "/folder/"}, + {"empty string", "", ""}, + {"simple file", "file.txt", "file.txt"}, + {"with leading slash", "/file.txt", "file.txt"}, + {"path without slash", "a/b/c.txt", "a/b/c.txt"}, + {"path with slash", "/a/b/c.txt", "a/b/c.txt"}, + {"duplicate slashes", "a//b///c.txt", "a/b/c.txt"}, + {"leading duplicates", "///a/b.txt", "a/b.txt"}, + {"all duplicates", "//a//b//", "a/b/"}, + {"just slashes", "///", ""}, + {"trailing slash", "folder/", "folder/"}, + {"backslash to forward slash", "folder\\file.txt", "folder/file.txt"}, + {"windows path", "folder\\subfolder\\file.txt", "folder/subfolder/file.txt"}, + {"mixed slashes", "a/b\\c/d", "a/b/c/d"}, } for _, tt := range tests { @@ -130,25 +132,25 @@ func TestPostPolicyFilenameSubstitution(t *testing.T) { name: "filename at end", keyTemplate: "uploads/${filename}", uploadedFilename: "photo.jpg", - expectedKey: "/uploads/photo.jpg", + expectedKey: "uploads/photo.jpg", }, { name: "filename in middle", keyTemplate: "user/files/${filename}/original", uploadedFilename: "document.pdf", - expectedKey: "/user/files/document.pdf/original", + expectedKey: "user/files/document.pdf/original", }, { name: "no substitution needed", keyTemplate: "static/file.txt", uploadedFilename: "ignored.txt", - expectedKey: "/static/file.txt", + expectedKey: "static/file.txt", }, { name: "filename only", keyTemplate: "${filename}", uploadedFilename: "myfile.png", - expectedKey: "/myfile.png", + expectedKey: "myfile.png", }, } @@ -292,7 +294,7 @@ func TestPostPolicyPathConstruction(t *testing.T) { object := s3_constants.NormalizeObjectKey(tt.formKey) // Construct path as done in PostPolicyBucketHandler - filePath := s3a.option.BucketsPath + "/" + tt.bucket + object + filePath := s3a.option.BucketsPath + "/" + tt.bucket + "/" + object assert.Equal(t, tt.expectedPath, filePath, "File path should be correctly constructed") @@ -374,7 +376,7 @@ func TestPostPolicyBucketHandlerKeyExtraction(t *testing.T) { object := s3_constants.NormalizeObjectKey(formValues.Get("Key")) // Construct path - filePath := s3a.option.BucketsPath + "/" + tt.bucket + object + filePath := s3a.option.BucketsPath + "/" + tt.bucket + "/" + object assert.Contains(t, filePath, tt.wantPathHas, "Path should contain properly separated bucket and key") diff --git a/weed/s3api/s3api_object_handlers_put.go b/weed/s3api/s3api_object_handlers_put.go index 6310e592e..e9e523138 100644 --- a/weed/s3api/s3api_object_handlers_put.go +++ b/weed/s3api/s3api_object_handlers_put.go @@ -113,8 +113,24 @@ func (s3a *S3ApiServer) PutObjectHandler(w http.ResponseWriter, r *http.Request) objectContentType := r.Header.Get("Content-Type") if strings.HasSuffix(object, "/") && r.ContentLength <= 1024 { + // Split the object into directory path and name + objectWithoutSlash := strings.TrimSuffix(object, "/") + dirName := path.Dir(objectWithoutSlash) + entryName := path.Base(objectWithoutSlash) + + if dirName == "." { + dirName = "" + } + dirName = strings.TrimPrefix(dirName, "/") + + // Construct full directory path + fullDirPath := s3a.option.BucketsPath + "/" + bucket + if dirName != "" { + fullDirPath = fullDirPath + "/" + dirName + } + if err := s3a.mkdir( - s3a.option.BucketsPath, bucket+strings.TrimSuffix(object, "/"), + fullDirPath, entryName, func(entry *filer_pb.Entry) { if objectContentType == "" { objectContentType = s3_constants.FolderMimeType @@ -883,13 +899,13 @@ func (s3a *S3ApiServer) updateIsLatestFlagsForSuspendedVersioning(bucket, object versionsObjectPath := object + s3_constants.VersionsFolder versionsDir := bucketDir + "/" + versionsObjectPath - glog.V(2).Infof("updateIsLatestFlagsForSuspendedVersioning: updating flags for %s%s", bucket, object) + glog.V(2).Infof("updateIsLatestFlagsForSuspendedVersioning: updating flags for %s/%s", bucket, object) // Check if .versions directory exists _, err := s3a.getEntry(bucketDir, versionsObjectPath) if err != nil { // No .versions directory exists, nothing to update - glog.V(2).Infof("updateIsLatestFlagsForSuspendedVersioning: no .versions directory for %s%s", bucket, object) + glog.V(2).Infof("updateIsLatestFlagsForSuspendedVersioning: no .versions directory for %s/%s", bucket, object) return nil } @@ -939,7 +955,7 @@ func (s3a *S3ApiServer) updateIsLatestFlagsForSuspendedVersioning(bucket, object return fmt.Errorf("failed to update .versions directory metadata: %v", err) } - glog.V(2).Infof("updateIsLatestFlagsForSuspendedVersioning: cleared latest version metadata for %s%s", bucket, object) + glog.V(2).Infof("updateIsLatestFlagsForSuspendedVersioning: cleared latest version metadata for %s/%s", bucket, object) } return nil diff --git a/weed/s3api/s3api_object_handlers_tagging.go b/weed/s3api/s3api_object_handlers_tagging.go index 7b6b947da..647545254 100644 --- a/weed/s3api/s3api_object_handlers_tagging.go +++ b/weed/s3api/s3api_object_handlers_tagging.go @@ -43,16 +43,16 @@ func (s3a *S3ApiServer) GetObjectTaggingHandler(w http.ResponseWriter, r *http.R // Handle versioned object tagging retrieval if versionId != "" { // Request for specific version - glog.V(2).Infof("GetObjectTaggingHandler: requesting tags for specific version %s of %s%s", versionId, bucket, object) + glog.V(2).Infof("GetObjectTaggingHandler: requesting tags for specific version %s of %s/%s", versionId, bucket, object) entry, err = s3a.getSpecificObjectVersion(bucket, object, versionId) } else { // Request for latest version - glog.V(2).Infof("GetObjectTaggingHandler: requesting tags for latest version of %s%s", bucket, object) + glog.V(2).Infof("GetObjectTaggingHandler: requesting tags for latest version of %s/%s", bucket, object) entry, err = s3a.getLatestObjectVersion(bucket, object) } if err != nil { - glog.Errorf("GetObjectTaggingHandler: Failed to get object version %s for %s%s: %v", versionId, bucket, object, err) + glog.Errorf("GetObjectTaggingHandler: Failed to get object version %s for %s/%s: %v", versionId, bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -66,7 +66,7 @@ func (s3a *S3ApiServer) GetObjectTaggingHandler(w http.ResponseWriter, r *http.R } } else { // Handle regular (non-versioned) object tagging retrieval - target := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object)) + target := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object)) dir, name := target.DirAndName() tags, err := s3a.getTags(dir, name) @@ -147,16 +147,16 @@ func (s3a *S3ApiServer) PutObjectTaggingHandler(w http.ResponseWriter, r *http.R // Handle versioned object tagging modification if versionId != "" { // Request for specific version - glog.V(2).Infof("PutObjectTaggingHandler: modifying tags for specific version %s of %s%s", versionId, bucket, object) + glog.V(2).Infof("PutObjectTaggingHandler: modifying tags for specific version %s of %s/%s", versionId, bucket, object) entry, err = s3a.getSpecificObjectVersion(bucket, object, versionId) } else { // Request for latest version - glog.V(2).Infof("PutObjectTaggingHandler: modifying tags for latest version of %s%s", bucket, object) + glog.V(2).Infof("PutObjectTaggingHandler: modifying tags for latest version of %s/%s", bucket, object) entry, err = s3a.getLatestObjectVersion(bucket, object) } if err != nil { - glog.Errorf("PutObjectTaggingHandler: Failed to get object version %s for %s%s: %v", versionId, bucket, object, err) + glog.Errorf("PutObjectTaggingHandler: Failed to get object version %s for %s/%s: %v", versionId, bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -170,7 +170,7 @@ func (s3a *S3ApiServer) PutObjectTaggingHandler(w http.ResponseWriter, r *http.R } } else { // Handle regular (non-versioned) object tagging modification - target := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object)) + target := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object)) dir, name := target.DirAndName() if err = s3a.setTags(dir, name, tags); err != nil { @@ -198,7 +198,7 @@ func (s3a *S3ApiServer) PutObjectTaggingHandler(w http.ResponseWriter, r *http.R updateDirectory = s3a.option.BucketsPath + "/" + bucket } else { // Versioned object - stored in .versions directory - updateDirectory = s3a.option.BucketsPath + "/" + bucket + object + s3_constants.VersionsFolder + updateDirectory = s3a.option.BucketsPath + "/" + bucket + "/" + object + s3_constants.VersionsFolder } } else { // Latest version in versioned bucket - could be null version or versioned object @@ -215,7 +215,7 @@ func (s3a *S3ApiServer) PutObjectTaggingHandler(w http.ResponseWriter, r *http.R updateDirectory = s3a.option.BucketsPath + "/" + bucket } else { // Versioned object - stored in .versions directory - updateDirectory = s3a.option.BucketsPath + "/" + bucket + object + s3_constants.VersionsFolder + updateDirectory = s3a.option.BucketsPath + "/" + bucket + "/" + object + s3_constants.VersionsFolder } } @@ -262,7 +262,7 @@ func (s3a *S3ApiServer) PutObjectTaggingHandler(w http.ResponseWriter, r *http.R func (s3a *S3ApiServer) DeleteObjectTaggingHandler(w http.ResponseWriter, r *http.Request) { bucket, object := s3_constants.GetBucketAndObject(r) - glog.V(3).Infof("DeleteObjectTaggingHandler %s %s", bucket, object) + glog.V(3).Infof("DeleteObjectTaggingHandler %s/%s", bucket, object) // Check for specific version ID in query parameters versionId := r.URL.Query().Get("versionId") @@ -285,16 +285,16 @@ func (s3a *S3ApiServer) DeleteObjectTaggingHandler(w http.ResponseWriter, r *htt // Handle versioned object tagging deletion if versionId != "" { // Request for specific version - glog.V(2).Infof("DeleteObjectTaggingHandler: deleting tags for specific version %s of %s%s", versionId, bucket, object) + glog.V(2).Infof("DeleteObjectTaggingHandler: deleting tags for specific version %s of %s/%s", versionId, bucket, object) entry, err = s3a.getSpecificObjectVersion(bucket, object, versionId) } else { // Request for latest version - glog.V(2).Infof("DeleteObjectTaggingHandler: deleting tags for latest version of %s%s", bucket, object) + glog.V(2).Infof("DeleteObjectTaggingHandler: deleting tags for latest version of %s/%s", bucket, object) entry, err = s3a.getLatestObjectVersion(bucket, object) } if err != nil { - glog.Errorf("DeleteObjectTaggingHandler: Failed to get object version %s for %s%s: %v", versionId, bucket, object, err) + glog.Errorf("DeleteObjectTaggingHandler: Failed to get object version %s for %s/%s: %v", versionId, bucket, object, err) s3err.WriteErrorResponse(w, r, s3err.ErrNoSuchKey) return } @@ -308,7 +308,7 @@ func (s3a *S3ApiServer) DeleteObjectTaggingHandler(w http.ResponseWriter, r *htt } } else { // Handle regular (non-versioned) object tagging deletion - target := util.FullPath(fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, object)) + target := util.FullPath(fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, bucket, object)) dir, name := target.DirAndName() err := s3a.rmTags(dir, name) @@ -337,7 +337,7 @@ func (s3a *S3ApiServer) DeleteObjectTaggingHandler(w http.ResponseWriter, r *htt updateDirectory = s3a.option.BucketsPath + "/" + bucket } else { // Versioned object - stored in .versions directory - updateDirectory = s3a.option.BucketsPath + "/" + bucket + object + s3_constants.VersionsFolder + updateDirectory = s3a.option.BucketsPath + "/" + bucket + "/" + object + s3_constants.VersionsFolder } } else { // Latest version in versioned bucket - could be null version or versioned object @@ -354,7 +354,7 @@ func (s3a *S3ApiServer) DeleteObjectTaggingHandler(w http.ResponseWriter, r *htt updateDirectory = s3a.option.BucketsPath + "/" + bucket } else { // Versioned object - stored in .versions directory - updateDirectory = s3a.option.BucketsPath + "/" + bucket + object + s3_constants.VersionsFolder + updateDirectory = s3a.option.BucketsPath + "/" + bucket + "/" + object + s3_constants.VersionsFolder } } diff --git a/weed/s3api/s3api_object_versioning.go b/weed/s3api/s3api_object_versioning.go index a14af6a10..3fadc46cb 100644 --- a/weed/s3api/s3api_object_versioning.go +++ b/weed/s3api/s3api_object_versioning.go @@ -348,20 +348,19 @@ func (vc *versionCollector) isFull() bool { // matchesPrefixFilter checks if an entry path matches the prefix filter func (vc *versionCollector) matchesPrefixFilter(entryPath string, isDirectory bool) bool { - normalizedPrefix := strings.TrimPrefix(vc.prefix, "/") - if normalizedPrefix == "" { + if vc.prefix == "" { return true } // Entry matches if its path starts with the prefix - isMatch := strings.HasPrefix(entryPath, normalizedPrefix) + isMatch := strings.HasPrefix(entryPath, vc.prefix) if !isMatch && isDirectory { // Directory might match with trailing slash - isMatch = strings.HasPrefix(entryPath+"/", normalizedPrefix) + isMatch = strings.HasPrefix(entryPath+"/", vc.prefix) } // For directories, also check if we need to descend (prefix is deeper) - canDescend := isDirectory && strings.HasPrefix(normalizedPrefix, entryPath) + canDescend := isDirectory && strings.HasPrefix(vc.prefix, entryPath) return isMatch || canDescend } @@ -423,7 +422,7 @@ func (vc *versionCollector) addVersion(version *ObjectVersion, objectKey string) // processVersionsDirectory handles a .versions directory entry func (vc *versionCollector) processVersionsDirectory(entryPath string) error { objectKey := strings.TrimSuffix(entryPath, s3_constants.VersionsFolder) - normalizedObjectKey := removeDuplicateSlashes(objectKey) + normalizedObjectKey := s3_constants.NormalizeObjectKey(objectKey) // Mark as processed vc.processedObjects[objectKey] = true @@ -493,7 +492,7 @@ func (vc *versionCollector) processExplicitDirectory(entryPath string, entry *fi // processRegularFile handles a regular file entry (pre-versioning or suspended-versioning object) func (vc *versionCollector) processRegularFile(currentPath, entryPath string, entry *filer_pb.Entry) { objectKey := entryPath - normalizedObjectKey := removeDuplicateSlashes(objectKey) + normalizedObjectKey := s3_constants.NormalizeObjectKey(objectKey) // Skip files before keyMarker if vc.shouldSkipObjectForMarker(normalizedObjectKey) { @@ -780,11 +779,11 @@ func (s3a *S3ApiServer) calculateETagFromChunks(chunks []*filer_pb.FileChunk) st // getSpecificObjectVersion retrieves a specific version of an object func (s3a *S3ApiServer) getSpecificObjectVersion(bucket, object, versionId string) (*filer_pb.Entry, error) { // Normalize object path to ensure consistency with toFilerPath behavior - normalizedObject := removeDuplicateSlashes(object) + normalizedObject := s3_constants.NormalizeObjectKey(object) if versionId == "" { // Get current version - return s3a.getEntry(path.Join(s3a.option.BucketsPath, bucket), strings.TrimPrefix(normalizedObject, "/")) + return s3a.getEntry(path.Join(s3a.option.BucketsPath, bucket), normalizedObject) } if versionId == "null" { @@ -812,7 +811,7 @@ func (s3a *S3ApiServer) getSpecificObjectVersion(bucket, object, versionId strin // deleteSpecificObjectVersion deletes a specific version of an object func (s3a *S3ApiServer) deleteSpecificObjectVersion(bucket, object, versionId string) error { // Normalize object path to ensure consistency with toFilerPath behavior - normalizedObject := removeDuplicateSlashes(object) + normalizedObject := s3_constants.NormalizeObjectKey(object) if versionId == "" { return fmt.Errorf("version ID is required for version-specific deletion") @@ -821,25 +820,24 @@ func (s3a *S3ApiServer) deleteSpecificObjectVersion(bucket, object, versionId st if versionId == "null" { // Delete "null" version (pre-versioning object stored as regular file) bucketDir := s3a.option.BucketsPath + "/" + bucket - cleanObject := strings.TrimPrefix(normalizedObject, "/") // Check if the object exists - _, err := s3a.getEntry(bucketDir, cleanObject) + _, err := s3a.getEntry(bucketDir, normalizedObject) if err != nil { // Object doesn't exist - this is OK for delete operations (idempotent) - glog.V(2).Infof("deleteSpecificObjectVersion: null version object %s already deleted or doesn't exist", cleanObject) + glog.V(2).Infof("deleteSpecificObjectVersion: null version object %s already deleted or doesn't exist", normalizedObject) return nil } // Delete the regular file - deleteErr := s3a.rm(bucketDir, cleanObject, true, false) + deleteErr := s3a.rm(bucketDir, normalizedObject, true, false) if deleteErr != nil { // Check if file was already deleted by another process - if _, checkErr := s3a.getEntry(bucketDir, cleanObject); checkErr != nil { + if _, checkErr := s3a.getEntry(bucketDir, normalizedObject); checkErr != nil { // File doesn't exist anymore, deletion was successful return nil } - return fmt.Errorf("failed to delete null version %s: %v", cleanObject, deleteErr) + return fmt.Errorf("failed to delete null version %s: %v", normalizedObject, deleteErr) } return nil } @@ -864,7 +862,7 @@ func (s3a *S3ApiServer) deleteSpecificObjectVersion(bucket, object, versionId st // Check if file was already deleted by another process (race condition handling) if _, checkErr := s3a.getEntry(versionsDir, versionFile); checkErr != nil { // File doesn't exist anymore, deletion was successful (another thread deleted it) - glog.V(2).Infof("deleteSpecificObjectVersion: version %s for %s%s already deleted by another process", versionId, bucket, object) + glog.V(2).Infof("deleteSpecificObjectVersion: version %s for %s/%s already deleted by another process", versionId, bucket, object) return nil } // File still exists but deletion failed for another reason @@ -873,7 +871,7 @@ func (s3a *S3ApiServer) deleteSpecificObjectVersion(bucket, object, versionId st // If we deleted the latest version, update the .versions directory metadata to point to the new latest if isLatestVersion { - err := s3a.updateLatestVersionAfterDeletion(bucket, object) + err := s3a.updateLatestVersionAfterDeletion(bucket, normalizedObject) if err != nil { glog.Warningf("deleteSpecificObjectVersion: failed to update latest version after deletion: %v", err) // Don't return error since the deletion was successful @@ -886,8 +884,7 @@ func (s3a *S3ApiServer) deleteSpecificObjectVersion(bucket, object, versionId st // updateLatestVersionAfterDeletion finds the new latest version after deleting the current latest func (s3a *S3ApiServer) updateLatestVersionAfterDeletion(bucket, object string) error { bucketDir := s3a.option.BucketsPath + "/" + bucket - cleanObject := strings.TrimPrefix(object, "/") - versionsObjectPath := cleanObject + s3_constants.VersionsFolder + versionsObjectPath := object + s3_constants.VersionsFolder versionsDir := bucketDir + "/" + versionsObjectPath glog.V(1).Infof("updateLatestVersionAfterDeletion: updating latest version for %s/%s, listing %s", bucket, object, versionsDir) @@ -989,9 +986,7 @@ func (s3a *S3ApiServer) ListObjectVersionsHandler(w http.ResponseWriter, r *http query := r.URL.Query() originalPrefix := query.Get("prefix") // Keep original prefix for response prefix := originalPrefix // Use for internal processing - if prefix != "" && !strings.HasPrefix(prefix, "/") { - prefix = "/" + prefix - } + // Note: prefix is used for filtering relative to bucket root, so no leading slash needed keyMarker := query.Get("key-marker") versionIdMarker := query.Get("version-id-marker") @@ -1022,7 +1017,7 @@ func (s3a *S3ApiServer) ListObjectVersionsHandler(w http.ResponseWriter, r *http // getLatestObjectVersion finds the latest version of an object by reading .versions directory metadata func (s3a *S3ApiServer) getLatestObjectVersion(bucket, object string) (*filer_pb.Entry, error) { // Normalize object path to ensure consistency with toFilerPath behavior - normalizedObject := removeDuplicateSlashes(object) + normalizedObject := s3_constants.NormalizeObjectKey(object) bucketDir := s3a.option.BucketsPath + "/" + bucket versionsObjectPath := normalizedObject + s3_constants.VersionsFolder @@ -1050,12 +1045,12 @@ func (s3a *S3ApiServer) getLatestObjectVersion(bucket, object string) (*filer_pb // .versions directory doesn't exist - this can happen for objects that existed // before versioning was enabled on the bucket. Fall back to checking for a // regular (non-versioned) object file. - glog.V(1).Infof("getLatestObjectVersion: no .versions directory for %s%s after %d attempts (error: %v), checking for pre-versioning object", bucket, normalizedObject, maxRetries, err) + glog.V(1).Infof("getLatestObjectVersion: no .versions directory for %s/%s after %d attempts (error: %v), checking for pre-versioning object", bucket, normalizedObject, maxRetries, err) regularEntry, regularErr := s3a.getEntry(bucketDir, normalizedObject) if regularErr != nil { - glog.V(1).Infof("getLatestObjectVersion: no pre-versioning object found for %s%s (error: %v)", bucket, normalizedObject, regularErr) - return nil, fmt.Errorf("failed to get %s%s .versions directory and no regular object found: %w", bucket, normalizedObject, err) + glog.V(1).Infof("getLatestObjectVersion: no pre-versioning object found for %s/%s (error: %v)", bucket, normalizedObject, regularErr) + return nil, fmt.Errorf("failed to get %s/%s .versions directory and no regular object found: %w", bucket, normalizedObject, err) } glog.V(1).Infof("getLatestObjectVersion: found pre-versioning object for %s/%s", bucket, normalizedObject) @@ -1081,14 +1076,14 @@ func (s3a *S3ApiServer) getLatestObjectVersion(bucket, object string) (*filer_pb // If still no metadata after retries, fall back to pre-versioning object if versionsEntry.Extended == nil { - glog.V(2).Infof("getLatestObjectVersion: no Extended metadata in .versions directory for %s%s after retries, checking for pre-versioning object", bucket, object) + glog.V(2).Infof("getLatestObjectVersion: no Extended metadata in .versions directory for %s/%s after retries, checking for pre-versioning object", bucket, object) regularEntry, regularErr := s3a.getEntry(bucketDir, normalizedObject) if regularErr != nil { - return nil, fmt.Errorf("no version metadata in .versions directory and no regular object found for %s%s", bucket, normalizedObject) + return nil, fmt.Errorf("no version metadata in .versions directory and no regular object found for %s/%s", bucket, normalizedObject) } - glog.V(2).Infof("getLatestObjectVersion: found pre-versioning object for %s%s (no Extended metadata case)", bucket, object) + glog.V(2).Infof("getLatestObjectVersion: found pre-versioning object for %s/%s (no Extended metadata case)", bucket, object) return regularEntry, nil } } @@ -1103,10 +1098,10 @@ func (s3a *S3ApiServer) getLatestObjectVersion(bucket, object string) (*filer_pb regularEntry, regularErr := s3a.getEntry(bucketDir, normalizedObject) if regularErr != nil { - return nil, fmt.Errorf("no version metadata in .versions directory and no regular object found for %s%s", bucket, normalizedObject) + return nil, fmt.Errorf("no version metadata in .versions directory and no regular object found for %s/%s", bucket, normalizedObject) } - glog.V(2).Infof("getLatestObjectVersion: found pre-versioning object for %s%s after version deletion", bucket, object) + glog.V(2).Infof("getLatestObjectVersion: found pre-versioning object for %s/%s after version deletion", bucket, object) return regularEntry, nil } @@ -1139,7 +1134,7 @@ func (s3a *S3ApiServer) getLatestVersionEntryFromDirectoryEntry(bucket, object s return nil, fmt.Errorf("nil .versions directory entry") } - normalizedObject := removeDuplicateSlashes(object) + normalizedObject := s3_constants.NormalizeObjectKey(object) // Check if the directory entry has latest version metadata if versionsDirEntry.Extended == nil { diff --git a/weed/s3api/s3api_version_id.go b/weed/s3api/s3api_version_id.go index db347d927..5f74d36a8 100644 --- a/weed/s3api/s3api_version_id.go +++ b/weed/s3api/s3api_version_id.go @@ -6,7 +6,6 @@ import ( "fmt" "math" "strconv" - "strings" "time" "github.com/seaweedfs/seaweedfs/weed/glog" @@ -155,9 +154,8 @@ func (s3a *S3ApiServer) getVersionFileName(versionId string) string { // For new .versions directories, returns true (use new format). // For existing directories, infers format from the latest version ID. func (s3a *S3ApiServer) getVersionIdFormat(bucket, object string) bool { - cleanObject := strings.TrimPrefix(object, "/") bucketDir := s3a.option.BucketsPath + "/" + bucket - versionsPath := cleanObject + s3_constants.VersionsFolder + versionsPath := object + s3_constants.VersionsFolder // Try to get the .versions directory entry versionsEntry, err := s3a.getEntry(bucketDir, versionsPath) diff --git a/weed/storage/backend/s3_backend/s3_download.go b/weed/storage/backend/s3_backend/s3_download.go index b0d30fbdb..af6f7b4f3 100644 --- a/weed/storage/backend/s3_backend/s3_download.go +++ b/weed/storage/backend/s3_backend/s3_download.go @@ -47,7 +47,7 @@ func downloadFromS3(sess s3iface.S3API, destFileName string, sourceBucket string Key: aws.String(sourceKey), }) if err != nil { - return fileSize, fmt.Errorf("failed to download /buckets/%s%s to %s: %v", sourceBucket, sourceKey, destFileName, err) + return fileSize, fmt.Errorf("failed to download /buckets/%s/%s to %s: %v", sourceBucket, sourceKey, destFileName, err) } glog.V(1).Infof("downloaded file %s\n", destFileName)