diff --git a/weed/s3api/auth_credentials.go b/weed/s3api/auth_credentials.go index 289fbd556..0d99e43eb 100644 --- a/weed/s3api/auth_credentials.go +++ b/weed/s3api/auth_credentials.go @@ -421,8 +421,10 @@ func (iam *IdentityAccessManagement) Auth(f http.HandlerFunc, action Action) htt glog.V(3).Infof("auth error: %v", errCode) if errCode == s3err.ErrNone { + // Store the authenticated identity in request context (secure, cannot be spoofed) if identity != nil && identity.Name != "" { - r.Header.Set(s3_constants.AmzIdentityId, identity.Name) + ctx := s3_constants.SetIdentityNameInContext(r.Context(), identity.Name) + r = r.WithContext(ctx) } f(w, r) return diff --git a/weed/s3api/s3_constants/header.go b/weed/s3api/s3_constants/header.go index 1ef6f62c5..a232eb189 100644 --- a/weed/s3api/s3_constants/header.go +++ b/weed/s3api/s3_constants/header.go @@ -17,6 +17,7 @@ package s3_constants import ( + "context" "net/http" "strings" @@ -44,8 +45,6 @@ const ( AmzObjectTaggingDirective = "X-Amz-Tagging-Directive" AmzTagCount = "x-amz-tagging-count" - SeaweedFSIsDirectoryKey = "X-Seaweedfs-Is-Directory-Key" - SeaweedFSPartNumber = "X-Seaweedfs-Part-Number" SeaweedFSUploadId = "X-Seaweedfs-Upload-Id" SeaweedFSMultipartPartsCount = "X-Seaweedfs-Multipart-Parts-Count" SeaweedFSMultipartPartBoundaries = "X-Seaweedfs-Multipart-Part-Boundaries" // JSON: [{part:1,start:0,end:2,etag:"abc"},{part:2,start:2,end:3,etag:"def"}] @@ -174,3 +173,29 @@ var PassThroughHeaders = map[string]string{ func IsSeaweedFSInternalHeader(headerKey string) bool { return strings.HasPrefix(strings.ToLower(headerKey), SeaweedFSInternalPrefix) } + +// Context keys for storing authenticated identity information +type contextKey string + +const ( + contextKeyIdentityName contextKey = "s3-identity-name" +) + +// SetIdentityNameInContext stores the authenticated identity name in the request context +// This is the secure way to propagate identity - headers can be spoofed, context cannot +func SetIdentityNameInContext(ctx context.Context, identityName string) context.Context { + if identityName != "" { + return context.WithValue(ctx, contextKeyIdentityName, identityName) + } + return ctx +} + +// GetIdentityNameFromContext retrieves the authenticated identity name from the request context +// Returns empty string if no identity is set (unauthenticated request) +// This is the secure way to retrieve identity - never read from headers directly +func GetIdentityNameFromContext(r *http.Request) string { + if name, ok := r.Context().Value(contextKeyIdentityName).(string); ok { + return name + } + return "" +} diff --git a/weed/s3api/s3_metadata_util.go b/weed/s3api/s3_metadata_util.go new file mode 100644 index 000000000..37363752a --- /dev/null +++ b/weed/s3api/s3_metadata_util.go @@ -0,0 +1,94 @@ +package s3api + +import ( + "net/http" + "net/url" + "strings" + + "github.com/seaweedfs/seaweedfs/weed/glog" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3err" +) + +// ParseS3Metadata extracts S3-specific metadata from HTTP request headers +// This includes: storage class, tags, user metadata, SSE headers, and ACL headers +// Used by S3 API handlers to prepare metadata before saving to filer +// Returns an S3 error code if tag parsing fails +func ParseS3Metadata(r *http.Request, existing map[string][]byte, isReplace bool) (metadata map[string][]byte, errCode s3err.ErrorCode) { + metadata = make(map[string][]byte) + + // Copy existing metadata unless replacing + if !isReplace { + for k, v := range existing { + metadata[k] = v + } + } + + // Storage class + if sc := r.Header.Get(s3_constants.AmzStorageClass); sc != "" { + metadata[s3_constants.AmzStorageClass] = []byte(sc) + } + + // Content-Encoding (standard HTTP header used by S3) + if ce := r.Header.Get("Content-Encoding"); ce != "" { + metadata["Content-Encoding"] = []byte(ce) + } + + // Object tagging + if tags := r.Header.Get(s3_constants.AmzObjectTagging); tags != "" { + // Use url.ParseQuery for robust parsing and automatic URL decoding + parsedTags, err := url.ParseQuery(tags) + if err != nil { + // Return proper S3 error instead of silently dropping tags + glog.Warningf("Invalid S3 tag format in header '%s': %v", tags, err) + return nil, s3err.ErrInvalidTag + } + + // Validate: S3 spec does not allow duplicate tag keys + for key, values := range parsedTags { + if len(values) > 1 { + glog.Warningf("Duplicate tag key '%s' in header '%s'", key, tags) + return nil, s3err.ErrInvalidTag + } + // Tag value can be an empty string but not nil + value := "" + if len(values) > 0 { + value = values[0] + } + metadata[s3_constants.AmzObjectTagging+"-"+key] = []byte(value) + } + } + + // User-defined metadata (x-amz-meta-* headers) + for header, values := range r.Header { + if strings.HasPrefix(header, s3_constants.AmzUserMetaPrefix) { + // Go's HTTP server canonicalizes headers (e.g., x-amz-meta-foo → X-Amz-Meta-Foo) + // Per HTTP and S3 spec: multiple header values are concatenated with commas + // This ensures no metadata is lost when clients send duplicate header names + metadata[header] = []byte(strings.Join(values, ",")) + } + } + + // SSE-C headers + if algorithm := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerAlgorithm); algorithm != "" { + metadata[s3_constants.AmzServerSideEncryptionCustomerAlgorithm] = []byte(algorithm) + } + if keyMD5 := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerKeyMD5); keyMD5 != "" { + // Store as-is; SSE-C MD5 is base64 and case-sensitive + metadata[s3_constants.AmzServerSideEncryptionCustomerKeyMD5] = []byte(keyMD5) + } + + // ACL owner + acpOwner := r.Header.Get(s3_constants.ExtAmzOwnerKey) + if len(acpOwner) > 0 { + metadata[s3_constants.ExtAmzOwnerKey] = []byte(acpOwner) + } + + // ACL grants + acpGrants := r.Header.Get(s3_constants.ExtAmzAclKey) + if len(acpGrants) > 0 { + metadata[s3_constants.ExtAmzAclKey] = []byte(acpGrants) + } + + return metadata, s3err.ErrNone +} diff --git a/weed/s3api/s3api_bucket_handlers.go b/weed/s3api/s3api_bucket_handlers.go index 4222c911e..eaff6d442 100644 --- a/weed/s3api/s3api_bucket_handlers.go +++ b/weed/s3api/s3api_bucket_handlers.go @@ -59,12 +59,9 @@ func (s3a *S3ApiServer) ListBucketsHandler(w http.ResponseWriter, r *http.Reques return } - identityId := "" - if identity != nil { - identityId = identity.Name - } - // Note: For unauthenticated requests, identityId remains empty. - // We never read from request headers to prevent reflecting unvalidated user input. + // Get authenticated identity from context (secure, cannot be spoofed) + // For unauthenticated requests, this returns empty string + identityId := s3_constants.GetIdentityNameFromContext(r) var listBuckets ListAllMyBucketsList for _, entry := range entries { @@ -164,7 +161,8 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request) } // Check if bucket already exists and handle ownership/settings - currentIdentityId := r.Header.Get(s3_constants.AmzIdentityId) + // Get authenticated identity from context (secure, cannot be spoofed) + currentIdentityId := s3_constants.GetIdentityNameFromContext(r) // Check collection existence first collectionExists := false @@ -247,11 +245,12 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request) } fn := func(entry *filer_pb.Entry) { - if identityId := r.Header.Get(s3_constants.AmzIdentityId); identityId != "" { + // Reuse currentIdentityId from above (already retrieved from context) + if currentIdentityId != "" { if entry.Extended == nil { entry.Extended = make(map[string][]byte) } - entry.Extended[s3_constants.AmzIdentityId] = []byte(identityId) + entry.Extended[s3_constants.AmzIdentityId] = []byte(currentIdentityId) } } @@ -576,7 +575,8 @@ func (s3a *S3ApiServer) hasAccess(r *http.Request, entry *filer_pb.Entry) bool { return true } - identityId := r.Header.Get(s3_constants.AmzIdentityId) + // Get authenticated identity from context (secure, cannot be spoofed) + identityId := s3_constants.GetIdentityNameFromContext(r) if id, ok := entry.Extended[s3_constants.AmzIdentityId]; ok { if identityId != string(id) { glog.V(3).Infof("hasAccess: %s != %s (entry.Extended = %v)", identityId, id, entry.Extended) diff --git a/weed/s3api/s3api_object_handlers_multipart.go b/weed/s3api/s3api_object_handlers_multipart.go index 3ea709b31..ba9886d66 100644 --- a/weed/s3api/s3api_object_handlers_multipart.go +++ b/weed/s3api/s3api_object_handlers_multipart.go @@ -20,7 +20,6 @@ import ( "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" "github.com/seaweedfs/seaweedfs/weed/s3api/s3err" - weed_server "github.com/seaweedfs/seaweedfs/weed/server" stats_collect "github.com/seaweedfs/seaweedfs/weed/stats" ) @@ -65,7 +64,12 @@ func (s3a *S3ApiServer) NewMultipartUploadHandler(w http.ResponseWriter, r *http Metadata: make(map[string]*string), } - metadata := weed_server.SaveAmzMetaData(r, nil, false) + // Parse S3 metadata from request headers + metadata, errCode := ParseS3Metadata(r, nil, false) + if errCode != s3err.ErrNone { + s3err.WriteErrorResponse(w, r, errCode) + return + } for k, v := range metadata { createMultipartUploadInput.Metadata[k] = aws.String(string(v)) } diff --git a/weed/s3api/s3err/audit_fluent.go b/weed/s3api/s3err/audit_fluent.go index ef2459eac..5d617ce1c 100644 --- a/weed/s3api/s3err/audit_fluent.go +++ b/weed/s3api/s3err/audit_fluent.go @@ -152,7 +152,7 @@ func GetAccessLog(r *http.Request, HTTPStatusCode int, s3errCode ErrorCode) *Acc HostHeader: hostHeader, RequestID: r.Header.Get("X-Request-ID"), RemoteIP: remoteIP, - Requester: r.Header.Get(s3_constants.AmzIdentityId), + Requester: s3_constants.GetIdentityNameFromContext(r), // Get from context, not header (secure) SignatureVersion: r.Header.Get(s3_constants.AmzAuthType), UserAgent: r.Header.Get("user-agent"), HostId: hostname, diff --git a/weed/server/filer_server_handlers_read.go b/weed/server/filer_server_handlers_read.go index 1a66dd045..3db936f43 100644 --- a/weed/server/filer_server_handlers_read.go +++ b/weed/server/filer_server_handlers_read.go @@ -2,9 +2,6 @@ package weed_server import ( "context" - "encoding/base64" - "encoding/hex" - "errors" "fmt" "io" "math" @@ -15,12 +12,11 @@ import ( "strings" "time" - "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" - "github.com/seaweedfs/seaweedfs/weed/security" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" + "github.com/seaweedfs/seaweedfs/weed/security" "github.com/seaweedfs/seaweedfs/weed/stats" "github.com/seaweedfs/seaweedfs/weed/util" ) @@ -122,22 +118,8 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) writeJsonQuiet(w, r, http.StatusOK, entry) return } - if entry.Attr.Mime == "" || (entry.Attr.Mime == s3_constants.FolderMimeType && r.Header.Get(s3_constants.AmzIdentityId) == "") { - // Don't return directory meta if config value is set to true - if fs.option.ExposeDirectoryData == false { - writeJsonError(w, r, http.StatusForbidden, errors.New("directory listing is disabled")) - return - } - // return index of directory for non s3 gateway - fs.listDirectoryHandler(w, r) - return - } - // inform S3 API this is a user created directory key object - w.Header().Set(s3_constants.SeaweedFSIsDirectoryKey, "true") - } - - if isForDirectory && entry.Attr.Mime != s3_constants.FolderMimeType { - w.WriteHeader(http.StatusNotFound) + // listDirectoryHandler checks ExposeDirectoryData internally + fs.listDirectoryHandler(w, r) return } @@ -160,22 +142,8 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) return } - var etag string - if partNumber, errNum := strconv.Atoi(r.Header.Get(s3_constants.SeaweedFSPartNumber)); errNum == nil { - if len(entry.Chunks) < partNumber { - stats.FilerHandlerCounter.WithLabelValues(stats.ErrorReadChunk).Inc() - w.WriteHeader(http.StatusBadRequest) - w.Write([]byte("InvalidPart")) - return - } - w.Header().Set(s3_constants.AmzMpPartsCount, strconv.Itoa(len(entry.Chunks))) - partChunk := entry.GetChunks()[partNumber-1] - md5, _ := base64.StdEncoding.DecodeString(partChunk.ETag) - etag = hex.EncodeToString(md5) - r.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", partChunk.Offset, uint64(partChunk.Offset)+partChunk.Size-1)) - } else { - etag = filer.ETagEntry(entry) - } + // Generate ETag for response + etag := filer.ETagEntry(entry) w.Header().Set("Accept-Ranges", "bytes") // mime type @@ -192,10 +160,9 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) } // print out the header from extended properties + // Filter out xattr-* (filesystem extended attributes) and internal SeaweedFS headers for k, v := range entry.Extended { if !strings.HasPrefix(k, "xattr-") && !s3_constants.IsSeaweedFSInternalHeader(k) { - // "xattr-" prefix is set in filesys.XATTR_PREFIX - // IsSeaweedFSInternalHeader filters internal metadata that should not become HTTP headers w.Header().Set(k, string(v)) } } @@ -210,17 +177,6 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) seaweedHeaders = append(seaweedHeaders, "Content-Disposition") w.Header().Set("Access-Control-Expose-Headers", strings.Join(seaweedHeaders, ",")) - //set tag count - tagCount := 0 - for k := range entry.Extended { - if strings.HasPrefix(k, s3_constants.AmzObjectTagging+"-") { - tagCount++ - } - } - if tagCount > 0 { - w.Header().Set(s3_constants.AmzTagCount, strconv.Itoa(tagCount)) - } - SetEtag(w, etag) filename := entry.Name() diff --git a/weed/server/filer_server_handlers_write_autochunk.go b/weed/server/filer_server_handlers_write_autochunk.go index 4a200cf43..8647bf696 100644 --- a/weed/server/filer_server_handlers_write_autochunk.go +++ b/weed/server/filer_server_handlers_write_autochunk.go @@ -7,7 +7,6 @@ import ( "fmt" "io" "net/http" - "net/url" "os" "path" "strconv" @@ -18,7 +17,6 @@ import ( "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/operation" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" "github.com/seaweedfs/seaweedfs/weed/storage/needle" "github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util/constants" @@ -135,17 +133,8 @@ func (fs *FilerServer) doPutAutoChunk(ctx context.Context, w http.ResponseWriter if err := fs.checkPermissions(ctx, r, fileName); err != nil { return nil, nil, err } - // Disable TTL-based (creation time) deletion when S3 expiry (modification time) is enabled - soMaybeWithOutTTL := so - if so.TtlSeconds > 0 { - if s3ExpiresValue := r.Header.Get(s3_constants.SeaweedFSExpiresS3); s3ExpiresValue == "true" { - clone := *so - clone.TtlSeconds = 0 - soMaybeWithOutTTL = &clone - } - } - fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadRequestToChunks(ctx, w, r, r.Body, chunkSize, fileName, contentType, contentLength, soMaybeWithOutTTL) + fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadRequestToChunks(ctx, w, r, r.Body, chunkSize, fileName, contentType, contentLength, so) if err != nil { return nil, nil, err @@ -333,18 +322,13 @@ func (fs *FilerServer) saveMetaData(ctx context.Context, r *http.Request, fileNa Size: int64(entry.FileSize), } - entry.Extended = SaveAmzMetaData(r, entry.Extended, false) - if entry.TtlSec > 0 && r.Header.Get(s3_constants.SeaweedFSExpiresS3) == "true" { - entry.Extended[s3_constants.SeaweedFSExpiresS3] = []byte("true") - } + // Save standard HTTP headers as extended attributes + // Note: S3 API now writes directly to volume servers and saves metadata via gRPC + // This handler is for non-S3 clients (WebDAV, SFTP, mount, curl, etc.) for k, v := range r.Header { if len(v) > 0 && len(v[0]) > 0 { if strings.HasPrefix(k, needle.PairNamePrefix) || k == "Cache-Control" || k == "Expires" || k == "Content-Disposition" { entry.Extended[k] = []byte(v[0]) - // Log version ID header specifically for debugging - if k == "Seaweed-X-Amz-Version-Id" { - glog.V(0).Infof("filer: storing version ID header in Extended: %s=%s for path=%s", k, v[0], path) - } } if k == "Response-Content-Disposition" { entry.Extended["Content-Disposition"] = []byte(v[0]) @@ -456,73 +440,3 @@ func (fs *FilerServer) mkdir(ctx context.Context, w http.ResponseWriter, r *http } return filerResult, replyerr } - -func SaveAmzMetaData(r *http.Request, existing map[string][]byte, isReplace bool) (metadata map[string][]byte) { - - metadata = make(map[string][]byte) - if !isReplace { - for k, v := range existing { - metadata[k] = v - } - } - - if sc := r.Header.Get(s3_constants.AmzStorageClass); sc != "" { - metadata[s3_constants.AmzStorageClass] = []byte(sc) - } - - if ce := r.Header.Get("Content-Encoding"); ce != "" { - metadata["Content-Encoding"] = []byte(ce) - } - - if tags := r.Header.Get(s3_constants.AmzObjectTagging); tags != "" { - // Use url.ParseQuery for robust parsing and automatic URL decoding - parsedTags, err := url.ParseQuery(tags) - if err != nil { - glog.Errorf("Failed to parse S3 tags '%s': %v", tags, err) - } else { - for key, values := range parsedTags { - // According to S3 spec, if a key is provided multiple times, the last value is used. - // A tag value can be an empty string but not nil. - value := "" - if len(values) > 0 { - value = values[len(values)-1] - } - metadata[s3_constants.AmzObjectTagging+"-"+key] = []byte(value) - } - } - } - - for header, values := range r.Header { - if strings.HasPrefix(header, s3_constants.AmzUserMetaPrefix) { - // Go's HTTP server canonicalizes headers (e.g., x-amz-meta-foo → X-Amz-Meta-Foo) - // We store them as they come in (after canonicalization) to preserve the user's intent - for _, value := range values { - metadata[header] = []byte(value) - } - } - } - - // Handle SSE-C headers - if algorithm := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerAlgorithm); algorithm != "" { - metadata[s3_constants.AmzServerSideEncryptionCustomerAlgorithm] = []byte(algorithm) - } - if keyMD5 := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerKeyMD5); keyMD5 != "" { - // Store as-is; SSE-C MD5 is base64 and case-sensitive - metadata[s3_constants.AmzServerSideEncryptionCustomerKeyMD5] = []byte(keyMD5) - } - - //acp-owner - acpOwner := r.Header.Get(s3_constants.ExtAmzOwnerKey) - if len(acpOwner) > 0 { - metadata[s3_constants.ExtAmzOwnerKey] = []byte(acpOwner) - } - - //acp-grants - acpGrants := r.Header.Get(s3_constants.ExtAmzAclKey) - if len(acpGrants) > 0 { - metadata[s3_constants.ExtAmzAclKey] = []byte(acpGrants) - } - - return - -}