Browse Source

S3: set identity to request context, and remove obsolete code (#7523)

* list owned buckets

* simplify

* add unit tests

* no-owner buckets

* set identity id

* fallback to request header if iam is not enabled

* refactor to test

* fix comparing

* fix security vulnerability

* Update s3api_bucket_handlers.go

* Update s3api_bucket_handlers.go

* Update s3api_bucket_handlers.go

* set identity to request context

* remove SeaweedFSIsDirectoryKey

* remove obsolete

* simplify

* reuse

* refactor or remove obsolete logic on filer

* Removed the redundant check in GetOrHeadHandler

* surfacing invalid X-Amz-Tagging as a client error

* clean up

* constant

* reuse

* multiple header values

* code reuse

* err on duplicated tag key
pull/7490/merge
Chris Lu 4 days ago
committed by GitHub
parent
commit
f125a013a8
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 4
      weed/s3api/auth_credentials.go
  2. 29
      weed/s3api/s3_constants/header.go
  3. 94
      weed/s3api/s3_metadata_util.go
  4. 20
      weed/s3api/s3api_bucket_handlers.go
  5. 8
      weed/s3api/s3api_object_handlers_multipart.go
  6. 2
      weed/s3api/s3err/audit_fluent.go
  7. 58
      weed/server/filer_server_handlers_read.go
  8. 94
      weed/server/filer_server_handlers_write_autochunk.go

4
weed/s3api/auth_credentials.go

@ -421,8 +421,10 @@ func (iam *IdentityAccessManagement) Auth(f http.HandlerFunc, action Action) htt
glog.V(3).Infof("auth error: %v", errCode)
if errCode == s3err.ErrNone {
// Store the authenticated identity in request context (secure, cannot be spoofed)
if identity != nil && identity.Name != "" {
r.Header.Set(s3_constants.AmzIdentityId, identity.Name)
ctx := s3_constants.SetIdentityNameInContext(r.Context(), identity.Name)
r = r.WithContext(ctx)
}
f(w, r)
return

29
weed/s3api/s3_constants/header.go

@ -17,6 +17,7 @@
package s3_constants
import (
"context"
"net/http"
"strings"
@ -44,8 +45,6 @@ const (
AmzObjectTaggingDirective = "X-Amz-Tagging-Directive"
AmzTagCount = "x-amz-tagging-count"
SeaweedFSIsDirectoryKey = "X-Seaweedfs-Is-Directory-Key"
SeaweedFSPartNumber = "X-Seaweedfs-Part-Number"
SeaweedFSUploadId = "X-Seaweedfs-Upload-Id"
SeaweedFSMultipartPartsCount = "X-Seaweedfs-Multipart-Parts-Count"
SeaweedFSMultipartPartBoundaries = "X-Seaweedfs-Multipart-Part-Boundaries" // JSON: [{part:1,start:0,end:2,etag:"abc"},{part:2,start:2,end:3,etag:"def"}]
@ -174,3 +173,29 @@ var PassThroughHeaders = map[string]string{
func IsSeaweedFSInternalHeader(headerKey string) bool {
return strings.HasPrefix(strings.ToLower(headerKey), SeaweedFSInternalPrefix)
}
// Context keys for storing authenticated identity information
type contextKey string
const (
contextKeyIdentityName contextKey = "s3-identity-name"
)
// SetIdentityNameInContext stores the authenticated identity name in the request context
// This is the secure way to propagate identity - headers can be spoofed, context cannot
func SetIdentityNameInContext(ctx context.Context, identityName string) context.Context {
if identityName != "" {
return context.WithValue(ctx, contextKeyIdentityName, identityName)
}
return ctx
}
// GetIdentityNameFromContext retrieves the authenticated identity name from the request context
// Returns empty string if no identity is set (unauthenticated request)
// This is the secure way to retrieve identity - never read from headers directly
func GetIdentityNameFromContext(r *http.Request) string {
if name, ok := r.Context().Value(contextKeyIdentityName).(string); ok {
return name
}
return ""
}

94
weed/s3api/s3_metadata_util.go

@ -0,0 +1,94 @@
package s3api
import (
"net/http"
"net/url"
"strings"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
)
// ParseS3Metadata extracts S3-specific metadata from HTTP request headers
// This includes: storage class, tags, user metadata, SSE headers, and ACL headers
// Used by S3 API handlers to prepare metadata before saving to filer
// Returns an S3 error code if tag parsing fails
func ParseS3Metadata(r *http.Request, existing map[string][]byte, isReplace bool) (metadata map[string][]byte, errCode s3err.ErrorCode) {
metadata = make(map[string][]byte)
// Copy existing metadata unless replacing
if !isReplace {
for k, v := range existing {
metadata[k] = v
}
}
// Storage class
if sc := r.Header.Get(s3_constants.AmzStorageClass); sc != "" {
metadata[s3_constants.AmzStorageClass] = []byte(sc)
}
// Content-Encoding (standard HTTP header used by S3)
if ce := r.Header.Get("Content-Encoding"); ce != "" {
metadata["Content-Encoding"] = []byte(ce)
}
// Object tagging
if tags := r.Header.Get(s3_constants.AmzObjectTagging); tags != "" {
// Use url.ParseQuery for robust parsing and automatic URL decoding
parsedTags, err := url.ParseQuery(tags)
if err != nil {
// Return proper S3 error instead of silently dropping tags
glog.Warningf("Invalid S3 tag format in header '%s': %v", tags, err)
return nil, s3err.ErrInvalidTag
}
// Validate: S3 spec does not allow duplicate tag keys
for key, values := range parsedTags {
if len(values) > 1 {
glog.Warningf("Duplicate tag key '%s' in header '%s'", key, tags)
return nil, s3err.ErrInvalidTag
}
// Tag value can be an empty string but not nil
value := ""
if len(values) > 0 {
value = values[0]
}
metadata[s3_constants.AmzObjectTagging+"-"+key] = []byte(value)
}
}
// User-defined metadata (x-amz-meta-* headers)
for header, values := range r.Header {
if strings.HasPrefix(header, s3_constants.AmzUserMetaPrefix) {
// Go's HTTP server canonicalizes headers (e.g., x-amz-meta-foo → X-Amz-Meta-Foo)
// Per HTTP and S3 spec: multiple header values are concatenated with commas
// This ensures no metadata is lost when clients send duplicate header names
metadata[header] = []byte(strings.Join(values, ","))
}
}
// SSE-C headers
if algorithm := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerAlgorithm); algorithm != "" {
metadata[s3_constants.AmzServerSideEncryptionCustomerAlgorithm] = []byte(algorithm)
}
if keyMD5 := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerKeyMD5); keyMD5 != "" {
// Store as-is; SSE-C MD5 is base64 and case-sensitive
metadata[s3_constants.AmzServerSideEncryptionCustomerKeyMD5] = []byte(keyMD5)
}
// ACL owner
acpOwner := r.Header.Get(s3_constants.ExtAmzOwnerKey)
if len(acpOwner) > 0 {
metadata[s3_constants.ExtAmzOwnerKey] = []byte(acpOwner)
}
// ACL grants
acpGrants := r.Header.Get(s3_constants.ExtAmzAclKey)
if len(acpGrants) > 0 {
metadata[s3_constants.ExtAmzAclKey] = []byte(acpGrants)
}
return metadata, s3err.ErrNone
}

20
weed/s3api/s3api_bucket_handlers.go

@ -59,12 +59,9 @@ func (s3a *S3ApiServer) ListBucketsHandler(w http.ResponseWriter, r *http.Reques
return
}
identityId := ""
if identity != nil {
identityId = identity.Name
}
// Note: For unauthenticated requests, identityId remains empty.
// We never read from request headers to prevent reflecting unvalidated user input.
// Get authenticated identity from context (secure, cannot be spoofed)
// For unauthenticated requests, this returns empty string
identityId := s3_constants.GetIdentityNameFromContext(r)
var listBuckets ListAllMyBucketsList
for _, entry := range entries {
@ -164,7 +161,8 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request)
}
// Check if bucket already exists and handle ownership/settings
currentIdentityId := r.Header.Get(s3_constants.AmzIdentityId)
// Get authenticated identity from context (secure, cannot be spoofed)
currentIdentityId := s3_constants.GetIdentityNameFromContext(r)
// Check collection existence first
collectionExists := false
@ -247,11 +245,12 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request)
}
fn := func(entry *filer_pb.Entry) {
if identityId := r.Header.Get(s3_constants.AmzIdentityId); identityId != "" {
// Reuse currentIdentityId from above (already retrieved from context)
if currentIdentityId != "" {
if entry.Extended == nil {
entry.Extended = make(map[string][]byte)
}
entry.Extended[s3_constants.AmzIdentityId] = []byte(identityId)
entry.Extended[s3_constants.AmzIdentityId] = []byte(currentIdentityId)
}
}
@ -576,7 +575,8 @@ func (s3a *S3ApiServer) hasAccess(r *http.Request, entry *filer_pb.Entry) bool {
return true
}
identityId := r.Header.Get(s3_constants.AmzIdentityId)
// Get authenticated identity from context (secure, cannot be spoofed)
identityId := s3_constants.GetIdentityNameFromContext(r)
if id, ok := entry.Extended[s3_constants.AmzIdentityId]; ok {
if identityId != string(id) {
glog.V(3).Infof("hasAccess: %s != %s (entry.Extended = %v)", identityId, id, entry.Extended)

8
weed/s3api/s3api_object_handlers_multipart.go

@ -20,7 +20,6 @@ import (
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
weed_server "github.com/seaweedfs/seaweedfs/weed/server"
stats_collect "github.com/seaweedfs/seaweedfs/weed/stats"
)
@ -65,7 +64,12 @@ func (s3a *S3ApiServer) NewMultipartUploadHandler(w http.ResponseWriter, r *http
Metadata: make(map[string]*string),
}
metadata := weed_server.SaveAmzMetaData(r, nil, false)
// Parse S3 metadata from request headers
metadata, errCode := ParseS3Metadata(r, nil, false)
if errCode != s3err.ErrNone {
s3err.WriteErrorResponse(w, r, errCode)
return
}
for k, v := range metadata {
createMultipartUploadInput.Metadata[k] = aws.String(string(v))
}

2
weed/s3api/s3err/audit_fluent.go

@ -152,7 +152,7 @@ func GetAccessLog(r *http.Request, HTTPStatusCode int, s3errCode ErrorCode) *Acc
HostHeader: hostHeader,
RequestID: r.Header.Get("X-Request-ID"),
RemoteIP: remoteIP,
Requester: r.Header.Get(s3_constants.AmzIdentityId),
Requester: s3_constants.GetIdentityNameFromContext(r), // Get from context, not header (secure)
SignatureVersion: r.Header.Get(s3_constants.AmzAuthType),
UserAgent: r.Header.Get("user-agent"),
HostId: hostname,

58
weed/server/filer_server_handlers_read.go

@ -2,9 +2,6 @@ package weed_server
import (
"context"
"encoding/base64"
"encoding/hex"
"errors"
"fmt"
"io"
"math"
@ -15,12 +12,11 @@ import (
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/util"
)
@ -122,22 +118,8 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
writeJsonQuiet(w, r, http.StatusOK, entry)
return
}
if entry.Attr.Mime == "" || (entry.Attr.Mime == s3_constants.FolderMimeType && r.Header.Get(s3_constants.AmzIdentityId) == "") {
// Don't return directory meta if config value is set to true
if fs.option.ExposeDirectoryData == false {
writeJsonError(w, r, http.StatusForbidden, errors.New("directory listing is disabled"))
return
}
// return index of directory for non s3 gateway
fs.listDirectoryHandler(w, r)
return
}
// inform S3 API this is a user created directory key object
w.Header().Set(s3_constants.SeaweedFSIsDirectoryKey, "true")
}
if isForDirectory && entry.Attr.Mime != s3_constants.FolderMimeType {
w.WriteHeader(http.StatusNotFound)
// listDirectoryHandler checks ExposeDirectoryData internally
fs.listDirectoryHandler(w, r)
return
}
@ -160,22 +142,8 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
return
}
var etag string
if partNumber, errNum := strconv.Atoi(r.Header.Get(s3_constants.SeaweedFSPartNumber)); errNum == nil {
if len(entry.Chunks) < partNumber {
stats.FilerHandlerCounter.WithLabelValues(stats.ErrorReadChunk).Inc()
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte("InvalidPart"))
return
}
w.Header().Set(s3_constants.AmzMpPartsCount, strconv.Itoa(len(entry.Chunks)))
partChunk := entry.GetChunks()[partNumber-1]
md5, _ := base64.StdEncoding.DecodeString(partChunk.ETag)
etag = hex.EncodeToString(md5)
r.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", partChunk.Offset, uint64(partChunk.Offset)+partChunk.Size-1))
} else {
etag = filer.ETagEntry(entry)
}
// Generate ETag for response
etag := filer.ETagEntry(entry)
w.Header().Set("Accept-Ranges", "bytes")
// mime type
@ -192,10 +160,9 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
}
// print out the header from extended properties
// Filter out xattr-* (filesystem extended attributes) and internal SeaweedFS headers
for k, v := range entry.Extended {
if !strings.HasPrefix(k, "xattr-") && !s3_constants.IsSeaweedFSInternalHeader(k) {
// "xattr-" prefix is set in filesys.XATTR_PREFIX
// IsSeaweedFSInternalHeader filters internal metadata that should not become HTTP headers
w.Header().Set(k, string(v))
}
}
@ -210,17 +177,6 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
seaweedHeaders = append(seaweedHeaders, "Content-Disposition")
w.Header().Set("Access-Control-Expose-Headers", strings.Join(seaweedHeaders, ","))
//set tag count
tagCount := 0
for k := range entry.Extended {
if strings.HasPrefix(k, s3_constants.AmzObjectTagging+"-") {
tagCount++
}
}
if tagCount > 0 {
w.Header().Set(s3_constants.AmzTagCount, strconv.Itoa(tagCount))
}
SetEtag(w, etag)
filename := entry.Name()

94
weed/server/filer_server_handlers_write_autochunk.go

@ -7,7 +7,6 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"os"
"path"
"strconv"
@ -18,7 +17,6 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/constants"
@ -135,17 +133,8 @@ func (fs *FilerServer) doPutAutoChunk(ctx context.Context, w http.ResponseWriter
if err := fs.checkPermissions(ctx, r, fileName); err != nil {
return nil, nil, err
}
// Disable TTL-based (creation time) deletion when S3 expiry (modification time) is enabled
soMaybeWithOutTTL := so
if so.TtlSeconds > 0 {
if s3ExpiresValue := r.Header.Get(s3_constants.SeaweedFSExpiresS3); s3ExpiresValue == "true" {
clone := *so
clone.TtlSeconds = 0
soMaybeWithOutTTL = &clone
}
}
fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadRequestToChunks(ctx, w, r, r.Body, chunkSize, fileName, contentType, contentLength, soMaybeWithOutTTL)
fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadRequestToChunks(ctx, w, r, r.Body, chunkSize, fileName, contentType, contentLength, so)
if err != nil {
return nil, nil, err
@ -333,18 +322,13 @@ func (fs *FilerServer) saveMetaData(ctx context.Context, r *http.Request, fileNa
Size: int64(entry.FileSize),
}
entry.Extended = SaveAmzMetaData(r, entry.Extended, false)
if entry.TtlSec > 0 && r.Header.Get(s3_constants.SeaweedFSExpiresS3) == "true" {
entry.Extended[s3_constants.SeaweedFSExpiresS3] = []byte("true")
}
// Save standard HTTP headers as extended attributes
// Note: S3 API now writes directly to volume servers and saves metadata via gRPC
// This handler is for non-S3 clients (WebDAV, SFTP, mount, curl, etc.)
for k, v := range r.Header {
if len(v) > 0 && len(v[0]) > 0 {
if strings.HasPrefix(k, needle.PairNamePrefix) || k == "Cache-Control" || k == "Expires" || k == "Content-Disposition" {
entry.Extended[k] = []byte(v[0])
// Log version ID header specifically for debugging
if k == "Seaweed-X-Amz-Version-Id" {
glog.V(0).Infof("filer: storing version ID header in Extended: %s=%s for path=%s", k, v[0], path)
}
}
if k == "Response-Content-Disposition" {
entry.Extended["Content-Disposition"] = []byte(v[0])
@ -456,73 +440,3 @@ func (fs *FilerServer) mkdir(ctx context.Context, w http.ResponseWriter, r *http
}
return filerResult, replyerr
}
func SaveAmzMetaData(r *http.Request, existing map[string][]byte, isReplace bool) (metadata map[string][]byte) {
metadata = make(map[string][]byte)
if !isReplace {
for k, v := range existing {
metadata[k] = v
}
}
if sc := r.Header.Get(s3_constants.AmzStorageClass); sc != "" {
metadata[s3_constants.AmzStorageClass] = []byte(sc)
}
if ce := r.Header.Get("Content-Encoding"); ce != "" {
metadata["Content-Encoding"] = []byte(ce)
}
if tags := r.Header.Get(s3_constants.AmzObjectTagging); tags != "" {
// Use url.ParseQuery for robust parsing and automatic URL decoding
parsedTags, err := url.ParseQuery(tags)
if err != nil {
glog.Errorf("Failed to parse S3 tags '%s': %v", tags, err)
} else {
for key, values := range parsedTags {
// According to S3 spec, if a key is provided multiple times, the last value is used.
// A tag value can be an empty string but not nil.
value := ""
if len(values) > 0 {
value = values[len(values)-1]
}
metadata[s3_constants.AmzObjectTagging+"-"+key] = []byte(value)
}
}
}
for header, values := range r.Header {
if strings.HasPrefix(header, s3_constants.AmzUserMetaPrefix) {
// Go's HTTP server canonicalizes headers (e.g., x-amz-meta-foo → X-Amz-Meta-Foo)
// We store them as they come in (after canonicalization) to preserve the user's intent
for _, value := range values {
metadata[header] = []byte(value)
}
}
}
// Handle SSE-C headers
if algorithm := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerAlgorithm); algorithm != "" {
metadata[s3_constants.AmzServerSideEncryptionCustomerAlgorithm] = []byte(algorithm)
}
if keyMD5 := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerKeyMD5); keyMD5 != "" {
// Store as-is; SSE-C MD5 is base64 and case-sensitive
metadata[s3_constants.AmzServerSideEncryptionCustomerKeyMD5] = []byte(keyMD5)
}
//acp-owner
acpOwner := r.Header.Get(s3_constants.ExtAmzOwnerKey)
if len(acpOwner) > 0 {
metadata[s3_constants.ExtAmzOwnerKey] = []byte(acpOwner)
}
//acp-grants
acpGrants := r.Header.Get(s3_constants.ExtAmzAclKey)
if len(acpGrants) > 0 {
metadata[s3_constants.ExtAmzAclKey] = []byte(acpGrants)
}
return
}
Loading…
Cancel
Save