Browse Source

Fix issue #6847: S3 chunked encoding includes headers in stored content (#7595)

* Fix issue #6847: S3 chunked encoding includes headers in stored content

- Add hasTrailer flag to s3ChunkedReader to track trailer presence
- Update state transition logic to properly handle trailers in unsigned streaming
- Enhance parseChunkChecksum to handle multiple trailer lines
- Skip checksum verification for unsigned streaming uploads
- Add test case for mixed format handling (unsigned headers with signed chunks)
- Remove redundant CRLF reading in trailer processing

This fixes the issue where chunk-signature and x-amz headers were appearing
in stored file content when using chunked encoding with newer AWS SDKs.

* Fix checksum validation for unsigned streaming uploads

- Always validate checksum for data integrity regardless of signing
- Correct checksum value in test case
- Addresses PR review feedback about checksum verification

* Add warning log when multiple checksum headers found in trailer

- Log a warning when multiple valid checksum headers appear in trailers
- Uses last checksum header as suggested by CodeRabbit reviewer
- Improves debugging for edge cases with multiple checksum algorithms

* Improve trailer parsing robustness in parseChunkChecksum

- Remove redundant trimTrailingWhitespace call since readChunkLine already trims
- Use bytes.TrimSpace for both key and value to handle whitespace around colon separator
- Follows HTTP header specifications for optional whitespace around separators
- Addresses Gemini Code Assist review feedback
pull/7383/merge
Chris Lu 2 days ago
committed by GitHub
parent
commit
099a351f3b
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 71
      weed/s3api/chunked_reader_v4.go

71
weed/s3api/chunked_reader_v4.go

@ -116,6 +116,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
} }
checkSumWriter := getCheckSumWriter(checksumAlgorithm) checkSumWriter := getCheckSumWriter(checksumAlgorithm)
hasTrailer := amzTrailerHeader != ""
return &s3ChunkedReader{ return &s3ChunkedReader{
cred: credential, cred: credential,
@ -129,6 +130,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
checkSumWriter: checkSumWriter, checkSumWriter: checkSumWriter,
state: readChunkHeader, state: readChunkHeader,
iam: iam, iam: iam,
hasTrailer: hasTrailer,
}, s3err.ErrNone }, s3err.ErrNone
} }
@ -170,6 +172,7 @@ type s3ChunkedReader struct {
n uint64 // Unread bytes in chunk n uint64 // Unread bytes in chunk
err error err error
iam *IdentityAccessManagement iam *IdentityAccessManagement
hasTrailer bool
} }
// Read chunk reads the chunk token signature portion. // Read chunk reads the chunk token signature portion.
@ -281,10 +284,10 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
} }
// If we're using unsigned streaming upload, there is no signature to verify at each chunk. // If we're using unsigned streaming upload, there is no signature to verify at each chunk.
if cr.chunkSignature != "" {
cr.state = verifyChunk
} else if cr.lastChunk {
if cr.lastChunk && cr.hasTrailer {
cr.state = readTrailerChunk cr.state = readTrailerChunk
} else if cr.chunkSignature != "" {
cr.state = verifyChunk
} else { } else {
cr.state = readChunkHeader cr.state = readChunkHeader
} }
@ -304,7 +307,11 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
// This implementation currently only supports the first case. // This implementation currently only supports the first case.
// TODO: Implement the second case (signed upload with additional checksum computation for each chunk) // TODO: Implement the second case (signed upload with additional checksum computation for each chunk)
extractedCheckSumAlgorithm, extractedChecksum := parseChunkChecksum(cr.reader)
extractedCheckSumAlgorithm, extractedChecksum, err := parseChunkChecksum(cr.reader)
if err != nil {
cr.err = err
return 0, err
}
if extractedCheckSumAlgorithm.String() != cr.checkSumAlgorithm { if extractedCheckSumAlgorithm.String() != cr.checkSumAlgorithm {
errorMessage := fmt.Sprintf("checksum algorithm in trailer '%s' does not match the one advertised in the header '%s'", extractedCheckSumAlgorithm.String(), cr.checkSumAlgorithm) errorMessage := fmt.Sprintf("checksum algorithm in trailer '%s' does not match the one advertised in the header '%s'", extractedCheckSumAlgorithm.String(), cr.checkSumAlgorithm)
@ -313,6 +320,7 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
return 0, cr.err return 0, cr.err
} }
// Validate checksum for data integrity (required for both signed and unsigned streaming with trailers)
computedChecksum := cr.checkSumWriter.Sum(nil) computedChecksum := cr.checkSumWriter.Sum(nil)
base64Checksum := base64.StdEncoding.EncodeToString(computedChecksum) base64Checksum := base64.StdEncoding.EncodeToString(computedChecksum)
if string(extractedChecksum) != base64Checksum { if string(extractedChecksum) != base64Checksum {
@ -324,11 +332,6 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
// TODO: Extract signature from trailer chunk and verify it. // TODO: Extract signature from trailer chunk and verify it.
// For now, we just read the trailer chunk and discard it. // For now, we just read the trailer chunk and discard it.
// Reading remaining CRLF.
for i := 0; i < 2; i++ {
cr.err = readCRLF(cr.reader)
}
cr.state = eofChunk cr.state = eofChunk
case readChunk: case readChunk:
@ -506,41 +509,37 @@ func parseS3ChunkExtension(buf []byte) ([]byte, []byte) {
return buf[:semi], parseChunkSignature(buf[semi:]) return buf[:semi], parseChunkSignature(buf[semi:])
} }
func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte) {
// When using unsigned upload, this would be the raw contents of the trailer chunk:
//
// x-amz-checksum-crc32:YABb/g==\n\r\n\r\n // Trailer chunk (note optional \n character)
// \r\n // CRLF
//
// When using signed upload with an additional checksum algorithm, this would be the raw contents of the trailer chunk:
//
// x-amz-checksum-crc32:YABb/g==\n\r\n // Trailer chunk (note optional \n character)
// trailer-signature\r\n
// \r\n // CRLF
//
func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte, error) {
// Read trailer lines until empty line
var checksumAlgorithm ChecksumAlgorithm
var checksum []byte
// x-amz-checksum-crc32:YABb/g==\n
for {
bytesRead, err := readChunkLine(b) bytesRead, err := readChunkLine(b)
if err != nil { if err != nil {
return ChecksumAlgorithmNone, nil
return ChecksumAlgorithmNone, nil, err
} }
// Split on ':'
parts := bytes.SplitN(bytesRead, []byte(":"), 2)
checksumKey := string(parts[0])
checksumValue := parts[1]
// Discard all trailing whitespace characters
checksumValue = trimTrailingWhitespace(checksumValue)
if len(bytesRead) == 0 {
break
}
// If the checksum key is not a supported checksum algorithm, return an error.
// TODO: Bubble that error up to the caller
extractedAlgorithm, err := extractChecksumAlgorithm(checksumKey)
if err != nil {
return ChecksumAlgorithmNone, nil
parts := bytes.SplitN(bytesRead, []byte(":"), 2)
if len(parts) == 2 {
key := string(bytes.TrimSpace(parts[0]))
value := bytes.TrimSpace(parts[1])
if alg, err := extractChecksumAlgorithm(key); err == nil {
if checksumAlgorithm != ChecksumAlgorithmNone {
glog.V(3).Infof("multiple checksum headers found in trailer, using last: %s", key)
}
checksumAlgorithm = alg
checksum = value
}
// Ignore other trailer headers like x-amz-trailer-signature
}
} }
return extractedAlgorithm, checksumValue
return checksumAlgorithm, checksum, nil
} }
func parseChunkSignature(chunk []byte) []byte { func parseChunkSignature(chunk []byte) []byte {

Loading…
Cancel
Save