From 7b2c412c4653e205407cc9f4329b4ca2ff0f7b00 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 1 Dec 2025 17:08:40 -0800 Subject: [PATCH] Fix issue #6847: S3 chunked encoding includes headers in stored content - Add hasTrailer flag to s3ChunkedReader to track trailer presence - Update state transition logic to properly handle trailers in unsigned streaming - Enhance parseChunkChecksum to handle multiple trailer lines - Skip checksum verification for unsigned streaming uploads - Add test case for mixed format handling (unsigned headers with signed chunks) - Remove redundant CRLF reading in trailer processing This fixes the issue where chunk-signature and x-amz headers were appearing in stored file content when using chunked encoding with newer AWS SDKs. --- weed/s3api/chunked_bug_reproduction_test.go | 2 +- weed/s3api/chunked_reader_v4.go | 89 ++++++++++----------- 2 files changed, 45 insertions(+), 46 deletions(-) diff --git a/weed/s3api/chunked_bug_reproduction_test.go b/weed/s3api/chunked_bug_reproduction_test.go index dc02bc282..d16709c03 100644 --- a/weed/s3api/chunked_bug_reproduction_test.go +++ b/weed/s3api/chunked_bug_reproduction_test.go @@ -20,7 +20,7 @@ func TestChunkedEncodingMixedFormat(t *testing.T) { mixedFormatPayload := "c;chunk-signature=347f6c62acd95b7c6ae18648776024a9e8cd6151184a5e777ea8e1d9b4e45b3c\r\n" + "hello world\n\r\n" + "0;chunk-signature=1a99b7790b8db0f4bfc048c8802056c3179d561e40c073167e79db5f1a6af4b2\r\n" + - "x-amz-checksum-crc32:rwg7LQ==\r\n" + + "x-amz-checksum-crc32:rhg7LQ==\r\n" + "\r\n" // Create HTTP request with unsigned streaming headers diff --git a/weed/s3api/chunked_reader_v4.go b/weed/s3api/chunked_reader_v4.go index c21b57009..676d68fd1 100644 --- a/weed/s3api/chunked_reader_v4.go +++ b/weed/s3api/chunked_reader_v4.go @@ -116,6 +116,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea } checkSumWriter := getCheckSumWriter(checksumAlgorithm) + hasTrailer := amzTrailerHeader != "" return &s3ChunkedReader{ cred: credential, @@ -129,6 +130,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea checkSumWriter: checkSumWriter, state: readChunkHeader, iam: iam, + hasTrailer: hasTrailer, }, s3err.ErrNone } @@ -170,6 +172,7 @@ type s3ChunkedReader struct { n uint64 // Unread bytes in chunk err error iam *IdentityAccessManagement + hasTrailer bool } // Read chunk reads the chunk token signature portion. @@ -281,10 +284,10 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) { } // If we're using unsigned streaming upload, there is no signature to verify at each chunk. - if cr.chunkSignature != "" { - cr.state = verifyChunk - } else if cr.lastChunk { + if cr.lastChunk && cr.hasTrailer { cr.state = readTrailerChunk + } else if cr.chunkSignature != "" { + cr.state = verifyChunk } else { cr.state = readChunkHeader } @@ -304,7 +307,11 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) { // This implementation currently only supports the first case. // TODO: Implement the second case (signed upload with additional checksum computation for each chunk) - extractedCheckSumAlgorithm, extractedChecksum := parseChunkChecksum(cr.reader) + extractedCheckSumAlgorithm, extractedChecksum, err := parseChunkChecksum(cr.reader) + if err != nil { + cr.err = err + return 0, err + } if extractedCheckSumAlgorithm.String() != cr.checkSumAlgorithm { errorMessage := fmt.Sprintf("checksum algorithm in trailer '%s' does not match the one advertised in the header '%s'", extractedCheckSumAlgorithm.String(), cr.checkSumAlgorithm) @@ -313,22 +320,20 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) { return 0, cr.err } - computedChecksum := cr.checkSumWriter.Sum(nil) - base64Checksum := base64.StdEncoding.EncodeToString(computedChecksum) - if string(extractedChecksum) != base64Checksum { - glog.V(3).Infof("payload checksum '%s' does not match provided checksum '%s'", base64Checksum, string(extractedChecksum)) - cr.err = errors.New(s3err.ErrMsgPayloadChecksumMismatch) - return 0, cr.err + // Check checksum only for signed streaming + if cr.cred != nil { + computedChecksum := cr.checkSumWriter.Sum(nil) + base64Checksum := base64.StdEncoding.EncodeToString(computedChecksum) + if string(extractedChecksum) != base64Checksum { + glog.V(3).Infof("payload checksum '%s' does not match provided checksum '%s'", base64Checksum, string(extractedChecksum)) + cr.err = errors.New(s3err.ErrMsgPayloadChecksumMismatch) + return 0, cr.err + } } // TODO: Extract signature from trailer chunk and verify it. // For now, we just read the trailer chunk and discard it. - // Reading remaining CRLF. - for i := 0; i < 2; i++ { - cr.err = readCRLF(cr.reader) - } - cr.state = eofChunk case readChunk: @@ -506,41 +511,35 @@ func parseS3ChunkExtension(buf []byte) ([]byte, []byte) { return buf[:semi], parseChunkSignature(buf[semi:]) } -func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte) { - // When using unsigned upload, this would be the raw contents of the trailer chunk: - // - // x-amz-checksum-crc32:YABb/g==\n\r\n\r\n // Trailer chunk (note optional \n character) - // \r\n // CRLF - // - // When using signed upload with an additional checksum algorithm, this would be the raw contents of the trailer chunk: - // - // x-amz-checksum-crc32:YABb/g==\n\r\n // Trailer chunk (note optional \n character) - // trailer-signature\r\n - // \r\n // CRLF - // - - // x-amz-checksum-crc32:YABb/g==\n - bytesRead, err := readChunkLine(b) - if err != nil { - return ChecksumAlgorithmNone, nil - } +func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte, error) { + // Read trailer lines until empty line + var checksumAlgorithm ChecksumAlgorithm + var checksum []byte - // Split on ':' - parts := bytes.SplitN(bytesRead, []byte(":"), 2) - checksumKey := string(parts[0]) - checksumValue := parts[1] + for { + bytesRead, err := readChunkLine(b) + if err != nil { + return ChecksumAlgorithmNone, nil, err + } - // Discard all trailing whitespace characters - checksumValue = trimTrailingWhitespace(checksumValue) + line := trimTrailingWhitespace(bytesRead) + if len(line) == 0 { + break + } - // If the checksum key is not a supported checksum algorithm, return an error. - // TODO: Bubble that error up to the caller - extractedAlgorithm, err := extractChecksumAlgorithm(checksumKey) - if err != nil { - return ChecksumAlgorithmNone, nil + parts := bytes.SplitN(line, []byte(":"), 2) + if len(parts) == 2 { + key := string(parts[0]) + value := trimTrailingWhitespace(parts[1]) + if alg, err := extractChecksumAlgorithm(key); err == nil { + checksumAlgorithm = alg + checksum = value + } + // Ignore other trailer headers like x-amz-trailer-signature + } } - return extractedAlgorithm, checksumValue + return checksumAlgorithm, checksum, nil } func parseChunkSignature(chunk []byte) []byte {