You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							203 lines
						
					
					
						
							5.0 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							203 lines
						
					
					
						
							5.0 KiB
						
					
					
				
								package compression
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"bytes"
							 | 
						|
									"compress/gzip"
							 | 
						|
									"fmt"
							 | 
						|
									"io"
							 | 
						|
								
							 | 
						|
									"github.com/golang/snappy"
							 | 
						|
									"github.com/klauspost/compress/zstd"
							 | 
						|
									"github.com/pierrec/lz4/v4"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// nopCloser wraps an io.Reader to provide a no-op Close method
							 | 
						|
								type nopCloser struct {
							 | 
						|
									io.Reader
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								func (nopCloser) Close() error { return nil }
							 | 
						|
								
							 | 
						|
								// CompressionCodec represents the compression codec used in Kafka record batches
							 | 
						|
								type CompressionCodec int8
							 | 
						|
								
							 | 
						|
								const (
							 | 
						|
									None   CompressionCodec = 0
							 | 
						|
									Gzip   CompressionCodec = 1
							 | 
						|
									Snappy CompressionCodec = 2
							 | 
						|
									Lz4    CompressionCodec = 3
							 | 
						|
									Zstd   CompressionCodec = 4
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// String returns the string representation of the compression codec
							 | 
						|
								func (c CompressionCodec) String() string {
							 | 
						|
									switch c {
							 | 
						|
									case None:
							 | 
						|
										return "none"
							 | 
						|
									case Gzip:
							 | 
						|
										return "gzip"
							 | 
						|
									case Snappy:
							 | 
						|
										return "snappy"
							 | 
						|
									case Lz4:
							 | 
						|
										return "lz4"
							 | 
						|
									case Zstd:
							 | 
						|
										return "zstd"
							 | 
						|
									default:
							 | 
						|
										return fmt.Sprintf("unknown(%d)", c)
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// IsValid returns true if the compression codec is valid
							 | 
						|
								func (c CompressionCodec) IsValid() bool {
							 | 
						|
									return c >= None && c <= Zstd
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// ExtractCompressionCodec extracts the compression codec from record batch attributes
							 | 
						|
								func ExtractCompressionCodec(attributes int16) CompressionCodec {
							 | 
						|
									return CompressionCodec(attributes & 0x07) // Lower 3 bits
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// SetCompressionCodec sets the compression codec in record batch attributes
							 | 
						|
								func SetCompressionCodec(attributes int16, codec CompressionCodec) int16 {
							 | 
						|
									return (attributes &^ 0x07) | int16(codec)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Compress compresses data using the specified codec
							 | 
						|
								func Compress(codec CompressionCodec, data []byte) ([]byte, error) {
							 | 
						|
									if codec == None {
							 | 
						|
										return data, nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									var buf bytes.Buffer
							 | 
						|
									var writer io.WriteCloser
							 | 
						|
									var err error
							 | 
						|
								
							 | 
						|
									switch codec {
							 | 
						|
									case Gzip:
							 | 
						|
										writer = gzip.NewWriter(&buf)
							 | 
						|
									case Snappy:
							 | 
						|
										// Snappy doesn't have a streaming writer, so we compress directly
							 | 
						|
										compressed := snappy.Encode(nil, data)
							 | 
						|
										if compressed == nil {
							 | 
						|
											compressed = []byte{}
							 | 
						|
										}
							 | 
						|
										return compressed, nil
							 | 
						|
									case Lz4:
							 | 
						|
										writer = lz4.NewWriter(&buf)
							 | 
						|
									case Zstd:
							 | 
						|
										writer, err = zstd.NewWriter(&buf)
							 | 
						|
										if err != nil {
							 | 
						|
											return nil, fmt.Errorf("failed to create zstd writer: %w", err)
							 | 
						|
										}
							 | 
						|
									default:
							 | 
						|
										return nil, fmt.Errorf("unsupported compression codec: %s", codec)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									if _, err := writer.Write(data); err != nil {
							 | 
						|
										writer.Close()
							 | 
						|
										return nil, fmt.Errorf("failed to write compressed data: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									if err := writer.Close(); err != nil {
							 | 
						|
										return nil, fmt.Errorf("failed to close compressor: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return buf.Bytes(), nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Decompress decompresses data using the specified codec
							 | 
						|
								func Decompress(codec CompressionCodec, data []byte) ([]byte, error) {
							 | 
						|
									if codec == None {
							 | 
						|
										return data, nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									var reader io.ReadCloser
							 | 
						|
									var err error
							 | 
						|
								
							 | 
						|
									buf := bytes.NewReader(data)
							 | 
						|
								
							 | 
						|
									switch codec {
							 | 
						|
									case Gzip:
							 | 
						|
										reader, err = gzip.NewReader(buf)
							 | 
						|
										if err != nil {
							 | 
						|
											return nil, fmt.Errorf("failed to create gzip reader: %w", err)
							 | 
						|
										}
							 | 
						|
									case Snappy:
							 | 
						|
										// Snappy doesn't have a streaming reader, so we decompress directly
							 | 
						|
										decompressed, err := snappy.Decode(nil, data)
							 | 
						|
										if err != nil {
							 | 
						|
											return nil, fmt.Errorf("failed to decompress snappy data: %w", err)
							 | 
						|
										}
							 | 
						|
										if decompressed == nil {
							 | 
						|
											decompressed = []byte{}
							 | 
						|
										}
							 | 
						|
										return decompressed, nil
							 | 
						|
									case Lz4:
							 | 
						|
										lz4Reader := lz4.NewReader(buf)
							 | 
						|
										// lz4.Reader doesn't implement Close, so we wrap it
							 | 
						|
										reader = &nopCloser{Reader: lz4Reader}
							 | 
						|
									case Zstd:
							 | 
						|
										zstdReader, err := zstd.NewReader(buf)
							 | 
						|
										if err != nil {
							 | 
						|
											return nil, fmt.Errorf("failed to create zstd reader: %w", err)
							 | 
						|
										}
							 | 
						|
										defer zstdReader.Close()
							 | 
						|
								
							 | 
						|
										var result bytes.Buffer
							 | 
						|
										if _, err := io.Copy(&result, zstdReader); err != nil {
							 | 
						|
											return nil, fmt.Errorf("failed to decompress zstd data: %w", err)
							 | 
						|
										}
							 | 
						|
										decompressed := result.Bytes()
							 | 
						|
										if decompressed == nil {
							 | 
						|
											decompressed = []byte{}
							 | 
						|
										}
							 | 
						|
										return decompressed, nil
							 | 
						|
									default:
							 | 
						|
										return nil, fmt.Errorf("unsupported compression codec: %s", codec)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									defer reader.Close()
							 | 
						|
								
							 | 
						|
									var result bytes.Buffer
							 | 
						|
									if _, err := io.Copy(&result, reader); err != nil {
							 | 
						|
										return nil, fmt.Errorf("failed to decompress data: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									decompressed := result.Bytes()
							 | 
						|
									if decompressed == nil {
							 | 
						|
										decompressed = []byte{}
							 | 
						|
									}
							 | 
						|
									return decompressed, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// CompressRecordBatch compresses the records portion of a Kafka record batch
							 | 
						|
								// This function compresses only the records data, not the entire batch header
							 | 
						|
								func CompressRecordBatch(codec CompressionCodec, recordsData []byte) ([]byte, int16, error) {
							 | 
						|
									if codec == None {
							 | 
						|
										return recordsData, 0, nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									compressed, err := Compress(codec, recordsData)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, 0, fmt.Errorf("failed to compress record batch: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									attributes := int16(codec)
							 | 
						|
									return compressed, attributes, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// DecompressRecordBatch decompresses the records portion of a Kafka record batch
							 | 
						|
								func DecompressRecordBatch(attributes int16, compressedData []byte) ([]byte, error) {
							 | 
						|
									codec := ExtractCompressionCodec(attributes)
							 | 
						|
								
							 | 
						|
									if codec == None {
							 | 
						|
										return compressedData, nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									decompressed, err := Decompress(codec, compressedData)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, fmt.Errorf("failed to decompress record batch: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return decompressed, nil
							 | 
						|
								}
							 |