You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
368 lines
11 KiB
368 lines
11 KiB
package protocol
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"hash/crc32"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/mq/kafka/integration"
|
|
)
|
|
|
|
// TestBatchConstruction tests that our batch construction produces valid CRC
|
|
func TestBatchConstruction(t *testing.T) {
|
|
// Create test data
|
|
key := []byte("test-key")
|
|
value := []byte("test-value")
|
|
timestamp := time.Now()
|
|
|
|
// Build batch using our implementation
|
|
batch := constructTestBatch(0, timestamp, key, value)
|
|
|
|
t.Logf("Batch size: %d bytes", len(batch))
|
|
t.Logf("Batch hex:\n%s", hexDumpTest(batch))
|
|
|
|
// Extract and verify CRC
|
|
if len(batch) < 21 {
|
|
t.Fatalf("Batch too short: %d bytes", len(batch))
|
|
}
|
|
|
|
storedCRC := binary.BigEndian.Uint32(batch[17:21])
|
|
t.Logf("Stored CRC: 0x%08x", storedCRC)
|
|
|
|
// Recalculate CRC from the data
|
|
crcData := batch[21:]
|
|
calculatedCRC := crc32.Checksum(crcData, crc32.MakeTable(crc32.Castagnoli))
|
|
t.Logf("Calculated CRC: 0x%08x (over %d bytes)", calculatedCRC, len(crcData))
|
|
|
|
if storedCRC != calculatedCRC {
|
|
t.Errorf("CRC mismatch: stored=0x%08x calculated=0x%08x", storedCRC, calculatedCRC)
|
|
|
|
// Debug: show what bytes the CRC is calculated over
|
|
t.Logf("CRC data (first 100 bytes):")
|
|
dumpSize := 100
|
|
if len(crcData) < dumpSize {
|
|
dumpSize = len(crcData)
|
|
}
|
|
for i := 0; i < dumpSize; i += 16 {
|
|
end := i + 16
|
|
if end > dumpSize {
|
|
end = dumpSize
|
|
}
|
|
t.Logf(" %04d: %x", i, crcData[i:end])
|
|
}
|
|
} else {
|
|
t.Log("CRC verification PASSED")
|
|
}
|
|
|
|
// Verify batch structure
|
|
t.Log("\n=== Batch Structure ===")
|
|
verifyField(t, "Base Offset", batch[0:8], binary.BigEndian.Uint64(batch[0:8]))
|
|
verifyField(t, "Batch Length", batch[8:12], binary.BigEndian.Uint32(batch[8:12]))
|
|
verifyField(t, "Leader Epoch", batch[12:16], int32(binary.BigEndian.Uint32(batch[12:16])))
|
|
verifyField(t, "Magic", batch[16:17], batch[16])
|
|
verifyField(t, "CRC", batch[17:21], binary.BigEndian.Uint32(batch[17:21]))
|
|
verifyField(t, "Attributes", batch[21:23], binary.BigEndian.Uint16(batch[21:23]))
|
|
verifyField(t, "Last Offset Delta", batch[23:27], binary.BigEndian.Uint32(batch[23:27]))
|
|
verifyField(t, "Base Timestamp", batch[27:35], binary.BigEndian.Uint64(batch[27:35]))
|
|
verifyField(t, "Max Timestamp", batch[35:43], binary.BigEndian.Uint64(batch[35:43]))
|
|
verifyField(t, "Record Count", batch[57:61], binary.BigEndian.Uint32(batch[57:61]))
|
|
|
|
// Verify the batch length field is correct
|
|
expectedBatchLength := uint32(len(batch) - 12)
|
|
actualBatchLength := binary.BigEndian.Uint32(batch[8:12])
|
|
if expectedBatchLength != actualBatchLength {
|
|
t.Errorf("Batch length mismatch: expected=%d actual=%d", expectedBatchLength, actualBatchLength)
|
|
} else {
|
|
t.Logf("Batch length correct: %d", actualBatchLength)
|
|
}
|
|
}
|
|
|
|
// TestMultipleRecordsBatch tests batch construction with multiple records
|
|
func TestMultipleRecordsBatch(t *testing.T) {
|
|
timestamp := time.Now()
|
|
|
|
// We can't easily test multiple records without the full implementation
|
|
// So let's test that our single record batch matches expected structure
|
|
|
|
batch1 := constructTestBatch(0, timestamp, []byte("key1"), []byte("value1"))
|
|
batch2 := constructTestBatch(1, timestamp, []byte("key2"), []byte("value2"))
|
|
|
|
t.Logf("Batch 1 size: %d, CRC: 0x%08x", len(batch1), binary.BigEndian.Uint32(batch1[17:21]))
|
|
t.Logf("Batch 2 size: %d, CRC: 0x%08x", len(batch2), binary.BigEndian.Uint32(batch2[17:21]))
|
|
|
|
// Verify both batches have valid CRCs
|
|
for i, batch := range [][]byte{batch1, batch2} {
|
|
storedCRC := binary.BigEndian.Uint32(batch[17:21])
|
|
calculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli))
|
|
|
|
if storedCRC != calculatedCRC {
|
|
t.Errorf("Batch %d CRC mismatch: stored=0x%08x calculated=0x%08x", i+1, storedCRC, calculatedCRC)
|
|
} else {
|
|
t.Logf("Batch %d CRC valid", i+1)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestVarintEncoding tests our varint encoding implementation
|
|
func TestVarintEncoding(t *testing.T) {
|
|
testCases := []struct {
|
|
value int64
|
|
expected []byte
|
|
}{
|
|
{0, []byte{0x00}},
|
|
{1, []byte{0x02}},
|
|
{-1, []byte{0x01}},
|
|
{5, []byte{0x0a}},
|
|
{-5, []byte{0x09}},
|
|
{127, []byte{0xfe, 0x01}},
|
|
{128, []byte{0x80, 0x02}},
|
|
{-127, []byte{0xfd, 0x01}},
|
|
{-128, []byte{0xff, 0x01}},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
result := encodeVarint(tc.value)
|
|
if !bytes.Equal(result, tc.expected) {
|
|
t.Errorf("encodeVarint(%d) = %x, expected %x", tc.value, result, tc.expected)
|
|
} else {
|
|
t.Logf("encodeVarint(%d) = %x", tc.value, result)
|
|
}
|
|
}
|
|
}
|
|
|
|
// constructTestBatch builds a batch using our implementation
|
|
func constructTestBatch(baseOffset int64, timestamp time.Time, key, value []byte) []byte {
|
|
batch := make([]byte, 0, 256)
|
|
|
|
// Base offset (0-7)
|
|
baseOffsetBytes := make([]byte, 8)
|
|
binary.BigEndian.PutUint64(baseOffsetBytes, uint64(baseOffset))
|
|
batch = append(batch, baseOffsetBytes...)
|
|
|
|
// Batch length placeholder (8-11)
|
|
batchLengthPos := len(batch)
|
|
batch = append(batch, 0, 0, 0, 0)
|
|
|
|
// Partition leader epoch (12-15)
|
|
batch = append(batch, 0xFF, 0xFF, 0xFF, 0xFF)
|
|
|
|
// Magic (16)
|
|
batch = append(batch, 0x02)
|
|
|
|
// CRC placeholder (17-20)
|
|
crcPos := len(batch)
|
|
batch = append(batch, 0, 0, 0, 0)
|
|
|
|
// Attributes (21-22)
|
|
batch = append(batch, 0, 0)
|
|
|
|
// Last offset delta (23-26)
|
|
batch = append(batch, 0, 0, 0, 0)
|
|
|
|
// Base timestamp (27-34)
|
|
timestampMs := timestamp.UnixMilli()
|
|
timestampBytes := make([]byte, 8)
|
|
binary.BigEndian.PutUint64(timestampBytes, uint64(timestampMs))
|
|
batch = append(batch, timestampBytes...)
|
|
|
|
// Max timestamp (35-42)
|
|
batch = append(batch, timestampBytes...)
|
|
|
|
// Producer ID (43-50)
|
|
batch = append(batch, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF)
|
|
|
|
// Producer epoch (51-52)
|
|
batch = append(batch, 0xFF, 0xFF)
|
|
|
|
// Base sequence (53-56)
|
|
batch = append(batch, 0xFF, 0xFF, 0xFF, 0xFF)
|
|
|
|
// Record count (57-60)
|
|
recordCountBytes := make([]byte, 4)
|
|
binary.BigEndian.PutUint32(recordCountBytes, 1)
|
|
batch = append(batch, recordCountBytes...)
|
|
|
|
// Build record (61+)
|
|
recordBody := []byte{}
|
|
|
|
// Attributes
|
|
recordBody = append(recordBody, 0)
|
|
|
|
// Timestamp delta
|
|
recordBody = append(recordBody, encodeVarint(0)...)
|
|
|
|
// Offset delta
|
|
recordBody = append(recordBody, encodeVarint(0)...)
|
|
|
|
// Key length and key
|
|
if key == nil {
|
|
recordBody = append(recordBody, encodeVarint(-1)...)
|
|
} else {
|
|
recordBody = append(recordBody, encodeVarint(int64(len(key)))...)
|
|
recordBody = append(recordBody, key...)
|
|
}
|
|
|
|
// Value length and value
|
|
if value == nil {
|
|
recordBody = append(recordBody, encodeVarint(-1)...)
|
|
} else {
|
|
recordBody = append(recordBody, encodeVarint(int64(len(value)))...)
|
|
recordBody = append(recordBody, value...)
|
|
}
|
|
|
|
// Headers count
|
|
recordBody = append(recordBody, encodeVarint(0)...)
|
|
|
|
// Prepend record length
|
|
recordLength := int64(len(recordBody))
|
|
batch = append(batch, encodeVarint(recordLength)...)
|
|
batch = append(batch, recordBody...)
|
|
|
|
// Fill in batch length
|
|
batchLength := uint32(len(batch) - 12)
|
|
binary.BigEndian.PutUint32(batch[batchLengthPos:], batchLength)
|
|
|
|
// Calculate CRC
|
|
crcData := batch[21:]
|
|
crc := crc32.Checksum(crcData, crc32.MakeTable(crc32.Castagnoli))
|
|
binary.BigEndian.PutUint32(batch[crcPos:], crc)
|
|
|
|
return batch
|
|
}
|
|
|
|
// verifyField logs a field's value
|
|
func verifyField(t *testing.T, name string, bytes []byte, value interface{}) {
|
|
t.Logf(" %s: %x (value: %v)", name, bytes, value)
|
|
}
|
|
|
|
// hexDump formats bytes as hex dump
|
|
func hexDumpTest(data []byte) string {
|
|
var buf bytes.Buffer
|
|
for i := 0; i < len(data); i += 16 {
|
|
end := i + 16
|
|
if end > len(data) {
|
|
end = len(data)
|
|
}
|
|
buf.WriteString(fmt.Sprintf(" %04d: %x\n", i, data[i:end]))
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
// TestClientSideCRCValidation mimics what a Kafka client does
|
|
func TestClientSideCRCValidation(t *testing.T) {
|
|
// Build a batch
|
|
batch := constructTestBatch(0, time.Now(), []byte("test-key"), []byte("test-value"))
|
|
|
|
t.Logf("Constructed batch: %d bytes", len(batch))
|
|
|
|
// Now pretend we're a Kafka client receiving this batch
|
|
// Step 1: Read the batch header to get the CRC
|
|
if len(batch) < 21 {
|
|
t.Fatalf("Batch too short for client to read CRC")
|
|
}
|
|
|
|
clientReadCRC := binary.BigEndian.Uint32(batch[17:21])
|
|
t.Logf("Client read CRC from header: 0x%08x", clientReadCRC)
|
|
|
|
// Step 2: Calculate CRC over the data (from byte 21 onwards)
|
|
clientCalculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli))
|
|
t.Logf("Client calculated CRC: 0x%08x", clientCalculatedCRC)
|
|
|
|
// Step 3: Compare
|
|
if clientReadCRC != clientCalculatedCRC {
|
|
t.Errorf("CLIENT WOULD REJECT: CRC mismatch: read=0x%08x calculated=0x%08x",
|
|
clientReadCRC, clientCalculatedCRC)
|
|
t.Log("This is the error consumers are seeing!")
|
|
} else {
|
|
t.Log("CLIENT WOULD ACCEPT: CRC valid")
|
|
}
|
|
}
|
|
|
|
// TestConcurrentBatchConstruction tests if there are race conditions
|
|
func TestConcurrentBatchConstruction(t *testing.T) {
|
|
timestamp := time.Now()
|
|
|
|
// Build multiple batches concurrently
|
|
const numBatches = 10
|
|
results := make(chan bool, numBatches)
|
|
|
|
for i := 0; i < numBatches; i++ {
|
|
go func(id int) {
|
|
batch := constructTestBatch(int64(id), timestamp,
|
|
[]byte(fmt.Sprintf("key-%d", id)),
|
|
[]byte(fmt.Sprintf("value-%d", id)))
|
|
|
|
// Validate CRC
|
|
storedCRC := binary.BigEndian.Uint32(batch[17:21])
|
|
calculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli))
|
|
|
|
results <- (storedCRC == calculatedCRC)
|
|
}(i)
|
|
}
|
|
|
|
// Check all results
|
|
allValid := true
|
|
for i := 0; i < numBatches; i++ {
|
|
if !<-results {
|
|
allValid = false
|
|
t.Errorf("Batch %d has invalid CRC", i)
|
|
}
|
|
}
|
|
|
|
if allValid {
|
|
t.Logf("All %d concurrent batches have valid CRCs", numBatches)
|
|
}
|
|
}
|
|
|
|
// TestProductionBatchConstruction tests the actual production code
|
|
func TestProductionBatchConstruction(t *testing.T) {
|
|
// Create a mock SMQ record
|
|
mockRecord := &mockSMQRecord{
|
|
key: []byte("prod-key"),
|
|
value: []byte("prod-value"),
|
|
timestamp: time.Now().UnixNano(),
|
|
}
|
|
|
|
// Create a mock handler
|
|
mockHandler := &Handler{}
|
|
|
|
// Create fetcher
|
|
fetcher := NewMultiBatchFetcher(mockHandler)
|
|
|
|
// Construct batch using production code
|
|
batch := fetcher.constructSingleRecordBatch("test-topic", 0, []integration.SMQRecord{mockRecord})
|
|
|
|
t.Logf("Production batch size: %d bytes", len(batch))
|
|
|
|
// Validate CRC
|
|
if len(batch) < 21 {
|
|
t.Fatalf("Production batch too short: %d bytes", len(batch))
|
|
}
|
|
|
|
storedCRC := binary.BigEndian.Uint32(batch[17:21])
|
|
calculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli))
|
|
|
|
t.Logf("Production batch CRC: stored=0x%08x calculated=0x%08x", storedCRC, calculatedCRC)
|
|
|
|
if storedCRC != calculatedCRC {
|
|
t.Errorf("PRODUCTION CODE CRC INVALID: stored=0x%08x calculated=0x%08x", storedCRC, calculatedCRC)
|
|
t.Log("This means the production constructSingleRecordBatch has a bug!")
|
|
} else {
|
|
t.Log("PRODUCTION CODE CRC VALID")
|
|
}
|
|
}
|
|
|
|
// mockSMQRecord implements the SMQRecord interface for testing
|
|
type mockSMQRecord struct {
|
|
key []byte
|
|
value []byte
|
|
timestamp int64
|
|
}
|
|
|
|
func (m *mockSMQRecord) GetKey() []byte { return m.key }
|
|
func (m *mockSMQRecord) GetValue() []byte { return m.value }
|
|
func (m *mockSMQRecord) GetTimestamp() int64 { return m.timestamp }
|
|
func (m *mockSMQRecord) GetOffset() int64 { return 0 }
|