You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							368 lines
						
					
					
						
							11 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							368 lines
						
					
					
						
							11 KiB
						
					
					
				| package protocol | |
| 
 | |
| import ( | |
| 	"bytes" | |
| 	"encoding/binary" | |
| 	"fmt" | |
| 	"hash/crc32" | |
| 	"testing" | |
| 	"time" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/mq/kafka/integration" | |
| ) | |
| 
 | |
| // TestBatchConstruction tests that our batch construction produces valid CRC | |
| func TestBatchConstruction(t *testing.T) { | |
| 	// Create test data | |
| 	key := []byte("test-key") | |
| 	value := []byte("test-value") | |
| 	timestamp := time.Now() | |
| 
 | |
| 	// Build batch using our implementation | |
| 	batch := constructTestBatch(0, timestamp, key, value) | |
| 
 | |
| 	t.Logf("Batch size: %d bytes", len(batch)) | |
| 	t.Logf("Batch hex:\n%s", hexDumpTest(batch)) | |
| 
 | |
| 	// Extract and verify CRC | |
| 	if len(batch) < 21 { | |
| 		t.Fatalf("Batch too short: %d bytes", len(batch)) | |
| 	} | |
| 
 | |
| 	storedCRC := binary.BigEndian.Uint32(batch[17:21]) | |
| 	t.Logf("Stored CRC: 0x%08x", storedCRC) | |
| 
 | |
| 	// Recalculate CRC from the data | |
| 	crcData := batch[21:] | |
| 	calculatedCRC := crc32.Checksum(crcData, crc32.MakeTable(crc32.Castagnoli)) | |
| 	t.Logf("Calculated CRC: 0x%08x (over %d bytes)", calculatedCRC, len(crcData)) | |
| 
 | |
| 	if storedCRC != calculatedCRC { | |
| 		t.Errorf("CRC mismatch: stored=0x%08x calculated=0x%08x", storedCRC, calculatedCRC) | |
| 
 | |
| 		// Debug: show what bytes the CRC is calculated over | |
| 		t.Logf("CRC data (first 100 bytes):") | |
| 		dumpSize := 100 | |
| 		if len(crcData) < dumpSize { | |
| 			dumpSize = len(crcData) | |
| 		} | |
| 		for i := 0; i < dumpSize; i += 16 { | |
| 			end := i + 16 | |
| 			if end > dumpSize { | |
| 				end = dumpSize | |
| 			} | |
| 			t.Logf("  %04d: %x", i, crcData[i:end]) | |
| 		} | |
| 	} else { | |
| 		t.Log("CRC verification PASSED") | |
| 	} | |
| 
 | |
| 	// Verify batch structure | |
| 	t.Log("\n=== Batch Structure ===") | |
| 	verifyField(t, "Base Offset", batch[0:8], binary.BigEndian.Uint64(batch[0:8])) | |
| 	verifyField(t, "Batch Length", batch[8:12], binary.BigEndian.Uint32(batch[8:12])) | |
| 	verifyField(t, "Leader Epoch", batch[12:16], int32(binary.BigEndian.Uint32(batch[12:16]))) | |
| 	verifyField(t, "Magic", batch[16:17], batch[16]) | |
| 	verifyField(t, "CRC", batch[17:21], binary.BigEndian.Uint32(batch[17:21])) | |
| 	verifyField(t, "Attributes", batch[21:23], binary.BigEndian.Uint16(batch[21:23])) | |
| 	verifyField(t, "Last Offset Delta", batch[23:27], binary.BigEndian.Uint32(batch[23:27])) | |
| 	verifyField(t, "Base Timestamp", batch[27:35], binary.BigEndian.Uint64(batch[27:35])) | |
| 	verifyField(t, "Max Timestamp", batch[35:43], binary.BigEndian.Uint64(batch[35:43])) | |
| 	verifyField(t, "Record Count", batch[57:61], binary.BigEndian.Uint32(batch[57:61])) | |
| 
 | |
| 	// Verify the batch length field is correct | |
| 	expectedBatchLength := uint32(len(batch) - 12) | |
| 	actualBatchLength := binary.BigEndian.Uint32(batch[8:12]) | |
| 	if expectedBatchLength != actualBatchLength { | |
| 		t.Errorf("Batch length mismatch: expected=%d actual=%d", expectedBatchLength, actualBatchLength) | |
| 	} else { | |
| 		t.Logf("Batch length correct: %d", actualBatchLength) | |
| 	} | |
| } | |
| 
 | |
| // TestMultipleRecordsBatch tests batch construction with multiple records | |
| func TestMultipleRecordsBatch(t *testing.T) { | |
| 	timestamp := time.Now() | |
| 
 | |
| 	// We can't easily test multiple records without the full implementation | |
| 	// So let's test that our single record batch matches expected structure | |
|  | |
| 	batch1 := constructTestBatch(0, timestamp, []byte("key1"), []byte("value1")) | |
| 	batch2 := constructTestBatch(1, timestamp, []byte("key2"), []byte("value2")) | |
| 
 | |
| 	t.Logf("Batch 1 size: %d, CRC: 0x%08x", len(batch1), binary.BigEndian.Uint32(batch1[17:21])) | |
| 	t.Logf("Batch 2 size: %d, CRC: 0x%08x", len(batch2), binary.BigEndian.Uint32(batch2[17:21])) | |
| 
 | |
| 	// Verify both batches have valid CRCs | |
| 	for i, batch := range [][]byte{batch1, batch2} { | |
| 		storedCRC := binary.BigEndian.Uint32(batch[17:21]) | |
| 		calculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli)) | |
| 
 | |
| 		if storedCRC != calculatedCRC { | |
| 			t.Errorf("Batch %d CRC mismatch: stored=0x%08x calculated=0x%08x", i+1, storedCRC, calculatedCRC) | |
| 		} else { | |
| 			t.Logf("Batch %d CRC valid", i+1) | |
| 		} | |
| 	} | |
| } | |
| 
 | |
| // TestVarintEncoding tests our varint encoding implementation | |
| func TestVarintEncoding(t *testing.T) { | |
| 	testCases := []struct { | |
| 		value    int64 | |
| 		expected []byte | |
| 	}{ | |
| 		{0, []byte{0x00}}, | |
| 		{1, []byte{0x02}}, | |
| 		{-1, []byte{0x01}}, | |
| 		{5, []byte{0x0a}}, | |
| 		{-5, []byte{0x09}}, | |
| 		{127, []byte{0xfe, 0x01}}, | |
| 		{128, []byte{0x80, 0x02}}, | |
| 		{-127, []byte{0xfd, 0x01}}, | |
| 		{-128, []byte{0xff, 0x01}}, | |
| 	} | |
| 
 | |
| 	for _, tc := range testCases { | |
| 		result := encodeVarint(tc.value) | |
| 		if !bytes.Equal(result, tc.expected) { | |
| 			t.Errorf("encodeVarint(%d) = %x, expected %x", tc.value, result, tc.expected) | |
| 		} else { | |
| 			t.Logf("encodeVarint(%d) = %x", tc.value, result) | |
| 		} | |
| 	} | |
| } | |
| 
 | |
| // constructTestBatch builds a batch using our implementation | |
| func constructTestBatch(baseOffset int64, timestamp time.Time, key, value []byte) []byte { | |
| 	batch := make([]byte, 0, 256) | |
| 
 | |
| 	// Base offset (0-7) | |
| 	baseOffsetBytes := make([]byte, 8) | |
| 	binary.BigEndian.PutUint64(baseOffsetBytes, uint64(baseOffset)) | |
| 	batch = append(batch, baseOffsetBytes...) | |
| 
 | |
| 	// Batch length placeholder (8-11) | |
| 	batchLengthPos := len(batch) | |
| 	batch = append(batch, 0, 0, 0, 0) | |
| 
 | |
| 	// Partition leader epoch (12-15) | |
| 	batch = append(batch, 0xFF, 0xFF, 0xFF, 0xFF) | |
| 
 | |
| 	// Magic (16) | |
| 	batch = append(batch, 0x02) | |
| 
 | |
| 	// CRC placeholder (17-20) | |
| 	crcPos := len(batch) | |
| 	batch = append(batch, 0, 0, 0, 0) | |
| 
 | |
| 	// Attributes (21-22) | |
| 	batch = append(batch, 0, 0) | |
| 
 | |
| 	// Last offset delta (23-26) | |
| 	batch = append(batch, 0, 0, 0, 0) | |
| 
 | |
| 	// Base timestamp (27-34) | |
| 	timestampMs := timestamp.UnixMilli() | |
| 	timestampBytes := make([]byte, 8) | |
| 	binary.BigEndian.PutUint64(timestampBytes, uint64(timestampMs)) | |
| 	batch = append(batch, timestampBytes...) | |
| 
 | |
| 	// Max timestamp (35-42) | |
| 	batch = append(batch, timestampBytes...) | |
| 
 | |
| 	// Producer ID (43-50) | |
| 	batch = append(batch, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF) | |
| 
 | |
| 	// Producer epoch (51-52) | |
| 	batch = append(batch, 0xFF, 0xFF) | |
| 
 | |
| 	// Base sequence (53-56) | |
| 	batch = append(batch, 0xFF, 0xFF, 0xFF, 0xFF) | |
| 
 | |
| 	// Record count (57-60) | |
| 	recordCountBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(recordCountBytes, 1) | |
| 	batch = append(batch, recordCountBytes...) | |
| 
 | |
| 	// Build record (61+) | |
| 	recordBody := []byte{} | |
| 
 | |
| 	// Attributes | |
| 	recordBody = append(recordBody, 0) | |
| 
 | |
| 	// Timestamp delta | |
| 	recordBody = append(recordBody, encodeVarint(0)...) | |
| 
 | |
| 	// Offset delta | |
| 	recordBody = append(recordBody, encodeVarint(0)...) | |
| 
 | |
| 	// Key length and key | |
| 	if key == nil { | |
| 		recordBody = append(recordBody, encodeVarint(-1)...) | |
| 	} else { | |
| 		recordBody = append(recordBody, encodeVarint(int64(len(key)))...) | |
| 		recordBody = append(recordBody, key...) | |
| 	} | |
| 
 | |
| 	// Value length and value | |
| 	if value == nil { | |
| 		recordBody = append(recordBody, encodeVarint(-1)...) | |
| 	} else { | |
| 		recordBody = append(recordBody, encodeVarint(int64(len(value)))...) | |
| 		recordBody = append(recordBody, value...) | |
| 	} | |
| 
 | |
| 	// Headers count | |
| 	recordBody = append(recordBody, encodeVarint(0)...) | |
| 
 | |
| 	// Prepend record length | |
| 	recordLength := int64(len(recordBody)) | |
| 	batch = append(batch, encodeVarint(recordLength)...) | |
| 	batch = append(batch, recordBody...) | |
| 
 | |
| 	// Fill in batch length | |
| 	batchLength := uint32(len(batch) - 12) | |
| 	binary.BigEndian.PutUint32(batch[batchLengthPos:], batchLength) | |
| 
 | |
| 	// Calculate CRC | |
| 	crcData := batch[21:] | |
| 	crc := crc32.Checksum(crcData, crc32.MakeTable(crc32.Castagnoli)) | |
| 	binary.BigEndian.PutUint32(batch[crcPos:], crc) | |
| 
 | |
| 	return batch | |
| } | |
| 
 | |
| // verifyField logs a field's value | |
| func verifyField(t *testing.T, name string, bytes []byte, value interface{}) { | |
| 	t.Logf("  %s: %x (value: %v)", name, bytes, value) | |
| } | |
| 
 | |
| // hexDump formats bytes as hex dump | |
| func hexDumpTest(data []byte) string { | |
| 	var buf bytes.Buffer | |
| 	for i := 0; i < len(data); i += 16 { | |
| 		end := i + 16 | |
| 		if end > len(data) { | |
| 			end = len(data) | |
| 		} | |
| 		buf.WriteString(fmt.Sprintf("  %04d: %x\n", i, data[i:end])) | |
| 	} | |
| 	return buf.String() | |
| } | |
| 
 | |
| // TestClientSideCRCValidation mimics what a Kafka client does | |
| func TestClientSideCRCValidation(t *testing.T) { | |
| 	// Build a batch | |
| 	batch := constructTestBatch(0, time.Now(), []byte("test-key"), []byte("test-value")) | |
| 
 | |
| 	t.Logf("Constructed batch: %d bytes", len(batch)) | |
| 
 | |
| 	// Now pretend we're a Kafka client receiving this batch | |
| 	// Step 1: Read the batch header to get the CRC | |
| 	if len(batch) < 21 { | |
| 		t.Fatalf("Batch too short for client to read CRC") | |
| 	} | |
| 
 | |
| 	clientReadCRC := binary.BigEndian.Uint32(batch[17:21]) | |
| 	t.Logf("Client read CRC from header: 0x%08x", clientReadCRC) | |
| 
 | |
| 	// Step 2: Calculate CRC over the data (from byte 21 onwards) | |
| 	clientCalculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli)) | |
| 	t.Logf("Client calculated CRC: 0x%08x", clientCalculatedCRC) | |
| 
 | |
| 	// Step 3: Compare | |
| 	if clientReadCRC != clientCalculatedCRC { | |
| 		t.Errorf("CLIENT WOULD REJECT: CRC mismatch: read=0x%08x calculated=0x%08x", | |
| 			clientReadCRC, clientCalculatedCRC) | |
| 		t.Log("This is the error consumers are seeing!") | |
| 	} else { | |
| 		t.Log("CLIENT WOULD ACCEPT: CRC valid") | |
| 	} | |
| } | |
| 
 | |
| // TestConcurrentBatchConstruction tests if there are race conditions | |
| func TestConcurrentBatchConstruction(t *testing.T) { | |
| 	timestamp := time.Now() | |
| 
 | |
| 	// Build multiple batches concurrently | |
| 	const numBatches = 10 | |
| 	results := make(chan bool, numBatches) | |
| 
 | |
| 	for i := 0; i < numBatches; i++ { | |
| 		go func(id int) { | |
| 			batch := constructTestBatch(int64(id), timestamp, | |
| 				[]byte(fmt.Sprintf("key-%d", id)), | |
| 				[]byte(fmt.Sprintf("value-%d", id))) | |
| 
 | |
| 			// Validate CRC | |
| 			storedCRC := binary.BigEndian.Uint32(batch[17:21]) | |
| 			calculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli)) | |
| 
 | |
| 			results <- (storedCRC == calculatedCRC) | |
| 		}(i) | |
| 	} | |
| 
 | |
| 	// Check all results | |
| 	allValid := true | |
| 	for i := 0; i < numBatches; i++ { | |
| 		if !<-results { | |
| 			allValid = false | |
| 			t.Errorf("Batch %d has invalid CRC", i) | |
| 		} | |
| 	} | |
| 
 | |
| 	if allValid { | |
| 		t.Logf("All %d concurrent batches have valid CRCs", numBatches) | |
| 	} | |
| } | |
| 
 | |
| // TestProductionBatchConstruction tests the actual production code | |
| func TestProductionBatchConstruction(t *testing.T) { | |
| 	// Create a mock SMQ record | |
| 	mockRecord := &mockSMQRecord{ | |
| 		key:       []byte("prod-key"), | |
| 		value:     []byte("prod-value"), | |
| 		timestamp: time.Now().UnixNano(), | |
| 	} | |
| 
 | |
| 	// Create a mock handler | |
| 	mockHandler := &Handler{} | |
| 
 | |
| 	// Create fetcher | |
| 	fetcher := NewMultiBatchFetcher(mockHandler) | |
| 
 | |
| 	// Construct batch using production code | |
| 	batch := fetcher.constructSingleRecordBatch("test-topic", 0, []integration.SMQRecord{mockRecord}) | |
| 
 | |
| 	t.Logf("Production batch size: %d bytes", len(batch)) | |
| 
 | |
| 	// Validate CRC | |
| 	if len(batch) < 21 { | |
| 		t.Fatalf("Production batch too short: %d bytes", len(batch)) | |
| 	} | |
| 
 | |
| 	storedCRC := binary.BigEndian.Uint32(batch[17:21]) | |
| 	calculatedCRC := crc32.Checksum(batch[21:], crc32.MakeTable(crc32.Castagnoli)) | |
| 
 | |
| 	t.Logf("Production batch CRC: stored=0x%08x calculated=0x%08x", storedCRC, calculatedCRC) | |
| 
 | |
| 	if storedCRC != calculatedCRC { | |
| 		t.Errorf("PRODUCTION CODE CRC INVALID: stored=0x%08x calculated=0x%08x", storedCRC, calculatedCRC) | |
| 		t.Log("This means the production constructSingleRecordBatch has a bug!") | |
| 	} else { | |
| 		t.Log("PRODUCTION CODE CRC VALID") | |
| 	} | |
| } | |
| 
 | |
| // mockSMQRecord implements the SMQRecord interface for testing | |
| type mockSMQRecord struct { | |
| 	key       []byte | |
| 	value     []byte | |
| 	timestamp int64 | |
| } | |
| 
 | |
| func (m *mockSMQRecord) GetKey() []byte      { return m.key } | |
| func (m *mockSMQRecord) GetValue() []byte    { return m.value } | |
| func (m *mockSMQRecord) GetTimestamp() int64 { return m.timestamp } | |
| func (m *mockSMQRecord) GetOffset() int64    { return 0 }
 |