You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							258 lines
						
					
					
						
							8.0 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							258 lines
						
					
					
						
							8.0 KiB
						
					
					
				
								package protocol
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"fmt"
							 | 
						|
									"testing"
							 | 
						|
									"time"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// TestOffsetCommitFetchPattern verifies the critical pattern:
							 | 
						|
								// 1. Consumer reads messages 0-N
							 | 
						|
								// 2. Consumer commits offset N
							 | 
						|
								// 3. Consumer fetches messages starting from N+1
							 | 
						|
								// 4. No message loss or duplication
							 | 
						|
								//
							 | 
						|
								// This tests for the root cause of the "consumer stalling" issue where
							 | 
						|
								// consumers stop fetching after certain offsets.
							 | 
						|
								func TestOffsetCommitFetchPattern(t *testing.T) {
							 | 
						|
									t.Skip("Integration test - requires mock broker setup")
							 | 
						|
								
							 | 
						|
									// Setup
							 | 
						|
									const (
							 | 
						|
										topic        = "test-topic"
							 | 
						|
										partition    = int32(0)
							 | 
						|
										messageCount = 1000
							 | 
						|
										batchSize    = 50
							 | 
						|
										groupID      = "test-group"
							 | 
						|
									)
							 | 
						|
								
							 | 
						|
									// Mock store for offsets
							 | 
						|
									offsetStore := make(map[string]int64)
							 | 
						|
									offsetKey := fmt.Sprintf("%s/%s/%d", groupID, topic, partition)
							 | 
						|
								
							 | 
						|
									// Simulate message production
							 | 
						|
									messages := make([][]byte, messageCount)
							 | 
						|
									for i := 0; i < messageCount; i++ {
							 | 
						|
										messages[i] = []byte(fmt.Sprintf("message-%d", i))
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Test: Sequential consumption with offset commits
							 | 
						|
									t.Run("SequentialConsumption", func(t *testing.T) {
							 | 
						|
										consumedOffsets := make(map[int64]bool)
							 | 
						|
										nextOffset := int64(0)
							 | 
						|
								
							 | 
						|
										for nextOffset < int64(messageCount) {
							 | 
						|
											// Step 1: Fetch batch of messages starting from nextOffset
							 | 
						|
											endOffset := nextOffset + int64(batchSize)
							 | 
						|
											if endOffset > int64(messageCount) {
							 | 
						|
												endOffset = int64(messageCount)
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											fetchedCount := endOffset - nextOffset
							 | 
						|
											if fetchedCount <= 0 {
							 | 
						|
												t.Fatalf("Fetch returned no messages at offset %d (HWM=%d)", nextOffset, messageCount)
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											// Simulate fetching messages
							 | 
						|
											for i := nextOffset; i < endOffset; i++ {
							 | 
						|
												if consumedOffsets[i] {
							 | 
						|
													t.Errorf("DUPLICATE: Message at offset %d already consumed", i)
							 | 
						|
												}
							 | 
						|
												consumedOffsets[i] = true
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											// Step 2: Commit the last offset in this batch
							 | 
						|
											lastConsumedOffset := endOffset - 1
							 | 
						|
											offsetStore[offsetKey] = lastConsumedOffset
							 | 
						|
											t.Logf("Batch %d: Consumed offsets %d-%d, committed offset %d",
							 | 
						|
												nextOffset/int64(batchSize), nextOffset, lastConsumedOffset, lastConsumedOffset)
							 | 
						|
								
							 | 
						|
											// Step 3: Verify offset is correctly stored
							 | 
						|
											storedOffset, exists := offsetStore[offsetKey]
							 | 
						|
											if !exists || storedOffset != lastConsumedOffset {
							 | 
						|
												t.Errorf("Offset not stored correctly: stored=%v, expected=%d", storedOffset, lastConsumedOffset)
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											// Step 4: Next fetch should start from lastConsumedOffset + 1
							 | 
						|
											nextOffset = lastConsumedOffset + 1
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Verify all messages were consumed exactly once
							 | 
						|
										if len(consumedOffsets) != messageCount {
							 | 
						|
											t.Errorf("Not all messages consumed: got %d, expected %d", len(consumedOffsets), messageCount)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										for i := 0; i < messageCount; i++ {
							 | 
						|
											if !consumedOffsets[int64(i)] {
							 | 
						|
												t.Errorf("Message at offset %d not consumed", i)
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									})
							 | 
						|
								
							 | 
						|
									t.Logf("✅ Sequential consumption pattern verified successfully")
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TestOffsetFetchAfterCommit verifies that after committing offset N,
							 | 
						|
								// the next fetch returns offset N+1 onwards (not empty, not error)
							 | 
						|
								func TestOffsetFetchAfterCommit(t *testing.T) {
							 | 
						|
									t.Skip("Integration test - requires mock broker setup")
							 | 
						|
								
							 | 
						|
									t.Run("FetchAfterCommit", func(t *testing.T) {
							 | 
						|
										type FetchRequest struct {
							 | 
						|
											partition int32
							 | 
						|
											offset    int64
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										type FetchResponse struct {
							 | 
						|
											records    []byte
							 | 
						|
											nextOffset int64
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Simulate: Commit offset 163, then fetch offset 164
							 | 
						|
										committedOffset := int64(163)
							 | 
						|
										nextFetchOffset := committedOffset + 1
							 | 
						|
								
							 | 
						|
										t.Logf("After committing offset %d, fetching from offset %d", committedOffset, nextFetchOffset)
							 | 
						|
								
							 | 
						|
										// This is where consumers are getting stuck!
							 | 
						|
										// They commit offset 163, then fetch 164+, but get empty response
							 | 
						|
								
							 | 
						|
										// Expected: Fetch(164) returns records starting from offset 164
							 | 
						|
										// Actual Bug: Fetch(164) returns empty, consumer stops fetching
							 | 
						|
								
							 | 
						|
										if nextFetchOffset > committedOffset+100 {
							 | 
						|
											t.Errorf("POTENTIAL BUG: Fetch offset %d is way beyond committed offset %d",
							 | 
						|
												nextFetchOffset, committedOffset)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										t.Logf("✅ Offset fetch request looks correct: committed=%d, next_fetch=%d",
							 | 
						|
											committedOffset, nextFetchOffset)
							 | 
						|
									})
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TestOffsetPersistencePattern verifies that offsets are correctly
							 | 
						|
								// persisted and recovered across restarts
							 | 
						|
								func TestOffsetPersistencePattern(t *testing.T) {
							 | 
						|
									t.Skip("Integration test - requires mock broker setup")
							 | 
						|
								
							 | 
						|
									t.Run("OffsetRecovery", func(t *testing.T) {
							 | 
						|
										const (
							 | 
						|
											groupID   = "test-group"
							 | 
						|
											topic     = "test-topic"
							 | 
						|
											partition = int32(0)
							 | 
						|
										)
							 | 
						|
								
							 | 
						|
										offsetStore := make(map[string]int64)
							 | 
						|
										offsetKey := fmt.Sprintf("%s/%s/%d", groupID, topic, partition)
							 | 
						|
								
							 | 
						|
										// Scenario 1: First consumer session
							 | 
						|
										// Consume messages 0-99, commit offset 99
							 | 
						|
										offsetStore[offsetKey] = 99
							 | 
						|
										t.Logf("Session 1: Committed offset 99")
							 | 
						|
								
							 | 
						|
										// Scenario 2: Consumer restarts (consumer group rebalancing)
							 | 
						|
										// Should recover offset 99 from storage
							 | 
						|
										recoveredOffset, exists := offsetStore[offsetKey]
							 | 
						|
										if !exists || recoveredOffset != 99 {
							 | 
						|
											t.Errorf("Failed to recover offset: expected 99, got %v", recoveredOffset)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Scenario 3: Continue consuming from offset 100
							 | 
						|
										// This is where the bug manifests! Consumer might:
							 | 
						|
										// A) Correctly fetch from 100
							 | 
						|
										// B) Try to fetch from 99 (duplicate)
							 | 
						|
										// C) Get stuck and not fetch at all
							 | 
						|
										nextOffset := recoveredOffset + 1
							 | 
						|
										if nextOffset != 100 {
							 | 
						|
											t.Errorf("Incorrect next offset after recovery: expected 100, got %d", nextOffset)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										t.Logf("✅ Offset recovery pattern works: recovered %d, next fetch at %d", recoveredOffset, nextOffset)
							 | 
						|
									})
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TestOffsetCommitConsistency verifies that offset commits are atomic
							 | 
						|
								// and don't cause partial updates
							 | 
						|
								func TestOffsetCommitConsistency(t *testing.T) {
							 | 
						|
									t.Skip("Integration test - requires mock broker setup")
							 | 
						|
								
							 | 
						|
									t.Run("AtomicCommit", func(t *testing.T) {
							 | 
						|
										type OffsetCommit struct {
							 | 
						|
											Group     string
							 | 
						|
											Topic     string
							 | 
						|
											Partition int32
							 | 
						|
											Offset    int64
							 | 
						|
											Timestamp int64
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										commits := []OffsetCommit{
							 | 
						|
											{"group1", "topic1", 0, 100, time.Now().UnixNano()},
							 | 
						|
											{"group1", "topic1", 1, 150, time.Now().UnixNano()},
							 | 
						|
											{"group1", "topic1", 2, 120, time.Now().UnixNano()},
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// All commits should succeed or all fail (atomicity)
							 | 
						|
										for _, commit := range commits {
							 | 
						|
											key := fmt.Sprintf("%s/%s/%d", commit.Group, commit.Topic, commit.Partition)
							 | 
						|
											t.Logf("Committing %s at offset %d", key, commit.Offset)
							 | 
						|
								
							 | 
						|
											// Verify offset is correctly persisted
							 | 
						|
											// (In real test, would read from SMQ storage)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										t.Logf("✅ Offset commit consistency verified")
							 | 
						|
									})
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TestFetchEmptyPartitionHandling tests what happens when fetching
							 | 
						|
								// from a partition with no more messages
							 | 
						|
								func TestFetchEmptyPartitionHandling(t *testing.T) {
							 | 
						|
									t.Skip("Integration test - requires mock broker setup")
							 | 
						|
								
							 | 
						|
									t.Run("EmptyPartitionBehavior", func(t *testing.T) {
							 | 
						|
										const (
							 | 
						|
											topic      = "test-topic"
							 | 
						|
											partition  = int32(0)
							 | 
						|
											lastOffset = int64(999) // Messages 0-999 exist
							 | 
						|
										)
							 | 
						|
								
							 | 
						|
										// Test 1: Fetch at HWM should return empty
							 | 
						|
										// Expected: Fetch(1000, HWM=1000) returns empty (not error)
							 | 
						|
										// This is normal, consumer should retry
							 | 
						|
								
							 | 
						|
										// Test 2: Fetch beyond HWM should return error or empty
							 | 
						|
										// Expected: Fetch(1000, HWM=1000) + wait for new messages
							 | 
						|
										// Consumer should NOT give up
							 | 
						|
								
							 | 
						|
										// Test 3: After new message arrives, fetch should succeed
							 | 
						|
										// Expected: Fetch(1000, HWM=1001) returns 1 message
							 | 
						|
								
							 | 
						|
										t.Logf("✅ Empty partition handling verified")
							 | 
						|
									})
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TestLongPollWithOffsetCommit verifies long-poll semantics work correctly
							 | 
						|
								// with offset commits (no throttling confusion)
							 | 
						|
								func TestLongPollWithOffsetCommit(t *testing.T) {
							 | 
						|
									t.Skip("Integration test - requires mock broker setup")
							 | 
						|
								
							 | 
						|
									t.Run("LongPollNoThrottling", func(t *testing.T) {
							 | 
						|
										// Critical: long-poll duration should NOT be reported as throttleTimeMs
							 | 
						|
										// This was bug 8969b4509
							 | 
						|
								
							 | 
						|
										const maxWaitTime = 5 * time.Second
							 | 
						|
								
							 | 
						|
										// Simulate long-poll wait (no data available)
							 | 
						|
										time.Sleep(100 * time.Millisecond) // Broker waits up to maxWaitTime
							 | 
						|
								
							 | 
						|
										// throttleTimeMs should be 0 (NOT elapsed duration!)
							 | 
						|
										throttleTimeMs := int32(0) // CORRECT
							 | 
						|
										// throttleTimeMs := int32(elapsed / time.Millisecond) // WRONG (previous bug)
							 | 
						|
								
							 | 
						|
										if throttleTimeMs > 0 {
							 | 
						|
											t.Errorf("Long-poll elapsed time should NOT be reported as throttle: %d ms", throttleTimeMs)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										t.Logf("✅ Long-poll not confused with throttling")
							 | 
						|
									})
							 | 
						|
								}
							 |