You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							125 lines
						
					
					
						
							4.9 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							125 lines
						
					
					
						
							4.9 KiB
						
					
					
				
								package protocol
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"testing"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// TestSyncGroup_RaceCondition_BugDocumentation documents the original race condition bug
							 | 
						|
								// This test documents the bug where non-leader in Stable state would trigger server-side assignment
							 | 
						|
								func TestSyncGroup_RaceCondition_BugDocumentation(t *testing.T) {
							 | 
						|
									// Original bug scenario:
							 | 
						|
									// 1. Consumer 1 (leader) joins, gets all 15 partitions
							 | 
						|
									// 2. Consumer 2 joins, triggers rebalance
							 | 
						|
									// 3. Consumer 1 commits offsets during cleanup
							 | 
						|
									// 4. Consumer 1 calls SyncGroup with client-side assignments, group moves to Stable
							 | 
						|
									// 5. Consumer 2 calls SyncGroup (late arrival), group is already Stable
							 | 
						|
									// 6. BUG: Consumer 2 falls into "else" branch, triggers server-side assignment
							 | 
						|
									// 7. Consumer 2 gets 10 partitions via server-side assignment
							 | 
						|
									// 8. Result: Some partitions (e.g., partition 2) assigned to BOTH consumers
							 | 
						|
									// 9. Consumer 2 fetches offsets, gets offset 0 (no committed offsets yet)
							 | 
						|
									// 10. Consumer 2 re-reads messages from offset 0 -> DUPLICATES (66.7%)!
							 | 
						|
								
							 | 
						|
									// ORIGINAL BUGGY CODE (joingroup.go lines 887-905):
							 | 
						|
									// } else if group.State == consumer.GroupStateCompletingRebalance || group.State == consumer.GroupStatePreparingRebalance {
							 | 
						|
									//     // Non-leader member waiting for leader to provide assignments
							 | 
						|
									//     glog.Infof("[SYNCGROUP] Non-leader %s waiting for leader assignments in group %s (state=%s)",
							 | 
						|
									//         request.MemberID, request.GroupID, group.State)
							 | 
						|
									// } else {
							 | 
						|
									//     // BUG: This branch was triggered when non-leader arrived in Stable state!
							 | 
						|
									//     glog.Warningf("[SYNCGROUP] Using server-side assignment for group %s (Leader=%s State=%s)",
							 | 
						|
									//         request.GroupID, group.Leader, group.State)
							 | 
						|
									//     topicPartitions := h.getTopicPartitions(group)
							 | 
						|
									//     group.AssignPartitions(topicPartitions)  // <- Duplicate assignment!
							 | 
						|
									// }
							 | 
						|
								
							 | 
						|
									// FIXED CODE (joingroup.go lines 887-906):
							 | 
						|
									// } else if request.MemberID != group.Leader && len(request.GroupAssignments) == 0 {
							 | 
						|
									//     // Non-leader member requesting its assignment
							 | 
						|
									//     // CRITICAL FIX: Non-leader members should ALWAYS wait for leader's client-side assignments
							 | 
						|
									//     // This is the correct behavior for Sarama and other client-side assignment protocols
							 | 
						|
									//     glog.Infof("[SYNCGROUP] Non-leader %s waiting for/retrieving assignment in group %s (state=%s)",
							 | 
						|
									//         request.MemberID, request.GroupID, group.State)
							 | 
						|
									//     // Assignment will be retrieved from member.Assignment below
							 | 
						|
									// } else {
							 | 
						|
									//     // This branch should only be reached for server-side assignment protocols
							 | 
						|
									//     // (not Sarama's client-side assignment)
							 | 
						|
									// }
							 | 
						|
								
							 | 
						|
									t.Log("Original bug: Non-leader in Stable state would trigger server-side assignment")
							 | 
						|
									t.Log("This caused duplicate partition assignments and message re-reads (66.7% duplicates)")
							 | 
						|
									t.Log("Fix: Check if member is non-leader with empty assignments, regardless of group state")
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TestSyncGroup_FixVerification verifies the fix logic
							 | 
						|
								func TestSyncGroup_FixVerification(t *testing.T) {
							 | 
						|
									testCases := []struct {
							 | 
						|
										name           string
							 | 
						|
										isLeader       bool
							 | 
						|
										hasAssignments bool
							 | 
						|
										shouldWait     bool
							 | 
						|
										shouldAssign   bool
							 | 
						|
										description    string
							 | 
						|
									}{
							 | 
						|
										{
							 | 
						|
											name:           "Leader with assignments",
							 | 
						|
											isLeader:       true,
							 | 
						|
											hasAssignments: true,
							 | 
						|
											shouldWait:     false,
							 | 
						|
											shouldAssign:   false,
							 | 
						|
											description:    "Leader provides client-side assignments, processes them",
							 | 
						|
										},
							 | 
						|
										{
							 | 
						|
											name:           "Non-leader without assignments (PreparingRebalance)",
							 | 
						|
											isLeader:       false,
							 | 
						|
											hasAssignments: false,
							 | 
						|
											shouldWait:     true,
							 | 
						|
											shouldAssign:   false,
							 | 
						|
											description:    "Non-leader waits for leader to provide assignments",
							 | 
						|
										},
							 | 
						|
										{
							 | 
						|
											name:           "Non-leader without assignments (Stable) - THE BUG CASE",
							 | 
						|
											isLeader:       false,
							 | 
						|
											hasAssignments: false,
							 | 
						|
											shouldWait:     true,
							 | 
						|
											shouldAssign:   false,
							 | 
						|
											description:    "Non-leader retrieves assignment from leader (already processed)",
							 | 
						|
										},
							 | 
						|
										{
							 | 
						|
											name:           "Leader without assignments",
							 | 
						|
											isLeader:       true,
							 | 
						|
											hasAssignments: false,
							 | 
						|
											shouldWait:     false,
							 | 
						|
											shouldAssign:   true,
							 | 
						|
											description:    "Edge case: server-side assignment (should not happen with Sarama)",
							 | 
						|
										},
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									for _, tc := range testCases {
							 | 
						|
										t.Run(tc.name, func(t *testing.T) {
							 | 
						|
											// Simulate the fixed logic
							 | 
						|
											memberID := "consumer-1"
							 | 
						|
											leaderID := "consumer-1"
							 | 
						|
											if !tc.isLeader {
							 | 
						|
												memberID = "consumer-2"
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											groupAssignmentsCount := 0
							 | 
						|
											if tc.hasAssignments {
							 | 
						|
												groupAssignmentsCount = 2 // Leader provides assignments for 2 members
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											// THE FIX: Check if non-leader with no assignments
							 | 
						|
											isNonLeaderWaiting := (memberID != leaderID) && (groupAssignmentsCount == 0)
							 | 
						|
								
							 | 
						|
											if tc.shouldWait && !isNonLeaderWaiting {
							 | 
						|
												t.Errorf("%s: Expected to wait, but logic says no", tc.description)
							 | 
						|
											}
							 | 
						|
											if !tc.shouldWait && isNonLeaderWaiting {
							 | 
						|
												t.Errorf("%s: Expected not to wait, but logic says yes", tc.description)
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											t.Logf("✓ %s: isLeader=%v hasAssignments=%v shouldWait=%v",
							 | 
						|
												tc.description, tc.isLeader, tc.hasAssignments, tc.shouldWait)
							 | 
						|
										})
							 | 
						|
									}
							 | 
						|
								}
							 |