You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

125 lines
4.9 KiB

package protocol
import (
"testing"
)
// TestSyncGroup_RaceCondition_BugDocumentation documents the original race condition bug
// This test documents the bug where non-leader in Stable state would trigger server-side assignment
func TestSyncGroup_RaceCondition_BugDocumentation(t *testing.T) {
// Original bug scenario:
// 1. Consumer 1 (leader) joins, gets all 15 partitions
// 2. Consumer 2 joins, triggers rebalance
// 3. Consumer 1 commits offsets during cleanup
// 4. Consumer 1 calls SyncGroup with client-side assignments, group moves to Stable
// 5. Consumer 2 calls SyncGroup (late arrival), group is already Stable
// 6. BUG: Consumer 2 falls into "else" branch, triggers server-side assignment
// 7. Consumer 2 gets 10 partitions via server-side assignment
// 8. Result: Some partitions (e.g., partition 2) assigned to BOTH consumers
// 9. Consumer 2 fetches offsets, gets offset 0 (no committed offsets yet)
// 10. Consumer 2 re-reads messages from offset 0 -> DUPLICATES (66.7%)!
// ORIGINAL BUGGY CODE (joingroup.go lines 887-905):
// } else if group.State == consumer.GroupStateCompletingRebalance || group.State == consumer.GroupStatePreparingRebalance {
// // Non-leader member waiting for leader to provide assignments
// glog.Infof("[SYNCGROUP] Non-leader %s waiting for leader assignments in group %s (state=%s)",
// request.MemberID, request.GroupID, group.State)
// } else {
// // BUG: This branch was triggered when non-leader arrived in Stable state!
// glog.Warningf("[SYNCGROUP] Using server-side assignment for group %s (Leader=%s State=%s)",
// request.GroupID, group.Leader, group.State)
// topicPartitions := h.getTopicPartitions(group)
// group.AssignPartitions(topicPartitions) // <- Duplicate assignment!
// }
// FIXED CODE (joingroup.go lines 887-906):
// } else if request.MemberID != group.Leader && len(request.GroupAssignments) == 0 {
// // Non-leader member requesting its assignment
// // CRITICAL FIX: Non-leader members should ALWAYS wait for leader's client-side assignments
// // This is the correct behavior for Sarama and other client-side assignment protocols
// glog.Infof("[SYNCGROUP] Non-leader %s waiting for/retrieving assignment in group %s (state=%s)",
// request.MemberID, request.GroupID, group.State)
// // Assignment will be retrieved from member.Assignment below
// } else {
// // This branch should only be reached for server-side assignment protocols
// // (not Sarama's client-side assignment)
// }
t.Log("Original bug: Non-leader in Stable state would trigger server-side assignment")
t.Log("This caused duplicate partition assignments and message re-reads (66.7% duplicates)")
t.Log("Fix: Check if member is non-leader with empty assignments, regardless of group state")
}
// TestSyncGroup_FixVerification verifies the fix logic
func TestSyncGroup_FixVerification(t *testing.T) {
testCases := []struct {
name string
isLeader bool
hasAssignments bool
shouldWait bool
shouldAssign bool
description string
}{
{
name: "Leader with assignments",
isLeader: true,
hasAssignments: true,
shouldWait: false,
shouldAssign: false,
description: "Leader provides client-side assignments, processes them",
},
{
name: "Non-leader without assignments (PreparingRebalance)",
isLeader: false,
hasAssignments: false,
shouldWait: true,
shouldAssign: false,
description: "Non-leader waits for leader to provide assignments",
},
{
name: "Non-leader without assignments (Stable) - THE BUG CASE",
isLeader: false,
hasAssignments: false,
shouldWait: true,
shouldAssign: false,
description: "Non-leader retrieves assignment from leader (already processed)",
},
{
name: "Leader without assignments",
isLeader: true,
hasAssignments: false,
shouldWait: false,
shouldAssign: true,
description: "Edge case: server-side assignment (should not happen with Sarama)",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Simulate the fixed logic
memberID := "consumer-1"
leaderID := "consumer-1"
if !tc.isLeader {
memberID = "consumer-2"
}
groupAssignmentsCount := 0
if tc.hasAssignments {
groupAssignmentsCount = 2 // Leader provides assignments for 2 members
}
// THE FIX: Check if non-leader with no assignments
isNonLeaderWaiting := (memberID != leaderID) && (groupAssignmentsCount == 0)
if tc.shouldWait && !isNonLeaderWaiting {
t.Errorf("%s: Expected to wait, but logic says no", tc.description)
}
if !tc.shouldWait && isNonLeaderWaiting {
t.Errorf("%s: Expected not to wait, but logic says yes", tc.description)
}
t.Logf("✓ %s: isLeader=%v hasAssignments=%v shouldWait=%v",
tc.description, tc.isLeader, tc.hasAssignments, tc.shouldWait)
})
}
}