You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
4.9 KiB
125 lines
4.9 KiB
package protocol
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
// TestSyncGroup_RaceCondition_BugDocumentation documents the original race condition bug
|
|
// This test documents the bug where non-leader in Stable state would trigger server-side assignment
|
|
func TestSyncGroup_RaceCondition_BugDocumentation(t *testing.T) {
|
|
// Original bug scenario:
|
|
// 1. Consumer 1 (leader) joins, gets all 15 partitions
|
|
// 2. Consumer 2 joins, triggers rebalance
|
|
// 3. Consumer 1 commits offsets during cleanup
|
|
// 4. Consumer 1 calls SyncGroup with client-side assignments, group moves to Stable
|
|
// 5. Consumer 2 calls SyncGroup (late arrival), group is already Stable
|
|
// 6. BUG: Consumer 2 falls into "else" branch, triggers server-side assignment
|
|
// 7. Consumer 2 gets 10 partitions via server-side assignment
|
|
// 8. Result: Some partitions (e.g., partition 2) assigned to BOTH consumers
|
|
// 9. Consumer 2 fetches offsets, gets offset 0 (no committed offsets yet)
|
|
// 10. Consumer 2 re-reads messages from offset 0 -> DUPLICATES (66.7%)!
|
|
|
|
// ORIGINAL BUGGY CODE (joingroup.go lines 887-905):
|
|
// } else if group.State == consumer.GroupStateCompletingRebalance || group.State == consumer.GroupStatePreparingRebalance {
|
|
// // Non-leader member waiting for leader to provide assignments
|
|
// glog.Infof("[SYNCGROUP] Non-leader %s waiting for leader assignments in group %s (state=%s)",
|
|
// request.MemberID, request.GroupID, group.State)
|
|
// } else {
|
|
// // BUG: This branch was triggered when non-leader arrived in Stable state!
|
|
// glog.Warningf("[SYNCGROUP] Using server-side assignment for group %s (Leader=%s State=%s)",
|
|
// request.GroupID, group.Leader, group.State)
|
|
// topicPartitions := h.getTopicPartitions(group)
|
|
// group.AssignPartitions(topicPartitions) // <- Duplicate assignment!
|
|
// }
|
|
|
|
// FIXED CODE (joingroup.go lines 887-906):
|
|
// } else if request.MemberID != group.Leader && len(request.GroupAssignments) == 0 {
|
|
// // Non-leader member requesting its assignment
|
|
// // CRITICAL FIX: Non-leader members should ALWAYS wait for leader's client-side assignments
|
|
// // This is the correct behavior for Sarama and other client-side assignment protocols
|
|
// glog.Infof("[SYNCGROUP] Non-leader %s waiting for/retrieving assignment in group %s (state=%s)",
|
|
// request.MemberID, request.GroupID, group.State)
|
|
// // Assignment will be retrieved from member.Assignment below
|
|
// } else {
|
|
// // This branch should only be reached for server-side assignment protocols
|
|
// // (not Sarama's client-side assignment)
|
|
// }
|
|
|
|
t.Log("Original bug: Non-leader in Stable state would trigger server-side assignment")
|
|
t.Log("This caused duplicate partition assignments and message re-reads (66.7% duplicates)")
|
|
t.Log("Fix: Check if member is non-leader with empty assignments, regardless of group state")
|
|
}
|
|
|
|
// TestSyncGroup_FixVerification verifies the fix logic
|
|
func TestSyncGroup_FixVerification(t *testing.T) {
|
|
testCases := []struct {
|
|
name string
|
|
isLeader bool
|
|
hasAssignments bool
|
|
shouldWait bool
|
|
shouldAssign bool
|
|
description string
|
|
}{
|
|
{
|
|
name: "Leader with assignments",
|
|
isLeader: true,
|
|
hasAssignments: true,
|
|
shouldWait: false,
|
|
shouldAssign: false,
|
|
description: "Leader provides client-side assignments, processes them",
|
|
},
|
|
{
|
|
name: "Non-leader without assignments (PreparingRebalance)",
|
|
isLeader: false,
|
|
hasAssignments: false,
|
|
shouldWait: true,
|
|
shouldAssign: false,
|
|
description: "Non-leader waits for leader to provide assignments",
|
|
},
|
|
{
|
|
name: "Non-leader without assignments (Stable) - THE BUG CASE",
|
|
isLeader: false,
|
|
hasAssignments: false,
|
|
shouldWait: true,
|
|
shouldAssign: false,
|
|
description: "Non-leader retrieves assignment from leader (already processed)",
|
|
},
|
|
{
|
|
name: "Leader without assignments",
|
|
isLeader: true,
|
|
hasAssignments: false,
|
|
shouldWait: false,
|
|
shouldAssign: true,
|
|
description: "Edge case: server-side assignment (should not happen with Sarama)",
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
// Simulate the fixed logic
|
|
memberID := "consumer-1"
|
|
leaderID := "consumer-1"
|
|
if !tc.isLeader {
|
|
memberID = "consumer-2"
|
|
}
|
|
|
|
groupAssignmentsCount := 0
|
|
if tc.hasAssignments {
|
|
groupAssignmentsCount = 2 // Leader provides assignments for 2 members
|
|
}
|
|
|
|
// THE FIX: Check if non-leader with no assignments
|
|
isNonLeaderWaiting := (memberID != leaderID) && (groupAssignmentsCount == 0)
|
|
|
|
if tc.shouldWait && !isNonLeaderWaiting {
|
|
t.Errorf("%s: Expected to wait, but logic says no", tc.description)
|
|
}
|
|
if !tc.shouldWait && isNonLeaderWaiting {
|
|
t.Errorf("%s: Expected not to wait, but logic says yes", tc.description)
|
|
}
|
|
|
|
t.Logf("✓ %s: isLeader=%v hasAssignments=%v shouldWait=%v",
|
|
tc.description, tc.isLeader, tc.hasAssignments, tc.shouldWait)
|
|
})
|
|
}
|
|
}
|