You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							399 lines
						
					
					
						
							12 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							399 lines
						
					
					
						
							12 KiB
						
					
					
				
								package consumer
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"crypto/sha256"
							 | 
						|
									"fmt"
							 | 
						|
									"sync"
							 | 
						|
									"time"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// GroupState represents the state of a consumer group
							 | 
						|
								type GroupState int
							 | 
						|
								
							 | 
						|
								const (
							 | 
						|
									GroupStateEmpty GroupState = iota
							 | 
						|
									GroupStatePreparingRebalance
							 | 
						|
									GroupStateCompletingRebalance
							 | 
						|
									GroupStateStable
							 | 
						|
									GroupStateDead
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								func (gs GroupState) String() string {
							 | 
						|
									switch gs {
							 | 
						|
									case GroupStateEmpty:
							 | 
						|
										return "Empty"
							 | 
						|
									case GroupStatePreparingRebalance:
							 | 
						|
										return "PreparingRebalance"
							 | 
						|
									case GroupStateCompletingRebalance:
							 | 
						|
										return "CompletingRebalance"
							 | 
						|
									case GroupStateStable:
							 | 
						|
										return "Stable"
							 | 
						|
									case GroupStateDead:
							 | 
						|
										return "Dead"
							 | 
						|
									default:
							 | 
						|
										return "Unknown"
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// MemberState represents the state of a group member
							 | 
						|
								type MemberState int
							 | 
						|
								
							 | 
						|
								const (
							 | 
						|
									MemberStateUnknown MemberState = iota
							 | 
						|
									MemberStatePending
							 | 
						|
									MemberStateStable
							 | 
						|
									MemberStateLeaving
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								func (ms MemberState) String() string {
							 | 
						|
									switch ms {
							 | 
						|
									case MemberStateUnknown:
							 | 
						|
										return "Unknown"
							 | 
						|
									case MemberStatePending:
							 | 
						|
										return "Pending"
							 | 
						|
									case MemberStateStable:
							 | 
						|
										return "Stable"
							 | 
						|
									case MemberStateLeaving:
							 | 
						|
										return "Leaving"
							 | 
						|
									default:
							 | 
						|
										return "Unknown"
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GroupMember represents a consumer in a consumer group
							 | 
						|
								type GroupMember struct {
							 | 
						|
									ID               string                // Member ID (generated by gateway)
							 | 
						|
									ClientID         string                // Client ID from consumer
							 | 
						|
									ClientHost       string                // Client host/IP
							 | 
						|
									GroupInstanceID  *string               // Static membership instance ID (optional)
							 | 
						|
									SessionTimeout   int32                 // Session timeout in milliseconds
							 | 
						|
									RebalanceTimeout int32                 // Rebalance timeout in milliseconds
							 | 
						|
									Subscription     []string              // Subscribed topics
							 | 
						|
									Assignment       []PartitionAssignment // Assigned partitions
							 | 
						|
									Metadata         []byte                // Protocol-specific metadata
							 | 
						|
									State            MemberState           // Current member state
							 | 
						|
									LastHeartbeat    time.Time             // Last heartbeat timestamp
							 | 
						|
									JoinedAt         time.Time             // When member joined group
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// PartitionAssignment represents partition assignment for a member
							 | 
						|
								type PartitionAssignment struct {
							 | 
						|
									Topic     string
							 | 
						|
									Partition int32
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// ConsumerGroup represents a Kafka consumer group
							 | 
						|
								type ConsumerGroup struct {
							 | 
						|
									ID               string                            // Group ID
							 | 
						|
									State            GroupState                        // Current group state
							 | 
						|
									Generation       int32                             // Generation ID (incremented on rebalance)
							 | 
						|
									Protocol         string                            // Assignment protocol (e.g., "range", "roundrobin")
							 | 
						|
									Leader           string                            // Leader member ID
							 | 
						|
									Members          map[string]*GroupMember           // Group members by member ID
							 | 
						|
									StaticMembers    map[string]string                 // Static instance ID -> member ID mapping
							 | 
						|
									SubscribedTopics map[string]bool                   // Topics subscribed by group
							 | 
						|
									OffsetCommits    map[string]map[int32]OffsetCommit // Topic -> Partition -> Offset
							 | 
						|
									CreatedAt        time.Time                         // Group creation time
							 | 
						|
									LastActivity     time.Time                         // Last activity (join, heartbeat, etc.)
							 | 
						|
								
							 | 
						|
									Mu sync.RWMutex // Protects group state
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// OffsetCommit represents a committed offset for a topic partition
							 | 
						|
								type OffsetCommit struct {
							 | 
						|
									Offset    int64     // Committed offset
							 | 
						|
									Metadata  string    // Optional metadata
							 | 
						|
									Timestamp time.Time // Commit timestamp
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GroupCoordinator manages consumer groups
							 | 
						|
								type GroupCoordinator struct {
							 | 
						|
									groups   map[string]*ConsumerGroup // Group ID -> Group
							 | 
						|
									groupsMu sync.RWMutex              // Protects groups map
							 | 
						|
								
							 | 
						|
									// Configuration
							 | 
						|
									sessionTimeoutMin  int32 // Minimum session timeout (ms)
							 | 
						|
									sessionTimeoutMax  int32 // Maximum session timeout (ms)
							 | 
						|
									rebalanceTimeoutMs int32 // Default rebalance timeout (ms)
							 | 
						|
								
							 | 
						|
									// Timeout management
							 | 
						|
									rebalanceTimeoutManager *RebalanceTimeoutManager
							 | 
						|
								
							 | 
						|
									// Cleanup
							 | 
						|
									cleanupTicker *time.Ticker
							 | 
						|
									stopChan      chan struct{}
							 | 
						|
									stopOnce      sync.Once
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// NewGroupCoordinator creates a new consumer group coordinator
							 | 
						|
								func NewGroupCoordinator() *GroupCoordinator {
							 | 
						|
									gc := &GroupCoordinator{
							 | 
						|
										groups:             make(map[string]*ConsumerGroup),
							 | 
						|
										sessionTimeoutMin:  6000,   // 6 seconds
							 | 
						|
										sessionTimeoutMax:  300000, // 5 minutes
							 | 
						|
										rebalanceTimeoutMs: 300000, // 5 minutes
							 | 
						|
										stopChan:           make(chan struct{}),
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Initialize rebalance timeout manager
							 | 
						|
									gc.rebalanceTimeoutManager = NewRebalanceTimeoutManager(gc)
							 | 
						|
								
							 | 
						|
									// Start cleanup routine
							 | 
						|
									gc.cleanupTicker = time.NewTicker(30 * time.Second)
							 | 
						|
									go gc.cleanupRoutine()
							 | 
						|
								
							 | 
						|
									return gc
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetOrCreateGroup returns an existing group or creates a new one
							 | 
						|
								func (gc *GroupCoordinator) GetOrCreateGroup(groupID string) *ConsumerGroup {
							 | 
						|
									gc.groupsMu.Lock()
							 | 
						|
									defer gc.groupsMu.Unlock()
							 | 
						|
								
							 | 
						|
									group, exists := gc.groups[groupID]
							 | 
						|
									if !exists {
							 | 
						|
										group = &ConsumerGroup{
							 | 
						|
											ID:               groupID,
							 | 
						|
											State:            GroupStateEmpty,
							 | 
						|
											Generation:       0,
							 | 
						|
											Members:          make(map[string]*GroupMember),
							 | 
						|
											StaticMembers:    make(map[string]string),
							 | 
						|
											SubscribedTopics: make(map[string]bool),
							 | 
						|
											OffsetCommits:    make(map[string]map[int32]OffsetCommit),
							 | 
						|
											CreatedAt:        time.Now(),
							 | 
						|
											LastActivity:     time.Now(),
							 | 
						|
										}
							 | 
						|
										gc.groups[groupID] = group
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return group
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetGroup returns an existing group or nil if not found
							 | 
						|
								func (gc *GroupCoordinator) GetGroup(groupID string) *ConsumerGroup {
							 | 
						|
									gc.groupsMu.RLock()
							 | 
						|
									defer gc.groupsMu.RUnlock()
							 | 
						|
								
							 | 
						|
									return gc.groups[groupID]
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// RemoveGroup removes a group from the coordinator
							 | 
						|
								func (gc *GroupCoordinator) RemoveGroup(groupID string) {
							 | 
						|
									gc.groupsMu.Lock()
							 | 
						|
									defer gc.groupsMu.Unlock()
							 | 
						|
								
							 | 
						|
									delete(gc.groups, groupID)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// ListGroups returns all current group IDs
							 | 
						|
								func (gc *GroupCoordinator) ListGroups() []string {
							 | 
						|
									gc.groupsMu.RLock()
							 | 
						|
									defer gc.groupsMu.RUnlock()
							 | 
						|
								
							 | 
						|
									groups := make([]string, 0, len(gc.groups))
							 | 
						|
									for groupID := range gc.groups {
							 | 
						|
										groups = append(groups, groupID)
							 | 
						|
									}
							 | 
						|
									return groups
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// FindStaticMember finds a member by static instance ID
							 | 
						|
								func (gc *GroupCoordinator) FindStaticMember(group *ConsumerGroup, instanceID string) *GroupMember {
							 | 
						|
									if instanceID == "" {
							 | 
						|
										return nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									group.Mu.RLock()
							 | 
						|
									defer group.Mu.RUnlock()
							 | 
						|
								
							 | 
						|
									if memberID, exists := group.StaticMembers[instanceID]; exists {
							 | 
						|
										return group.Members[memberID]
							 | 
						|
									}
							 | 
						|
									return nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// FindStaticMemberLocked finds a member by static instance ID (assumes group is already locked)
							 | 
						|
								func (gc *GroupCoordinator) FindStaticMemberLocked(group *ConsumerGroup, instanceID string) *GroupMember {
							 | 
						|
									if instanceID == "" {
							 | 
						|
										return nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									if memberID, exists := group.StaticMembers[instanceID]; exists {
							 | 
						|
										return group.Members[memberID]
							 | 
						|
									}
							 | 
						|
									return nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// RegisterStaticMember registers a static member in the group
							 | 
						|
								func (gc *GroupCoordinator) RegisterStaticMember(group *ConsumerGroup, member *GroupMember) {
							 | 
						|
									if member.GroupInstanceID == nil || *member.GroupInstanceID == "" {
							 | 
						|
										return
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									group.Mu.Lock()
							 | 
						|
									defer group.Mu.Unlock()
							 | 
						|
								
							 | 
						|
									group.StaticMembers[*member.GroupInstanceID] = member.ID
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// RegisterStaticMemberLocked registers a static member in the group (assumes group is already locked)
							 | 
						|
								func (gc *GroupCoordinator) RegisterStaticMemberLocked(group *ConsumerGroup, member *GroupMember) {
							 | 
						|
									if member.GroupInstanceID == nil || *member.GroupInstanceID == "" {
							 | 
						|
										return
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									group.StaticMembers[*member.GroupInstanceID] = member.ID
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// UnregisterStaticMember removes a static member from the group
							 | 
						|
								func (gc *GroupCoordinator) UnregisterStaticMember(group *ConsumerGroup, instanceID string) {
							 | 
						|
									if instanceID == "" {
							 | 
						|
										return
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									group.Mu.Lock()
							 | 
						|
									defer group.Mu.Unlock()
							 | 
						|
								
							 | 
						|
									delete(group.StaticMembers, instanceID)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// UnregisterStaticMemberLocked removes a static member from the group (assumes group is already locked)
							 | 
						|
								func (gc *GroupCoordinator) UnregisterStaticMemberLocked(group *ConsumerGroup, instanceID string) {
							 | 
						|
									if instanceID == "" {
							 | 
						|
										return
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									delete(group.StaticMembers, instanceID)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// IsStaticMember checks if a member is using static membership
							 | 
						|
								func (gc *GroupCoordinator) IsStaticMember(member *GroupMember) bool {
							 | 
						|
									return member.GroupInstanceID != nil && *member.GroupInstanceID != ""
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GenerateMemberID creates a deterministic member ID based on client info
							 | 
						|
								func (gc *GroupCoordinator) GenerateMemberID(clientID, clientHost string) string {
							 | 
						|
									// EXPERIMENT: Use simpler member ID format like real Kafka brokers
							 | 
						|
									// Real Kafka uses format like: "consumer-1-uuid" or "consumer-groupId-uuid"
							 | 
						|
									hash := fmt.Sprintf("%x", sha256.Sum256([]byte(clientID+"-"+clientHost)))
							 | 
						|
									return fmt.Sprintf("consumer-%s", hash[:16]) // Shorter, simpler format
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// ValidateSessionTimeout checks if session timeout is within acceptable range
							 | 
						|
								func (gc *GroupCoordinator) ValidateSessionTimeout(timeout int32) bool {
							 | 
						|
									return timeout >= gc.sessionTimeoutMin && timeout <= gc.sessionTimeoutMax
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// cleanupRoutine periodically cleans up dead groups and expired members
							 | 
						|
								func (gc *GroupCoordinator) cleanupRoutine() {
							 | 
						|
									for {
							 | 
						|
										select {
							 | 
						|
										case <-gc.cleanupTicker.C:
							 | 
						|
											gc.performCleanup()
							 | 
						|
										case <-gc.stopChan:
							 | 
						|
											return
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// performCleanup removes expired members and empty groups
							 | 
						|
								func (gc *GroupCoordinator) performCleanup() {
							 | 
						|
									now := time.Now()
							 | 
						|
								
							 | 
						|
									// Use rebalance timeout manager for more sophisticated timeout handling
							 | 
						|
									gc.rebalanceTimeoutManager.CheckRebalanceTimeouts()
							 | 
						|
								
							 | 
						|
									gc.groupsMu.Lock()
							 | 
						|
									defer gc.groupsMu.Unlock()
							 | 
						|
								
							 | 
						|
									for groupID, group := range gc.groups {
							 | 
						|
										group.Mu.Lock()
							 | 
						|
								
							 | 
						|
										// Check for expired members (session timeout)
							 | 
						|
										expiredMembers := make([]string, 0)
							 | 
						|
										for memberID, member := range group.Members {
							 | 
						|
											sessionDuration := time.Duration(member.SessionTimeout) * time.Millisecond
							 | 
						|
											timeSinceHeartbeat := now.Sub(member.LastHeartbeat)
							 | 
						|
											if timeSinceHeartbeat > sessionDuration {
							 | 
						|
												expiredMembers = append(expiredMembers, memberID)
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Remove expired members
							 | 
						|
										for _, memberID := range expiredMembers {
							 | 
						|
											delete(group.Members, memberID)
							 | 
						|
											if group.Leader == memberID {
							 | 
						|
												group.Leader = ""
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Update group state based on member count
							 | 
						|
										if len(group.Members) == 0 {
							 | 
						|
											if group.State != GroupStateEmpty {
							 | 
						|
												group.State = GroupStateEmpty
							 | 
						|
												group.Generation++
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											// Mark group for deletion if empty for too long (30 minutes)
							 | 
						|
											if now.Sub(group.LastActivity) > 30*time.Minute {
							 | 
						|
												group.State = GroupStateDead
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Check for stuck rebalances and force completion if necessary
							 | 
						|
										maxRebalanceDuration := 10 * time.Minute // Maximum time allowed for rebalancing
							 | 
						|
										if gc.rebalanceTimeoutManager.IsRebalanceStuck(group, maxRebalanceDuration) {
							 | 
						|
											gc.rebalanceTimeoutManager.ForceCompleteRebalance(group)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										group.Mu.Unlock()
							 | 
						|
								
							 | 
						|
										// Remove dead groups
							 | 
						|
										if group.State == GroupStateDead {
							 | 
						|
											delete(gc.groups, groupID)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Close shuts down the group coordinator
							 | 
						|
								func (gc *GroupCoordinator) Close() {
							 | 
						|
									gc.stopOnce.Do(func() {
							 | 
						|
										close(gc.stopChan)
							 | 
						|
										if gc.cleanupTicker != nil {
							 | 
						|
											gc.cleanupTicker.Stop()
							 | 
						|
										}
							 | 
						|
									})
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetGroupStats returns statistics about the group coordinator
							 | 
						|
								func (gc *GroupCoordinator) GetGroupStats() map[string]interface{} {
							 | 
						|
									gc.groupsMu.RLock()
							 | 
						|
									defer gc.groupsMu.RUnlock()
							 | 
						|
								
							 | 
						|
									stats := map[string]interface{}{
							 | 
						|
										"total_groups": len(gc.groups),
							 | 
						|
										"group_states": make(map[string]int),
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									stateCount := make(map[GroupState]int)
							 | 
						|
									totalMembers := 0
							 | 
						|
								
							 | 
						|
									for _, group := range gc.groups {
							 | 
						|
										group.Mu.RLock()
							 | 
						|
										stateCount[group.State]++
							 | 
						|
										totalMembers += len(group.Members)
							 | 
						|
										group.Mu.RUnlock()
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									stats["total_members"] = totalMembers
							 | 
						|
									for state, count := range stateCount {
							 | 
						|
										stats["group_states"].(map[string]int)[state.String()] = count
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return stats
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetRebalanceStatus returns the rebalance status for a specific group
							 | 
						|
								func (gc *GroupCoordinator) GetRebalanceStatus(groupID string) *RebalanceStatus {
							 | 
						|
									return gc.rebalanceTimeoutManager.GetRebalanceStatus(groupID)
							 | 
						|
								}
							 |