package consumer import ( "crypto/sha256" "fmt" "sync" "time" ) // GroupState represents the state of a consumer group type GroupState int const ( GroupStateEmpty GroupState = iota GroupStatePreparingRebalance GroupStateCompletingRebalance GroupStateStable GroupStateDead ) func (gs GroupState) String() string { switch gs { case GroupStateEmpty: return "Empty" case GroupStatePreparingRebalance: return "PreparingRebalance" case GroupStateCompletingRebalance: return "CompletingRebalance" case GroupStateStable: return "Stable" case GroupStateDead: return "Dead" default: return "Unknown" } } // MemberState represents the state of a group member type MemberState int const ( MemberStateUnknown MemberState = iota MemberStatePending MemberStateStable MemberStateLeaving ) func (ms MemberState) String() string { switch ms { case MemberStateUnknown: return "Unknown" case MemberStatePending: return "Pending" case MemberStateStable: return "Stable" case MemberStateLeaving: return "Leaving" default: return "Unknown" } } // GroupMember represents a consumer in a consumer group type GroupMember struct { ID string // Member ID (generated by gateway) ClientID string // Client ID from consumer ClientHost string // Client host/IP GroupInstanceID *string // Static membership instance ID (optional) SessionTimeout int32 // Session timeout in milliseconds RebalanceTimeout int32 // Rebalance timeout in milliseconds Subscription []string // Subscribed topics Assignment []PartitionAssignment // Assigned partitions Metadata []byte // Protocol-specific metadata State MemberState // Current member state LastHeartbeat time.Time // Last heartbeat timestamp JoinedAt time.Time // When member joined group } // PartitionAssignment represents partition assignment for a member type PartitionAssignment struct { Topic string Partition int32 } // ConsumerGroup represents a Kafka consumer group type ConsumerGroup struct { ID string // Group ID State GroupState // Current group state Generation int32 // Generation ID (incremented on rebalance) Protocol string // Assignment protocol (e.g., "range", "roundrobin") Leader string // Leader member ID Members map[string]*GroupMember // Group members by member ID StaticMembers map[string]string // Static instance ID -> member ID mapping SubscribedTopics map[string]bool // Topics subscribed by group OffsetCommits map[string]map[int32]OffsetCommit // Topic -> Partition -> Offset CreatedAt time.Time // Group creation time LastActivity time.Time // Last activity (join, heartbeat, etc.) Mu sync.RWMutex // Protects group state } // OffsetCommit represents a committed offset for a topic partition type OffsetCommit struct { Offset int64 // Committed offset Metadata string // Optional metadata Timestamp time.Time // Commit timestamp } // GroupCoordinator manages consumer groups type GroupCoordinator struct { groups map[string]*ConsumerGroup // Group ID -> Group groupsMu sync.RWMutex // Protects groups map // Configuration sessionTimeoutMin int32 // Minimum session timeout (ms) sessionTimeoutMax int32 // Maximum session timeout (ms) rebalanceTimeoutMs int32 // Default rebalance timeout (ms) // Timeout management rebalanceTimeoutManager *RebalanceTimeoutManager // Cleanup cleanupTicker *time.Ticker stopChan chan struct{} stopOnce sync.Once } // NewGroupCoordinator creates a new consumer group coordinator func NewGroupCoordinator() *GroupCoordinator { gc := &GroupCoordinator{ groups: make(map[string]*ConsumerGroup), sessionTimeoutMin: 6000, // 6 seconds sessionTimeoutMax: 300000, // 5 minutes rebalanceTimeoutMs: 300000, // 5 minutes stopChan: make(chan struct{}), } // Initialize rebalance timeout manager gc.rebalanceTimeoutManager = NewRebalanceTimeoutManager(gc) // Start cleanup routine gc.cleanupTicker = time.NewTicker(30 * time.Second) go gc.cleanupRoutine() return gc } // GetOrCreateGroup returns an existing group or creates a new one func (gc *GroupCoordinator) GetOrCreateGroup(groupID string) *ConsumerGroup { gc.groupsMu.Lock() defer gc.groupsMu.Unlock() group, exists := gc.groups[groupID] if !exists { group = &ConsumerGroup{ ID: groupID, State: GroupStateEmpty, Generation: 0, Members: make(map[string]*GroupMember), StaticMembers: make(map[string]string), SubscribedTopics: make(map[string]bool), OffsetCommits: make(map[string]map[int32]OffsetCommit), CreatedAt: time.Now(), LastActivity: time.Now(), } gc.groups[groupID] = group } return group } // GetGroup returns an existing group or nil if not found func (gc *GroupCoordinator) GetGroup(groupID string) *ConsumerGroup { gc.groupsMu.RLock() defer gc.groupsMu.RUnlock() return gc.groups[groupID] } // RemoveGroup removes a group from the coordinator func (gc *GroupCoordinator) RemoveGroup(groupID string) { gc.groupsMu.Lock() defer gc.groupsMu.Unlock() delete(gc.groups, groupID) } // ListGroups returns all current group IDs func (gc *GroupCoordinator) ListGroups() []string { gc.groupsMu.RLock() defer gc.groupsMu.RUnlock() groups := make([]string, 0, len(gc.groups)) for groupID := range gc.groups { groups = append(groups, groupID) } return groups } // FindStaticMember finds a member by static instance ID func (gc *GroupCoordinator) FindStaticMember(group *ConsumerGroup, instanceID string) *GroupMember { if instanceID == "" { return nil } group.Mu.RLock() defer group.Mu.RUnlock() if memberID, exists := group.StaticMembers[instanceID]; exists { return group.Members[memberID] } return nil } // FindStaticMemberLocked finds a member by static instance ID (assumes group is already locked) func (gc *GroupCoordinator) FindStaticMemberLocked(group *ConsumerGroup, instanceID string) *GroupMember { if instanceID == "" { return nil } if memberID, exists := group.StaticMembers[instanceID]; exists { return group.Members[memberID] } return nil } // RegisterStaticMember registers a static member in the group func (gc *GroupCoordinator) RegisterStaticMember(group *ConsumerGroup, member *GroupMember) { if member.GroupInstanceID == nil || *member.GroupInstanceID == "" { return } group.Mu.Lock() defer group.Mu.Unlock() group.StaticMembers[*member.GroupInstanceID] = member.ID } // RegisterStaticMemberLocked registers a static member in the group (assumes group is already locked) func (gc *GroupCoordinator) RegisterStaticMemberLocked(group *ConsumerGroup, member *GroupMember) { if member.GroupInstanceID == nil || *member.GroupInstanceID == "" { return } group.StaticMembers[*member.GroupInstanceID] = member.ID } // UnregisterStaticMember removes a static member from the group func (gc *GroupCoordinator) UnregisterStaticMember(group *ConsumerGroup, instanceID string) { if instanceID == "" { return } group.Mu.Lock() defer group.Mu.Unlock() delete(group.StaticMembers, instanceID) } // UnregisterStaticMemberLocked removes a static member from the group (assumes group is already locked) func (gc *GroupCoordinator) UnregisterStaticMemberLocked(group *ConsumerGroup, instanceID string) { if instanceID == "" { return } delete(group.StaticMembers, instanceID) } // IsStaticMember checks if a member is using static membership func (gc *GroupCoordinator) IsStaticMember(member *GroupMember) bool { return member.GroupInstanceID != nil && *member.GroupInstanceID != "" } // GenerateMemberID creates a deterministic member ID based on client info func (gc *GroupCoordinator) GenerateMemberID(clientID, clientHost string) string { // EXPERIMENT: Use simpler member ID format like real Kafka brokers // Real Kafka uses format like: "consumer-1-uuid" or "consumer-groupId-uuid" hash := fmt.Sprintf("%x", sha256.Sum256([]byte(clientID+"-"+clientHost))) return fmt.Sprintf("consumer-%s", hash[:16]) // Shorter, simpler format } // ValidateSessionTimeout checks if session timeout is within acceptable range func (gc *GroupCoordinator) ValidateSessionTimeout(timeout int32) bool { return timeout >= gc.sessionTimeoutMin && timeout <= gc.sessionTimeoutMax } // cleanupRoutine periodically cleans up dead groups and expired members func (gc *GroupCoordinator) cleanupRoutine() { for { select { case <-gc.cleanupTicker.C: gc.performCleanup() case <-gc.stopChan: return } } } // performCleanup removes expired members and empty groups func (gc *GroupCoordinator) performCleanup() { now := time.Now() // Use rebalance timeout manager for more sophisticated timeout handling gc.rebalanceTimeoutManager.CheckRebalanceTimeouts() gc.groupsMu.Lock() defer gc.groupsMu.Unlock() for groupID, group := range gc.groups { group.Mu.Lock() // Check for expired members (session timeout) expiredMembers := make([]string, 0) for memberID, member := range group.Members { sessionDuration := time.Duration(member.SessionTimeout) * time.Millisecond timeSinceHeartbeat := now.Sub(member.LastHeartbeat) if timeSinceHeartbeat > sessionDuration { expiredMembers = append(expiredMembers, memberID) } } // Remove expired members for _, memberID := range expiredMembers { delete(group.Members, memberID) if group.Leader == memberID { group.Leader = "" } } // Update group state based on member count if len(group.Members) == 0 { if group.State != GroupStateEmpty { group.State = GroupStateEmpty group.Generation++ } // Mark group for deletion if empty for too long (30 minutes) if now.Sub(group.LastActivity) > 30*time.Minute { group.State = GroupStateDead } } // Check for stuck rebalances and force completion if necessary maxRebalanceDuration := 10 * time.Minute // Maximum time allowed for rebalancing if gc.rebalanceTimeoutManager.IsRebalanceStuck(group, maxRebalanceDuration) { gc.rebalanceTimeoutManager.ForceCompleteRebalance(group) } group.Mu.Unlock() // Remove dead groups if group.State == GroupStateDead { delete(gc.groups, groupID) } } } // Close shuts down the group coordinator func (gc *GroupCoordinator) Close() { gc.stopOnce.Do(func() { close(gc.stopChan) if gc.cleanupTicker != nil { gc.cleanupTicker.Stop() } }) } // GetGroupStats returns statistics about the group coordinator func (gc *GroupCoordinator) GetGroupStats() map[string]interface{} { gc.groupsMu.RLock() defer gc.groupsMu.RUnlock() stats := map[string]interface{}{ "total_groups": len(gc.groups), "group_states": make(map[string]int), } stateCount := make(map[GroupState]int) totalMembers := 0 for _, group := range gc.groups { group.Mu.RLock() stateCount[group.State]++ totalMembers += len(group.Members) group.Mu.RUnlock() } stats["total_members"] = totalMembers for state, count := range stateCount { stats["group_states"].(map[string]int)[state.String()] = count } return stats } // GetRebalanceStatus returns the rebalance status for a specific group func (gc *GroupCoordinator) GetRebalanceStatus(groupID string) *RebalanceStatus { return gc.rebalanceTimeoutManager.GetRebalanceStatus(groupID) }