You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

399 lines
12 KiB

package consumer
import (
"crypto/sha256"
"fmt"
"sync"
"time"
)
// GroupState represents the state of a consumer group
type GroupState int
const (
GroupStateEmpty GroupState = iota
GroupStatePreparingRebalance
GroupStateCompletingRebalance
GroupStateStable
GroupStateDead
)
func (gs GroupState) String() string {
switch gs {
case GroupStateEmpty:
return "Empty"
case GroupStatePreparingRebalance:
return "PreparingRebalance"
case GroupStateCompletingRebalance:
return "CompletingRebalance"
case GroupStateStable:
return "Stable"
case GroupStateDead:
return "Dead"
default:
return "Unknown"
}
}
// MemberState represents the state of a group member
type MemberState int
const (
MemberStateUnknown MemberState = iota
MemberStatePending
MemberStateStable
MemberStateLeaving
)
func (ms MemberState) String() string {
switch ms {
case MemberStateUnknown:
return "Unknown"
case MemberStatePending:
return "Pending"
case MemberStateStable:
return "Stable"
case MemberStateLeaving:
return "Leaving"
default:
return "Unknown"
}
}
// GroupMember represents a consumer in a consumer group
type GroupMember struct {
ID string // Member ID (generated by gateway)
ClientID string // Client ID from consumer
ClientHost string // Client host/IP
GroupInstanceID *string // Static membership instance ID (optional)
SessionTimeout int32 // Session timeout in milliseconds
RebalanceTimeout int32 // Rebalance timeout in milliseconds
Subscription []string // Subscribed topics
Assignment []PartitionAssignment // Assigned partitions
Metadata []byte // Protocol-specific metadata
State MemberState // Current member state
LastHeartbeat time.Time // Last heartbeat timestamp
JoinedAt time.Time // When member joined group
}
// PartitionAssignment represents partition assignment for a member
type PartitionAssignment struct {
Topic string
Partition int32
}
// ConsumerGroup represents a Kafka consumer group
type ConsumerGroup struct {
ID string // Group ID
State GroupState // Current group state
Generation int32 // Generation ID (incremented on rebalance)
Protocol string // Assignment protocol (e.g., "range", "roundrobin")
Leader string // Leader member ID
Members map[string]*GroupMember // Group members by member ID
StaticMembers map[string]string // Static instance ID -> member ID mapping
SubscribedTopics map[string]bool // Topics subscribed by group
OffsetCommits map[string]map[int32]OffsetCommit // Topic -> Partition -> Offset
CreatedAt time.Time // Group creation time
LastActivity time.Time // Last activity (join, heartbeat, etc.)
Mu sync.RWMutex // Protects group state
}
// OffsetCommit represents a committed offset for a topic partition
type OffsetCommit struct {
Offset int64 // Committed offset
Metadata string // Optional metadata
Timestamp time.Time // Commit timestamp
}
// GroupCoordinator manages consumer groups
type GroupCoordinator struct {
groups map[string]*ConsumerGroup // Group ID -> Group
groupsMu sync.RWMutex // Protects groups map
// Configuration
sessionTimeoutMin int32 // Minimum session timeout (ms)
sessionTimeoutMax int32 // Maximum session timeout (ms)
rebalanceTimeoutMs int32 // Default rebalance timeout (ms)
// Timeout management
rebalanceTimeoutManager *RebalanceTimeoutManager
// Cleanup
cleanupTicker *time.Ticker
stopChan chan struct{}
stopOnce sync.Once
}
// NewGroupCoordinator creates a new consumer group coordinator
func NewGroupCoordinator() *GroupCoordinator {
gc := &GroupCoordinator{
groups: make(map[string]*ConsumerGroup),
sessionTimeoutMin: 6000, // 6 seconds
sessionTimeoutMax: 300000, // 5 minutes
rebalanceTimeoutMs: 300000, // 5 minutes
stopChan: make(chan struct{}),
}
// Initialize rebalance timeout manager
gc.rebalanceTimeoutManager = NewRebalanceTimeoutManager(gc)
// Start cleanup routine
gc.cleanupTicker = time.NewTicker(30 * time.Second)
go gc.cleanupRoutine()
return gc
}
// GetOrCreateGroup returns an existing group or creates a new one
func (gc *GroupCoordinator) GetOrCreateGroup(groupID string) *ConsumerGroup {
gc.groupsMu.Lock()
defer gc.groupsMu.Unlock()
group, exists := gc.groups[groupID]
if !exists {
group = &ConsumerGroup{
ID: groupID,
State: GroupStateEmpty,
Generation: 0,
Members: make(map[string]*GroupMember),
StaticMembers: make(map[string]string),
SubscribedTopics: make(map[string]bool),
OffsetCommits: make(map[string]map[int32]OffsetCommit),
CreatedAt: time.Now(),
LastActivity: time.Now(),
}
gc.groups[groupID] = group
}
return group
}
// GetGroup returns an existing group or nil if not found
func (gc *GroupCoordinator) GetGroup(groupID string) *ConsumerGroup {
gc.groupsMu.RLock()
defer gc.groupsMu.RUnlock()
return gc.groups[groupID]
}
// RemoveGroup removes a group from the coordinator
func (gc *GroupCoordinator) RemoveGroup(groupID string) {
gc.groupsMu.Lock()
defer gc.groupsMu.Unlock()
delete(gc.groups, groupID)
}
// ListGroups returns all current group IDs
func (gc *GroupCoordinator) ListGroups() []string {
gc.groupsMu.RLock()
defer gc.groupsMu.RUnlock()
groups := make([]string, 0, len(gc.groups))
for groupID := range gc.groups {
groups = append(groups, groupID)
}
return groups
}
// FindStaticMember finds a member by static instance ID
func (gc *GroupCoordinator) FindStaticMember(group *ConsumerGroup, instanceID string) *GroupMember {
if instanceID == "" {
return nil
}
group.Mu.RLock()
defer group.Mu.RUnlock()
if memberID, exists := group.StaticMembers[instanceID]; exists {
return group.Members[memberID]
}
return nil
}
// FindStaticMemberLocked finds a member by static instance ID (assumes group is already locked)
func (gc *GroupCoordinator) FindStaticMemberLocked(group *ConsumerGroup, instanceID string) *GroupMember {
if instanceID == "" {
return nil
}
if memberID, exists := group.StaticMembers[instanceID]; exists {
return group.Members[memberID]
}
return nil
}
// RegisterStaticMember registers a static member in the group
func (gc *GroupCoordinator) RegisterStaticMember(group *ConsumerGroup, member *GroupMember) {
if member.GroupInstanceID == nil || *member.GroupInstanceID == "" {
return
}
group.Mu.Lock()
defer group.Mu.Unlock()
group.StaticMembers[*member.GroupInstanceID] = member.ID
}
// RegisterStaticMemberLocked registers a static member in the group (assumes group is already locked)
func (gc *GroupCoordinator) RegisterStaticMemberLocked(group *ConsumerGroup, member *GroupMember) {
if member.GroupInstanceID == nil || *member.GroupInstanceID == "" {
return
}
group.StaticMembers[*member.GroupInstanceID] = member.ID
}
// UnregisterStaticMember removes a static member from the group
func (gc *GroupCoordinator) UnregisterStaticMember(group *ConsumerGroup, instanceID string) {
if instanceID == "" {
return
}
group.Mu.Lock()
defer group.Mu.Unlock()
delete(group.StaticMembers, instanceID)
}
// UnregisterStaticMemberLocked removes a static member from the group (assumes group is already locked)
func (gc *GroupCoordinator) UnregisterStaticMemberLocked(group *ConsumerGroup, instanceID string) {
if instanceID == "" {
return
}
delete(group.StaticMembers, instanceID)
}
// IsStaticMember checks if a member is using static membership
func (gc *GroupCoordinator) IsStaticMember(member *GroupMember) bool {
return member.GroupInstanceID != nil && *member.GroupInstanceID != ""
}
// GenerateMemberID creates a deterministic member ID based on client info
func (gc *GroupCoordinator) GenerateMemberID(clientID, clientHost string) string {
// EXPERIMENT: Use simpler member ID format like real Kafka brokers
// Real Kafka uses format like: "consumer-1-uuid" or "consumer-groupId-uuid"
hash := fmt.Sprintf("%x", sha256.Sum256([]byte(clientID+"-"+clientHost)))
return fmt.Sprintf("consumer-%s", hash[:16]) // Shorter, simpler format
}
// ValidateSessionTimeout checks if session timeout is within acceptable range
func (gc *GroupCoordinator) ValidateSessionTimeout(timeout int32) bool {
return timeout >= gc.sessionTimeoutMin && timeout <= gc.sessionTimeoutMax
}
// cleanupRoutine periodically cleans up dead groups and expired members
func (gc *GroupCoordinator) cleanupRoutine() {
for {
select {
case <-gc.cleanupTicker.C:
gc.performCleanup()
case <-gc.stopChan:
return
}
}
}
// performCleanup removes expired members and empty groups
func (gc *GroupCoordinator) performCleanup() {
now := time.Now()
// Use rebalance timeout manager for more sophisticated timeout handling
gc.rebalanceTimeoutManager.CheckRebalanceTimeouts()
gc.groupsMu.Lock()
defer gc.groupsMu.Unlock()
for groupID, group := range gc.groups {
group.Mu.Lock()
// Check for expired members (session timeout)
expiredMembers := make([]string, 0)
for memberID, member := range group.Members {
sessionDuration := time.Duration(member.SessionTimeout) * time.Millisecond
timeSinceHeartbeat := now.Sub(member.LastHeartbeat)
if timeSinceHeartbeat > sessionDuration {
expiredMembers = append(expiredMembers, memberID)
}
}
// Remove expired members
for _, memberID := range expiredMembers {
delete(group.Members, memberID)
if group.Leader == memberID {
group.Leader = ""
}
}
// Update group state based on member count
if len(group.Members) == 0 {
if group.State != GroupStateEmpty {
group.State = GroupStateEmpty
group.Generation++
}
// Mark group for deletion if empty for too long (30 minutes)
if now.Sub(group.LastActivity) > 30*time.Minute {
group.State = GroupStateDead
}
}
// Check for stuck rebalances and force completion if necessary
maxRebalanceDuration := 10 * time.Minute // Maximum time allowed for rebalancing
if gc.rebalanceTimeoutManager.IsRebalanceStuck(group, maxRebalanceDuration) {
gc.rebalanceTimeoutManager.ForceCompleteRebalance(group)
}
group.Mu.Unlock()
// Remove dead groups
if group.State == GroupStateDead {
delete(gc.groups, groupID)
}
}
}
// Close shuts down the group coordinator
func (gc *GroupCoordinator) Close() {
gc.stopOnce.Do(func() {
close(gc.stopChan)
if gc.cleanupTicker != nil {
gc.cleanupTicker.Stop()
}
})
}
// GetGroupStats returns statistics about the group coordinator
func (gc *GroupCoordinator) GetGroupStats() map[string]interface{} {
gc.groupsMu.RLock()
defer gc.groupsMu.RUnlock()
stats := map[string]interface{}{
"total_groups": len(gc.groups),
"group_states": make(map[string]int),
}
stateCount := make(map[GroupState]int)
totalMembers := 0
for _, group := range gc.groups {
group.Mu.RLock()
stateCount[group.State]++
totalMembers += len(group.Members)
group.Mu.RUnlock()
}
stats["total_members"] = totalMembers
for state, count := range stateCount {
stats["group_states"].(map[string]int)[state.String()] = count
}
return stats
}
// GetRebalanceStatus returns the rebalance status for a specific group
func (gc *GroupCoordinator) GetRebalanceStatus(groupID string) *RebalanceStatus {
return gc.rebalanceTimeoutManager.GetRebalanceStatus(groupID)
}