You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							218 lines
						
					
					
						
							7.2 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							218 lines
						
					
					
						
							7.2 KiB
						
					
					
				
								package consumer
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"time"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// RebalanceTimeoutManager handles rebalance timeout logic and member eviction
							 | 
						|
								type RebalanceTimeoutManager struct {
							 | 
						|
									coordinator *GroupCoordinator
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// NewRebalanceTimeoutManager creates a new rebalance timeout manager
							 | 
						|
								func NewRebalanceTimeoutManager(coordinator *GroupCoordinator) *RebalanceTimeoutManager {
							 | 
						|
									return &RebalanceTimeoutManager{
							 | 
						|
										coordinator: coordinator,
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// CheckRebalanceTimeouts checks for members that have exceeded rebalance timeouts
							 | 
						|
								func (rtm *RebalanceTimeoutManager) CheckRebalanceTimeouts() {
							 | 
						|
									now := time.Now()
							 | 
						|
									rtm.coordinator.groupsMu.RLock()
							 | 
						|
									defer rtm.coordinator.groupsMu.RUnlock()
							 | 
						|
								
							 | 
						|
									for _, group := range rtm.coordinator.groups {
							 | 
						|
										group.Mu.Lock()
							 | 
						|
								
							 | 
						|
										// Only check timeouts for groups in rebalancing states
							 | 
						|
										if group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance {
							 | 
						|
											rtm.checkGroupRebalanceTimeout(group, now)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										group.Mu.Unlock()
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// checkGroupRebalanceTimeout checks and handles rebalance timeout for a specific group
							 | 
						|
								func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGroup, now time.Time) {
							 | 
						|
									expiredMembers := make([]string, 0)
							 | 
						|
								
							 | 
						|
									for memberID, member := range group.Members {
							 | 
						|
										// Check if member has exceeded its rebalance timeout
							 | 
						|
										rebalanceTimeout := time.Duration(member.RebalanceTimeout) * time.Millisecond
							 | 
						|
										if rebalanceTimeout == 0 {
							 | 
						|
											// Use default rebalance timeout if not specified
							 | 
						|
											rebalanceTimeout = time.Duration(rtm.coordinator.rebalanceTimeoutMs) * time.Millisecond
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// For members in pending state during rebalance, check against join time
							 | 
						|
										if member.State == MemberStatePending {
							 | 
						|
											if now.Sub(member.JoinedAt) > rebalanceTimeout {
							 | 
						|
												expiredMembers = append(expiredMembers, memberID)
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Also check session timeout as a fallback
							 | 
						|
										sessionTimeout := time.Duration(member.SessionTimeout) * time.Millisecond
							 | 
						|
										if now.Sub(member.LastHeartbeat) > sessionTimeout {
							 | 
						|
											expiredMembers = append(expiredMembers, memberID)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Remove expired members and trigger rebalance if necessary
							 | 
						|
									if len(expiredMembers) > 0 {
							 | 
						|
										rtm.evictExpiredMembers(group, expiredMembers)
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// evictExpiredMembers removes expired members and updates group state
							 | 
						|
								func (rtm *RebalanceTimeoutManager) evictExpiredMembers(group *ConsumerGroup, expiredMembers []string) {
							 | 
						|
									for _, memberID := range expiredMembers {
							 | 
						|
										delete(group.Members, memberID)
							 | 
						|
								
							 | 
						|
										// If the leader was evicted, clear leader
							 | 
						|
										if group.Leader == memberID {
							 | 
						|
											group.Leader = ""
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Update group state based on remaining members
							 | 
						|
									if len(group.Members) == 0 {
							 | 
						|
										group.State = GroupStateEmpty
							 | 
						|
										group.Generation++
							 | 
						|
										group.Leader = ""
							 | 
						|
									} else {
							 | 
						|
										// If we were in the middle of rebalancing, restart the process
							 | 
						|
										if group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance {
							 | 
						|
											// Select new leader if needed
							 | 
						|
											if group.Leader == "" {
							 | 
						|
												for memberID := range group.Members {
							 | 
						|
													group.Leader = memberID
							 | 
						|
													break
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											// Reset to preparing rebalance to restart the process
							 | 
						|
											group.State = GroupStatePreparingRebalance
							 | 
						|
											group.Generation++
							 | 
						|
								
							 | 
						|
											// Mark remaining members as pending
							 | 
						|
											for _, member := range group.Members {
							 | 
						|
												member.State = MemberStatePending
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									group.LastActivity = time.Now()
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// IsRebalanceStuck checks if a group has been stuck in rebalancing for too long
							 | 
						|
								func (rtm *RebalanceTimeoutManager) IsRebalanceStuck(group *ConsumerGroup, maxRebalanceDuration time.Duration) bool {
							 | 
						|
									if group.State != GroupStatePreparingRebalance && group.State != GroupStateCompletingRebalance {
							 | 
						|
										return false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return time.Since(group.LastActivity) > maxRebalanceDuration
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// ForceCompleteRebalance forces completion of a stuck rebalance
							 | 
						|
								func (rtm *RebalanceTimeoutManager) ForceCompleteRebalance(group *ConsumerGroup) {
							 | 
						|
									group.Mu.Lock()
							 | 
						|
									defer group.Mu.Unlock()
							 | 
						|
								
							 | 
						|
									// If stuck in preparing rebalance, move to completing
							 | 
						|
									if group.State == GroupStatePreparingRebalance {
							 | 
						|
										group.State = GroupStateCompletingRebalance
							 | 
						|
										group.LastActivity = time.Now()
							 | 
						|
										return
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// If stuck in completing rebalance, force to stable
							 | 
						|
									if group.State == GroupStateCompletingRebalance {
							 | 
						|
										group.State = GroupStateStable
							 | 
						|
										for _, member := range group.Members {
							 | 
						|
											member.State = MemberStateStable
							 | 
						|
										}
							 | 
						|
										group.LastActivity = time.Now()
							 | 
						|
										return
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetRebalanceStatus returns the current rebalance status for a group
							 | 
						|
								func (rtm *RebalanceTimeoutManager) GetRebalanceStatus(groupID string) *RebalanceStatus {
							 | 
						|
									group := rtm.coordinator.GetGroup(groupID)
							 | 
						|
									if group == nil {
							 | 
						|
										return nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									group.Mu.RLock()
							 | 
						|
									defer group.Mu.RUnlock()
							 | 
						|
								
							 | 
						|
									status := &RebalanceStatus{
							 | 
						|
										GroupID:           groupID,
							 | 
						|
										State:             group.State,
							 | 
						|
										Generation:        group.Generation,
							 | 
						|
										MemberCount:       len(group.Members),
							 | 
						|
										Leader:            group.Leader,
							 | 
						|
										LastActivity:      group.LastActivity,
							 | 
						|
										IsRebalancing:     group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance,
							 | 
						|
										RebalanceDuration: time.Since(group.LastActivity),
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Calculate member timeout status
							 | 
						|
									now := time.Now()
							 | 
						|
									for memberID, member := range group.Members {
							 | 
						|
										memberStatus := MemberTimeoutStatus{
							 | 
						|
											MemberID:         memberID,
							 | 
						|
											State:            member.State,
							 | 
						|
											LastHeartbeat:    member.LastHeartbeat,
							 | 
						|
											JoinedAt:         member.JoinedAt,
							 | 
						|
											SessionTimeout:   time.Duration(member.SessionTimeout) * time.Millisecond,
							 | 
						|
											RebalanceTimeout: time.Duration(member.RebalanceTimeout) * time.Millisecond,
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Calculate time until session timeout
							 | 
						|
										sessionTimeRemaining := memberStatus.SessionTimeout - now.Sub(member.LastHeartbeat)
							 | 
						|
										if sessionTimeRemaining < 0 {
							 | 
						|
											sessionTimeRemaining = 0
							 | 
						|
										}
							 | 
						|
										memberStatus.SessionTimeRemaining = sessionTimeRemaining
							 | 
						|
								
							 | 
						|
										// Calculate time until rebalance timeout
							 | 
						|
										rebalanceTimeRemaining := memberStatus.RebalanceTimeout - now.Sub(member.JoinedAt)
							 | 
						|
										if rebalanceTimeRemaining < 0 {
							 | 
						|
											rebalanceTimeRemaining = 0
							 | 
						|
										}
							 | 
						|
										memberStatus.RebalanceTimeRemaining = rebalanceTimeRemaining
							 | 
						|
								
							 | 
						|
										status.Members = append(status.Members, memberStatus)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return status
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// RebalanceStatus represents the current status of a group's rebalance
							 | 
						|
								type RebalanceStatus struct {
							 | 
						|
									GroupID           string                `json:"group_id"`
							 | 
						|
									State             GroupState            `json:"state"`
							 | 
						|
									Generation        int32                 `json:"generation"`
							 | 
						|
									MemberCount       int                   `json:"member_count"`
							 | 
						|
									Leader            string                `json:"leader"`
							 | 
						|
									LastActivity      time.Time             `json:"last_activity"`
							 | 
						|
									IsRebalancing     bool                  `json:"is_rebalancing"`
							 | 
						|
									RebalanceDuration time.Duration         `json:"rebalance_duration"`
							 | 
						|
									Members           []MemberTimeoutStatus `json:"members"`
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// MemberTimeoutStatus represents timeout status for a group member
							 | 
						|
								type MemberTimeoutStatus struct {
							 | 
						|
									MemberID               string        `json:"member_id"`
							 | 
						|
									State                  MemberState   `json:"state"`
							 | 
						|
									LastHeartbeat          time.Time     `json:"last_heartbeat"`
							 | 
						|
									JoinedAt               time.Time     `json:"joined_at"`
							 | 
						|
									SessionTimeout         time.Duration `json:"session_timeout"`
							 | 
						|
									RebalanceTimeout       time.Duration `json:"rebalance_timeout"`
							 | 
						|
									SessionTimeRemaining   time.Duration `json:"session_time_remaining"`
							 | 
						|
									RebalanceTimeRemaining time.Duration `json:"rebalance_time_remaining"`
							 | 
						|
								}
							 |