You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							805 lines
						
					
					
						
							25 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							805 lines
						
					
					
						
							25 KiB
						
					
					
				
								package gateway
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"context"
							 | 
						|
									"encoding/json"
							 | 
						|
									"fmt"
							 | 
						|
									"hash/fnv"
							 | 
						|
									"io"
							 | 
						|
									"sort"
							 | 
						|
									"strings"
							 | 
						|
									"sync"
							 | 
						|
									"time"
							 | 
						|
								
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/cluster"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/filer"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/filer_client"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/glog"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/mq/kafka/protocol"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/pb"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
							 | 
						|
									"google.golang.org/grpc"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// CoordinatorRegistry manages consumer group coordinator assignments
							 | 
						|
								// Only the gateway leader maintains this registry
							 | 
						|
								type CoordinatorRegistry struct {
							 | 
						|
									// Leader election
							 | 
						|
									leaderLock       *cluster.LiveLock
							 | 
						|
									isLeader         bool
							 | 
						|
									leaderMutex      sync.RWMutex
							 | 
						|
									leadershipChange chan string // Notifies when leadership changes
							 | 
						|
								
							 | 
						|
									// No in-memory assignments - read/write directly to filer
							 | 
						|
									// assignmentsMutex still needed for coordinating file operations
							 | 
						|
									assignmentsMutex sync.RWMutex
							 | 
						|
								
							 | 
						|
									// Gateway registry
							 | 
						|
									activeGateways map[string]*GatewayInfo // gatewayAddress -> info
							 | 
						|
									gatewaysMutex  sync.RWMutex
							 | 
						|
								
							 | 
						|
									// Configuration
							 | 
						|
									gatewayAddress        string
							 | 
						|
									lockClient            *cluster.LockClient
							 | 
						|
									filerClientAccessor   *filer_client.FilerClientAccessor
							 | 
						|
									filerDiscoveryService *filer_client.FilerDiscoveryService
							 | 
						|
								
							 | 
						|
									// Control
							 | 
						|
									stopChan chan struct{}
							 | 
						|
									wg       sync.WaitGroup
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Remove local CoordinatorAssignment - use protocol.CoordinatorAssignment instead
							 | 
						|
								
							 | 
						|
								// GatewayInfo represents an active gateway instance
							 | 
						|
								type GatewayInfo struct {
							 | 
						|
									Address       string
							 | 
						|
									NodeID        int32
							 | 
						|
									RegisteredAt  time.Time
							 | 
						|
									LastHeartbeat time.Time
							 | 
						|
									IsHealthy     bool
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								const (
							 | 
						|
									GatewayLeaderLockKey = "kafka-gateway-leader"
							 | 
						|
									HeartbeatInterval    = 10 * time.Second
							 | 
						|
									GatewayTimeout       = 30 * time.Second
							 | 
						|
								
							 | 
						|
									// Filer paths for coordinator assignment persistence
							 | 
						|
									CoordinatorAssignmentsDir = "/topics/kafka/.meta/coordinators"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// NewCoordinatorRegistry creates a new coordinator registry
							 | 
						|
								func NewCoordinatorRegistry(gatewayAddress string, masters []pb.ServerAddress, grpcDialOption grpc.DialOption) *CoordinatorRegistry {
							 | 
						|
									// Create filer discovery service that will periodically refresh filers from all masters
							 | 
						|
									filerDiscoveryService := filer_client.NewFilerDiscoveryService(masters, grpcDialOption)
							 | 
						|
								
							 | 
						|
									// Manually discover filers from each master until we find one
							 | 
						|
									var seedFiler pb.ServerAddress
							 | 
						|
									for _, master := range masters {
							 | 
						|
										// Use the same discovery logic as filer_discovery.go
							 | 
						|
										grpcAddr := master.ToGrpcAddress()
							 | 
						|
										conn, err := grpc.NewClient(grpcAddr, grpcDialOption)
							 | 
						|
										if err != nil {
							 | 
						|
											continue
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										client := master_pb.NewSeaweedClient(conn)
							 | 
						|
										ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
							 | 
						|
										resp, err := client.ListClusterNodes(ctx, &master_pb.ListClusterNodesRequest{
							 | 
						|
											ClientType: cluster.FilerType,
							 | 
						|
										})
							 | 
						|
										cancel()
							 | 
						|
										conn.Close()
							 | 
						|
								
							 | 
						|
										if err == nil && len(resp.ClusterNodes) > 0 {
							 | 
						|
											// Found a filer - use its HTTP address (WithFilerClient will convert to gRPC automatically)
							 | 
						|
											seedFiler = pb.ServerAddress(resp.ClusterNodes[0].Address)
							 | 
						|
											glog.V(1).Infof("Using filer %s as seed for distributed locking (discovered from master %s)", seedFiler, master)
							 | 
						|
											break
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									lockClient := cluster.NewLockClient(grpcDialOption, seedFiler)
							 | 
						|
								
							 | 
						|
									registry := &CoordinatorRegistry{
							 | 
						|
										activeGateways:        make(map[string]*GatewayInfo),
							 | 
						|
										gatewayAddress:        gatewayAddress,
							 | 
						|
										lockClient:            lockClient,
							 | 
						|
										stopChan:              make(chan struct{}),
							 | 
						|
										leadershipChange:      make(chan string, 10), // Buffered channel for leadership notifications
							 | 
						|
										filerDiscoveryService: filerDiscoveryService,
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Create filer client accessor that uses dynamic filer discovery
							 | 
						|
									registry.filerClientAccessor = &filer_client.FilerClientAccessor{
							 | 
						|
										GetGrpcDialOption: func() grpc.DialOption {
							 | 
						|
											return grpcDialOption
							 | 
						|
										},
							 | 
						|
										GetFilers: func() []pb.ServerAddress {
							 | 
						|
											return registry.filerDiscoveryService.GetFilers()
							 | 
						|
										},
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return registry
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Start begins the coordinator registry operations
							 | 
						|
								func (cr *CoordinatorRegistry) Start() error {
							 | 
						|
									glog.V(1).Infof("Starting coordinator registry for gateway %s", cr.gatewayAddress)
							 | 
						|
								
							 | 
						|
									// Start filer discovery service first
							 | 
						|
									if err := cr.filerDiscoveryService.Start(); err != nil {
							 | 
						|
										return fmt.Errorf("failed to start filer discovery service: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Start leader election
							 | 
						|
									cr.startLeaderElection()
							 | 
						|
								
							 | 
						|
									// Start heartbeat loop to keep this gateway healthy
							 | 
						|
									cr.startHeartbeatLoop()
							 | 
						|
								
							 | 
						|
									// Start cleanup goroutine
							 | 
						|
									cr.startCleanupLoop()
							 | 
						|
								
							 | 
						|
									// Register this gateway
							 | 
						|
									cr.registerGateway(cr.gatewayAddress)
							 | 
						|
								
							 | 
						|
									return nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Stop shuts down the coordinator registry
							 | 
						|
								func (cr *CoordinatorRegistry) Stop() error {
							 | 
						|
									glog.V(1).Infof("Stopping coordinator registry for gateway %s", cr.gatewayAddress)
							 | 
						|
								
							 | 
						|
									close(cr.stopChan)
							 | 
						|
									cr.wg.Wait()
							 | 
						|
								
							 | 
						|
									// Release leader lock if held
							 | 
						|
									if cr.leaderLock != nil {
							 | 
						|
										cr.leaderLock.Stop()
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Stop filer discovery service
							 | 
						|
									if err := cr.filerDiscoveryService.Stop(); err != nil {
							 | 
						|
										glog.Warningf("Failed to stop filer discovery service: %v", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// startLeaderElection starts the leader election process
							 | 
						|
								func (cr *CoordinatorRegistry) startLeaderElection() {
							 | 
						|
									cr.wg.Add(1)
							 | 
						|
									go func() {
							 | 
						|
										defer cr.wg.Done()
							 | 
						|
								
							 | 
						|
										// Start long-lived lock for leader election
							 | 
						|
										cr.leaderLock = cr.lockClient.StartLongLivedLock(
							 | 
						|
											GatewayLeaderLockKey,
							 | 
						|
											cr.gatewayAddress,
							 | 
						|
											cr.onLeadershipChange,
							 | 
						|
										)
							 | 
						|
								
							 | 
						|
										// Wait for shutdown
							 | 
						|
										<-cr.stopChan
							 | 
						|
								
							 | 
						|
										// The leader lock will be stopped when Stop() is called
							 | 
						|
									}()
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// onLeadershipChange handles leadership changes
							 | 
						|
								func (cr *CoordinatorRegistry) onLeadershipChange(newLeader string) {
							 | 
						|
									cr.leaderMutex.Lock()
							 | 
						|
									defer cr.leaderMutex.Unlock()
							 | 
						|
								
							 | 
						|
									wasLeader := cr.isLeader
							 | 
						|
									cr.isLeader = (newLeader == cr.gatewayAddress)
							 | 
						|
								
							 | 
						|
									if cr.isLeader && !wasLeader {
							 | 
						|
										glog.V(0).Infof("Gateway %s became the coordinator registry leader", cr.gatewayAddress)
							 | 
						|
										cr.onBecameLeader()
							 | 
						|
									} else if !cr.isLeader && wasLeader {
							 | 
						|
										glog.V(0).Infof("Gateway %s lost coordinator registry leadership to %s", cr.gatewayAddress, newLeader)
							 | 
						|
										cr.onLostLeadership()
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Notify waiting goroutines about leadership change
							 | 
						|
									select {
							 | 
						|
									case cr.leadershipChange <- newLeader:
							 | 
						|
										// Notification sent
							 | 
						|
									default:
							 | 
						|
										// Channel full, skip notification (shouldn't happen with buffered channel)
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// onBecameLeader handles becoming the leader
							 | 
						|
								func (cr *CoordinatorRegistry) onBecameLeader() {
							 | 
						|
									// Assignments are now read directly from files - no need to load into memory
							 | 
						|
									glog.V(1).Info("Leader election complete - coordinator assignments will be read from filer as needed")
							 | 
						|
								
							 | 
						|
									// Clear gateway registry since it's ephemeral (gateways need to re-register)
							 | 
						|
									cr.gatewaysMutex.Lock()
							 | 
						|
									cr.activeGateways = make(map[string]*GatewayInfo)
							 | 
						|
									cr.gatewaysMutex.Unlock()
							 | 
						|
								
							 | 
						|
									// Re-register this gateway
							 | 
						|
									cr.registerGateway(cr.gatewayAddress)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// onLostLeadership handles losing leadership
							 | 
						|
								func (cr *CoordinatorRegistry) onLostLeadership() {
							 | 
						|
									// No in-memory assignments to clear - assignments are stored in filer
							 | 
						|
									glog.V(1).Info("Lost leadership - no longer managing coordinator assignments")
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// IsLeader returns whether this gateway is the coordinator registry leader
							 | 
						|
								func (cr *CoordinatorRegistry) IsLeader() bool {
							 | 
						|
									cr.leaderMutex.RLock()
							 | 
						|
									defer cr.leaderMutex.RUnlock()
							 | 
						|
									return cr.isLeader
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetLeaderAddress returns the current leader's address
							 | 
						|
								func (cr *CoordinatorRegistry) GetLeaderAddress() string {
							 | 
						|
									if cr.leaderLock != nil {
							 | 
						|
										return cr.leaderLock.LockOwner()
							 | 
						|
									}
							 | 
						|
									return ""
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// WaitForLeader waits for a leader to be elected, with timeout
							 | 
						|
								func (cr *CoordinatorRegistry) WaitForLeader(timeout time.Duration) (string, error) {
							 | 
						|
									// Check if there's already a leader
							 | 
						|
									if leader := cr.GetLeaderAddress(); leader != "" {
							 | 
						|
										return leader, nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Check if this instance is the leader
							 | 
						|
									if cr.IsLeader() {
							 | 
						|
										return cr.gatewayAddress, nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Wait for leadership change notification
							 | 
						|
									deadline := time.Now().Add(timeout)
							 | 
						|
									for {
							 | 
						|
										select {
							 | 
						|
										case leader := <-cr.leadershipChange:
							 | 
						|
											if leader != "" {
							 | 
						|
												return leader, nil
							 | 
						|
											}
							 | 
						|
										case <-time.After(time.Until(deadline)):
							 | 
						|
											return "", fmt.Errorf("timeout waiting for leader election after %v", timeout)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Double-check in case we missed a notification
							 | 
						|
										if leader := cr.GetLeaderAddress(); leader != "" {
							 | 
						|
											return leader, nil
							 | 
						|
										}
							 | 
						|
										if cr.IsLeader() {
							 | 
						|
											return cr.gatewayAddress, nil
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										if time.Now().After(deadline) {
							 | 
						|
											break
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return "", fmt.Errorf("timeout waiting for leader election after %v", timeout)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// AssignCoordinator assigns a coordinator for a consumer group using a balanced strategy.
							 | 
						|
								// The coordinator is selected deterministically via consistent hashing of the
							 | 
						|
								// consumer group across the set of healthy gateways. This spreads groups evenly
							 | 
						|
								// and avoids hot-spotting on the first requester.
							 | 
						|
								func (cr *CoordinatorRegistry) AssignCoordinator(consumerGroup string, requestingGateway string) (*protocol.CoordinatorAssignment, error) {
							 | 
						|
									if !cr.IsLeader() {
							 | 
						|
										return nil, fmt.Errorf("not the coordinator registry leader")
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// First check if requesting gateway is healthy without holding assignments lock
							 | 
						|
									if !cr.isGatewayHealthy(requestingGateway) {
							 | 
						|
										return nil, fmt.Errorf("requesting gateway %s is not healthy", requestingGateway)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Lock assignments mutex to coordinate file operations
							 | 
						|
									cr.assignmentsMutex.Lock()
							 | 
						|
									defer cr.assignmentsMutex.Unlock()
							 | 
						|
								
							 | 
						|
									// Check if coordinator already assigned by trying to load from file
							 | 
						|
									existing, err := cr.loadCoordinatorAssignment(consumerGroup)
							 | 
						|
									if err == nil && existing != nil {
							 | 
						|
										// Assignment exists, check if coordinator is still healthy
							 | 
						|
										if cr.isGatewayHealthy(existing.CoordinatorAddr) {
							 | 
						|
											glog.V(2).Infof("Consumer group %s already has healthy coordinator %s", consumerGroup, existing.CoordinatorAddr)
							 | 
						|
											return existing, nil
							 | 
						|
										} else {
							 | 
						|
											glog.V(1).Infof("Existing coordinator %s for group %s is unhealthy, reassigning", existing.CoordinatorAddr, consumerGroup)
							 | 
						|
											// Delete the existing assignment file
							 | 
						|
											if delErr := cr.deleteCoordinatorAssignment(consumerGroup); delErr != nil {
							 | 
						|
												glog.Warningf("Failed to delete stale assignment for group %s: %v", consumerGroup, delErr)
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Choose a balanced coordinator via consistent hashing across healthy gateways
							 | 
						|
									chosenAddr, nodeID, err := cr.chooseCoordinatorAddrForGroup(consumerGroup)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, err
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									assignment := &protocol.CoordinatorAssignment{
							 | 
						|
										ConsumerGroup:     consumerGroup,
							 | 
						|
										CoordinatorAddr:   chosenAddr,
							 | 
						|
										CoordinatorNodeID: nodeID,
							 | 
						|
										AssignedAt:        time.Now(),
							 | 
						|
										LastHeartbeat:     time.Now(),
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Persist the new assignment to individual file
							 | 
						|
									if err := cr.saveCoordinatorAssignment(consumerGroup, assignment); err != nil {
							 | 
						|
										return nil, fmt.Errorf("failed to persist coordinator assignment for group %s: %w", consumerGroup, err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									glog.V(1).Infof("Assigned coordinator %s (node %d) for consumer group %s via consistent hashing", chosenAddr, nodeID, consumerGroup)
							 | 
						|
									return assignment, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetCoordinator returns the coordinator for a consumer group
							 | 
						|
								func (cr *CoordinatorRegistry) GetCoordinator(consumerGroup string) (*protocol.CoordinatorAssignment, error) {
							 | 
						|
									if !cr.IsLeader() {
							 | 
						|
										return nil, fmt.Errorf("not the coordinator registry leader")
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Load assignment directly from file
							 | 
						|
									assignment, err := cr.loadCoordinatorAssignment(consumerGroup)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, fmt.Errorf("no coordinator assigned for consumer group %s: %w", consumerGroup, err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return assignment, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// RegisterGateway registers a gateway instance
							 | 
						|
								func (cr *CoordinatorRegistry) RegisterGateway(gatewayAddress string) error {
							 | 
						|
									if !cr.IsLeader() {
							 | 
						|
										return fmt.Errorf("not the coordinator registry leader")
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									cr.registerGateway(gatewayAddress)
							 | 
						|
									return nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// registerGateway internal method to register a gateway
							 | 
						|
								func (cr *CoordinatorRegistry) registerGateway(gatewayAddress string) {
							 | 
						|
									cr.gatewaysMutex.Lock()
							 | 
						|
									defer cr.gatewaysMutex.Unlock()
							 | 
						|
								
							 | 
						|
									nodeID := generateDeterministicNodeID(gatewayAddress)
							 | 
						|
								
							 | 
						|
									cr.activeGateways[gatewayAddress] = &GatewayInfo{
							 | 
						|
										Address:       gatewayAddress,
							 | 
						|
										NodeID:        nodeID,
							 | 
						|
										RegisteredAt:  time.Now(),
							 | 
						|
										LastHeartbeat: time.Now(),
							 | 
						|
										IsHealthy:     true,
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									glog.V(1).Infof("Registered gateway %s with deterministic node ID %d", gatewayAddress, nodeID)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// HeartbeatGateway updates the heartbeat for a gateway
							 | 
						|
								func (cr *CoordinatorRegistry) HeartbeatGateway(gatewayAddress string) error {
							 | 
						|
									if !cr.IsLeader() {
							 | 
						|
										return fmt.Errorf("not the coordinator registry leader")
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									cr.gatewaysMutex.Lock()
							 | 
						|
								
							 | 
						|
									if gateway, exists := cr.activeGateways[gatewayAddress]; exists {
							 | 
						|
										gateway.LastHeartbeat = time.Now()
							 | 
						|
										gateway.IsHealthy = true
							 | 
						|
										cr.gatewaysMutex.Unlock()
							 | 
						|
										glog.V(3).Infof("Updated heartbeat for gateway %s", gatewayAddress)
							 | 
						|
									} else {
							 | 
						|
										// Auto-register unknown gateway - unlock first to avoid double unlock
							 | 
						|
										cr.gatewaysMutex.Unlock()
							 | 
						|
										cr.registerGateway(gatewayAddress)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// isGatewayHealthy checks if a gateway is healthy
							 | 
						|
								func (cr *CoordinatorRegistry) isGatewayHealthy(gatewayAddress string) bool {
							 | 
						|
									cr.gatewaysMutex.RLock()
							 | 
						|
									defer cr.gatewaysMutex.RUnlock()
							 | 
						|
								
							 | 
						|
									return cr.isGatewayHealthyUnsafe(gatewayAddress)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// isGatewayHealthyUnsafe checks if a gateway is healthy without acquiring locks
							 | 
						|
								// Caller must hold gatewaysMutex.RLock() or gatewaysMutex.Lock()
							 | 
						|
								func (cr *CoordinatorRegistry) isGatewayHealthyUnsafe(gatewayAddress string) bool {
							 | 
						|
									gateway, exists := cr.activeGateways[gatewayAddress]
							 | 
						|
									if !exists {
							 | 
						|
										return false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return gateway.IsHealthy && time.Since(gateway.LastHeartbeat) < GatewayTimeout
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// getGatewayNodeID returns the node ID for a gateway
							 | 
						|
								func (cr *CoordinatorRegistry) getGatewayNodeID(gatewayAddress string) int32 {
							 | 
						|
									cr.gatewaysMutex.RLock()
							 | 
						|
									defer cr.gatewaysMutex.RUnlock()
							 | 
						|
								
							 | 
						|
									return cr.getGatewayNodeIDUnsafe(gatewayAddress)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// getGatewayNodeIDUnsafe returns the node ID for a gateway without acquiring locks
							 | 
						|
								// Caller must hold gatewaysMutex.RLock() or gatewaysMutex.Lock()
							 | 
						|
								func (cr *CoordinatorRegistry) getGatewayNodeIDUnsafe(gatewayAddress string) int32 {
							 | 
						|
									if gateway, exists := cr.activeGateways[gatewayAddress]; exists {
							 | 
						|
										return gateway.NodeID
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return 1 // Default node ID
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// getHealthyGatewaysSorted returns a stable-sorted list of healthy gateway addresses.
							 | 
						|
								func (cr *CoordinatorRegistry) getHealthyGatewaysSorted() []string {
							 | 
						|
									cr.gatewaysMutex.RLock()
							 | 
						|
									defer cr.gatewaysMutex.RUnlock()
							 | 
						|
								
							 | 
						|
									addresses := make([]string, 0, len(cr.activeGateways))
							 | 
						|
									for addr, info := range cr.activeGateways {
							 | 
						|
										if info.IsHealthy && time.Since(info.LastHeartbeat) < GatewayTimeout {
							 | 
						|
											addresses = append(addresses, addr)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									sort.Strings(addresses)
							 | 
						|
									return addresses
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// chooseCoordinatorAddrForGroup selects a coordinator address using consistent hashing.
							 | 
						|
								func (cr *CoordinatorRegistry) chooseCoordinatorAddrForGroup(consumerGroup string) (string, int32, error) {
							 | 
						|
									healthy := cr.getHealthyGatewaysSorted()
							 | 
						|
									if len(healthy) == 0 {
							 | 
						|
										return "", 0, fmt.Errorf("no healthy gateways available for coordinator assignment")
							 | 
						|
									}
							 | 
						|
									idx := hashStringToIndex(consumerGroup, len(healthy))
							 | 
						|
									addr := healthy[idx]
							 | 
						|
									return addr, cr.getGatewayNodeID(addr), nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// hashStringToIndex hashes a string to an index in [0, modulo).
							 | 
						|
								func hashStringToIndex(s string, modulo int) int {
							 | 
						|
									if modulo <= 0 {
							 | 
						|
										return 0
							 | 
						|
									}
							 | 
						|
									h := fnv.New32a()
							 | 
						|
									_, _ = h.Write([]byte(s))
							 | 
						|
									return int(h.Sum32() % uint32(modulo))
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// generateDeterministicNodeID generates a stable node ID based on gateway address
							 | 
						|
								func generateDeterministicNodeID(gatewayAddress string) int32 {
							 | 
						|
									h := fnv.New32a()
							 | 
						|
									_, _ = h.Write([]byte(gatewayAddress))
							 | 
						|
									// Use only positive values and avoid 0
							 | 
						|
									return int32(h.Sum32()&0x7fffffff) + 1
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// startHeartbeatLoop starts the heartbeat loop for this gateway
							 | 
						|
								func (cr *CoordinatorRegistry) startHeartbeatLoop() {
							 | 
						|
									cr.wg.Add(1)
							 | 
						|
									go func() {
							 | 
						|
										defer cr.wg.Done()
							 | 
						|
								
							 | 
						|
										ticker := time.NewTicker(HeartbeatInterval / 2) // Send heartbeats more frequently than timeout
							 | 
						|
										defer ticker.Stop()
							 | 
						|
								
							 | 
						|
										for {
							 | 
						|
											select {
							 | 
						|
											case <-cr.stopChan:
							 | 
						|
												return
							 | 
						|
											case <-ticker.C:
							 | 
						|
												if cr.IsLeader() {
							 | 
						|
													// Send heartbeat for this gateway to keep it healthy
							 | 
						|
													if err := cr.HeartbeatGateway(cr.gatewayAddress); err != nil {
							 | 
						|
														glog.V(2).Infof("Failed to send heartbeat for gateway %s: %v", cr.gatewayAddress, err)
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}()
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// startCleanupLoop starts the cleanup loop for stale assignments and gateways
							 | 
						|
								func (cr *CoordinatorRegistry) startCleanupLoop() {
							 | 
						|
									cr.wg.Add(1)
							 | 
						|
									go func() {
							 | 
						|
										defer cr.wg.Done()
							 | 
						|
								
							 | 
						|
										ticker := time.NewTicker(HeartbeatInterval)
							 | 
						|
										defer ticker.Stop()
							 | 
						|
								
							 | 
						|
										for {
							 | 
						|
											select {
							 | 
						|
											case <-cr.stopChan:
							 | 
						|
												return
							 | 
						|
											case <-ticker.C:
							 | 
						|
												if cr.IsLeader() {
							 | 
						|
													cr.cleanupStaleEntries()
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}()
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// cleanupStaleEntries removes stale gateways and assignments
							 | 
						|
								func (cr *CoordinatorRegistry) cleanupStaleEntries() {
							 | 
						|
									now := time.Now()
							 | 
						|
								
							 | 
						|
									// First, identify stale gateways
							 | 
						|
									var staleGateways []string
							 | 
						|
									cr.gatewaysMutex.Lock()
							 | 
						|
									for addr, gateway := range cr.activeGateways {
							 | 
						|
										if now.Sub(gateway.LastHeartbeat) > GatewayTimeout {
							 | 
						|
											staleGateways = append(staleGateways, addr)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
									// Remove stale gateways
							 | 
						|
									for _, addr := range staleGateways {
							 | 
						|
										glog.V(1).Infof("Removing stale gateway %s", addr)
							 | 
						|
										delete(cr.activeGateways, addr)
							 | 
						|
									}
							 | 
						|
									cr.gatewaysMutex.Unlock()
							 | 
						|
								
							 | 
						|
									// Then, identify assignments with unhealthy coordinators and reassign them
							 | 
						|
									cr.assignmentsMutex.Lock()
							 | 
						|
									defer cr.assignmentsMutex.Unlock()
							 | 
						|
								
							 | 
						|
									// Get list of all consumer groups with assignments
							 | 
						|
									consumerGroups, err := cr.listAllCoordinatorAssignments()
							 | 
						|
									if err != nil {
							 | 
						|
										glog.Warningf("Failed to list coordinator assignments during cleanup: %v", err)
							 | 
						|
										return
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									for _, group := range consumerGroups {
							 | 
						|
										// Load assignment from file
							 | 
						|
										assignment, err := cr.loadCoordinatorAssignment(group)
							 | 
						|
										if err != nil {
							 | 
						|
											glog.Warningf("Failed to load assignment for group %s during cleanup: %v", group, err)
							 | 
						|
											continue
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Check if coordinator is healthy
							 | 
						|
										if !cr.isGatewayHealthy(assignment.CoordinatorAddr) {
							 | 
						|
											glog.V(1).Infof("Coordinator %s for group %s is unhealthy, attempting reassignment", assignment.CoordinatorAddr, group)
							 | 
						|
								
							 | 
						|
											// Try to reassign to a healthy gateway
							 | 
						|
											newAddr, newNodeID, err := cr.chooseCoordinatorAddrForGroup(group)
							 | 
						|
											if err != nil {
							 | 
						|
												// No healthy gateways available, remove the assignment for now
							 | 
						|
												glog.Warningf("No healthy gateways available for reassignment of group %s, removing assignment", group)
							 | 
						|
												if delErr := cr.deleteCoordinatorAssignment(group); delErr != nil {
							 | 
						|
													glog.Warningf("Failed to delete assignment for group %s: %v", group, delErr)
							 | 
						|
												}
							 | 
						|
											} else if newAddr != assignment.CoordinatorAddr {
							 | 
						|
												// Reassign to the new healthy coordinator
							 | 
						|
												newAssignment := &protocol.CoordinatorAssignment{
							 | 
						|
													ConsumerGroup:     group,
							 | 
						|
													CoordinatorAddr:   newAddr,
							 | 
						|
													CoordinatorNodeID: newNodeID,
							 | 
						|
													AssignedAt:        time.Now(),
							 | 
						|
													LastHeartbeat:     time.Now(),
							 | 
						|
												}
							 | 
						|
								
							 | 
						|
												// Save new assignment to file
							 | 
						|
												if saveErr := cr.saveCoordinatorAssignment(group, newAssignment); saveErr != nil {
							 | 
						|
													glog.Warningf("Failed to save reassignment for group %s: %v", group, saveErr)
							 | 
						|
												} else {
							 | 
						|
													glog.V(0).Infof("Reassigned coordinator for group %s from unhealthy %s to healthy %s",
							 | 
						|
														group, assignment.CoordinatorAddr, newAddr)
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetStats returns registry statistics
							 | 
						|
								func (cr *CoordinatorRegistry) GetStats() map[string]interface{} {
							 | 
						|
									// Read counts separately to avoid holding locks while calling IsLeader()
							 | 
						|
									cr.gatewaysMutex.RLock()
							 | 
						|
									gatewayCount := len(cr.activeGateways)
							 | 
						|
									cr.gatewaysMutex.RUnlock()
							 | 
						|
								
							 | 
						|
									// Count assignments from files
							 | 
						|
									var assignmentCount int
							 | 
						|
									if cr.IsLeader() {
							 | 
						|
										consumerGroups, err := cr.listAllCoordinatorAssignments()
							 | 
						|
										if err != nil {
							 | 
						|
											glog.Warningf("Failed to count coordinator assignments: %v", err)
							 | 
						|
											assignmentCount = -1 // Indicate error
							 | 
						|
										} else {
							 | 
						|
											assignmentCount = len(consumerGroups)
							 | 
						|
										}
							 | 
						|
									} else {
							 | 
						|
										assignmentCount = 0 // Non-leader doesn't track assignments
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return map[string]interface{}{
							 | 
						|
										"is_leader":       cr.IsLeader(),
							 | 
						|
										"leader_address":  cr.GetLeaderAddress(),
							 | 
						|
										"active_gateways": gatewayCount,
							 | 
						|
										"assignments":     assignmentCount,
							 | 
						|
										"gateway_address": cr.gatewayAddress,
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Persistence methods for coordinator assignments
							 | 
						|
								
							 | 
						|
								// saveCoordinatorAssignment saves a single coordinator assignment to its individual file
							 | 
						|
								func (cr *CoordinatorRegistry) saveCoordinatorAssignment(consumerGroup string, assignment *protocol.CoordinatorAssignment) error {
							 | 
						|
									if !cr.IsLeader() {
							 | 
						|
										// Only leader should save assignments
							 | 
						|
										return nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return cr.filerClientAccessor.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
							 | 
						|
										// Convert assignment to JSON
							 | 
						|
										assignmentData, err := json.Marshal(assignment)
							 | 
						|
										if err != nil {
							 | 
						|
											return fmt.Errorf("failed to marshal assignment for group %s: %w", consumerGroup, err)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Save to individual file: /topics/kafka/.meta/coordinators/<consumer-group>_assignments.json
							 | 
						|
										fileName := fmt.Sprintf("%s_assignments.json", consumerGroup)
							 | 
						|
										return filer.SaveInsideFiler(client, CoordinatorAssignmentsDir, fileName, assignmentData)
							 | 
						|
									})
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// loadCoordinatorAssignment loads a single coordinator assignment from its individual file
							 | 
						|
								func (cr *CoordinatorRegistry) loadCoordinatorAssignment(consumerGroup string) (*protocol.CoordinatorAssignment, error) {
							 | 
						|
									return cr.loadCoordinatorAssignmentWithClient(consumerGroup, cr.filerClientAccessor)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// loadCoordinatorAssignmentWithClient loads a single coordinator assignment using provided client
							 | 
						|
								func (cr *CoordinatorRegistry) loadCoordinatorAssignmentWithClient(consumerGroup string, clientAccessor *filer_client.FilerClientAccessor) (*protocol.CoordinatorAssignment, error) {
							 | 
						|
									var assignment *protocol.CoordinatorAssignment
							 | 
						|
								
							 | 
						|
									err := clientAccessor.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
							 | 
						|
										// Load from individual file: /topics/kafka/.meta/coordinators/<consumer-group>_assignments.json
							 | 
						|
										fileName := fmt.Sprintf("%s_assignments.json", consumerGroup)
							 | 
						|
										data, err := filer.ReadInsideFiler(client, CoordinatorAssignmentsDir, fileName)
							 | 
						|
										if err != nil {
							 | 
						|
											return fmt.Errorf("assignment file not found for group %s: %w", consumerGroup, err)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Parse JSON
							 | 
						|
										if err := json.Unmarshal(data, &assignment); err != nil {
							 | 
						|
											return fmt.Errorf("failed to unmarshal assignment for group %s: %w", consumerGroup, err)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										return nil
							 | 
						|
									})
							 | 
						|
								
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, err
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return assignment, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// listAllCoordinatorAssignments lists all coordinator assignment files
							 | 
						|
								func (cr *CoordinatorRegistry) listAllCoordinatorAssignments() ([]string, error) {
							 | 
						|
									var consumerGroups []string
							 | 
						|
								
							 | 
						|
									err := cr.filerClientAccessor.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
							 | 
						|
										request := &filer_pb.ListEntriesRequest{
							 | 
						|
											Directory: CoordinatorAssignmentsDir,
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										stream, streamErr := client.ListEntries(context.Background(), request)
							 | 
						|
										if streamErr != nil {
							 | 
						|
											// Directory might not exist yet, that's okay
							 | 
						|
											return nil
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										for {
							 | 
						|
											resp, recvErr := stream.Recv()
							 | 
						|
											if recvErr != nil {
							 | 
						|
												if recvErr == io.EOF {
							 | 
						|
													break
							 | 
						|
												}
							 | 
						|
												return fmt.Errorf("failed to receive entry: %v", recvErr)
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											// Only include assignment files (ending with _assignments.json)
							 | 
						|
											if resp.Entry != nil && !resp.Entry.IsDirectory &&
							 | 
						|
												strings.HasSuffix(resp.Entry.Name, "_assignments.json") {
							 | 
						|
												// Extract consumer group name by removing _assignments.json suffix
							 | 
						|
												consumerGroup := strings.TrimSuffix(resp.Entry.Name, "_assignments.json")
							 | 
						|
												consumerGroups = append(consumerGroups, consumerGroup)
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										return nil
							 | 
						|
									})
							 | 
						|
								
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, fmt.Errorf("failed to list coordinator assignments: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return consumerGroups, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// deleteCoordinatorAssignment removes a coordinator assignment file
							 | 
						|
								func (cr *CoordinatorRegistry) deleteCoordinatorAssignment(consumerGroup string) error {
							 | 
						|
									if !cr.IsLeader() {
							 | 
						|
										return nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return cr.filerClientAccessor.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
							 | 
						|
										fileName := fmt.Sprintf("%s_assignments.json", consumerGroup)
							 | 
						|
										filePath := fmt.Sprintf("%s/%s", CoordinatorAssignmentsDir, fileName)
							 | 
						|
								
							 | 
						|
										_, err := client.DeleteEntry(context.Background(), &filer_pb.DeleteEntryRequest{
							 | 
						|
											Directory: CoordinatorAssignmentsDir,
							 | 
						|
											Name:      fileName,
							 | 
						|
										})
							 | 
						|
								
							 | 
						|
										if err != nil {
							 | 
						|
											return fmt.Errorf("failed to delete assignment file %s: %w", filePath, err)
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										return nil
							 | 
						|
									})
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// ReassignCoordinator manually reassigns a coordinator for a consumer group
							 | 
						|
								// This can be called when a coordinator gateway becomes unavailable
							 | 
						|
								func (cr *CoordinatorRegistry) ReassignCoordinator(consumerGroup string) (*protocol.CoordinatorAssignment, error) {
							 | 
						|
									if !cr.IsLeader() {
							 | 
						|
										return nil, fmt.Errorf("not the coordinator registry leader")
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									cr.assignmentsMutex.Lock()
							 | 
						|
									defer cr.assignmentsMutex.Unlock()
							 | 
						|
								
							 | 
						|
									// Check if assignment exists by loading from file
							 | 
						|
									existing, err := cr.loadCoordinatorAssignment(consumerGroup)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, fmt.Errorf("no existing assignment for consumer group %s: %w", consumerGroup, err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Choose a new coordinator
							 | 
						|
									newAddr, newNodeID, err := cr.chooseCoordinatorAddrForGroup(consumerGroup)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, fmt.Errorf("failed to choose new coordinator: %w", err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Create new assignment
							 | 
						|
									newAssignment := &protocol.CoordinatorAssignment{
							 | 
						|
										ConsumerGroup:     consumerGroup,
							 | 
						|
										CoordinatorAddr:   newAddr,
							 | 
						|
										CoordinatorNodeID: newNodeID,
							 | 
						|
										AssignedAt:        time.Now(),
							 | 
						|
										LastHeartbeat:     time.Now(),
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Persist the new assignment to individual file
							 | 
						|
									if err := cr.saveCoordinatorAssignment(consumerGroup, newAssignment); err != nil {
							 | 
						|
										return nil, fmt.Errorf("failed to persist coordinator reassignment for group %s: %w", consumerGroup, err)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									glog.V(0).Infof("Manually reassigned coordinator for group %s from %s to %s",
							 | 
						|
										consumerGroup, existing.CoordinatorAddr, newAddr)
							 | 
						|
								
							 | 
						|
									return newAssignment, nil
							 | 
						|
								}
							 |