You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							4149 lines
						
					
					
						
							144 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							4149 lines
						
					
					
						
							144 KiB
						
					
					
				| package protocol | |
| 
 | |
| import ( | |
| 	"bufio" | |
| 	"bytes" | |
| 	"context" | |
| 	"encoding/binary" | |
| 	"fmt" | |
| 	"io" | |
| 	"net" | |
| 	"os" | |
| 	"strconv" | |
| 	"strings" | |
| 	"sync" | |
| 	"time" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/glog" | |
| 	"github.com/seaweedfs/seaweedfs/weed/mq/kafka/consumer" | |
| 	"github.com/seaweedfs/seaweedfs/weed/mq/kafka/consumer_offset" | |
| 	"github.com/seaweedfs/seaweedfs/weed/mq/kafka/integration" | |
| 	"github.com/seaweedfs/seaweedfs/weed/mq/kafka/schema" | |
| 	mqschema "github.com/seaweedfs/seaweedfs/weed/mq/schema" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/security" | |
| 	"github.com/seaweedfs/seaweedfs/weed/util" | |
| ) | |
| 
 | |
| // GetAdvertisedAddress returns the host:port that should be advertised to clients | |
| // This handles the Docker networking issue where internal IPs aren't reachable by external clients | |
| func (h *Handler) GetAdvertisedAddress(gatewayAddr string) (string, int) { | |
| 	host, port := "localhost", 9093 | |
| 
 | |
| 	// Try to parse the gateway address if provided to get the port | |
| 	if gatewayAddr != "" { | |
| 		if _, gatewayPort, err := net.SplitHostPort(gatewayAddr); err == nil { | |
| 			if gatewayPortInt, err := strconv.Atoi(gatewayPort); err == nil { | |
| 				port = gatewayPortInt // Only use the port, not the host | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	// Override with environment variable if set, otherwise always use localhost for external clients | |
| 	if advertisedHost := os.Getenv("KAFKA_ADVERTISED_HOST"); advertisedHost != "" { | |
| 		host = advertisedHost | |
| 	} else { | |
| 		host = "localhost" | |
| 	} | |
| 
 | |
| 	return host, port | |
| } | |
| 
 | |
| // TopicInfo holds basic information about a topic | |
| type TopicInfo struct { | |
| 	Name       string | |
| 	Partitions int32 | |
| 	CreatedAt  int64 | |
| } | |
| 
 | |
| // TopicPartitionKey uniquely identifies a topic partition | |
| type TopicPartitionKey struct { | |
| 	Topic     string | |
| 	Partition int32 | |
| } | |
| 
 | |
| // contextKey is a type for context keys to avoid collisions | |
| type contextKey string | |
| 
 | |
| const ( | |
| 	// connContextKey is the context key for storing ConnectionContext | |
| 	connContextKey contextKey = "connectionContext" | |
| ) | |
| 
 | |
| // kafkaRequest represents a Kafka API request to be processed | |
| type kafkaRequest struct { | |
| 	correlationID uint32 | |
| 	apiKey        uint16 | |
| 	apiVersion    uint16 | |
| 	requestBody   []byte | |
| 	ctx           context.Context | |
| 	connContext   *ConnectionContext // Per-connection context to avoid race conditions | |
| } | |
| 
 | |
| // kafkaResponse represents a Kafka API response | |
| type kafkaResponse struct { | |
| 	correlationID uint32 | |
| 	apiKey        uint16 | |
| 	apiVersion    uint16 | |
| 	response      []byte | |
| 	err           error | |
| } | |
| 
 | |
| const ( | |
| 	// DefaultKafkaNamespace is the default namespace for Kafka topics in SeaweedMQ | |
| 	DefaultKafkaNamespace = "kafka" | |
| ) | |
| 
 | |
| // APIKey represents a Kafka API key type for better type safety | |
| type APIKey uint16 | |
| 
 | |
| // Kafka API Keys | |
| const ( | |
| 	APIKeyProduce         APIKey = 0 | |
| 	APIKeyFetch           APIKey = 1 | |
| 	APIKeyListOffsets     APIKey = 2 | |
| 	APIKeyMetadata        APIKey = 3 | |
| 	APIKeyOffsetCommit    APIKey = 8 | |
| 	APIKeyOffsetFetch     APIKey = 9 | |
| 	APIKeyFindCoordinator APIKey = 10 | |
| 	APIKeyJoinGroup       APIKey = 11 | |
| 	APIKeyHeartbeat       APIKey = 12 | |
| 	APIKeyLeaveGroup      APIKey = 13 | |
| 	APIKeySyncGroup       APIKey = 14 | |
| 	APIKeyDescribeGroups  APIKey = 15 | |
| 	APIKeyListGroups      APIKey = 16 | |
| 	APIKeyApiVersions     APIKey = 18 | |
| 	APIKeyCreateTopics    APIKey = 19 | |
| 	APIKeyDeleteTopics    APIKey = 20 | |
| 	APIKeyInitProducerId  APIKey = 22 | |
| 	APIKeyDescribeConfigs APIKey = 32 | |
| 	APIKeyDescribeCluster APIKey = 60 | |
| ) | |
| 
 | |
| // SeaweedMQHandlerInterface defines the interface for SeaweedMQ integration | |
| type SeaweedMQHandlerInterface interface { | |
| 	TopicExists(topic string) bool | |
| 	ListTopics() []string | |
| 	CreateTopic(topic string, partitions int32) error | |
| 	CreateTopicWithSchemas(name string, partitions int32, keyRecordType *schema_pb.RecordType, valueRecordType *schema_pb.RecordType) error | |
| 	DeleteTopic(topic string) error | |
| 	GetTopicInfo(topic string) (*integration.KafkaTopicInfo, bool) | |
| 	// Ledger methods REMOVED - SMQ handles Kafka offsets natively | |
| 	ProduceRecord(ctx context.Context, topicName string, partitionID int32, key, value []byte) (int64, error) | |
| 	ProduceRecordValue(ctx context.Context, topicName string, partitionID int32, key []byte, recordValueBytes []byte) (int64, error) | |
| 	// GetStoredRecords retrieves records from SMQ storage (optional - for advanced implementations) | |
| 	// ctx is used to control the fetch timeout (should match Kafka fetch request's MaxWaitTime) | |
| 	GetStoredRecords(ctx context.Context, topic string, partition int32, fromOffset int64, maxRecords int) ([]integration.SMQRecord, error) | |
| 	// GetEarliestOffset returns the earliest available offset for a topic partition | |
| 	GetEarliestOffset(topic string, partition int32) (int64, error) | |
| 	// GetLatestOffset returns the latest available offset for a topic partition | |
| 	GetLatestOffset(topic string, partition int32) (int64, error) | |
| 	// WithFilerClient executes a function with a filer client for accessing SeaweedMQ metadata | |
| 	WithFilerClient(streamingMode bool, fn func(client filer_pb.SeaweedFilerClient) error) error | |
| 	// GetBrokerAddresses returns the discovered SMQ broker addresses for Metadata responses | |
| 	GetBrokerAddresses() []string | |
| 	// CreatePerConnectionBrokerClient creates an isolated BrokerClient for each TCP connection | |
| 	CreatePerConnectionBrokerClient() (*integration.BrokerClient, error) | |
| 	// SetProtocolHandler sets the protocol handler reference for connection context access | |
| 	SetProtocolHandler(handler integration.ProtocolHandler) | |
| 	Close() error | |
| } | |
| 
 | |
| // ConsumerOffsetStorage defines the interface for storing consumer offsets | |
| // This is used by OffsetCommit and OffsetFetch protocol handlers | |
| type ConsumerOffsetStorage interface { | |
| 	CommitOffset(group, topic string, partition int32, offset int64, metadata string) error | |
| 	FetchOffset(group, topic string, partition int32) (int64, string, error) | |
| 	FetchAllOffsets(group string) (map[TopicPartition]OffsetMetadata, error) | |
| 	DeleteGroup(group string) error | |
| 	Close() error | |
| } | |
| 
 | |
| // TopicPartition uniquely identifies a topic partition for offset storage | |
| type TopicPartition struct { | |
| 	Topic     string | |
| 	Partition int32 | |
| } | |
| 
 | |
| // OffsetMetadata contains offset and associated metadata | |
| type OffsetMetadata struct { | |
| 	Offset   int64 | |
| 	Metadata string | |
| } | |
| 
 | |
| // TopicSchemaConfig holds schema configuration for a topic | |
| type TopicSchemaConfig struct { | |
| 	// Value schema configuration | |
| 	ValueSchemaID     uint32 | |
| 	ValueSchemaFormat schema.Format | |
| 
 | |
| 	// Key schema configuration (optional) | |
| 	KeySchemaID     uint32 | |
| 	KeySchemaFormat schema.Format | |
| 	HasKeySchema    bool // indicates if key schema is configured | |
| } | |
| 
 | |
| // Legacy accessors for backward compatibility | |
| func (c *TopicSchemaConfig) SchemaID() uint32 { | |
| 	return c.ValueSchemaID | |
| } | |
| 
 | |
| func (c *TopicSchemaConfig) SchemaFormat() schema.Format { | |
| 	return c.ValueSchemaFormat | |
| } | |
| 
 | |
| // getTopicSchemaFormat returns the schema format string for a topic | |
| func (h *Handler) getTopicSchemaFormat(topic string) string { | |
| 	h.topicSchemaConfigMu.RLock() | |
| 	defer h.topicSchemaConfigMu.RUnlock() | |
| 
 | |
| 	if config, exists := h.topicSchemaConfigs[topic]; exists { | |
| 		return config.ValueSchemaFormat.String() | |
| 	} | |
| 	return "" // Empty string means schemaless or format unknown | |
| } | |
| 
 | |
| // Handler processes Kafka protocol requests from clients using SeaweedMQ | |
| type Handler struct { | |
| 	// SeaweedMQ integration | |
| 	seaweedMQHandler SeaweedMQHandlerInterface | |
| 
 | |
| 	// SMQ offset storage removed - using ConsumerOffsetStorage instead | |
|  | |
| 	// Consumer offset storage for Kafka protocol OffsetCommit/OffsetFetch | |
| 	consumerOffsetStorage ConsumerOffsetStorage | |
| 
 | |
| 	// Consumer group coordination | |
| 	groupCoordinator *consumer.GroupCoordinator | |
| 
 | |
| 	// Response caching to reduce CPU usage for repeated requests | |
| 	metadataCache    *ResponseCache | |
| 	coordinatorCache *ResponseCache | |
| 
 | |
| 	// Coordinator registry for distributed coordinator assignment | |
| 	coordinatorRegistry CoordinatorRegistryInterface | |
| 
 | |
| 	// Schema management (optional, for schematized topics) | |
| 	schemaManager *schema.Manager | |
| 	useSchema     bool | |
| 	brokerClient  *schema.BrokerClient | |
| 
 | |
| 	// Topic schema configuration cache | |
| 	topicSchemaConfigs  map[string]*TopicSchemaConfig | |
| 	topicSchemaConfigMu sync.RWMutex | |
| 
 | |
| 	// Track registered schemas to prevent duplicate registrations | |
| 	registeredSchemas   map[string]bool // key: "topic:schemaID" or "topic-key:schemaID" | |
| 	registeredSchemasMu sync.RWMutex | |
| 
 | |
| 	filerClient filer_pb.SeaweedFilerClient | |
| 
 | |
| 	// SMQ broker addresses discovered from masters for Metadata responses | |
| 	smqBrokerAddresses []string | |
| 
 | |
| 	// Gateway address for coordinator registry | |
| 	gatewayAddress string | |
| 
 | |
| 	// Connection contexts stored per connection ID (thread-safe) | |
| 	// Replaces the race-prone shared connContext field | |
| 	connContexts sync.Map // map[string]*ConnectionContext | |
|  | |
| 	// Schema Registry URL for delayed initialization | |
| 	schemaRegistryURL string | |
| 
 | |
| 	// Default partition count for auto-created topics | |
| 	defaultPartitions int32 | |
| } | |
| 
 | |
| // NewHandler creates a basic Kafka handler with in-memory storage | |
| // WARNING: This is for testing ONLY - never use in production! | |
| // For production use with persistent storage, use NewSeaweedMQBrokerHandler instead | |
| func NewHandler() *Handler { | |
| 	// Production safety check - prevent accidental production use | |
| 	// Comment out for testing: os.Getenv can be used for runtime checks | |
| 	panic("NewHandler() with in-memory storage should NEVER be used in production! Use NewSeaweedMQBrokerHandler() with SeaweedMQ masters for production, or NewTestHandler() for tests.") | |
| } | |
| 
 | |
| // NewTestHandler and NewSimpleTestHandler moved to handler_test.go (test-only file) | |
|  | |
| // All test-related types and implementations moved to handler_test.go (test-only file) | |
|  | |
| // NewTestHandlerWithMock creates a test handler with a custom SeaweedMQHandlerInterface | |
| // This is useful for unit tests that need a handler but don't want to connect to real SeaweedMQ | |
| func NewTestHandlerWithMock(mockHandler SeaweedMQHandlerInterface) *Handler { | |
| 	return &Handler{ | |
| 		seaweedMQHandler:      mockHandler, | |
| 		consumerOffsetStorage: nil, // Unit tests don't need offset storage | |
| 		groupCoordinator:      consumer.NewGroupCoordinator(), | |
| 		registeredSchemas:     make(map[string]bool), | |
| 		topicSchemaConfigs:    make(map[string]*TopicSchemaConfig), | |
| 		defaultPartitions:     1, | |
| 	} | |
| } | |
| 
 | |
| // NewSeaweedMQBrokerHandler creates a new handler with SeaweedMQ broker integration | |
| func NewSeaweedMQBrokerHandler(masters string, filerGroup string, clientHost string) (*Handler, error) { | |
| 	return NewSeaweedMQBrokerHandlerWithDefaults(masters, filerGroup, clientHost, 4) // Default to 4 partitions | |
| } | |
| 
 | |
| // NewSeaweedMQBrokerHandlerWithDefaults creates a new handler with SeaweedMQ broker integration and custom defaults | |
| func NewSeaweedMQBrokerHandlerWithDefaults(masters string, filerGroup string, clientHost string, defaultPartitions int32) (*Handler, error) { | |
| 	// Set up SeaweedMQ integration | |
| 	smqHandler, err := integration.NewSeaweedMQBrokerHandler(masters, filerGroup, clientHost) | |
| 	if err != nil { | |
| 		return nil, err | |
| 	} | |
| 
 | |
| 	// Use the shared filer client accessor from SeaweedMQHandler | |
| 	sharedFilerAccessor := smqHandler.GetFilerClientAccessor() | |
| 	if sharedFilerAccessor == nil { | |
| 		return nil, fmt.Errorf("no shared filer client accessor available from SMQ handler") | |
| 	} | |
| 
 | |
| 	// Create consumer offset storage (for OffsetCommit/OffsetFetch protocol) | |
| 	// Use filer-based storage for persistence across restarts | |
| 	consumerOffsetStorage := newOffsetStorageAdapter( | |
| 		consumer_offset.NewFilerStorage(sharedFilerAccessor), | |
| 	) | |
| 
 | |
| 	// Create response caches to reduce CPU usage | |
| 	// Metadata cache: 5 second TTL (Schema Registry polls frequently) | |
| 	// Coordinator cache: 10 second TTL (less frequent, more stable) | |
| 	metadataCache := NewResponseCache(5 * time.Second) | |
| 	coordinatorCache := NewResponseCache(10 * time.Second) | |
| 
 | |
| 	// Start cleanup loops | |
| 	metadataCache.StartCleanupLoop(30 * time.Second) | |
| 	coordinatorCache.StartCleanupLoop(60 * time.Second) | |
| 
 | |
| 	handler := &Handler{ | |
| 		seaweedMQHandler:      smqHandler, | |
| 		consumerOffsetStorage: consumerOffsetStorage, | |
| 		groupCoordinator:      consumer.NewGroupCoordinator(), | |
| 		smqBrokerAddresses:    nil, // Will be set by SetSMQBrokerAddresses() when server starts | |
| 		registeredSchemas:     make(map[string]bool), | |
| 		defaultPartitions:     defaultPartitions, | |
| 		metadataCache:         metadataCache, | |
| 		coordinatorCache:      coordinatorCache, | |
| 	} | |
| 
 | |
| 	// Set protocol handler reference in SMQ handler for connection context access | |
| 	smqHandler.SetProtocolHandler(handler) | |
| 
 | |
| 	return handler, nil | |
| } | |
| 
 | |
| // AddTopicForTesting creates a topic for testing purposes | |
| // This delegates to the underlying SeaweedMQ handler | |
| func (h *Handler) AddTopicForTesting(topicName string, partitions int32) { | |
| 	if h.seaweedMQHandler != nil { | |
| 		h.seaweedMQHandler.CreateTopic(topicName, partitions) | |
| 	} | |
| } | |
| 
 | |
| // Delegate methods to SeaweedMQ handler | |
|  | |
| // GetOrCreateLedger method REMOVED - SMQ handles Kafka offsets natively | |
|  | |
| // GetLedger method REMOVED - SMQ handles Kafka offsets natively | |
|  | |
| // Close shuts down the handler and all connections | |
| func (h *Handler) Close() error { | |
| 	// Close group coordinator | |
| 	if h.groupCoordinator != nil { | |
| 		h.groupCoordinator.Close() | |
| 	} | |
| 
 | |
| 	// Close broker client if present | |
| 	if h.brokerClient != nil { | |
| 		if err := h.brokerClient.Close(); err != nil { | |
| 			glog.Warningf("Failed to close broker client: %v", err) | |
| 		} | |
| 	} | |
| 
 | |
| 	// Close SeaweedMQ handler if present | |
| 	if h.seaweedMQHandler != nil { | |
| 		return h.seaweedMQHandler.Close() | |
| 	} | |
| 	return nil | |
| } | |
| 
 | |
| // SetSMQBrokerAddresses updates the SMQ broker addresses used in Metadata responses | |
| func (h *Handler) SetSMQBrokerAddresses(brokerAddresses []string) { | |
| 	h.smqBrokerAddresses = brokerAddresses | |
| } | |
| 
 | |
| // GetSMQBrokerAddresses returns the SMQ broker addresses | |
| func (h *Handler) GetSMQBrokerAddresses() []string { | |
| 	// First try to get from the SeaweedMQ handler (preferred) | |
| 	if h.seaweedMQHandler != nil { | |
| 		if brokerAddresses := h.seaweedMQHandler.GetBrokerAddresses(); len(brokerAddresses) > 0 { | |
| 			return brokerAddresses | |
| 		} | |
| 	} | |
| 
 | |
| 	// Fallback to manually set addresses | |
| 	if len(h.smqBrokerAddresses) > 0 { | |
| 		return h.smqBrokerAddresses | |
| 	} | |
| 
 | |
| 	// No brokers configured - return empty slice | |
| 	// This will cause proper error handling in callers | |
| 	return []string{} | |
| } | |
| 
 | |
| // GetGatewayAddress returns the current gateway address as a string (for coordinator registry) | |
| func (h *Handler) GetGatewayAddress() string { | |
| 	if h.gatewayAddress != "" { | |
| 		return h.gatewayAddress | |
| 	} | |
| 	// No gateway address configured - return empty string | |
| 	// Callers should handle this as a configuration error | |
| 	return "" | |
| } | |
| 
 | |
| // SetGatewayAddress sets the gateway address for coordinator registry | |
| func (h *Handler) SetGatewayAddress(address string) { | |
| 	h.gatewayAddress = address | |
| } | |
| 
 | |
| // SetCoordinatorRegistry sets the coordinator registry for this handler | |
| func (h *Handler) SetCoordinatorRegistry(registry CoordinatorRegistryInterface) { | |
| 	h.coordinatorRegistry = registry | |
| } | |
| 
 | |
| // GetCoordinatorRegistry returns the coordinator registry | |
| func (h *Handler) GetCoordinatorRegistry() CoordinatorRegistryInterface { | |
| 	return h.coordinatorRegistry | |
| } | |
| 
 | |
| // isDataPlaneAPI returns true if the API key is a data plane operation (Fetch, Produce) | |
| // Data plane operations can be slow and may block on I/O | |
| func isDataPlaneAPI(apiKey uint16) bool { | |
| 	switch APIKey(apiKey) { | |
| 	case APIKeyProduce: | |
| 		return true | |
| 	case APIKeyFetch: | |
| 		return true | |
| 	default: | |
| 		return false | |
| 	} | |
| } | |
| 
 | |
| // GetConnectionContext returns the current connection context converted to integration.ConnectionContext | |
| // This implements the integration.ProtocolHandler interface | |
| // | |
| // NOTE: Since this method doesn't receive a context parameter, it returns a "best guess" connection context. | |
| // In single-connection scenarios (like tests), this works correctly. In high-concurrency scenarios with many | |
| // simultaneous connections, this may return a connection context from a different connection. | |
| // For a proper fix, the integration.ProtocolHandler interface would need to be updated to pass context.Context. | |
| func (h *Handler) GetConnectionContext() *integration.ConnectionContext { | |
| 	// Try to find any active connection context | |
| 	// In most cases (single connection, or low concurrency), this will return the correct context | |
| 	var connCtx *ConnectionContext | |
| 	h.connContexts.Range(func(key, value interface{}) bool { | |
| 		if ctx, ok := value.(*ConnectionContext); ok { | |
| 			connCtx = ctx | |
| 			return false // Stop iteration after finding first context | |
| 		} | |
| 		return true | |
| 	}) | |
| 
 | |
| 	if connCtx == nil { | |
| 		return nil | |
| 	} | |
| 
 | |
| 	// Convert protocol.ConnectionContext to integration.ConnectionContext | |
| 	return &integration.ConnectionContext{ | |
| 		ClientID:      connCtx.ClientID, | |
| 		ConsumerGroup: connCtx.ConsumerGroup, | |
| 		MemberID:      connCtx.MemberID, | |
| 		BrokerClient:  connCtx.BrokerClient, | |
| 	} | |
| } | |
| 
 | |
| // HandleConn processes a single client connection | |
| func (h *Handler) HandleConn(ctx context.Context, conn net.Conn) error { | |
| 	connectionID := fmt.Sprintf("%s->%s", conn.RemoteAddr(), conn.LocalAddr()) | |
| 
 | |
| 	// Record connection metrics | |
| 	RecordConnectionMetrics() | |
| 
 | |
| 	// Create cancellable context for this connection | |
| 	// This ensures all requests are cancelled when the connection closes | |
| 	ctx, cancel := context.WithCancel(ctx) | |
| 	defer cancel() | |
| 
 | |
| 	// CRITICAL: Create per-connection BrokerClient for isolated gRPC streams | |
| 	// This prevents different connections from interfering with each other's Fetch requests | |
| 	// In mock/unit test mode, this may not be available, so we continue without it | |
| 	var connBrokerClient *integration.BrokerClient | |
| 	connBrokerClient, err := h.seaweedMQHandler.CreatePerConnectionBrokerClient() | |
| 	if err != nil { | |
| 		// Continue without broker client for unit test/mock mode | |
| 		connBrokerClient = nil | |
| 	} | |
| 
 | |
| 	// RACE CONDITION FIX: Create connection-local context and pass through request pipeline | |
| 	// Store in thread-safe map to enable lookup from methods that don't have direct access | |
| 	connContext := &ConnectionContext{ | |
| 		RemoteAddr:   conn.RemoteAddr(), | |
| 		LocalAddr:    conn.LocalAddr(), | |
| 		ConnectionID: connectionID, | |
| 		BrokerClient: connBrokerClient, | |
| 	} | |
| 
 | |
| 	// Store in thread-safe map for later retrieval | |
| 	h.connContexts.Store(connectionID, connContext) | |
| 
 | |
| 	defer func() { | |
| 		// Close all partition readers first | |
| 		cleanupPartitionReaders(connContext) | |
| 		// Close the per-connection broker client | |
| 		if connBrokerClient != nil { | |
| 			if closeErr := connBrokerClient.Close(); closeErr != nil { | |
| 				glog.Errorf("[%s] Error closing BrokerClient: %v", connectionID, closeErr) | |
| 			} | |
| 		} | |
| 		// Remove connection context from map | |
| 		h.connContexts.Delete(connectionID) | |
| 		RecordDisconnectionMetrics() | |
| 		conn.Close() | |
| 	}() | |
| 
 | |
| 	r := bufio.NewReader(conn) | |
| 	w := bufio.NewWriter(conn) | |
| 	defer w.Flush() | |
| 
 | |
| 	// Use default timeout config | |
| 	timeoutConfig := DefaultTimeoutConfig() | |
| 
 | |
| 	// Track consecutive read timeouts to detect stale/CLOSE_WAIT connections | |
| 	consecutiveTimeouts := 0 | |
| 	const maxConsecutiveTimeouts = 3 // Give up after 3 timeouts in a row | |
|  | |
| 	// CRITICAL: Separate control plane from data plane | |
| 	// Control plane: Metadata, Heartbeat, JoinGroup, etc. (must be fast, never block) | |
| 	// Data plane: Fetch, Produce (can be slow, may block on I/O) | |
| 	// | |
| 	// Architecture: | |
| 	// - Main loop routes requests to appropriate channel based on API key | |
| 	// - Control goroutine processes control messages (fast, sequential) | |
| 	// - Data goroutine processes data messages (can be slow) | |
| 	// - Response writer handles responses in order using correlation IDs | |
| 	controlChan := make(chan *kafkaRequest, 10) | |
| 	dataChan := make(chan *kafkaRequest, 10) | |
| 	responseChan := make(chan *kafkaResponse, 100) | |
| 	var wg sync.WaitGroup | |
| 
 | |
| 	// Response writer - maintains request/response order per connection | |
| 	// CRITICAL: While we process requests concurrently (control/data plane), | |
| 	// we MUST track the order requests arrive and send responses in that same order. | |
| 	// Solution: Track received correlation IDs in a queue, send responses in that queue order. | |
| 	correlationQueue := make([]uint32, 0, 100) | |
| 	correlationQueueMu := &sync.Mutex{} | |
| 
 | |
| 	wg.Add(1) | |
| 	go func() { | |
| 		defer wg.Done() | |
| 		glog.V(4).Infof("[%s] Response writer started", connectionID) | |
| 		defer glog.V(4).Infof("[%s] Response writer exiting", connectionID) | |
| 		pendingResponses := make(map[uint32]*kafkaResponse) | |
| 		nextToSend := 0 // Index in correlationQueue | |
|  | |
| 		for { | |
| 			select { | |
| 			case resp, ok := <-responseChan: | |
| 				if !ok { | |
| 					// responseChan closed, exit | |
| 					return | |
| 				} | |
| 				glog.V(4).Infof("[%s] Response writer received correlation=%d from responseChan", connectionID, resp.correlationID) | |
| 				correlationQueueMu.Lock() | |
| 				pendingResponses[resp.correlationID] = resp | |
| 
 | |
| 				// Send all responses we can in queue order | |
| 				for nextToSend < len(correlationQueue) { | |
| 					expectedID := correlationQueue[nextToSend] | |
| 					readyResp, exists := pendingResponses[expectedID] | |
| 					if !exists { | |
| 						// Response not ready yet, stop sending | |
| 						glog.V(3).Infof("[%s] Response writer: waiting for correlation=%d (nextToSend=%d, queueLen=%d)", connectionID, expectedID, nextToSend, len(correlationQueue)) | |
| 						break | |
| 					} | |
| 
 | |
| 					// Send this response | |
| 					if readyResp.err != nil { | |
| 						glog.Errorf("[%s] Error processing correlation=%d: %v", connectionID, readyResp.correlationID, readyResp.err) | |
| 					} else { | |
| 						glog.V(4).Infof("[%s] Response writer: about to write correlation=%d (%d bytes)", connectionID, readyResp.correlationID, len(readyResp.response)) | |
| 						if writeErr := h.writeResponseWithHeader(w, readyResp.correlationID, readyResp.apiKey, readyResp.apiVersion, readyResp.response, timeoutConfig.WriteTimeout); writeErr != nil { | |
| 							glog.Errorf("[%s] Response writer: WRITE ERROR correlation=%d: %v - EXITING", connectionID, readyResp.correlationID, writeErr) | |
| 							glog.Errorf("[%s] Write error correlation=%d: %v", connectionID, readyResp.correlationID, writeErr) | |
| 							correlationQueueMu.Unlock() | |
| 							return | |
| 						} | |
| 						glog.V(4).Infof("[%s] Response writer: successfully wrote correlation=%d", connectionID, readyResp.correlationID) | |
| 					} | |
| 
 | |
| 					// Remove from pending and advance | |
| 					delete(pendingResponses, expectedID) | |
| 					nextToSend++ | |
| 				} | |
| 				correlationQueueMu.Unlock() | |
| 			case <-ctx.Done(): | |
| 				// Context cancelled, exit immediately to prevent deadlock | |
| 				glog.V(4).Infof("[%s] Response writer: context cancelled, exiting", connectionID) | |
| 				return | |
| 			} | |
| 		} | |
| 	}() | |
| 
 | |
| 	// Control plane processor - fast operations, never blocks | |
| 	wg.Add(1) | |
| 	go func() { | |
| 		defer wg.Done() | |
| 		for { | |
| 			select { | |
| 			case req, ok := <-controlChan: | |
| 				if !ok { | |
| 					// Channel closed, exit | |
| 					return | |
| 				} | |
| 				glog.V(4).Infof("[%s] Control plane processing correlation=%d, apiKey=%d", connectionID, req.correlationID, req.apiKey) | |
| 
 | |
| 				// CRITICAL: Wrap request processing with panic recovery to prevent deadlocks | |
| 				// If processRequestSync panics, we MUST still send a response to avoid blocking the response writer | |
| 				var response []byte | |
| 				var err error | |
| 				func() { | |
| 					defer func() { | |
| 						if r := recover(); r != nil { | |
| 							glog.Errorf("[%s] PANIC in control plane correlation=%d: %v", connectionID, req.correlationID, r) | |
| 							err = fmt.Errorf("internal server error: panic in request handler: %v", r) | |
| 						} | |
| 					}() | |
| 					response, err = h.processRequestSync(req) | |
| 				}() | |
| 
 | |
| 				glog.V(4).Infof("[%s] Control plane completed correlation=%d, sending to responseChan", connectionID, req.correlationID) | |
| 				select { | |
| 				case responseChan <- &kafkaResponse{ | |
| 					correlationID: req.correlationID, | |
| 					apiKey:        req.apiKey, | |
| 					apiVersion:    req.apiVersion, | |
| 					response:      response, | |
| 					err:           err, | |
| 				}: | |
| 					glog.V(4).Infof("[%s] Control plane sent correlation=%d to responseChan", connectionID, req.correlationID) | |
| 				case <-ctx.Done(): | |
| 					// Connection closed, stop processing | |
| 					return | |
| 				case <-time.After(5 * time.Second): | |
| 					glog.Errorf("[%s] DEADLOCK: Control plane timeout sending correlation=%d to responseChan (buffer full?)", connectionID, req.correlationID) | |
| 				} | |
| 			case <-ctx.Done(): | |
| 				// Context cancelled, drain remaining requests before exiting | |
| 				glog.V(4).Infof("[%s] Control plane: context cancelled, draining remaining requests", connectionID) | |
| 				for { | |
| 					select { | |
| 					case req, ok := <-controlChan: | |
| 						if !ok { | |
| 							return | |
| 						} | |
| 						// Process remaining requests with a short timeout | |
| 						glog.V(3).Infof("[%s] Control plane: processing drained request correlation=%d", connectionID, req.correlationID) | |
| 						response, err := h.processRequestSync(req) | |
| 						select { | |
| 						case responseChan <- &kafkaResponse{ | |
| 							correlationID: req.correlationID, | |
| 							apiKey:        req.apiKey, | |
| 							apiVersion:    req.apiVersion, | |
| 							response:      response, | |
| 							err:           err, | |
| 						}: | |
| 							glog.V(3).Infof("[%s] Control plane: sent drained response correlation=%d", connectionID, req.correlationID) | |
| 						case <-time.After(1 * time.Second): | |
| 							glog.Warningf("[%s] Control plane: timeout sending drained response correlation=%d, discarding", connectionID, req.correlationID) | |
| 							return | |
| 						} | |
| 					default: | |
| 						// Channel empty, safe to exit | |
| 						glog.V(4).Infof("[%s] Control plane: drain complete, exiting", connectionID) | |
| 						return | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 	}() | |
| 
 | |
| 	// Data plane processor - can block on I/O | |
| 	wg.Add(1) | |
| 	go func() { | |
| 		defer wg.Done() | |
| 		for { | |
| 			select { | |
| 			case req, ok := <-dataChan: | |
| 				if !ok { | |
| 					// Channel closed, exit | |
| 					return | |
| 				} | |
| 				glog.V(4).Infof("[%s] Data plane processing correlation=%d, apiKey=%d", connectionID, req.correlationID, req.apiKey) | |
| 
 | |
| 				// CRITICAL: Wrap request processing with panic recovery to prevent deadlocks | |
| 				// If processRequestSync panics, we MUST still send a response to avoid blocking the response writer | |
| 				var response []byte | |
| 				var err error | |
| 				func() { | |
| 					defer func() { | |
| 						if r := recover(); r != nil { | |
| 							glog.Errorf("[%s] PANIC in data plane correlation=%d: %v", connectionID, req.correlationID, r) | |
| 							err = fmt.Errorf("internal server error: panic in request handler: %v", r) | |
| 						} | |
| 					}() | |
| 					response, err = h.processRequestSync(req) | |
| 				}() | |
| 
 | |
| 				glog.V(4).Infof("[%s] Data plane completed correlation=%d, sending to responseChan", connectionID, req.correlationID) | |
| 				// Use select with context to avoid sending on closed channel | |
| 				select { | |
| 				case responseChan <- &kafkaResponse{ | |
| 					correlationID: req.correlationID, | |
| 					apiKey:        req.apiKey, | |
| 					apiVersion:    req.apiVersion, | |
| 					response:      response, | |
| 					err:           err, | |
| 				}: | |
| 					glog.V(4).Infof("[%s] Data plane sent correlation=%d to responseChan", connectionID, req.correlationID) | |
| 				case <-ctx.Done(): | |
| 					// Connection closed, stop processing | |
| 					return | |
| 				case <-time.After(5 * time.Second): | |
| 					glog.Errorf("[%s] DEADLOCK: Data plane timeout sending correlation=%d to responseChan (buffer full?)", connectionID, req.correlationID) | |
| 				} | |
| 			case <-ctx.Done(): | |
| 				// Context cancelled, drain remaining requests before exiting | |
| 				glog.V(4).Infof("[%s] Data plane: context cancelled, draining remaining requests", connectionID) | |
| 				for { | |
| 					select { | |
| 					case req, ok := <-dataChan: | |
| 						if !ok { | |
| 							return | |
| 						} | |
| 						// Process remaining requests with a short timeout | |
| 						glog.V(3).Infof("[%s] Data plane: processing drained request correlation=%d", connectionID, req.correlationID) | |
| 						response, err := h.processRequestSync(req) | |
| 						select { | |
| 						case responseChan <- &kafkaResponse{ | |
| 							correlationID: req.correlationID, | |
| 							apiKey:        req.apiKey, | |
| 							apiVersion:    req.apiVersion, | |
| 							response:      response, | |
| 							err:           err, | |
| 						}: | |
| 							glog.V(3).Infof("[%s] Data plane: sent drained response correlation=%d", connectionID, req.correlationID) | |
| 						case <-time.After(1 * time.Second): | |
| 							glog.Warningf("[%s] Data plane: timeout sending drained response correlation=%d, discarding", connectionID, req.correlationID) | |
| 							return | |
| 						} | |
| 					default: | |
| 						// Channel empty, safe to exit | |
| 						glog.V(4).Infof("[%s] Data plane: drain complete, exiting", connectionID) | |
| 						return | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 	}() | |
| 
 | |
| 	defer func() { | |
| 		// CRITICAL: Close channels in correct order to avoid panics | |
| 		// 1. Close input channels to stop accepting new requests | |
| 		close(controlChan) | |
| 		close(dataChan) | |
| 		// 2. Wait for worker goroutines to finish processing and sending responses | |
| 		wg.Wait() | |
| 		// 3. NOW close responseChan to signal response writer to exit | |
| 		close(responseChan) | |
| 	}() | |
| 
 | |
| 	for { | |
| 		// Check if context is cancelled | |
| 		select { | |
| 		case <-ctx.Done(): | |
| 			return ctx.Err() | |
| 		default: | |
| 		} | |
| 
 | |
| 		// Set a read deadline for the connection based on context or default timeout | |
| 		var readDeadline time.Time | |
| 		var timeoutDuration time.Duration | |
| 
 | |
| 		if deadline, ok := ctx.Deadline(); ok { | |
| 			readDeadline = deadline | |
| 			timeoutDuration = time.Until(deadline) | |
| 		} else { | |
| 			// Use configurable read timeout instead of hardcoded 5 seconds | |
| 			timeoutDuration = timeoutConfig.ReadTimeout | |
| 			readDeadline = time.Now().Add(timeoutDuration) | |
| 		} | |
| 
 | |
| 		if err := conn.SetReadDeadline(readDeadline); err != nil { | |
| 			return fmt.Errorf("set read deadline: %w", err) | |
| 		} | |
| 
 | |
| 		// Check context before reading | |
| 		select { | |
| 		case <-ctx.Done(): | |
| 			// Give a small delay to ensure proper cleanup | |
| 			time.Sleep(100 * time.Millisecond) | |
| 			return ctx.Err() | |
| 		default: | |
| 			// If context is close to being cancelled, set a very short timeout | |
| 			if deadline, ok := ctx.Deadline(); ok { | |
| 				timeUntilDeadline := time.Until(deadline) | |
| 				if timeUntilDeadline < 2*time.Second && timeUntilDeadline > 0 { | |
| 					shortDeadline := time.Now().Add(500 * time.Millisecond) | |
| 					if err := conn.SetReadDeadline(shortDeadline); err == nil { | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 
 | |
| 		// Read message size (4 bytes) | |
| 		var sizeBytes [4]byte | |
| 		if _, err := io.ReadFull(r, sizeBytes[:]); err != nil { | |
| 			if err == io.EOF { | |
| 				return nil | |
| 			} | |
| 			if netErr, ok := err.(net.Error); ok && netErr.Timeout() { | |
| 				// CRITICAL FIX: Track consecutive timeouts to detect CLOSE_WAIT connections | |
| 				// When remote peer closes, connection enters CLOSE_WAIT and reads keep timing out | |
| 				// After several consecutive timeouts with no data, assume connection is dead | |
| 				consecutiveTimeouts++ | |
| 				if consecutiveTimeouts >= maxConsecutiveTimeouts { | |
| 					return nil | |
| 				} | |
| 				// Idle timeout while waiting for next request; keep connection open | |
| 				continue | |
| 			} | |
| 			return fmt.Errorf("read message size: %w", err) | |
| 		} | |
| 
 | |
| 		// Successfully read data, reset timeout counter | |
| 		consecutiveTimeouts = 0 | |
| 
 | |
| 		// Successfully read the message size | |
| 		size := binary.BigEndian.Uint32(sizeBytes[:]) | |
| 		// Debug("Read message size: %d bytes", size) | |
| 		if size == 0 || size > 1024*1024 { // 1MB limit | |
| 			// Use standardized error for message size limit | |
| 			// Send error response for message too large | |
| 			errorResponse := BuildErrorResponse(0, ErrorCodeMessageTooLarge) // correlation ID 0 since we can't parse it yet | |
| 			if writeErr := h.writeResponseWithCorrelationID(w, 0, errorResponse, timeoutConfig.WriteTimeout); writeErr != nil { | |
| 			} | |
| 			return fmt.Errorf("message size %d exceeds limit", size) | |
| 		} | |
| 
 | |
| 		// Set read deadline for message body | |
| 		if err := conn.SetReadDeadline(time.Now().Add(timeoutConfig.ReadTimeout)); err != nil { | |
| 		} | |
| 
 | |
| 		// Read the message | |
| 		messageBuf := make([]byte, size) | |
| 		if _, err := io.ReadFull(r, messageBuf); err != nil { | |
| 			_ = HandleTimeoutError(err, "read") // errorCode | |
| 			return fmt.Errorf("read message: %w", err) | |
| 		} | |
| 
 | |
| 		// Parse at least the basic header to get API key and correlation ID | |
| 		if len(messageBuf) < 8 { | |
| 			return fmt.Errorf("message too short") | |
| 		} | |
| 
 | |
| 		apiKey := binary.BigEndian.Uint16(messageBuf[0:2]) | |
| 		apiVersion := binary.BigEndian.Uint16(messageBuf[2:4]) | |
| 		correlationID := binary.BigEndian.Uint32(messageBuf[4:8]) | |
| 
 | |
| 		// Debug("Parsed header - API Key: %d (%s), Version: %d, Correlation: %d", apiKey, getAPIName(APIKey(apiKey)), apiVersion, correlationID) | |
|  | |
| 		// Validate API version against what we support | |
| 		if err := h.validateAPIVersion(apiKey, apiVersion); err != nil { | |
| 			glog.Errorf("API VERSION VALIDATION FAILED: Key=%d (%s), Version=%d, error=%v", apiKey, getAPIName(APIKey(apiKey)), apiVersion, err) | |
| 			// Return proper Kafka error response for unsupported version | |
| 			response, writeErr := h.buildUnsupportedVersionResponse(correlationID, apiKey, apiVersion) | |
| 			if writeErr != nil { | |
| 				return fmt.Errorf("build error response: %w", writeErr) | |
| 			} | |
| 			// CRITICAL: Send error response through response queue to maintain sequential ordering | |
| 			// This prevents deadlocks in the response writer which expects all correlation IDs in sequence | |
| 			select { | |
| 			case responseChan <- &kafkaResponse{ | |
| 				correlationID: correlationID, | |
| 				apiKey:        apiKey, | |
| 				apiVersion:    apiVersion, | |
| 				response:      response, | |
| 				err:           nil, | |
| 			}: | |
| 				// Error response queued successfully, continue reading next request | |
| 				continue | |
| 			case <-ctx.Done(): | |
| 				return ctx.Err() | |
| 			} | |
| 		} | |
| 
 | |
| 		// CRITICAL DEBUG: Log that validation passed | |
| 		glog.V(4).Infof("API VERSION VALIDATION PASSED: Key=%d (%s), Version=%d, Correlation=%d - proceeding to header parsing", | |
| 			apiKey, getAPIName(APIKey(apiKey)), apiVersion, correlationID) | |
| 
 | |
| 		// Extract request body - special handling for ApiVersions requests | |
| 		var requestBody []byte | |
| 		if apiKey == uint16(APIKeyApiVersions) && apiVersion >= 3 { | |
| 			// ApiVersions v3+ uses client_software_name + client_software_version, not client_id | |
| 			bodyOffset := 8 // Skip api_key(2) + api_version(2) + correlation_id(4) | |
|  | |
| 			// Skip client_software_name (compact string) | |
| 			if len(messageBuf) > bodyOffset { | |
| 				clientNameLen := int(messageBuf[bodyOffset]) // compact string length | |
| 				if clientNameLen > 0 { | |
| 					clientNameLen-- // compact strings encode length+1 | |
| 					bodyOffset += 1 + clientNameLen | |
| 				} else { | |
| 					bodyOffset += 1 // just the length byte for null/empty | |
| 				} | |
| 			} | |
| 
 | |
| 			// Skip client_software_version (compact string) | |
| 			if len(messageBuf) > bodyOffset { | |
| 				clientVersionLen := int(messageBuf[bodyOffset]) // compact string length | |
| 				if clientVersionLen > 0 { | |
| 					clientVersionLen-- // compact strings encode length+1 | |
| 					bodyOffset += 1 + clientVersionLen | |
| 				} else { | |
| 					bodyOffset += 1 // just the length byte for null/empty | |
| 				} | |
| 			} | |
| 
 | |
| 			// Skip tagged fields (should be 0x00 for ApiVersions) | |
| 			if len(messageBuf) > bodyOffset { | |
| 				bodyOffset += 1 // tagged fields byte | |
| 			} | |
| 
 | |
| 			requestBody = messageBuf[bodyOffset:] | |
| 		} else { | |
| 			// Parse header using flexible version utilities for other APIs | |
| 			header, parsedRequestBody, parseErr := ParseRequestHeader(messageBuf) | |
| 			if parseErr != nil { | |
| 				// CRITICAL: Log the parsing error for debugging | |
| 				glog.Errorf("REQUEST HEADER PARSING FAILED: API=%d (%s) v%d, correlation=%d, error=%v, msgLen=%d", | |
| 					apiKey, getAPIName(APIKey(apiKey)), apiVersion, correlationID, parseErr, len(messageBuf)) | |
| 
 | |
| 				// Fall back to basic header parsing if flexible version parsing fails | |
|  | |
| 				// Basic header parsing fallback (original logic) | |
| 				bodyOffset := 8 | |
| 				if len(messageBuf) < bodyOffset+2 { | |
| 					glog.Errorf("FALLBACK PARSING FAILED: missing client_id length, msgLen=%d", len(messageBuf)) | |
| 					return fmt.Errorf("invalid header: missing client_id length") | |
| 				} | |
| 				clientIDLen := int16(binary.BigEndian.Uint16(messageBuf[bodyOffset : bodyOffset+2])) | |
| 				bodyOffset += 2 | |
| 				if clientIDLen >= 0 { | |
| 					if len(messageBuf) < bodyOffset+int(clientIDLen) { | |
| 						glog.Errorf("FALLBACK PARSING FAILED: client_id truncated, clientIDLen=%d, msgLen=%d", clientIDLen, len(messageBuf)) | |
| 						return fmt.Errorf("invalid header: client_id truncated") | |
| 					} | |
| 					bodyOffset += int(clientIDLen) | |
| 				} | |
| 				requestBody = messageBuf[bodyOffset:] | |
| 				glog.V(2).Infof("FALLBACK PARSING SUCCESS: API=%d (%s) v%d, bodyLen=%d", apiKey, getAPIName(APIKey(apiKey)), apiVersion, len(requestBody)) | |
| 			} else { | |
| 				// Use the successfully parsed request body | |
| 				requestBody = parsedRequestBody | |
| 
 | |
| 				// Validate parsed header matches what we already extracted | |
| 				if header.APIKey != apiKey || header.APIVersion != apiVersion || header.CorrelationID != correlationID { | |
| 					// Fall back to basic parsing rather than failing | |
| 					bodyOffset := 8 | |
| 					if len(messageBuf) < bodyOffset+2 { | |
| 						return fmt.Errorf("invalid header: missing client_id length") | |
| 					} | |
| 					clientIDLen := int16(binary.BigEndian.Uint16(messageBuf[bodyOffset : bodyOffset+2])) | |
| 					bodyOffset += 2 | |
| 					if clientIDLen >= 0 { | |
| 						if len(messageBuf) < bodyOffset+int(clientIDLen) { | |
| 							return fmt.Errorf("invalid header: client_id truncated") | |
| 						} | |
| 						bodyOffset += int(clientIDLen) | |
| 					} | |
| 					requestBody = messageBuf[bodyOffset:] | |
| 				} else if header.ClientID != nil { | |
| 					// Store client ID in connection context for use in fetch requests | |
| 					connContext.ClientID = *header.ClientID | |
| 				} | |
| 			} | |
| 		} | |
| 
 | |
| 		// CRITICAL: Route request to appropriate processor | |
| 		// Control plane: Fast, never blocks (Metadata, Heartbeat, etc.) | |
| 		// Data plane: Can be slow (Fetch, Produce) | |
|  | |
| 		// Attach connection context to the Go context for retrieval in nested calls | |
| 		ctxWithConn := context.WithValue(ctx, connContextKey, connContext) | |
| 
 | |
| 		req := &kafkaRequest{ | |
| 			correlationID: correlationID, | |
| 			apiKey:        apiKey, | |
| 			apiVersion:    apiVersion, | |
| 			requestBody:   requestBody, | |
| 			ctx:           ctxWithConn, | |
| 			connContext:   connContext, // Pass per-connection context to avoid race conditions | |
| 		} | |
| 
 | |
| 		// Route to appropriate channel based on API key | |
| 		var targetChan chan *kafkaRequest | |
| 		if isDataPlaneAPI(apiKey) { | |
| 			targetChan = dataChan | |
| 		} else { | |
| 			targetChan = controlChan | |
| 		} | |
| 
 | |
| 		// CRITICAL: Only add to correlation queue AFTER successful channel send | |
| 		// If we add before and the channel blocks, the correlation ID is in the queue | |
| 		// but the request never gets processed, causing response writer deadlock | |
| 		select { | |
| 		case targetChan <- req: | |
| 			// Request queued successfully - NOW add to correlation tracking | |
| 			correlationQueueMu.Lock() | |
| 			correlationQueue = append(correlationQueue, correlationID) | |
| 			correlationQueueMu.Unlock() | |
| 		case <-ctx.Done(): | |
| 			return ctx.Err() | |
| 		case <-time.After(10 * time.Second): | |
| 			// Channel full for too long - this shouldn't happen with proper backpressure | |
| 			glog.Errorf("[%s] CRITICAL: Failed to queue correlation=%d after 10s timeout - channel full!", connectionID, correlationID) | |
| 			return fmt.Errorf("request queue full: correlation=%d", correlationID) | |
| 		} | |
| 	} | |
| } | |
| 
 | |
| // processRequestSync processes a single Kafka API request synchronously and returns the response | |
| func (h *Handler) processRequestSync(req *kafkaRequest) ([]byte, error) { | |
| 	// Record request start time for latency tracking | |
| 	requestStart := time.Now() | |
| 	apiName := getAPIName(APIKey(req.apiKey)) | |
| 
 | |
| 	// Debug: Log API calls at verbose level 2 (disabled by default) | |
| 	glog.V(4).Infof("[API] %s (key=%d, ver=%d, corr=%d)", | |
| 		apiName, req.apiKey, req.apiVersion, req.correlationID) | |
| 
 | |
| 	var response []byte | |
| 	var err error | |
| 
 | |
| 	switch APIKey(req.apiKey) { | |
| 	case APIKeyApiVersions: | |
| 		response, err = h.handleApiVersions(req.correlationID, req.apiVersion) | |
| 
 | |
| 	case APIKeyMetadata: | |
| 		response, err = h.handleMetadata(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyListOffsets: | |
| 		response, err = h.handleListOffsets(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyCreateTopics: | |
| 		response, err = h.handleCreateTopics(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyDeleteTopics: | |
| 		response, err = h.handleDeleteTopics(req.correlationID, req.requestBody) | |
| 
 | |
| 	case APIKeyProduce: | |
| 		response, err = h.handleProduce(req.ctx, req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyFetch: | |
| 		response, err = h.handleFetch(req.ctx, req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyJoinGroup: | |
| 		response, err = h.handleJoinGroup(req.connContext, req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeySyncGroup: | |
| 		response, err = h.handleSyncGroup(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyOffsetCommit: | |
| 		response, err = h.handleOffsetCommit(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyOffsetFetch: | |
| 		response, err = h.handleOffsetFetch(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyFindCoordinator: | |
| 		response, err = h.handleFindCoordinator(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyHeartbeat: | |
| 		response, err = h.handleHeartbeat(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyLeaveGroup: | |
| 		response, err = h.handleLeaveGroup(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyDescribeGroups: | |
| 		response, err = h.handleDescribeGroups(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyListGroups: | |
| 		response, err = h.handleListGroups(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyDescribeConfigs: | |
| 		response, err = h.handleDescribeConfigs(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyDescribeCluster: | |
| 		response, err = h.handleDescribeCluster(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	case APIKeyInitProducerId: | |
| 		response, err = h.handleInitProducerId(req.correlationID, req.apiVersion, req.requestBody) | |
| 
 | |
| 	default: | |
| 		glog.Warningf("Unsupported API key: %d (%s) v%d - Correlation: %d", req.apiKey, apiName, req.apiVersion, req.correlationID) | |
| 		err = fmt.Errorf("unsupported API key: %d (version %d)", req.apiKey, req.apiVersion) | |
| 	} | |
| 
 | |
| 	glog.V(2).Infof("processRequestSync: Switch completed for correlation=%d, about to record metrics", req.correlationID) | |
| 	// Record metrics | |
| 	requestLatency := time.Since(requestStart) | |
| 	if err != nil { | |
| 		RecordErrorMetrics(req.apiKey, requestLatency) | |
| 	} else { | |
| 		RecordRequestMetrics(req.apiKey, requestLatency) | |
| 	} | |
| 	glog.V(2).Infof("processRequestSync: Metrics recorded for correlation=%d, about to return", req.correlationID) | |
| 
 | |
| 	return response, err | |
| } | |
| 
 | |
| // ApiKeyInfo represents supported API key information | |
| type ApiKeyInfo struct { | |
| 	ApiKey     APIKey | |
| 	MinVersion uint16 | |
| 	MaxVersion uint16 | |
| } | |
| 
 | |
| // SupportedApiKeys defines all supported API keys and their version ranges | |
| var SupportedApiKeys = []ApiKeyInfo{ | |
| 	{APIKeyApiVersions, 0, 4},     // ApiVersions - support up to v4 for Kafka 8.0.0 compatibility | |
| 	{APIKeyMetadata, 0, 7},        // Metadata - support up to v7 | |
| 	{APIKeyProduce, 0, 7},         // Produce | |
| 	{APIKeyFetch, 0, 7},           // Fetch | |
| 	{APIKeyListOffsets, 0, 2},     // ListOffsets | |
| 	{APIKeyCreateTopics, 0, 5},    // CreateTopics | |
| 	{APIKeyDeleteTopics, 0, 4},    // DeleteTopics | |
| 	{APIKeyFindCoordinator, 0, 3}, // FindCoordinator - v3+ supports flexible responses | |
| 	{APIKeyJoinGroup, 0, 6},       // JoinGroup | |
| 	{APIKeySyncGroup, 0, 5},       // SyncGroup | |
| 	{APIKeyOffsetCommit, 0, 2},    // OffsetCommit | |
| 	{APIKeyOffsetFetch, 0, 5},     // OffsetFetch | |
| 	{APIKeyHeartbeat, 0, 4},       // Heartbeat | |
| 	{APIKeyLeaveGroup, 0, 4},      // LeaveGroup | |
| 	{APIKeyDescribeGroups, 0, 5},  // DescribeGroups | |
| 	{APIKeyListGroups, 0, 4},      // ListGroups | |
| 	{APIKeyDescribeConfigs, 0, 4}, // DescribeConfigs | |
| 	{APIKeyInitProducerId, 0, 4},  // InitProducerId - support up to v4 for transactional producers | |
| 	{APIKeyDescribeCluster, 0, 1}, // DescribeCluster - for AdminClient compatibility (KIP-919) | |
| } | |
| 
 | |
| func (h *Handler) handleApiVersions(correlationID uint32, apiVersion uint16) ([]byte, error) { | |
| 	// Send correct flexible or non-flexible response based on API version | |
| 	// This fixes the AdminClient "collection size 2184558" error by using proper varint encoding | |
| 	response := make([]byte, 0, 512) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithCorrelationID | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// === RESPONSE BODY === | |
| 	// Error code (2 bytes) - always fixed-length | |
| 	response = append(response, 0, 0) // No error | |
|  | |
| 	// API Keys Array - CRITICAL FIX: Use correct encoding based on version | |
| 	if apiVersion >= 3 { | |
| 		// FLEXIBLE FORMAT: Compact array with varint length - THIS FIXES THE ADMINCLIENT BUG! | |
| 		response = append(response, CompactArrayLength(uint32(len(SupportedApiKeys)))...) | |
| 
 | |
| 		// Add API key entries with per-element tagged fields | |
| 		for _, api := range SupportedApiKeys { | |
| 			response = append(response, byte(api.ApiKey>>8), byte(api.ApiKey))         // api_key (2 bytes) | |
| 			response = append(response, byte(api.MinVersion>>8), byte(api.MinVersion)) // min_version (2 bytes) | |
| 			response = append(response, byte(api.MaxVersion>>8), byte(api.MaxVersion)) // max_version (2 bytes) | |
| 			response = append(response, 0x00)                                          // Per-element tagged fields (varint: empty) | |
| 		} | |
| 
 | |
| 	} else { | |
| 		// NON-FLEXIBLE FORMAT: Regular array with fixed 4-byte length | |
| 		response = append(response, 0, 0, 0, byte(len(SupportedApiKeys))) // Array length (4 bytes) | |
|  | |
| 		// Add API key entries without tagged fields | |
| 		for _, api := range SupportedApiKeys { | |
| 			response = append(response, byte(api.ApiKey>>8), byte(api.ApiKey))         // api_key (2 bytes) | |
| 			response = append(response, byte(api.MinVersion>>8), byte(api.MinVersion)) // min_version (2 bytes) | |
| 			response = append(response, byte(api.MaxVersion>>8), byte(api.MaxVersion)) // max_version (2 bytes) | |
| 		} | |
| 	} | |
| 
 | |
| 	// Throttle time (for v1+) - always fixed-length | |
| 	if apiVersion >= 1 { | |
| 		response = append(response, 0, 0, 0, 0) // throttle_time_ms = 0 (4 bytes) | |
| 	} | |
| 
 | |
| 	// Response-level tagged fields (for v3+ flexible versions) | |
| 	if apiVersion >= 3 { | |
| 		response = append(response, 0x00) // Empty response-level tagged fields (varint: single byte 0) | |
| 	} | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // handleMetadataV0 implements the Metadata API response in version 0 format. | |
| // v0 response layout: | |
| // correlation_id(4) + brokers(ARRAY) + topics(ARRAY) | |
| // broker: node_id(4) + host(STRING) + port(4) | |
| // topic: error_code(2) + name(STRING) + partitions(ARRAY) | |
| // partition: error_code(2) + partition_id(4) + leader(4) + replicas(ARRAY<int32>) + isr(ARRAY<int32>) | |
| func (h *Handler) HandleMetadataV0(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	response := make([]byte, 0, 256) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithCorrelationID | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// Brokers array length (4 bytes) - 1 broker (this gateway) | |
| 	response = append(response, 0, 0, 0, 1) | |
| 
 | |
| 	// Broker 0: node_id(4) + host(STRING) + port(4) | |
| 	response = append(response, 0, 0, 0, 1) // node_id = 1 (consistent with partitions) | |
|  | |
| 	// Get advertised address for client connections | |
| 	host, port := h.GetAdvertisedAddress(h.GetGatewayAddress()) | |
| 
 | |
| 	// Host (STRING: 2 bytes length + bytes) - validate length fits in uint16 | |
| 	if len(host) > 65535 { | |
| 		return nil, fmt.Errorf("host name too long: %d bytes", len(host)) | |
| 	} | |
| 	hostLen := uint16(len(host)) | |
| 	response = append(response, byte(hostLen>>8), byte(hostLen)) | |
| 	response = append(response, []byte(host)...) | |
| 
 | |
| 	// Port (4 bytes) - validate port range | |
| 	if port < 0 || port > 65535 { | |
| 		return nil, fmt.Errorf("invalid port number: %d", port) | |
| 	} | |
| 	portBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(portBytes, uint32(port)) | |
| 	response = append(response, portBytes...) | |
| 
 | |
| 	// Parse requested topics (empty means all) | |
| 	requestedTopics := h.parseMetadataTopics(requestBody) | |
| 	glog.V(3).Infof("[METADATA v0] Requested topics: %v (empty=all)", requestedTopics) | |
| 
 | |
| 	// Determine topics to return using SeaweedMQ handler | |
| 	var topicsToReturn []string | |
| 	if len(requestedTopics) == 0 { | |
| 		topicsToReturn = h.seaweedMQHandler.ListTopics() | |
| 	} else { | |
| 		for _, name := range requestedTopics { | |
| 			if h.seaweedMQHandler.TopicExists(name) { | |
| 				topicsToReturn = append(topicsToReturn, name) | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	// Topics array length (4 bytes) | |
| 	topicsCountBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(topicsCountBytes, uint32(len(topicsToReturn))) | |
| 	response = append(response, topicsCountBytes...) | |
| 
 | |
| 	// Topic entries | |
| 	for _, topicName := range topicsToReturn { | |
| 		// error_code(2) = 0 | |
| 		response = append(response, 0, 0) | |
| 
 | |
| 		// name (STRING) | |
| 		nameBytes := []byte(topicName) | |
| 		nameLen := uint16(len(nameBytes)) | |
| 		response = append(response, byte(nameLen>>8), byte(nameLen)) | |
| 		response = append(response, nameBytes...) | |
| 
 | |
| 		// Get actual partition count from topic info | |
| 		topicInfo, exists := h.seaweedMQHandler.GetTopicInfo(topicName) | |
| 		partitionCount := h.GetDefaultPartitions() // Use configurable default | |
| 		if exists && topicInfo != nil { | |
| 			partitionCount = topicInfo.Partitions | |
| 		} | |
| 
 | |
| 		// partitions array length (4 bytes) | |
| 		partitionsBytes := make([]byte, 4) | |
| 		binary.BigEndian.PutUint32(partitionsBytes, uint32(partitionCount)) | |
| 		response = append(response, partitionsBytes...) | |
| 
 | |
| 		// Create partition entries for each partition | |
| 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ { | |
| 			// partition: error_code(2) + partition_id(4) + leader(4) | |
| 			response = append(response, 0, 0) // error_code | |
|  | |
| 			// partition_id (4 bytes) | |
| 			partitionIDBytes := make([]byte, 4) | |
| 			binary.BigEndian.PutUint32(partitionIDBytes, uint32(partitionID)) | |
| 			response = append(response, partitionIDBytes...) | |
| 
 | |
| 			response = append(response, 0, 0, 0, 1) // leader = 1 (this broker) | |
|  | |
| 			// replicas: array length(4) + one broker id (1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 
 | |
| 			// isr: array length(4) + one broker id (1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 		} | |
| 	} | |
| 
 | |
| 	for range topicsToReturn { | |
| 	} | |
| 	return response, nil | |
| } | |
| 
 | |
| func (h *Handler) HandleMetadataV1(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	// Simplified Metadata v1 implementation - based on working v0 + v1 additions | |
| 	// v1 adds: ControllerID (after brokers), Rack (for brokers), IsInternal (for topics) | |
|  | |
| 	// Parse requested topics (empty means all) | |
| 	requestedTopics := h.parseMetadataTopics(requestBody) | |
| 	glog.V(3).Infof("[METADATA v1] Requested topics: %v (empty=all)", requestedTopics) | |
| 
 | |
| 	// Determine topics to return using SeaweedMQ handler | |
| 	var topicsToReturn []string | |
| 	if len(requestedTopics) == 0 { | |
| 		topicsToReturn = h.seaweedMQHandler.ListTopics() | |
| 	} else { | |
| 		for _, name := range requestedTopics { | |
| 			if h.seaweedMQHandler.TopicExists(name) { | |
| 				topicsToReturn = append(topicsToReturn, name) | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	// Build response using same approach as v0 but with v1 additions | |
| 	response := make([]byte, 0, 256) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// Brokers array length (4 bytes) - 1 broker (this gateway) | |
| 	response = append(response, 0, 0, 0, 1) | |
| 
 | |
| 	// Broker 0: node_id(4) + host(STRING) + port(4) + rack(STRING) | |
| 	response = append(response, 0, 0, 0, 1) // node_id = 1 | |
|  | |
| 	// Get advertised address for client connections | |
| 	host, port := h.GetAdvertisedAddress(h.GetGatewayAddress()) | |
| 
 | |
| 	// Host (STRING: 2 bytes length + bytes) - validate length fits in uint16 | |
| 	if len(host) > 65535 { | |
| 		return nil, fmt.Errorf("host name too long: %d bytes", len(host)) | |
| 	} | |
| 	hostLen := uint16(len(host)) | |
| 	response = append(response, byte(hostLen>>8), byte(hostLen)) | |
| 	response = append(response, []byte(host)...) | |
| 
 | |
| 	// Port (4 bytes) - validate port range | |
| 	if port < 0 || port > 65535 { | |
| 		return nil, fmt.Errorf("invalid port number: %d", port) | |
| 	} | |
| 	portBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(portBytes, uint32(port)) | |
| 	response = append(response, portBytes...) | |
| 
 | |
| 	// Rack (STRING: 2 bytes length + bytes) - v1 addition, non-nullable empty string | |
| 	response = append(response, 0, 0) // empty string | |
|  | |
| 	// ControllerID (4 bytes) - v1 addition | |
| 	response = append(response, 0, 0, 0, 1) // controller_id = 1 | |
|  | |
| 	// Topics array length (4 bytes) | |
| 	topicsCountBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(topicsCountBytes, uint32(len(topicsToReturn))) | |
| 	response = append(response, topicsCountBytes...) | |
| 
 | |
| 	// Topics | |
| 	for _, topicName := range topicsToReturn { | |
| 		// error_code (2 bytes) | |
| 		response = append(response, 0, 0) | |
| 
 | |
| 		// topic name (STRING: 2 bytes length + bytes) | |
| 		topicLen := uint16(len(topicName)) | |
| 		response = append(response, byte(topicLen>>8), byte(topicLen)) | |
| 		response = append(response, []byte(topicName)...) | |
| 
 | |
| 		// is_internal (1 byte) - v1 addition | |
| 		response = append(response, 0) // false | |
|  | |
| 		// Get actual partition count from topic info | |
| 		topicInfo, exists := h.seaweedMQHandler.GetTopicInfo(topicName) | |
| 		partitionCount := h.GetDefaultPartitions() // Use configurable default | |
| 		if exists && topicInfo != nil { | |
| 			partitionCount = topicInfo.Partitions | |
| 		} | |
| 
 | |
| 		// partitions array length (4 bytes) | |
| 		partitionsBytes := make([]byte, 4) | |
| 		binary.BigEndian.PutUint32(partitionsBytes, uint32(partitionCount)) | |
| 		response = append(response, partitionsBytes...) | |
| 
 | |
| 		// Create partition entries for each partition | |
| 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ { | |
| 			// partition: error_code(2) + partition_id(4) + leader_id(4) + replicas(ARRAY) + isr(ARRAY) | |
| 			response = append(response, 0, 0) // error_code | |
|  | |
| 			// partition_id (4 bytes) | |
| 			partitionIDBytes := make([]byte, 4) | |
| 			binary.BigEndian.PutUint32(partitionIDBytes, uint32(partitionID)) | |
| 			response = append(response, partitionIDBytes...) | |
| 
 | |
| 			response = append(response, 0, 0, 0, 1) // leader_id = 1 | |
|  | |
| 			// replicas: array length(4) + one broker id (1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 
 | |
| 			// isr: array length(4) + one broker id (1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 			response = append(response, 0, 0, 0, 1) | |
| 		} | |
| 	} | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // HandleMetadataV2 implements Metadata API v2 with ClusterID field | |
| func (h *Handler) HandleMetadataV2(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	// Metadata v2 adds ClusterID field (nullable string) | |
| 	// v2 response layout: correlation_id(4) + brokers(ARRAY) + cluster_id(NULLABLE_STRING) + controller_id(4) + topics(ARRAY) | |
|  | |
| 	// Parse requested topics (empty means all) | |
| 	requestedTopics := h.parseMetadataTopics(requestBody) | |
| 	glog.V(3).Infof("[METADATA v2] Requested topics: %v (empty=all)", requestedTopics) | |
| 
 | |
| 	// Determine topics to return using SeaweedMQ handler | |
| 	var topicsToReturn []string | |
| 	if len(requestedTopics) == 0 { | |
| 		topicsToReturn = h.seaweedMQHandler.ListTopics() | |
| 	} else { | |
| 		for _, name := range requestedTopics { | |
| 			if h.seaweedMQHandler.TopicExists(name) { | |
| 				topicsToReturn = append(topicsToReturn, name) | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	var buf bytes.Buffer | |
| 
 | |
| 	// Correlation ID (4 bytes) | |
| 	// NOTE: Correlation ID is handled by writeResponseWithCorrelationID | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// Brokers array (4 bytes length + brokers) - 1 broker (this gateway) | |
| 	binary.Write(&buf, binary.BigEndian, int32(1)) | |
| 
 | |
| 	// Get advertised address for client connections | |
| 	host, port := h.GetAdvertisedAddress(h.GetGatewayAddress()) | |
| 
 | |
| 	nodeID := int32(1) // Single gateway node | |
|  | |
| 	// Broker: node_id(4) + host(STRING) + port(4) + rack(STRING) + cluster_id(NULLABLE_STRING) | |
| 	binary.Write(&buf, binary.BigEndian, nodeID) | |
| 
 | |
| 	// Host (STRING: 2 bytes length + data) - validate length fits in int16 | |
| 	if len(host) > 32767 { | |
| 		return nil, fmt.Errorf("host name too long: %d bytes", len(host)) | |
| 	} | |
| 	binary.Write(&buf, binary.BigEndian, int16(len(host))) | |
| 	buf.WriteString(host) | |
| 
 | |
| 	// Port (4 bytes) - validate port range | |
| 	if port < 0 || port > 65535 { | |
| 		return nil, fmt.Errorf("invalid port number: %d", port) | |
| 	} | |
| 	binary.Write(&buf, binary.BigEndian, int32(port)) | |
| 
 | |
| 	// Rack (STRING: 2 bytes length + data) - v1+ addition, non-nullable | |
| 	binary.Write(&buf, binary.BigEndian, int16(0)) // Empty string | |
|  | |
| 	// ClusterID (NULLABLE_STRING: 2 bytes length + data) - v2 addition | |
| 	// Schema Registry requires a non-null cluster ID | |
| 	clusterID := "seaweedfs-kafka-gateway" | |
| 	binary.Write(&buf, binary.BigEndian, int16(len(clusterID))) | |
| 	buf.WriteString(clusterID) | |
| 
 | |
| 	// ControllerID (4 bytes) - v1+ addition | |
| 	binary.Write(&buf, binary.BigEndian, int32(1)) | |
| 
 | |
| 	// Topics array (4 bytes length + topics) | |
| 	binary.Write(&buf, binary.BigEndian, int32(len(topicsToReturn))) | |
| 
 | |
| 	for _, topicName := range topicsToReturn { | |
| 		// ErrorCode (2 bytes) | |
| 		binary.Write(&buf, binary.BigEndian, int16(0)) | |
| 
 | |
| 		// Name (STRING: 2 bytes length + data) | |
| 		binary.Write(&buf, binary.BigEndian, int16(len(topicName))) | |
| 		buf.WriteString(topicName) | |
| 
 | |
| 		// IsInternal (1 byte) - v1+ addition | |
| 		buf.WriteByte(0) // false | |
|  | |
| 		// Get actual partition count from topic info | |
| 		topicInfo, exists := h.seaweedMQHandler.GetTopicInfo(topicName) | |
| 		partitionCount := h.GetDefaultPartitions() // Use configurable default | |
| 		if exists && topicInfo != nil { | |
| 			partitionCount = topicInfo.Partitions | |
| 		} | |
| 
 | |
| 		// Partitions array (4 bytes length + partitions) | |
| 		binary.Write(&buf, binary.BigEndian, partitionCount) | |
| 
 | |
| 		// Create partition entries for each partition | |
| 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ { | |
| 			binary.Write(&buf, binary.BigEndian, int16(0))    // ErrorCode | |
| 			binary.Write(&buf, binary.BigEndian, partitionID) // PartitionIndex | |
| 			binary.Write(&buf, binary.BigEndian, int32(1))    // LeaderID | |
|  | |
| 			// ReplicaNodes array (4 bytes length + nodes) | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 replica | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // NodeID 1 | |
|  | |
| 			// IsrNodes array (4 bytes length + nodes) | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 ISR node | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // NodeID 1 | |
| 		} | |
| 	} | |
| 
 | |
| 	response := buf.Bytes() | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // HandleMetadataV3V4 implements Metadata API v3/v4 with ThrottleTimeMs field | |
| func (h *Handler) HandleMetadataV3V4(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	// Metadata v3/v4 adds ThrottleTimeMs field at the beginning | |
| 	// v3/v4 response layout: correlation_id(4) + throttle_time_ms(4) + brokers(ARRAY) + cluster_id(NULLABLE_STRING) + controller_id(4) + topics(ARRAY) | |
|  | |
| 	// Parse requested topics (empty means all) | |
| 	requestedTopics := h.parseMetadataTopics(requestBody) | |
| 	glog.V(3).Infof("[METADATA v3/v4] Requested topics: %v (empty=all)", requestedTopics) | |
| 
 | |
| 	// Determine topics to return using SeaweedMQ handler | |
| 	var topicsToReturn []string | |
| 	if len(requestedTopics) == 0 { | |
| 		topicsToReturn = h.seaweedMQHandler.ListTopics() | |
| 	} else { | |
| 		for _, name := range requestedTopics { | |
| 			if h.seaweedMQHandler.TopicExists(name) { | |
| 				topicsToReturn = append(topicsToReturn, name) | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	var buf bytes.Buffer | |
| 
 | |
| 	// Correlation ID (4 bytes) | |
| 	// NOTE: Correlation ID is handled by writeResponseWithCorrelationID | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// ThrottleTimeMs (4 bytes) - v3+ addition | |
| 	binary.Write(&buf, binary.BigEndian, int32(0)) // No throttling | |
|  | |
| 	// Brokers array (4 bytes length + brokers) - 1 broker (this gateway) | |
| 	binary.Write(&buf, binary.BigEndian, int32(1)) | |
| 
 | |
| 	// Get advertised address for client connections | |
| 	host, port := h.GetAdvertisedAddress(h.GetGatewayAddress()) | |
| 
 | |
| 	nodeID := int32(1) // Single gateway node | |
|  | |
| 	// Broker: node_id(4) + host(STRING) + port(4) + rack(STRING) + cluster_id(NULLABLE_STRING) | |
| 	binary.Write(&buf, binary.BigEndian, nodeID) | |
| 
 | |
| 	// Host (STRING: 2 bytes length + data) - validate length fits in int16 | |
| 	if len(host) > 32767 { | |
| 		return nil, fmt.Errorf("host name too long: %d bytes", len(host)) | |
| 	} | |
| 	binary.Write(&buf, binary.BigEndian, int16(len(host))) | |
| 	buf.WriteString(host) | |
| 
 | |
| 	// Port (4 bytes) - validate port range | |
| 	if port < 0 || port > 65535 { | |
| 		return nil, fmt.Errorf("invalid port number: %d", port) | |
| 	} | |
| 	binary.Write(&buf, binary.BigEndian, int32(port)) | |
| 
 | |
| 	// Rack (STRING: 2 bytes length + data) - v1+ addition, non-nullable | |
| 	binary.Write(&buf, binary.BigEndian, int16(0)) // Empty string | |
|  | |
| 	// ClusterID (NULLABLE_STRING: 2 bytes length + data) - v2+ addition | |
| 	// Schema Registry requires a non-null cluster ID | |
| 	clusterID := "seaweedfs-kafka-gateway" | |
| 	binary.Write(&buf, binary.BigEndian, int16(len(clusterID))) | |
| 	buf.WriteString(clusterID) | |
| 
 | |
| 	// ControllerID (4 bytes) - v1+ addition | |
| 	binary.Write(&buf, binary.BigEndian, int32(1)) | |
| 
 | |
| 	// Topics array (4 bytes length + topics) | |
| 	binary.Write(&buf, binary.BigEndian, int32(len(topicsToReturn))) | |
| 
 | |
| 	for _, topicName := range topicsToReturn { | |
| 		// ErrorCode (2 bytes) | |
| 		binary.Write(&buf, binary.BigEndian, int16(0)) | |
| 
 | |
| 		// Name (STRING: 2 bytes length + data) | |
| 		binary.Write(&buf, binary.BigEndian, int16(len(topicName))) | |
| 		buf.WriteString(topicName) | |
| 
 | |
| 		// IsInternal (1 byte) - v1+ addition | |
| 		buf.WriteByte(0) // false | |
|  | |
| 		// Get actual partition count from topic info | |
| 		topicInfo, exists := h.seaweedMQHandler.GetTopicInfo(topicName) | |
| 		partitionCount := h.GetDefaultPartitions() // Use configurable default | |
| 		if exists && topicInfo != nil { | |
| 			partitionCount = topicInfo.Partitions | |
| 		} | |
| 
 | |
| 		// Partitions array (4 bytes length + partitions) | |
| 		binary.Write(&buf, binary.BigEndian, partitionCount) | |
| 
 | |
| 		// Create partition entries for each partition | |
| 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ { | |
| 			binary.Write(&buf, binary.BigEndian, int16(0))    // ErrorCode | |
| 			binary.Write(&buf, binary.BigEndian, partitionID) // PartitionIndex | |
| 			binary.Write(&buf, binary.BigEndian, int32(1))    // LeaderID | |
|  | |
| 			// ReplicaNodes array (4 bytes length + nodes) | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 replica | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // NodeID 1 | |
|  | |
| 			// IsrNodes array (4 bytes length + nodes) | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 ISR node | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // NodeID 1 | |
| 		} | |
| 	} | |
| 
 | |
| 	response := buf.Bytes() | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // HandleMetadataV5V6 implements Metadata API v5/v6 with OfflineReplicas field | |
| func (h *Handler) HandleMetadataV5V6(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	return h.handleMetadataV5ToV8(correlationID, requestBody, 5) | |
| } | |
| 
 | |
| // HandleMetadataV7 implements Metadata API v7 with LeaderEpoch field (REGULAR FORMAT, NOT FLEXIBLE) | |
| func (h *Handler) HandleMetadataV7(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	// CRITICAL: Metadata v7 uses REGULAR arrays/strings (like v5/v6), NOT compact format | |
| 	// Only v9+ uses compact format (flexible responses) | |
| 	return h.handleMetadataV5ToV8(correlationID, requestBody, 7) | |
| } | |
| 
 | |
| // handleMetadataV5ToV8 handles Metadata v5-v8 with regular (non-compact) encoding | |
| // v5/v6: adds OfflineReplicas field to partitions | |
| // v7: adds LeaderEpoch field to partitions | |
| // v8: adds ClusterAuthorizedOperations field | |
| // All use REGULAR arrays/strings (NOT compact) - only v9+ uses compact format | |
| func (h *Handler) handleMetadataV5ToV8(correlationID uint32, requestBody []byte, apiVersion int) ([]byte, error) { | |
| 	// v5-v8 response layout: throttle_time_ms(4) + brokers(ARRAY) + cluster_id(NULLABLE_STRING) + controller_id(4) + topics(ARRAY) [+ cluster_authorized_operations(4) for v8] | |
| 	// Each partition includes: error_code(2) + partition_index(4) + leader_id(4) [+ leader_epoch(4) for v7+] + replica_nodes(ARRAY) + isr_nodes(ARRAY) + offline_replicas(ARRAY) | |
|  | |
| 	// Parse requested topics (empty means all) | |
| 	requestedTopics := h.parseMetadataTopics(requestBody) | |
| 	glog.V(3).Infof("[METADATA v%d] Requested topics: %v (empty=all)", apiVersion, requestedTopics) | |
| 
 | |
| 	// Determine topics to return using SeaweedMQ handler | |
| 	var topicsToReturn []string | |
| 	if len(requestedTopics) == 0 { | |
| 		topicsToReturn = h.seaweedMQHandler.ListTopics() | |
| 	} else { | |
| 		// FIXED: Proper topic existence checking (removed the hack) | |
| 		// Now that CreateTopics v5 works, we use proper Kafka workflow: | |
| 		// 1. Check which requested topics actually exist | |
| 		// 2. Auto-create system topics if they don't exist | |
| 		// 3. Only return existing topics in metadata | |
| 		// 4. Client will call CreateTopics for non-existent topics | |
| 		// 5. Then request metadata again to see the created topics | |
| 		for _, topic := range requestedTopics { | |
| 			if isSystemTopic(topic) { | |
| 				// Always try to auto-create system topics during metadata requests | |
| 				glog.V(3).Infof("[METADATA v%d] Ensuring system topic %s exists during metadata request", apiVersion, topic) | |
| 				if !h.seaweedMQHandler.TopicExists(topic) { | |
| 					glog.V(3).Infof("[METADATA v%d] Auto-creating system topic %s during metadata request", apiVersion, topic) | |
| 					if err := h.createTopicWithSchemaSupport(topic, 1); err != nil { | |
| 						glog.V(0).Infof("[METADATA v%d] Failed to auto-create system topic %s: %v", apiVersion, topic, err) | |
| 						// Continue without adding to topicsToReturn - client will get UNKNOWN_TOPIC_OR_PARTITION | |
| 					} else { | |
| 						glog.V(3).Infof("[METADATA v%d] Successfully auto-created system topic %s", apiVersion, topic) | |
| 					} | |
| 				} else { | |
| 					glog.V(3).Infof("[METADATA v%d] System topic %s already exists", apiVersion, topic) | |
| 				} | |
| 				topicsToReturn = append(topicsToReturn, topic) | |
| 			} else if h.seaweedMQHandler.TopicExists(topic) { | |
| 				topicsToReturn = append(topicsToReturn, topic) | |
| 			} | |
| 		} | |
| 		glog.V(3).Infof("[METADATA v%d] Returning topics: %v (requested: %v)", apiVersion, topicsToReturn, requestedTopics) | |
| 	} | |
| 
 | |
| 	var buf bytes.Buffer | |
| 
 | |
| 	// Correlation ID (4 bytes) | |
| 	// NOTE: Correlation ID is handled by writeResponseWithCorrelationID | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// ThrottleTimeMs (4 bytes) - v3+ addition | |
| 	binary.Write(&buf, binary.BigEndian, int32(0)) // No throttling | |
|  | |
| 	// Brokers array (4 bytes length + brokers) - 1 broker (this gateway) | |
| 	binary.Write(&buf, binary.BigEndian, int32(1)) | |
| 
 | |
| 	// Get advertised address for client connections | |
| 	host, port := h.GetAdvertisedAddress(h.GetGatewayAddress()) | |
| 
 | |
| 	nodeID := int32(1) // Single gateway node | |
|  | |
| 	// Broker: node_id(4) + host(STRING) + port(4) + rack(STRING) + cluster_id(NULLABLE_STRING) | |
| 	binary.Write(&buf, binary.BigEndian, nodeID) | |
| 
 | |
| 	// Host (STRING: 2 bytes length + data) - validate length fits in int16 | |
| 	if len(host) > 32767 { | |
| 		return nil, fmt.Errorf("host name too long: %d bytes", len(host)) | |
| 	} | |
| 	binary.Write(&buf, binary.BigEndian, int16(len(host))) | |
| 	buf.WriteString(host) | |
| 
 | |
| 	// Port (4 bytes) - validate port range | |
| 	if port < 0 || port > 65535 { | |
| 		return nil, fmt.Errorf("invalid port number: %d", port) | |
| 	} | |
| 	binary.Write(&buf, binary.BigEndian, int32(port)) | |
| 
 | |
| 	// Rack (STRING: 2 bytes length + data) - v1+ addition, non-nullable | |
| 	binary.Write(&buf, binary.BigEndian, int16(0)) // Empty string | |
|  | |
| 	// ClusterID (NULLABLE_STRING: 2 bytes length + data) - v2+ addition | |
| 	// Schema Registry requires a non-null cluster ID | |
| 	clusterID := "seaweedfs-kafka-gateway" | |
| 	binary.Write(&buf, binary.BigEndian, int16(len(clusterID))) | |
| 	buf.WriteString(clusterID) | |
| 
 | |
| 	// ControllerID (4 bytes) - v1+ addition | |
| 	binary.Write(&buf, binary.BigEndian, int32(1)) | |
| 
 | |
| 	// Topics array (4 bytes length + topics) | |
| 	binary.Write(&buf, binary.BigEndian, int32(len(topicsToReturn))) | |
| 
 | |
| 	for _, topicName := range topicsToReturn { | |
| 		// ErrorCode (2 bytes) | |
| 		binary.Write(&buf, binary.BigEndian, int16(0)) | |
| 
 | |
| 		// Name (STRING: 2 bytes length + data) | |
| 		binary.Write(&buf, binary.BigEndian, int16(len(topicName))) | |
| 		buf.WriteString(topicName) | |
| 
 | |
| 		// IsInternal (1 byte) - v1+ addition | |
| 		buf.WriteByte(0) // false | |
|  | |
| 		// Get actual partition count from topic info | |
| 		topicInfo, exists := h.seaweedMQHandler.GetTopicInfo(topicName) | |
| 		partitionCount := h.GetDefaultPartitions() // Use configurable default | |
| 		if exists && topicInfo != nil { | |
| 			partitionCount = topicInfo.Partitions | |
| 		} | |
| 
 | |
| 		// Partitions array (4 bytes length + partitions) | |
| 		binary.Write(&buf, binary.BigEndian, partitionCount) | |
| 
 | |
| 		// Create partition entries for each partition | |
| 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ { | |
| 			binary.Write(&buf, binary.BigEndian, int16(0))    // ErrorCode | |
| 			binary.Write(&buf, binary.BigEndian, partitionID) // PartitionIndex | |
| 			binary.Write(&buf, binary.BigEndian, int32(1))    // LeaderID | |
|  | |
| 			// LeaderEpoch (4 bytes) - v7+ addition | |
| 			if apiVersion >= 7 { | |
| 				binary.Write(&buf, binary.BigEndian, int32(0)) // Leader epoch 0 | |
| 			} | |
| 
 | |
| 			// ReplicaNodes array (4 bytes length + nodes) | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 replica | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // NodeID 1 | |
|  | |
| 			// IsrNodes array (4 bytes length + nodes) | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 ISR node | |
| 			binary.Write(&buf, binary.BigEndian, int32(1)) // NodeID 1 | |
|  | |
| 			// OfflineReplicas array (4 bytes length + nodes) - v5+ addition | |
| 			binary.Write(&buf, binary.BigEndian, int32(0)) // No offline replicas | |
| 		} | |
| 	} | |
| 
 | |
| 	// ClusterAuthorizedOperations (4 bytes) - v8+ addition | |
| 	if apiVersion >= 8 { | |
| 		binary.Write(&buf, binary.BigEndian, int32(-2147483648)) // All operations allowed (bit mask) | |
| 	} | |
| 
 | |
| 	response := buf.Bytes() | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| func (h *Handler) parseMetadataTopics(requestBody []byte) []string { | |
| 	// Support both v0/v1 parsing: v1 payload starts directly with topics array length (int32), | |
| 	// while older assumptions may have included a client_id string first. | |
| 	if len(requestBody) < 4 { | |
| 		return []string{} | |
| 	} | |
| 
 | |
| 	// Try path A: interpret first 4 bytes as topics_count | |
| 	offset := 0 | |
| 	topicsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 	if topicsCount == 0xFFFFFFFF { // -1 means all topics | |
| 		return []string{} | |
| 	} | |
| 	if topicsCount <= 1000000 { // sane bound | |
| 		offset += 4 | |
| 		topics := make([]string, 0, topicsCount) | |
| 		for i := uint32(0); i < topicsCount && offset+2 <= len(requestBody); i++ { | |
| 			nameLen := int(binary.BigEndian.Uint16(requestBody[offset : offset+2])) | |
| 			offset += 2 | |
| 			if offset+nameLen > len(requestBody) { | |
| 				break | |
| 			} | |
| 			topics = append(topics, string(requestBody[offset:offset+nameLen])) | |
| 			offset += nameLen | |
| 		} | |
| 		return topics | |
| 	} | |
| 
 | |
| 	// Path B: assume leading client_id string then topics_count | |
| 	if len(requestBody) < 6 { | |
| 		return []string{} | |
| 	} | |
| 	clientIDLen := int(binary.BigEndian.Uint16(requestBody[0:2])) | |
| 	offset = 2 + clientIDLen | |
| 	if len(requestBody) < offset+4 { | |
| 		return []string{} | |
| 	} | |
| 	topicsCount = binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 	offset += 4 | |
| 	if topicsCount == 0xFFFFFFFF { | |
| 		return []string{} | |
| 	} | |
| 	topics := make([]string, 0, topicsCount) | |
| 	for i := uint32(0); i < topicsCount && offset+2 <= len(requestBody); i++ { | |
| 		nameLen := int(binary.BigEndian.Uint16(requestBody[offset : offset+2])) | |
| 		offset += 2 | |
| 		if offset+nameLen > len(requestBody) { | |
| 			break | |
| 		} | |
| 		topics = append(topics, string(requestBody[offset:offset+nameLen])) | |
| 		offset += nameLen | |
| 	} | |
| 	return topics | |
| } | |
| 
 | |
| func (h *Handler) handleListOffsets(correlationID uint32, apiVersion uint16, requestBody []byte) ([]byte, error) { | |
| 
 | |
| 	// Parse minimal request to understand what's being asked (header already stripped) | |
| 	offset := 0 | |
| 
 | |
| 	// v1+ has replica_id(4) | |
| 	if apiVersion >= 1 { | |
| 		if len(requestBody) < offset+4 { | |
| 			return nil, fmt.Errorf("ListOffsets v%d request missing replica_id", apiVersion) | |
| 		} | |
| 		_ = int32(binary.BigEndian.Uint32(requestBody[offset : offset+4])) // replicaID | |
| 		offset += 4 | |
| 	} | |
| 
 | |
| 	// v2+ adds isolation_level(1) | |
| 	if apiVersion >= 2 { | |
| 		if len(requestBody) < offset+1 { | |
| 			return nil, fmt.Errorf("ListOffsets v%d request missing isolation_level", apiVersion) | |
| 		} | |
| 		_ = requestBody[offset] // isolationLevel | |
| 		offset += 1 | |
| 	} | |
| 
 | |
| 	if len(requestBody) < offset+4 { | |
| 		return nil, fmt.Errorf("ListOffsets request missing topics count") | |
| 	} | |
| 
 | |
| 	topicsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 	offset += 4 | |
| 
 | |
| 	response := make([]byte, 0, 256) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// Throttle time (4 bytes, 0 = no throttling) - v2+ only | |
| 	if apiVersion >= 2 { | |
| 		response = append(response, 0, 0, 0, 0) | |
| 	} | |
| 
 | |
| 	// Topics count (will be updated later with actual count) | |
| 	topicsCountBytes := make([]byte, 4) | |
| 	topicsCountOffset := len(response) // Remember where to update the count | |
| 	binary.BigEndian.PutUint32(topicsCountBytes, topicsCount) | |
| 	response = append(response, topicsCountBytes...) | |
| 
 | |
| 	// Track how many topics we actually process | |
| 	actualTopicsCount := uint32(0) | |
| 
 | |
| 	// Process each requested topic | |
| 	for i := uint32(0); i < topicsCount && offset < len(requestBody); i++ { | |
| 		if len(requestBody) < offset+2 { | |
| 			break | |
| 		} | |
| 
 | |
| 		// Parse topic name | |
| 		topicNameSize := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 		offset += 2 | |
| 
 | |
| 		if len(requestBody) < offset+int(topicNameSize)+4 { | |
| 			break | |
| 		} | |
| 
 | |
| 		topicName := requestBody[offset : offset+int(topicNameSize)] | |
| 		offset += int(topicNameSize) | |
| 
 | |
| 		// Parse partitions count for this topic | |
| 		partitionsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 
 | |
| 		// Response: topic_name_size(2) + topic_name + partitions_array | |
| 		response = append(response, byte(topicNameSize>>8), byte(topicNameSize)) | |
| 		response = append(response, topicName...) | |
| 
 | |
| 		partitionsCountBytes := make([]byte, 4) | |
| 		binary.BigEndian.PutUint32(partitionsCountBytes, partitionsCount) | |
| 		response = append(response, partitionsCountBytes...) | |
| 
 | |
| 		// Process each partition | |
| 		for j := uint32(0); j < partitionsCount && offset+12 <= len(requestBody); j++ { | |
| 			// Parse partition request: partition_id(4) + timestamp(8) | |
| 			partitionID := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 			timestamp := int64(binary.BigEndian.Uint64(requestBody[offset+4 : offset+12])) | |
| 			offset += 12 | |
| 
 | |
| 			// Response: partition_id(4) + error_code(2) + timestamp(8) + offset(8) | |
| 			partitionIDBytes := make([]byte, 4) | |
| 			binary.BigEndian.PutUint32(partitionIDBytes, partitionID) | |
| 			response = append(response, partitionIDBytes...) | |
| 
 | |
| 			// Error code (0 = no error) | |
| 			response = append(response, 0, 0) | |
| 
 | |
| 			// Use direct SMQ reading - no ledgers needed | |
| 			// SMQ handles offset management internally | |
| 			var responseTimestamp int64 | |
| 			var responseOffset int64 | |
| 
 | |
| 			switch timestamp { | |
| 			case -2: // earliest offset | |
| 				// Get the actual earliest offset from SMQ | |
| 				earliestOffset, err := h.seaweedMQHandler.GetEarliestOffset(string(topicName), int32(partitionID)) | |
| 				if err != nil { | |
| 					responseOffset = 0 // fallback to 0 | |
| 				} else { | |
| 					responseOffset = earliestOffset | |
| 				} | |
| 				responseTimestamp = 0 // No specific timestamp for earliest | |
|  | |
| 			case -1: // latest offset | |
| 				// Get the actual latest offset from SMQ | |
| 				if h.seaweedMQHandler == nil { | |
| 					responseOffset = 0 | |
| 				} else { | |
| 					latestOffset, err := h.seaweedMQHandler.GetLatestOffset(string(topicName), int32(partitionID)) | |
| 					if err != nil { | |
| 						responseOffset = 0 // fallback to 0 | |
| 					} else { | |
| 						responseOffset = latestOffset | |
| 					} | |
| 				} | |
| 				responseTimestamp = 0 // No specific timestamp for latest | |
| 			default: // specific timestamp - find offset by timestamp | |
| 				// For timestamp-based lookup, we need to implement this properly | |
| 				// For now, return 0 as fallback | |
| 				responseOffset = 0 | |
| 				responseTimestamp = timestamp | |
| 			} | |
| 
 | |
| 			// Ensure we never return a timestamp as offset - this was the bug! | |
| 			if responseOffset > 1000000000 { // If offset looks like a timestamp | |
| 				responseOffset = 0 | |
| 			} | |
| 
 | |
| 			timestampBytes := make([]byte, 8) | |
| 			binary.BigEndian.PutUint64(timestampBytes, uint64(responseTimestamp)) | |
| 			response = append(response, timestampBytes...) | |
| 
 | |
| 			offsetBytes := make([]byte, 8) | |
| 			binary.BigEndian.PutUint64(offsetBytes, uint64(responseOffset)) | |
| 			response = append(response, offsetBytes...) | |
| 		} | |
| 
 | |
| 		// Successfully processed this topic | |
| 		actualTopicsCount++ | |
| 	} | |
| 
 | |
| 	// CRITICAL FIX: Update the topics count in the response header with the actual count | |
| 	// This prevents ErrIncompleteResponse when request parsing fails mid-way | |
| 	if actualTopicsCount != topicsCount { | |
| 		binary.BigEndian.PutUint32(response[topicsCountOffset:topicsCountOffset+4], actualTopicsCount) | |
| 	} | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| func (h *Handler) handleCreateTopics(correlationID uint32, apiVersion uint16, requestBody []byte) ([]byte, error) { | |
| 
 | |
| 	if len(requestBody) < 2 { | |
| 		return nil, fmt.Errorf("CreateTopics request too short") | |
| 	} | |
| 
 | |
| 	// Parse based on API version | |
| 	switch apiVersion { | |
| 	case 0, 1: | |
| 		response, err := h.handleCreateTopicsV0V1(correlationID, requestBody) | |
| 		return response, err | |
| 	case 2, 3, 4: | |
| 		// kafka-go sends v2-4 in regular format, not compact | |
| 		response, err := h.handleCreateTopicsV2To4(correlationID, requestBody) | |
| 		return response, err | |
| 	case 5: | |
| 		// v5+ uses flexible format with compact arrays | |
| 		response, err := h.handleCreateTopicsV2Plus(correlationID, apiVersion, requestBody) | |
| 		return response, err | |
| 	default: | |
| 		return nil, fmt.Errorf("unsupported CreateTopics API version: %d", apiVersion) | |
| 	} | |
| } | |
| 
 | |
| // handleCreateTopicsV2To4 handles CreateTopics API versions 2-4 (auto-detect regular vs compact format) | |
| func (h *Handler) handleCreateTopicsV2To4(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	// Auto-detect format: kafka-go sends regular format, tests send compact format | |
| 	if len(requestBody) < 1 { | |
| 		return nil, fmt.Errorf("CreateTopics v2-4 request too short") | |
| 	} | |
| 
 | |
| 	// Detect format by checking first byte | |
| 	// Compact format: first byte is compact array length (usually 0x02 for 1 topic) | |
| 	// Regular format: first 4 bytes are regular array count (usually 0x00000001 for 1 topic) | |
| 	isCompactFormat := false | |
| 	if len(requestBody) >= 4 { | |
| 		// Check if this looks like a regular 4-byte array count | |
| 		regularCount := binary.BigEndian.Uint32(requestBody[0:4]) | |
| 		// If the "regular count" is very large (> 1000), it's probably compact format | |
| 		// Also check if first byte is small (typical compact array length) | |
| 		if regularCount > 1000 || (requestBody[0] <= 10 && requestBody[0] > 0) { | |
| 			isCompactFormat = true | |
| 		} | |
| 	} else if requestBody[0] <= 10 && requestBody[0] > 0 { | |
| 		isCompactFormat = true | |
| 	} | |
| 
 | |
| 	if isCompactFormat { | |
| 		// Delegate to the compact format handler | |
| 		response, err := h.handleCreateTopicsV2Plus(correlationID, 2, requestBody) | |
| 		return response, err | |
| 	} | |
| 
 | |
| 	// Handle regular format | |
| 	offset := 0 | |
| 	if len(requestBody) < offset+4 { | |
| 		return nil, fmt.Errorf("CreateTopics v2-4 request too short for topics array") | |
| 	} | |
| 
 | |
| 	topicsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 	offset += 4 | |
| 
 | |
| 	// Parse topics | |
| 	topics := make([]struct { | |
| 		name        string | |
| 		partitions  uint32 | |
| 		replication uint16 | |
| 	}, 0, topicsCount) | |
| 	for i := uint32(0); i < topicsCount; i++ { | |
| 		if len(requestBody) < offset+2 { | |
| 			return nil, fmt.Errorf("CreateTopics v2-4: truncated topic name length") | |
| 		} | |
| 		nameLen := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 		offset += 2 | |
| 		if len(requestBody) < offset+int(nameLen) { | |
| 			return nil, fmt.Errorf("CreateTopics v2-4: truncated topic name") | |
| 		} | |
| 		topicName := string(requestBody[offset : offset+int(nameLen)]) | |
| 		offset += int(nameLen) | |
| 
 | |
| 		if len(requestBody) < offset+4 { | |
| 			return nil, fmt.Errorf("CreateTopics v2-4: truncated num_partitions") | |
| 		} | |
| 		numPartitions := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 
 | |
| 		if len(requestBody) < offset+2 { | |
| 			return nil, fmt.Errorf("CreateTopics v2-4: truncated replication_factor") | |
| 		} | |
| 		replication := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 		offset += 2 | |
| 
 | |
| 		// Assignments array (array of partition assignments) - skip contents | |
| 		if len(requestBody) < offset+4 { | |
| 			return nil, fmt.Errorf("CreateTopics v2-4: truncated assignments count") | |
| 		} | |
| 		assignments := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 		for j := uint32(0); j < assignments; j++ { | |
| 			// partition_id (int32) + replicas (array int32) | |
| 			if len(requestBody) < offset+4 { | |
| 				return nil, fmt.Errorf("CreateTopics v2-4: truncated assignment partition id") | |
| 			} | |
| 			offset += 4 | |
| 			if len(requestBody) < offset+4 { | |
| 				return nil, fmt.Errorf("CreateTopics v2-4: truncated replicas count") | |
| 			} | |
| 			replicasCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 			offset += 4 | |
| 			// skip replica ids | |
| 			offset += int(replicasCount) * 4 | |
| 		} | |
| 
 | |
| 		// Configs array (array of (name,value) strings) - skip contents | |
| 		if len(requestBody) < offset+4 { | |
| 			return nil, fmt.Errorf("CreateTopics v2-4: truncated configs count") | |
| 		} | |
| 		configs := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 		for j := uint32(0); j < configs; j++ { | |
| 			// name (string) | |
| 			if len(requestBody) < offset+2 { | |
| 				return nil, fmt.Errorf("CreateTopics v2-4: truncated config name length") | |
| 			} | |
| 			nameLen := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 			offset += 2 + int(nameLen) | |
| 			// value (nullable string) | |
| 			if len(requestBody) < offset+2 { | |
| 				return nil, fmt.Errorf("CreateTopics v2-4: truncated config value length") | |
| 			} | |
| 			valueLen := int16(binary.BigEndian.Uint16(requestBody[offset : offset+2])) | |
| 			offset += 2 | |
| 			if valueLen >= 0 { | |
| 				offset += int(valueLen) | |
| 			} | |
| 		} | |
| 
 | |
| 		topics = append(topics, struct { | |
| 			name        string | |
| 			partitions  uint32 | |
| 			replication uint16 | |
| 		}{topicName, numPartitions, replication}) | |
| 	} | |
| 
 | |
| 	// timeout_ms | |
| 	if len(requestBody) >= offset+4 { | |
| 		_ = binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 	} | |
| 	// validate_only (boolean) | |
| 	if len(requestBody) >= offset+1 { | |
| 		_ = requestBody[offset] | |
| 		offset += 1 | |
| 	} | |
| 
 | |
| 	// Build response | |
| 	response := make([]byte, 0, 128) | |
| 	// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 	// Do NOT include it in the response body | |
| 	// throttle_time_ms (4 bytes) | |
| 	response = append(response, 0, 0, 0, 0) | |
| 	// topics array count (int32) | |
| 	countBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(countBytes, uint32(len(topics))) | |
| 	response = append(response, countBytes...) | |
| 	// per-topic responses | |
| 	for _, t := range topics { | |
| 		// topic name (string) | |
| 		nameLen := make([]byte, 2) | |
| 		binary.BigEndian.PutUint16(nameLen, uint16(len(t.name))) | |
| 		response = append(response, nameLen...) | |
| 		response = append(response, []byte(t.name)...) | |
| 		// error_code (int16) | |
| 		var errCode uint16 = 0 | |
| 		if h.seaweedMQHandler.TopicExists(t.name) { | |
| 			errCode = 36 // TOPIC_ALREADY_EXISTS | |
| 		} else if t.partitions == 0 { | |
| 			errCode = 37 // INVALID_PARTITIONS | |
| 		} else if t.replication == 0 { | |
| 			errCode = 38 // INVALID_REPLICATION_FACTOR | |
| 		} else { | |
| 			// Use schema-aware topic creation | |
| 			if err := h.createTopicWithSchemaSupport(t.name, int32(t.partitions)); err != nil { | |
| 				errCode = 1 // UNKNOWN_SERVER_ERROR | |
| 			} | |
| 		} | |
| 		eb := make([]byte, 2) | |
| 		binary.BigEndian.PutUint16(eb, errCode) | |
| 		response = append(response, eb...) | |
| 		// error_message (nullable string) -> null | |
| 		response = append(response, 0xFF, 0xFF) | |
| 	} | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| func (h *Handler) handleCreateTopicsV0V1(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 
 | |
| 	if len(requestBody) < 4 { | |
| 		return nil, fmt.Errorf("CreateTopics v0/v1 request too short") | |
| 	} | |
| 
 | |
| 	offset := 0 | |
| 
 | |
| 	// Parse topics array (regular array format: count + topics) | |
| 	topicsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 	offset += 4 | |
| 
 | |
| 	// Build response | |
| 	response := make([]byte, 0, 256) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// Topics array count (4 bytes in v0/v1) | |
| 	topicsCountBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(topicsCountBytes, topicsCount) | |
| 	response = append(response, topicsCountBytes...) | |
| 
 | |
| 	// Process each topic | |
| 	for i := uint32(0); i < topicsCount && offset < len(requestBody); i++ { | |
| 		// Parse topic name (regular string: length + bytes) | |
| 		if len(requestBody) < offset+2 { | |
| 			break | |
| 		} | |
| 		topicNameLength := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 		offset += 2 | |
| 
 | |
| 		if len(requestBody) < offset+int(topicNameLength) { | |
| 			break | |
| 		} | |
| 		topicName := string(requestBody[offset : offset+int(topicNameLength)]) | |
| 		offset += int(topicNameLength) | |
| 
 | |
| 		// Parse num_partitions (4 bytes) | |
| 		if len(requestBody) < offset+4 { | |
| 			break | |
| 		} | |
| 		numPartitions := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 
 | |
| 		// Parse replication_factor (2 bytes) | |
| 		if len(requestBody) < offset+2 { | |
| 			break | |
| 		} | |
| 		replicationFactor := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 		offset += 2 | |
| 
 | |
| 		// Parse assignments array (4 bytes count, then assignments) | |
| 		if len(requestBody) < offset+4 { | |
| 			break | |
| 		} | |
| 		assignmentsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 
 | |
| 		// Skip assignments for now (simplified) | |
| 		for j := uint32(0); j < assignmentsCount && offset < len(requestBody); j++ { | |
| 			// Skip partition_id (4 bytes) | |
| 			if len(requestBody) >= offset+4 { | |
| 				offset += 4 | |
| 			} | |
| 			// Skip replicas array (4 bytes count + replica_ids) | |
| 			if len(requestBody) >= offset+4 { | |
| 				replicasCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 				offset += 4 | |
| 				offset += int(replicasCount) * 4 // Skip replica IDs | |
| 			} | |
| 		} | |
| 
 | |
| 		// Parse configs array (4 bytes count, then configs) | |
| 		if len(requestBody) >= offset+4 { | |
| 			configsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 			offset += 4 | |
| 
 | |
| 			// Skip configs (simplified) | |
| 			for j := uint32(0); j < configsCount && offset < len(requestBody); j++ { | |
| 				// Skip config name (string: 2 bytes length + bytes) | |
| 				if len(requestBody) >= offset+2 { | |
| 					configNameLength := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 					offset += 2 + int(configNameLength) | |
| 				} | |
| 				// Skip config value (string: 2 bytes length + bytes) | |
| 				if len(requestBody) >= offset+2 { | |
| 					configValueLength := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 					offset += 2 + int(configValueLength) | |
| 				} | |
| 			} | |
| 		} | |
| 
 | |
| 		// Build response for this topic | |
| 		// Topic name (string: length + bytes) | |
| 		topicNameLengthBytes := make([]byte, 2) | |
| 		binary.BigEndian.PutUint16(topicNameLengthBytes, uint16(len(topicName))) | |
| 		response = append(response, topicNameLengthBytes...) | |
| 		response = append(response, []byte(topicName)...) | |
| 
 | |
| 		// Determine error code and message | |
| 		var errorCode uint16 = 0 | |
| 
 | |
| 		// Apply defaults for invalid values | |
| 		if numPartitions <= 0 { | |
| 			numPartitions = uint32(h.GetDefaultPartitions()) // Use configurable default | |
| 		} | |
| 		if replicationFactor <= 0 { | |
| 			replicationFactor = 1 // Default to 1 replica | |
| 		} | |
| 
 | |
| 		// Use SeaweedMQ integration | |
| 		if h.seaweedMQHandler.TopicExists(topicName) { | |
| 			errorCode = 36 // TOPIC_ALREADY_EXISTS | |
| 		} else { | |
| 			// Create the topic in SeaweedMQ with schema support | |
| 			if err := h.createTopicWithSchemaSupport(topicName, int32(numPartitions)); err != nil { | |
| 				errorCode = 1 // UNKNOWN_SERVER_ERROR | |
| 			} | |
| 		} | |
| 
 | |
| 		// Error code (2 bytes) | |
| 		errorCodeBytes := make([]byte, 2) | |
| 		binary.BigEndian.PutUint16(errorCodeBytes, errorCode) | |
| 		response = append(response, errorCodeBytes...) | |
| 	} | |
| 
 | |
| 	// Parse timeout_ms (4 bytes) - at the end of request | |
| 	if len(requestBody) >= offset+4 { | |
| 		_ = binary.BigEndian.Uint32(requestBody[offset : offset+4]) // timeoutMs | |
| 		offset += 4 | |
| 	} | |
| 
 | |
| 	// Parse validate_only (1 byte) - only in v1 | |
| 	if len(requestBody) >= offset+1 { | |
| 		_ = requestBody[offset] != 0 // validateOnly | |
| 	} | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // handleCreateTopicsV2Plus handles CreateTopics API versions 2+ (flexible versions with compact arrays/strings) | |
| // For simplicity and consistency with existing response builder, this parses the flexible request, | |
| // converts it into the non-flexible v2-v4 body format, and reuses handleCreateTopicsV2To4 to build the response. | |
| func (h *Handler) handleCreateTopicsV2Plus(correlationID uint32, apiVersion uint16, requestBody []byte) ([]byte, error) { | |
| 	offset := 0 | |
| 
 | |
| 	// ADMIN CLIENT COMPATIBILITY FIX: | |
| 	// AdminClient's CreateTopics v5 request DOES start with top-level tagged fields (usually empty) | |
| 	// Parse them first, then the topics compact array | |
|  | |
| 	// Parse top-level tagged fields first (usually 0x00 for empty) | |
| 	_, consumed, err := DecodeTaggedFields(requestBody[offset:]) | |
| 	if err != nil { | |
| 		// Don't fail - AdminClient might not always include tagged fields properly | |
| 		// Just log and continue with topics parsing | |
| 	} else { | |
| 		offset += consumed | |
| 	} | |
| 
 | |
| 	// Topics (compact array) - Now correctly positioned after tagged fields | |
| 	topicsCount, consumed, err := DecodeCompactArrayLength(requestBody[offset:]) | |
| 	if err != nil { | |
| 		return nil, fmt.Errorf("CreateTopics v%d: decode topics compact array: %w", apiVersion, err) | |
| 	} | |
| 	offset += consumed | |
| 
 | |
| 	type topicSpec struct { | |
| 		name        string | |
| 		partitions  uint32 | |
| 		replication uint16 | |
| 	} | |
| 	topics := make([]topicSpec, 0, topicsCount) | |
| 
 | |
| 	for i := uint32(0); i < topicsCount; i++ { | |
| 		// Topic name (compact string) | |
| 		name, consumed, err := DecodeFlexibleString(requestBody[offset:]) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("CreateTopics v%d: decode topic[%d] name: %w", apiVersion, i, err) | |
| 		} | |
| 		offset += consumed | |
| 
 | |
| 		if len(requestBody) < offset+6 { | |
| 			return nil, fmt.Errorf("CreateTopics v%d: truncated partitions/replication for topic[%d]", apiVersion, i) | |
| 		} | |
| 
 | |
| 		partitions := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 		replication := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 		offset += 2 | |
| 
 | |
| 		// ADMIN CLIENT COMPATIBILITY: AdminClient uses little-endian for replication factor | |
| 		// This violates Kafka protocol spec but we need to handle it for compatibility | |
| 		if replication == 256 { | |
| 			replication = 1 // AdminClient sent 0x01 0x00, intended as little-endian 1 | |
| 		} | |
| 
 | |
| 		// Apply defaults for invalid values | |
| 		if partitions <= 0 { | |
| 			partitions = uint32(h.GetDefaultPartitions()) // Use configurable default | |
| 		} | |
| 		if replication <= 0 { | |
| 			replication = 1 // Default to 1 replica | |
| 		} | |
| 
 | |
| 		// FIX 2: Assignments (compact array) - this was missing! | |
| 		assignCount, consumed, err := DecodeCompactArrayLength(requestBody[offset:]) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("CreateTopics v%d: decode topic[%d] assignments array: %w", apiVersion, i, err) | |
| 		} | |
| 		offset += consumed | |
| 
 | |
| 		// Skip assignment entries (partition_id + replicas array) | |
| 		for j := uint32(0); j < assignCount; j++ { | |
| 			// partition_id (int32) | |
| 			if len(requestBody) < offset+4 { | |
| 				return nil, fmt.Errorf("CreateTopics v%d: truncated assignment[%d] partition_id", apiVersion, j) | |
| 			} | |
| 			offset += 4 | |
| 
 | |
| 			// replicas (compact array of int32) | |
| 			replicasCount, consumed, err := DecodeCompactArrayLength(requestBody[offset:]) | |
| 			if err != nil { | |
| 				return nil, fmt.Errorf("CreateTopics v%d: decode assignment[%d] replicas: %w", apiVersion, j, err) | |
| 			} | |
| 			offset += consumed | |
| 
 | |
| 			// Skip replica broker IDs (int32 each) | |
| 			if len(requestBody) < offset+int(replicasCount)*4 { | |
| 				return nil, fmt.Errorf("CreateTopics v%d: truncated assignment[%d] replicas", apiVersion, j) | |
| 			} | |
| 			offset += int(replicasCount) * 4 | |
| 
 | |
| 			// Assignment tagged fields | |
| 			_, consumed, err = DecodeTaggedFields(requestBody[offset:]) | |
| 			if err != nil { | |
| 				return nil, fmt.Errorf("CreateTopics v%d: decode assignment[%d] tagged fields: %w", apiVersion, j, err) | |
| 			} | |
| 			offset += consumed | |
| 		} | |
| 
 | |
| 		// Configs (compact array) - skip entries | |
| 		cfgCount, consumed, err := DecodeCompactArrayLength(requestBody[offset:]) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("CreateTopics v%d: decode topic[%d] configs array: %w", apiVersion, i, err) | |
| 		} | |
| 		offset += consumed | |
| 
 | |
| 		for j := uint32(0); j < cfgCount; j++ { | |
| 			// name (compact string) | |
| 			_, consumed, err := DecodeFlexibleString(requestBody[offset:]) | |
| 			if err != nil { | |
| 				return nil, fmt.Errorf("CreateTopics v%d: decode topic[%d] config[%d] name: %w", apiVersion, i, j, err) | |
| 			} | |
| 			offset += consumed | |
| 
 | |
| 			// value (nullable compact string) | |
| 			_, consumed, err = DecodeFlexibleString(requestBody[offset:]) | |
| 			if err != nil { | |
| 				return nil, fmt.Errorf("CreateTopics v%d: decode topic[%d] config[%d] value: %w", apiVersion, i, j, err) | |
| 			} | |
| 			offset += consumed | |
| 
 | |
| 			// tagged fields for each config | |
| 			_, consumed, err = DecodeTaggedFields(requestBody[offset:]) | |
| 			if err != nil { | |
| 				return nil, fmt.Errorf("CreateTopics v%d: decode topic[%d] config[%d] tagged fields: %w", apiVersion, i, j, err) | |
| 			} | |
| 			offset += consumed | |
| 		} | |
| 
 | |
| 		// Tagged fields for topic | |
| 		_, consumed, err = DecodeTaggedFields(requestBody[offset:]) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("CreateTopics v%d: decode topic[%d] tagged fields: %w", apiVersion, i, err) | |
| 		} | |
| 		offset += consumed | |
| 
 | |
| 		topics = append(topics, topicSpec{name: name, partitions: partitions, replication: replication}) | |
| 	} | |
| 
 | |
| 	for range topics { | |
| 	} | |
| 
 | |
| 	// timeout_ms (int32) | |
| 	if len(requestBody) < offset+4 { | |
| 		return nil, fmt.Errorf("CreateTopics v%d: missing timeout_ms", apiVersion) | |
| 	} | |
| 	timeoutMs := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 	offset += 4 | |
| 
 | |
| 	// validate_only (boolean) | |
| 	if len(requestBody) < offset+1 { | |
| 		return nil, fmt.Errorf("CreateTopics v%d: missing validate_only flag", apiVersion) | |
| 	} | |
| 	validateOnly := requestBody[offset] != 0 | |
| 	offset += 1 | |
| 
 | |
| 	// Remaining bytes after parsing - could be additional fields | |
| 	if offset < len(requestBody) { | |
| 	} | |
| 
 | |
| 	// Reconstruct a non-flexible v2-like request body and reuse existing handler | |
| 	// Format: topics(ARRAY) + timeout_ms(INT32) + validate_only(BOOLEAN) | |
| 	var legacyBody []byte | |
| 
 | |
| 	// topics count (int32) | |
| 	legacyBody = append(legacyBody, 0, 0, 0, byte(len(topics))) | |
| 	if len(topics) > 0 { | |
| 		legacyBody[len(legacyBody)-1] = byte(len(topics)) | |
| 	} | |
| 
 | |
| 	for _, t := range topics { | |
| 		// topic name (STRING) | |
| 		nameLen := uint16(len(t.name)) | |
| 		legacyBody = append(legacyBody, byte(nameLen>>8), byte(nameLen)) | |
| 		legacyBody = append(legacyBody, []byte(t.name)...) | |
| 
 | |
| 		// num_partitions (INT32) | |
| 		legacyBody = append(legacyBody, byte(t.partitions>>24), byte(t.partitions>>16), byte(t.partitions>>8), byte(t.partitions)) | |
| 
 | |
| 		// replication_factor (INT16) | |
| 		legacyBody = append(legacyBody, byte(t.replication>>8), byte(t.replication)) | |
| 
 | |
| 		// assignments array (INT32 count = 0) | |
| 		legacyBody = append(legacyBody, 0, 0, 0, 0) | |
| 
 | |
| 		// configs array (INT32 count = 0) | |
| 		legacyBody = append(legacyBody, 0, 0, 0, 0) | |
| 	} | |
| 
 | |
| 	// timeout_ms | |
| 	legacyBody = append(legacyBody, byte(timeoutMs>>24), byte(timeoutMs>>16), byte(timeoutMs>>8), byte(timeoutMs)) | |
| 
 | |
| 	// validate_only | |
| 	if validateOnly { | |
| 		legacyBody = append(legacyBody, 1) | |
| 	} else { | |
| 		legacyBody = append(legacyBody, 0) | |
| 	} | |
| 
 | |
| 	// Build response directly instead of delegating to avoid circular dependency | |
| 	response := make([]byte, 0, 128) | |
| 
 | |
| 	// NOTE: Correlation ID and header tagged fields are handled by writeResponseWithHeader | |
| 	// Do NOT include them in the response body | |
|  | |
| 	// throttle_time_ms (4 bytes) - first field in CreateTopics response body | |
| 	response = append(response, 0, 0, 0, 0) | |
| 
 | |
| 	// topics (compact array) - V5 FLEXIBLE FORMAT | |
| 	topicCount := len(topics) | |
| 
 | |
| 	// Debug: log response size at each step | |
| 	debugResponseSize := func(step string) { | |
| 	} | |
| 	debugResponseSize("After correlation ID and throttle_time_ms") | |
| 
 | |
| 	// Compact array: length is encoded as UNSIGNED_VARINT(actualLength + 1) | |
| 	response = append(response, EncodeUvarint(uint32(topicCount+1))...) | |
| 	debugResponseSize("After topics array length") | |
| 
 | |
| 	// For each topic | |
| 	for _, t := range topics { | |
| 		// name (compact string): length is encoded as UNSIGNED_VARINT(actualLength + 1) | |
| 		nameBytes := []byte(t.name) | |
| 		response = append(response, EncodeUvarint(uint32(len(nameBytes)+1))...) | |
| 		response = append(response, nameBytes...) | |
| 
 | |
| 		// TopicId - Not present in v5, only added in v7+ | |
| 		// v5 CreateTopics response does not include TopicId field | |
|  | |
| 		// error_code (int16) | |
| 		var errCode uint16 = 0 | |
| 
 | |
| 		// ADMIN CLIENT COMPATIBILITY: Apply defaults before error checking | |
| 		actualPartitions := t.partitions | |
| 		if actualPartitions == 0 { | |
| 			actualPartitions = 1 // Default to 1 partition if 0 requested | |
| 		} | |
| 		actualReplication := t.replication | |
| 		if actualReplication == 0 { | |
| 			actualReplication = 1 // Default to 1 replication if 0 requested | |
| 		} | |
| 
 | |
| 		// ADMIN CLIENT COMPATIBILITY: Always return success for existing topics | |
| 		// AdminClient expects topic creation to succeed, even if topic already exists | |
| 		if h.seaweedMQHandler.TopicExists(t.name) { | |
| 			errCode = 0 // SUCCESS - AdminClient can handle this gracefully | |
| 		} else { | |
| 			// Use corrected values for error checking and topic creation with schema support | |
| 			if err := h.createTopicWithSchemaSupport(t.name, int32(actualPartitions)); err != nil { | |
| 				errCode = 1 // UNKNOWN_SERVER_ERROR | |
| 			} | |
| 		} | |
| 		eb := make([]byte, 2) | |
| 		binary.BigEndian.PutUint16(eb, errCode) | |
| 		response = append(response, eb...) | |
| 
 | |
| 		// error_message (compact nullable string) - ADMINCLIENT 7.4.0-CE COMPATIBILITY FIX | |
| 		// For "_schemas" topic, send null for byte-level compatibility with Java reference | |
| 		// For other topics, send empty string to avoid NPE in AdminClient response handling | |
| 		if t.name == "_schemas" { | |
| 			response = append(response, 0) // Null = 0 | |
| 		} else { | |
| 			response = append(response, 1) // Empty string = 1 (0 chars + 1) | |
| 		} | |
| 
 | |
| 		// ADDED FOR V5: num_partitions (int32) | |
| 		// ADMIN CLIENT COMPATIBILITY: Use corrected values from error checking logic | |
| 		partBytes := make([]byte, 4) | |
| 		binary.BigEndian.PutUint32(partBytes, actualPartitions) | |
| 		response = append(response, partBytes...) | |
| 
 | |
| 		// ADDED FOR V5: replication_factor (int16) | |
| 		replBytes := make([]byte, 2) | |
| 		binary.BigEndian.PutUint16(replBytes, actualReplication) | |
| 		response = append(response, replBytes...) | |
| 
 | |
| 		// configs (compact nullable array) - ADDED FOR V5 | |
| 		// ADMINCLIENT 7.4.0-CE NPE FIX: Send empty configs array instead of null | |
| 		// AdminClient 7.4.0-ce has NPE when configs=null but were requested | |
| 		// Empty array = 1 (0 configs + 1), still achieves ~30-byte response | |
| 		response = append(response, 1) // Empty configs array = 1 (0 configs + 1) | |
|  | |
| 		// Tagged fields for each topic - V5 format per Kafka source | |
| 		// Count tagged fields (topicConfigErrorCode only if != 0) | |
| 		topicConfigErrorCode := uint16(0) // No error | |
| 		numTaggedFields := 0 | |
| 		if topicConfigErrorCode != 0 { | |
| 			numTaggedFields = 1 | |
| 		} | |
| 
 | |
| 		// Write tagged fields count | |
| 		response = append(response, EncodeUvarint(uint32(numTaggedFields))...) | |
| 
 | |
| 		// Write tagged fields (only if topicConfigErrorCode != 0) | |
| 		if topicConfigErrorCode != 0 { | |
| 			// Tag 0: TopicConfigErrorCode | |
| 			response = append(response, EncodeUvarint(0)...) // Tag number 0 | |
| 			response = append(response, EncodeUvarint(2)...) // Length (int16 = 2 bytes) | |
| 			topicConfigErrBytes := make([]byte, 2) | |
| 			binary.BigEndian.PutUint16(topicConfigErrBytes, topicConfigErrorCode) | |
| 			response = append(response, topicConfigErrBytes...) | |
| 		} | |
| 
 | |
| 		debugResponseSize(fmt.Sprintf("After topic '%s'", t.name)) | |
| 	} | |
| 
 | |
| 	// Top-level tagged fields for v5 flexible response (empty) | |
| 	response = append(response, 0) // Empty tagged fields = 0 | |
| 	debugResponseSize("Final response") | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| func (h *Handler) handleDeleteTopics(correlationID uint32, requestBody []byte) ([]byte, error) { | |
| 	// Parse minimal DeleteTopics request | |
| 	// Request format: client_id + timeout(4) + topics_array | |
|  | |
| 	if len(requestBody) < 6 { // client_id_size(2) + timeout(4) | |
| 		return nil, fmt.Errorf("DeleteTopics request too short") | |
| 	} | |
| 
 | |
| 	// Skip client_id | |
| 	clientIDSize := binary.BigEndian.Uint16(requestBody[0:2]) | |
| 	offset := 2 + int(clientIDSize) | |
| 
 | |
| 	if len(requestBody) < offset+8 { // timeout(4) + topics_count(4) | |
| 		return nil, fmt.Errorf("DeleteTopics request missing data") | |
| 	} | |
| 
 | |
| 	// Skip timeout | |
| 	offset += 4 | |
| 
 | |
| 	topicsCount := binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 	offset += 4 | |
| 
 | |
| 	response := make([]byte, 0, 256) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// Throttle time (4 bytes, 0 = no throttling) | |
| 	response = append(response, 0, 0, 0, 0) | |
| 
 | |
| 	// Topics count (same as request) | |
| 	topicsCountBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(topicsCountBytes, topicsCount) | |
| 	response = append(response, topicsCountBytes...) | |
| 
 | |
| 	// Process each topic (using SeaweedMQ handler) | |
|  | |
| 	for i := uint32(0); i < topicsCount && offset < len(requestBody); i++ { | |
| 		if len(requestBody) < offset+2 { | |
| 			break | |
| 		} | |
| 
 | |
| 		// Parse topic name | |
| 		topicNameSize := binary.BigEndian.Uint16(requestBody[offset : offset+2]) | |
| 		offset += 2 | |
| 
 | |
| 		if len(requestBody) < offset+int(topicNameSize) { | |
| 			break | |
| 		} | |
| 
 | |
| 		topicName := string(requestBody[offset : offset+int(topicNameSize)]) | |
| 		offset += int(topicNameSize) | |
| 
 | |
| 		// Response: topic_name + error_code(2) + error_message | |
| 		response = append(response, byte(topicNameSize>>8), byte(topicNameSize)) | |
| 		response = append(response, []byte(topicName)...) | |
| 
 | |
| 		// Check if topic exists and delete it | |
| 		var errorCode uint16 = 0 | |
| 		var errorMessage string = "" | |
| 
 | |
| 		// Use SeaweedMQ integration | |
| 		if !h.seaweedMQHandler.TopicExists(topicName) { | |
| 			errorCode = 3 // UNKNOWN_TOPIC_OR_PARTITION | |
| 			errorMessage = "Unknown topic" | |
| 		} else { | |
| 			// Delete the topic from SeaweedMQ | |
| 			if err := h.seaweedMQHandler.DeleteTopic(topicName); err != nil { | |
| 				errorCode = 1 // UNKNOWN_SERVER_ERROR | |
| 				errorMessage = err.Error() | |
| 			} | |
| 		} | |
| 
 | |
| 		// Error code | |
| 		response = append(response, byte(errorCode>>8), byte(errorCode)) | |
| 
 | |
| 		// Error message (nullable string) | |
| 		if errorMessage == "" { | |
| 			response = append(response, 0xFF, 0xFF) // null string | |
| 		} else { | |
| 			errorMsgLen := uint16(len(errorMessage)) | |
| 			response = append(response, byte(errorMsgLen>>8), byte(errorMsgLen)) | |
| 			response = append(response, []byte(errorMessage)...) | |
| 		} | |
| 	} | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // validateAPIVersion checks if we support the requested API version | |
| func (h *Handler) validateAPIVersion(apiKey, apiVersion uint16) error { | |
| 	supportedVersions := map[APIKey][2]uint16{ | |
| 		APIKeyApiVersions:     {0, 4}, // ApiVersions: v0-v4 (Kafka 8.0.0 compatibility) | |
| 		APIKeyMetadata:        {0, 7}, // Metadata: v0-v7 | |
| 		APIKeyProduce:         {0, 7}, // Produce: v0-v7 | |
| 		APIKeyFetch:           {0, 7}, // Fetch: v0-v7 | |
| 		APIKeyListOffsets:     {0, 2}, // ListOffsets: v0-v2 | |
| 		APIKeyCreateTopics:    {0, 5}, // CreateTopics: v0-v5 (updated to match implementation) | |
| 		APIKeyDeleteTopics:    {0, 4}, // DeleteTopics: v0-v4 | |
| 		APIKeyFindCoordinator: {0, 3}, // FindCoordinator: v0-v3 (v3+ uses flexible format) | |
| 		APIKeyJoinGroup:       {0, 6}, // JoinGroup: cap to v6 (first flexible version) | |
| 		APIKeySyncGroup:       {0, 5}, // SyncGroup: v0-v5 | |
| 		APIKeyOffsetCommit:    {0, 2}, // OffsetCommit: v0-v2 | |
| 		APIKeyOffsetFetch:     {0, 5}, // OffsetFetch: v0-v5 (updated to match implementation) | |
| 		APIKeyHeartbeat:       {0, 4}, // Heartbeat: v0-v4 | |
| 		APIKeyLeaveGroup:      {0, 4}, // LeaveGroup: v0-v4 | |
| 		APIKeyDescribeGroups:  {0, 5}, // DescribeGroups: v0-v5 | |
| 		APIKeyListGroups:      {0, 4}, // ListGroups: v0-v4 | |
| 		APIKeyDescribeConfigs: {0, 4}, // DescribeConfigs: v0-v4 | |
| 		APIKeyInitProducerId:  {0, 4}, // InitProducerId: v0-v4 | |
| 		APIKeyDescribeCluster: {0, 1}, // DescribeCluster: v0-v1 (KIP-919, AdminClient compatibility) | |
| 	} | |
| 
 | |
| 	if versionRange, exists := supportedVersions[APIKey(apiKey)]; exists { | |
| 		minVer, maxVer := versionRange[0], versionRange[1] | |
| 		if apiVersion < minVer || apiVersion > maxVer { | |
| 			return fmt.Errorf("unsupported API version %d for API key %d (supported: %d-%d)", | |
| 				apiVersion, apiKey, minVer, maxVer) | |
| 		} | |
| 		return nil | |
| 	} | |
| 
 | |
| 	return fmt.Errorf("unsupported API key: %d", apiKey) | |
| } | |
| 
 | |
| // buildUnsupportedVersionResponse creates a proper Kafka error response | |
| func (h *Handler) buildUnsupportedVersionResponse(correlationID uint32, apiKey, apiVersion uint16) ([]byte, error) { | |
| 	errorMsg := fmt.Sprintf("Unsupported version %d for API key", apiVersion) | |
| 	return BuildErrorResponseWithMessage(correlationID, ErrorCodeUnsupportedVersion, errorMsg), nil | |
| } | |
| 
 | |
| // handleMetadata routes to the appropriate version-specific handler | |
| func (h *Handler) handleMetadata(correlationID uint32, apiVersion uint16, requestBody []byte) ([]byte, error) { | |
| 	switch apiVersion { | |
| 	case 0: | |
| 		return h.HandleMetadataV0(correlationID, requestBody) | |
| 	case 1: | |
| 		return h.HandleMetadataV1(correlationID, requestBody) | |
| 	case 2: | |
| 		return h.HandleMetadataV2(correlationID, requestBody) | |
| 	case 3, 4: | |
| 		return h.HandleMetadataV3V4(correlationID, requestBody) | |
| 	case 5, 6: | |
| 		return h.HandleMetadataV5V6(correlationID, requestBody) | |
| 	case 7: | |
| 		return h.HandleMetadataV7(correlationID, requestBody) | |
| 	default: | |
| 		// For versions > 7, use the V7 handler (flexible format) | |
| 		if apiVersion > 7 { | |
| 			return h.HandleMetadataV7(correlationID, requestBody) | |
| 		} | |
| 		return nil, fmt.Errorf("metadata version %d not implemented yet", apiVersion) | |
| 	} | |
| } | |
| 
 | |
| // getAPIName returns a human-readable name for Kafka API keys (for debugging) | |
| func getAPIName(apiKey APIKey) string { | |
| 	switch apiKey { | |
| 	case APIKeyProduce: | |
| 		return "Produce" | |
| 	case APIKeyFetch: | |
| 		return "Fetch" | |
| 	case APIKeyListOffsets: | |
| 		return "ListOffsets" | |
| 	case APIKeyMetadata: | |
| 		return "Metadata" | |
| 	case APIKeyOffsetCommit: | |
| 		return "OffsetCommit" | |
| 	case APIKeyOffsetFetch: | |
| 		return "OffsetFetch" | |
| 	case APIKeyFindCoordinator: | |
| 		return "FindCoordinator" | |
| 	case APIKeyJoinGroup: | |
| 		return "JoinGroup" | |
| 	case APIKeyHeartbeat: | |
| 		return "Heartbeat" | |
| 	case APIKeyLeaveGroup: | |
| 		return "LeaveGroup" | |
| 	case APIKeySyncGroup: | |
| 		return "SyncGroup" | |
| 	case APIKeyDescribeGroups: | |
| 		return "DescribeGroups" | |
| 	case APIKeyListGroups: | |
| 		return "ListGroups" | |
| 	case APIKeyApiVersions: | |
| 		return "ApiVersions" | |
| 	case APIKeyCreateTopics: | |
| 		return "CreateTopics" | |
| 	case APIKeyDeleteTopics: | |
| 		return "DeleteTopics" | |
| 	case APIKeyDescribeConfigs: | |
| 		return "DescribeConfigs" | |
| 	case APIKeyInitProducerId: | |
| 		return "InitProducerId" | |
| 	case APIKeyDescribeCluster: | |
| 		return "DescribeCluster" | |
| 	default: | |
| 		return "Unknown" | |
| 	} | |
| } | |
| 
 | |
| // handleDescribeConfigs handles DescribeConfigs API requests (API key 32) | |
| func (h *Handler) handleDescribeConfigs(correlationID uint32, apiVersion uint16, requestBody []byte) ([]byte, error) { | |
| 
 | |
| 	// Parse request to extract resources | |
| 	resources, err := h.parseDescribeConfigsRequest(requestBody, apiVersion) | |
| 	if err != nil { | |
| 		glog.Errorf("DescribeConfigs parsing error: %v", err) | |
| 		return nil, fmt.Errorf("failed to parse DescribeConfigs request: %w", err) | |
| 	} | |
| 
 | |
| 	isFlexible := apiVersion >= 4 | |
| 	if !isFlexible { | |
| 		// Legacy (non-flexible) response for v0-3 | |
| 		response := make([]byte, 0, 2048) | |
| 
 | |
| 		// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 		// Do NOT include it in the response body | |
|  | |
| 		// Throttle time (0ms) | |
| 		throttleBytes := make([]byte, 4) | |
| 		binary.BigEndian.PutUint32(throttleBytes, 0) | |
| 		response = append(response, throttleBytes...) | |
| 
 | |
| 		// Resources array length | |
| 		resourcesBytes := make([]byte, 4) | |
| 		binary.BigEndian.PutUint32(resourcesBytes, uint32(len(resources))) | |
| 		response = append(response, resourcesBytes...) | |
| 
 | |
| 		// For each resource, return appropriate configs | |
| 		for _, resource := range resources { | |
| 			resourceResponse := h.buildDescribeConfigsResourceResponse(resource, apiVersion) | |
| 			response = append(response, resourceResponse...) | |
| 		} | |
| 
 | |
| 		return response, nil | |
| 	} | |
| 
 | |
| 	// Flexible response for v4+ | |
| 	response := make([]byte, 0, 2048) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 	// Do NOT include it in the response body | |
|  | |
| 	// throttle_time_ms (4 bytes) | |
| 	response = append(response, 0, 0, 0, 0) | |
| 
 | |
| 	// Results (compact array) | |
| 	response = append(response, EncodeUvarint(uint32(len(resources)+1))...) | |
| 
 | |
| 	for _, res := range resources { | |
| 		// ErrorCode (int16) = 0 | |
| 		response = append(response, 0, 0) | |
| 		// ErrorMessage (compact nullable string) = null (0) | |
| 		response = append(response, 0) | |
| 		// ResourceType (int8) | |
| 		response = append(response, byte(res.ResourceType)) | |
| 		// ResourceName (compact string) | |
| 		nameBytes := []byte(res.ResourceName) | |
| 		response = append(response, EncodeUvarint(uint32(len(nameBytes)+1))...) | |
| 		response = append(response, nameBytes...) | |
| 
 | |
| 		// Build configs for this resource | |
| 		var cfgs []ConfigEntry | |
| 		if res.ResourceType == 2 { // Topic | |
| 			cfgs = h.getTopicConfigs(res.ResourceName, res.ConfigNames) | |
| 			// Ensure cleanup.policy is compact for _schemas | |
| 			if res.ResourceName == "_schemas" { | |
| 				replaced := false | |
| 				for i := range cfgs { | |
| 					if cfgs[i].Name == "cleanup.policy" { | |
| 						cfgs[i].Value = "compact" | |
| 						replaced = true | |
| 						break | |
| 					} | |
| 				} | |
| 				if !replaced { | |
| 					cfgs = append(cfgs, ConfigEntry{Name: "cleanup.policy", Value: "compact"}) | |
| 				} | |
| 			} | |
| 		} else if res.ResourceType == 4 { // Broker | |
| 			cfgs = h.getBrokerConfigs(res.ConfigNames) | |
| 		} else { | |
| 			cfgs = []ConfigEntry{} | |
| 		} | |
| 
 | |
| 		// Configs (compact array) | |
| 		response = append(response, EncodeUvarint(uint32(len(cfgs)+1))...) | |
| 
 | |
| 		for _, cfg := range cfgs { | |
| 			// name (compact string) | |
| 			cb := []byte(cfg.Name) | |
| 			response = append(response, EncodeUvarint(uint32(len(cb)+1))...) | |
| 			response = append(response, cb...) | |
| 
 | |
| 			// value (compact nullable string) | |
| 			vb := []byte(cfg.Value) | |
| 			if len(vb) == 0 { | |
| 				response = append(response, 0) // null | |
| 			} else { | |
| 				response = append(response, EncodeUvarint(uint32(len(vb)+1))...) | |
| 				response = append(response, vb...) | |
| 			} | |
| 
 | |
| 			// readOnly (bool) | |
| 			if cfg.ReadOnly { | |
| 				response = append(response, 1) | |
| 			} else { | |
| 				response = append(response, 0) | |
| 			} | |
| 
 | |
| 			// configSource (int8): DEFAULT_CONFIG = 5 | |
| 			response = append(response, byte(5)) | |
| 
 | |
| 			// isSensitive (bool) | |
| 			if cfg.Sensitive { | |
| 				response = append(response, 1) | |
| 			} else { | |
| 				response = append(response, 0) | |
| 			} | |
| 
 | |
| 			// synonyms (compact array) - empty | |
| 			response = append(response, 1) | |
| 
 | |
| 			// config_type (int8) - STRING = 1 | |
| 			response = append(response, byte(1)) | |
| 
 | |
| 			// documentation (compact nullable string) - null | |
| 			response = append(response, 0) | |
| 
 | |
| 			// per-config tagged fields (empty) | |
| 			response = append(response, 0) | |
| 		} | |
| 
 | |
| 		// Per-result tagged fields (empty) | |
| 		response = append(response, 0) | |
| 	} | |
| 
 | |
| 	// Top-level tagged fields (empty) | |
| 	response = append(response, 0) | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // isFlexibleResponse determines if an API response should use flexible format (with header tagged fields) | |
| // Based on Kafka protocol specifications: most APIs become flexible at v3+, but some differ | |
| func isFlexibleResponse(apiKey uint16, apiVersion uint16) bool { | |
| 	// Reference: kafka-go/protocol/response.go:119 and sarama/response_header.go:21 | |
| 	// Flexible responses have headerVersion >= 1, which adds tagged fields after correlation ID | |
|  | |
| 	switch APIKey(apiKey) { | |
| 	case APIKeyProduce: | |
| 		return apiVersion >= 9 | |
| 	case APIKeyFetch: | |
| 		return apiVersion >= 12 | |
| 	case APIKeyMetadata: | |
| 		// Metadata v9+ uses flexible responses (v7-8 use compact arrays/strings but NOT flexible headers) | |
| 		return apiVersion >= 9 | |
| 	case APIKeyOffsetCommit: | |
| 		return apiVersion >= 8 | |
| 	case APIKeyOffsetFetch: | |
| 		return apiVersion >= 6 | |
| 	case APIKeyFindCoordinator: | |
| 		return apiVersion >= 3 | |
| 	case APIKeyJoinGroup: | |
| 		return apiVersion >= 6 | |
| 	case APIKeyHeartbeat: | |
| 		return apiVersion >= 4 | |
| 	case APIKeyLeaveGroup: | |
| 		return apiVersion >= 4 | |
| 	case APIKeySyncGroup: | |
| 		return apiVersion >= 4 | |
| 	case APIKeyApiVersions: | |
| 		// CRITICAL: AdminClient compatibility requires header version 0 (no tagged fields) | |
| 		// Even though ApiVersions v3+ technically supports flexible responses, AdminClient | |
| 		// expects the header to NOT include tagged fields. This is a known quirk. | |
| 		return false // Always use non-flexible header for ApiVersions | |
| 	case APIKeyCreateTopics: | |
| 		return apiVersion >= 5 | |
| 	case APIKeyDeleteTopics: | |
| 		return apiVersion >= 4 | |
| 	case APIKeyInitProducerId: | |
| 		return apiVersion >= 2 // Flexible from v2+ (KIP-360) | |
| 	case APIKeyDescribeConfigs: | |
| 		return apiVersion >= 4 | |
| 	case APIKeyDescribeCluster: | |
| 		return true // All versions (0+) are flexible | |
| 	default: | |
| 		// For unknown APIs, assume non-flexible (safer default) | |
| 		return false | |
| 	} | |
| } | |
| 
 | |
| // writeResponseWithHeader writes a Kafka response following the wire protocol: | |
| // [Size: 4 bytes][Correlation ID: 4 bytes][Tagged Fields (if flexible)][Body] | |
| func (h *Handler) writeResponseWithHeader(w *bufio.Writer, correlationID uint32, apiKey uint16, apiVersion uint16, responseBody []byte, timeout time.Duration) error { | |
| 	// Kafka wire protocol format (from kafka-go/protocol/response.go:116-138 and sarama/response_header.go:10-27): | |
| 	// [4 bytes: size = len(everything after this)] | |
| 	// [4 bytes: correlation ID] | |
| 	// [varint: header tagged fields (0x00 for empty) - ONLY for flexible responses with headerVersion >= 1] | |
| 	// [N bytes: response body] | |
|  | |
| 	// Determine if this response should be flexible | |
| 	isFlexible := isFlexibleResponse(apiKey, apiVersion) | |
| 
 | |
| 	// Calculate total size: correlation ID (4) + tagged fields (1 if flexible) + body | |
| 	totalSize := 4 + len(responseBody) | |
| 	if isFlexible { | |
| 		totalSize += 1 // Add 1 byte for empty tagged fields (0x00) | |
| 	} | |
| 
 | |
| 	// Build complete response in memory for hex dump logging | |
| 	fullResponse := make([]byte, 0, 4+totalSize) | |
| 
 | |
| 	// Write size | |
| 	sizeBuf := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(sizeBuf, uint32(totalSize)) | |
| 	fullResponse = append(fullResponse, sizeBuf...) | |
| 
 | |
| 	// Write correlation ID | |
| 	correlationBuf := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(correlationBuf, correlationID) | |
| 	fullResponse = append(fullResponse, correlationBuf...) | |
| 
 | |
| 	// Write header-level tagged fields for flexible responses | |
| 	if isFlexible { | |
| 		// Empty tagged fields = 0x00 (varint 0) | |
| 		fullResponse = append(fullResponse, 0x00) | |
| 	} | |
| 
 | |
| 	// Write response body | |
| 	fullResponse = append(fullResponse, responseBody...) | |
| 
 | |
| 	// Write to connection | |
| 	if _, err := w.Write(fullResponse); err != nil { | |
| 		return fmt.Errorf("write response: %w", err) | |
| 	} | |
| 
 | |
| 	// Flush | |
| 	if err := w.Flush(); err != nil { | |
| 		return fmt.Errorf("flush response: %w", err) | |
| 	} | |
| 
 | |
| 	return nil | |
| } | |
| 
 | |
| // writeResponseWithCorrelationID is deprecated - use writeResponseWithHeader instead | |
| // Kept for compatibility with direct callers that don't have API info | |
| func (h *Handler) writeResponseWithCorrelationID(w *bufio.Writer, correlationID uint32, responseBody []byte, timeout time.Duration) error { | |
| 	// Assume non-flexible for backward compatibility | |
| 	return h.writeResponseWithHeader(w, correlationID, 0, 0, responseBody, timeout) | |
| } | |
| 
 | |
| // writeResponseWithTimeout writes a Kafka response with timeout handling | |
| // DEPRECATED: Use writeResponseWithCorrelationID instead | |
| func (h *Handler) writeResponseWithTimeout(w *bufio.Writer, response []byte, timeout time.Duration) error { | |
| 	// This old function expects response to include correlation ID at the start | |
| 	// For backward compatibility with any remaining callers | |
|  | |
| 	// Write response size (4 bytes) | |
| 	responseSizeBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(responseSizeBytes, uint32(len(response))) | |
| 
 | |
| 	if _, err := w.Write(responseSizeBytes); err != nil { | |
| 		return fmt.Errorf("write response size: %w", err) | |
| 	} | |
| 
 | |
| 	// Write response data | |
| 	if _, err := w.Write(response); err != nil { | |
| 		return fmt.Errorf("write response data: %w", err) | |
| 	} | |
| 
 | |
| 	// Flush the buffer | |
| 	if err := w.Flush(); err != nil { | |
| 		return fmt.Errorf("flush response: %w", err) | |
| 	} | |
| 
 | |
| 	return nil | |
| } | |
| 
 | |
| // EnableSchemaManagement enables schema management with the given configuration | |
| func (h *Handler) EnableSchemaManagement(config schema.ManagerConfig) error { | |
| 	manager, err := schema.NewManagerWithHealthCheck(config) | |
| 	if err != nil { | |
| 		return fmt.Errorf("failed to create schema manager: %w", err) | |
| 	} | |
| 
 | |
| 	h.schemaManager = manager | |
| 	h.useSchema = true | |
| 
 | |
| 	return nil | |
| } | |
| 
 | |
| // EnableBrokerIntegration enables mq.broker integration for schematized messages | |
| func (h *Handler) EnableBrokerIntegration(brokers []string) error { | |
| 	if !h.IsSchemaEnabled() { | |
| 		return fmt.Errorf("schema management must be enabled before broker integration") | |
| 	} | |
| 
 | |
| 	brokerClient := schema.NewBrokerClient(schema.BrokerClientConfig{ | |
| 		Brokers:       brokers, | |
| 		SchemaManager: h.schemaManager, | |
| 	}) | |
| 
 | |
| 	h.brokerClient = brokerClient | |
| 	return nil | |
| } | |
| 
 | |
| // DisableSchemaManagement disables schema management and broker integration | |
| func (h *Handler) DisableSchemaManagement() { | |
| 	if h.brokerClient != nil { | |
| 		h.brokerClient.Close() | |
| 		h.brokerClient = nil | |
| 	} | |
| 	h.schemaManager = nil | |
| 	h.useSchema = false | |
| } | |
| 
 | |
| // SetSchemaRegistryURL sets the Schema Registry URL for delayed initialization | |
| func (h *Handler) SetSchemaRegistryURL(url string) { | |
| 	h.schemaRegistryURL = url | |
| } | |
| 
 | |
| // SetDefaultPartitions sets the default partition count for auto-created topics | |
| func (h *Handler) SetDefaultPartitions(partitions int32) { | |
| 	h.defaultPartitions = partitions | |
| } | |
| 
 | |
| // GetDefaultPartitions returns the default partition count for auto-created topics | |
| func (h *Handler) GetDefaultPartitions() int32 { | |
| 	if h.defaultPartitions <= 0 { | |
| 		return 4 // Fallback default | |
| 	} | |
| 	return h.defaultPartitions | |
| } | |
| 
 | |
| // IsSchemaEnabled returns whether schema management is enabled | |
| func (h *Handler) IsSchemaEnabled() bool { | |
| 	// Try to initialize schema management if not already done | |
| 	if !h.useSchema && h.schemaRegistryURL != "" { | |
| 		h.tryInitializeSchemaManagement() | |
| 	} | |
| 	return h.useSchema && h.schemaManager != nil | |
| } | |
| 
 | |
| // tryInitializeSchemaManagement attempts to initialize schema management | |
| // This is called lazily when schema functionality is first needed | |
| func (h *Handler) tryInitializeSchemaManagement() { | |
| 	if h.useSchema || h.schemaRegistryURL == "" { | |
| 		return // Already initialized or no URL provided | |
| 	} | |
| 
 | |
| 	schemaConfig := schema.ManagerConfig{ | |
| 		RegistryURL: h.schemaRegistryURL, | |
| 	} | |
| 
 | |
| 	if err := h.EnableSchemaManagement(schemaConfig); err != nil { | |
| 		return | |
| 	} | |
| 
 | |
| } | |
| 
 | |
| // IsBrokerIntegrationEnabled returns true if broker integration is enabled | |
| func (h *Handler) IsBrokerIntegrationEnabled() bool { | |
| 	return h.IsSchemaEnabled() && h.brokerClient != nil | |
| } | |
| 
 | |
| // commitOffsetToSMQ commits offset using SMQ storage | |
| func (h *Handler) commitOffsetToSMQ(key ConsumerOffsetKey, offsetValue int64, metadata string) error { | |
| 	// Use new consumer offset storage if available, fall back to SMQ storage | |
| 	if h.consumerOffsetStorage != nil { | |
| 		return h.consumerOffsetStorage.CommitOffset(key.ConsumerGroup, key.Topic, key.Partition, offsetValue, metadata) | |
| 	} | |
| 
 | |
| 	// No SMQ offset storage - only use consumer offset storage | |
| 	return fmt.Errorf("offset storage not initialized") | |
| } | |
| 
 | |
| // fetchOffsetFromSMQ fetches offset using SMQ storage | |
| func (h *Handler) fetchOffsetFromSMQ(key ConsumerOffsetKey) (int64, string, error) { | |
| 	// Use new consumer offset storage if available, fall back to SMQ storage | |
| 	if h.consumerOffsetStorage != nil { | |
| 		return h.consumerOffsetStorage.FetchOffset(key.ConsumerGroup, key.Topic, key.Partition) | |
| 	} | |
| 
 | |
| 	// SMQ offset storage removed - no fallback | |
| 	return -1, "", fmt.Errorf("offset storage not initialized") | |
| } | |
| 
 | |
| // DescribeConfigsResource represents a resource in a DescribeConfigs request | |
| type DescribeConfigsResource struct { | |
| 	ResourceType int8 // 2 = Topic, 4 = Broker | |
| 	ResourceName string | |
| 	ConfigNames  []string // Empty means return all configs | |
| } | |
| 
 | |
| // parseDescribeConfigsRequest parses a DescribeConfigs request body | |
| func (h *Handler) parseDescribeConfigsRequest(requestBody []byte, apiVersion uint16) ([]DescribeConfigsResource, error) { | |
| 	if len(requestBody) < 1 { | |
| 		return nil, fmt.Errorf("request too short") | |
| 	} | |
| 
 | |
| 	offset := 0 | |
| 
 | |
| 	// DescribeConfigs v4+ uses flexible protocol (compact arrays with varint) | |
| 	isFlexible := apiVersion >= 4 | |
| 
 | |
| 	var resourcesLength uint32 | |
| 	if isFlexible { | |
| 		// Debug: log the first 8 bytes of the request body | |
| 		debugBytes := requestBody[offset:] | |
| 		if len(debugBytes) > 8 { | |
| 			debugBytes = debugBytes[:8] | |
| 		} | |
| 
 | |
| 		// FIX: Skip top-level tagged fields for DescribeConfigs v4+ flexible protocol | |
| 		// The request body starts with tagged fields count (usually 0x00 = empty) | |
| 		_, consumed, err := DecodeTaggedFields(requestBody[offset:]) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("DescribeConfigs v%d: decode top-level tagged fields: %w", apiVersion, err) | |
| 		} | |
| 		offset += consumed | |
| 
 | |
| 		// Resources (compact array) - Now correctly positioned after tagged fields | |
| 		resourcesLength, consumed, err = DecodeCompactArrayLength(requestBody[offset:]) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("decode resources compact array: %w", err) | |
| 		} | |
| 		offset += consumed | |
| 	} else { | |
| 		// Regular array: length is int32 | |
| 		if len(requestBody) < 4 { | |
| 			return nil, fmt.Errorf("request too short for regular array") | |
| 		} | |
| 		resourcesLength = binary.BigEndian.Uint32(requestBody[offset : offset+4]) | |
| 		offset += 4 | |
| 	} | |
| 
 | |
| 	// Validate resources length to prevent panic | |
| 	if resourcesLength > 100 { // Reasonable limit | |
| 		return nil, fmt.Errorf("invalid resources length: %d", resourcesLength) | |
| 	} | |
| 
 | |
| 	resources := make([]DescribeConfigsResource, 0, resourcesLength) | |
| 
 | |
| 	for i := uint32(0); i < resourcesLength; i++ { | |
| 		if offset+1 > len(requestBody) { | |
| 			return nil, fmt.Errorf("insufficient data for resource type") | |
| 		} | |
| 
 | |
| 		// Resource type (1 byte) | |
| 		resourceType := int8(requestBody[offset]) | |
| 		offset++ | |
| 
 | |
| 		// Resource name (string - compact for v4+, regular for v0-3) | |
| 		var resourceName string | |
| 		if isFlexible { | |
| 			// Compact string: length is encoded as UNSIGNED_VARINT(actualLength + 1) | |
| 			name, consumed, err := DecodeFlexibleString(requestBody[offset:]) | |
| 			if err != nil { | |
| 				return nil, fmt.Errorf("decode resource name compact string: %w", err) | |
| 			} | |
| 			resourceName = name | |
| 			offset += consumed | |
| 		} else { | |
| 			// Regular string: length is int16 | |
| 			if offset+2 > len(requestBody) { | |
| 				return nil, fmt.Errorf("insufficient data for resource name length") | |
| 			} | |
| 			nameLength := int(binary.BigEndian.Uint16(requestBody[offset : offset+2])) | |
| 			offset += 2 | |
| 
 | |
| 			// Validate name length to prevent panic | |
| 			if nameLength < 0 || nameLength > 1000 { // Reasonable limit | |
| 				return nil, fmt.Errorf("invalid resource name length: %d", nameLength) | |
| 			} | |
| 
 | |
| 			if offset+nameLength > len(requestBody) { | |
| 				return nil, fmt.Errorf("insufficient data for resource name") | |
| 			} | |
| 			resourceName = string(requestBody[offset : offset+nameLength]) | |
| 			offset += nameLength | |
| 		} | |
| 
 | |
| 		// Config names array (compact for v4+, regular for v0-3) | |
| 		var configNames []string | |
| 		if isFlexible { | |
| 			// Compact array: length is encoded as UNSIGNED_VARINT(actualLength + 1) | |
| 			// For nullable arrays, 0 means null, 1 means empty | |
| 			configNamesCount, consumed, err := DecodeCompactArrayLength(requestBody[offset:]) | |
| 			if err != nil { | |
| 				return nil, fmt.Errorf("decode config names compact array: %w", err) | |
| 			} | |
| 			offset += consumed | |
| 
 | |
| 			// Parse each config name as compact string (if not null) | |
| 			if configNamesCount > 0 { | |
| 				for j := uint32(0); j < configNamesCount; j++ { | |
| 					configName, consumed, err := DecodeFlexibleString(requestBody[offset:]) | |
| 					if err != nil { | |
| 						return nil, fmt.Errorf("decode config name[%d] compact string: %w", j, err) | |
| 					} | |
| 					offset += consumed | |
| 					configNames = append(configNames, configName) | |
| 				} | |
| 			} | |
| 		} else { | |
| 			// Regular array: length is int32 | |
| 			if offset+4 > len(requestBody) { | |
| 				return nil, fmt.Errorf("insufficient data for config names length") | |
| 			} | |
| 			configNamesLength := int32(binary.BigEndian.Uint32(requestBody[offset : offset+4])) | |
| 			offset += 4 | |
| 
 | |
| 			// Validate config names length to prevent panic | |
| 			// Note: -1 means null/empty array in Kafka protocol | |
| 			if configNamesLength < -1 || configNamesLength > 1000 { // Reasonable limit | |
| 				return nil, fmt.Errorf("invalid config names length: %d", configNamesLength) | |
| 			} | |
| 
 | |
| 			// Handle null array case | |
| 			if configNamesLength == -1 { | |
| 				configNamesLength = 0 | |
| 			} | |
| 
 | |
| 			configNames = make([]string, 0, configNamesLength) | |
| 			for j := int32(0); j < configNamesLength; j++ { | |
| 				if offset+2 > len(requestBody) { | |
| 					return nil, fmt.Errorf("insufficient data for config name length") | |
| 				} | |
| 				configNameLength := int(binary.BigEndian.Uint16(requestBody[offset : offset+2])) | |
| 				offset += 2 | |
| 
 | |
| 				// Validate config name length to prevent panic | |
| 				if configNameLength < 0 || configNameLength > 500 { // Reasonable limit | |
| 					return nil, fmt.Errorf("invalid config name length: %d", configNameLength) | |
| 				} | |
| 
 | |
| 				if offset+configNameLength > len(requestBody) { | |
| 					return nil, fmt.Errorf("insufficient data for config name") | |
| 				} | |
| 				configName := string(requestBody[offset : offset+configNameLength]) | |
| 				offset += configNameLength | |
| 
 | |
| 				configNames = append(configNames, configName) | |
| 			} | |
| 		} | |
| 
 | |
| 		resources = append(resources, DescribeConfigsResource{ | |
| 			ResourceType: resourceType, | |
| 			ResourceName: resourceName, | |
| 			ConfigNames:  configNames, | |
| 		}) | |
| 	} | |
| 
 | |
| 	return resources, nil | |
| } | |
| 
 | |
| // buildDescribeConfigsResourceResponse builds the response for a single resource | |
| func (h *Handler) buildDescribeConfigsResourceResponse(resource DescribeConfigsResource, apiVersion uint16) []byte { | |
| 	response := make([]byte, 0, 512) | |
| 
 | |
| 	// Error code (0 = no error) | |
| 	errorCodeBytes := make([]byte, 2) | |
| 	binary.BigEndian.PutUint16(errorCodeBytes, 0) | |
| 	response = append(response, errorCodeBytes...) | |
| 
 | |
| 	// Error message (null string = -1 length) | |
| 	errorMsgBytes := make([]byte, 2) | |
| 	binary.BigEndian.PutUint16(errorMsgBytes, 0xFFFF) // -1 as uint16 | |
| 	response = append(response, errorMsgBytes...) | |
| 
 | |
| 	// Resource type | |
| 	response = append(response, byte(resource.ResourceType)) | |
| 
 | |
| 	// Resource name | |
| 	nameBytes := make([]byte, 2+len(resource.ResourceName)) | |
| 	binary.BigEndian.PutUint16(nameBytes[0:2], uint16(len(resource.ResourceName))) | |
| 	copy(nameBytes[2:], []byte(resource.ResourceName)) | |
| 	response = append(response, nameBytes...) | |
| 
 | |
| 	// Get configs for this resource | |
| 	configs := h.getConfigsForResource(resource) | |
| 
 | |
| 	// Config entries array length | |
| 	configCountBytes := make([]byte, 4) | |
| 	binary.BigEndian.PutUint32(configCountBytes, uint32(len(configs))) | |
| 	response = append(response, configCountBytes...) | |
| 
 | |
| 	// Add each config entry | |
| 	for _, config := range configs { | |
| 		configBytes := h.buildConfigEntry(config, apiVersion) | |
| 		response = append(response, configBytes...) | |
| 	} | |
| 
 | |
| 	return response | |
| } | |
| 
 | |
| // ConfigEntry represents a single configuration entry | |
| type ConfigEntry struct { | |
| 	Name      string | |
| 	Value     string | |
| 	ReadOnly  bool | |
| 	IsDefault bool | |
| 	Sensitive bool | |
| } | |
| 
 | |
| // getConfigsForResource returns appropriate configs for a resource | |
| func (h *Handler) getConfigsForResource(resource DescribeConfigsResource) []ConfigEntry { | |
| 	switch resource.ResourceType { | |
| 	case 2: // Topic | |
| 		return h.getTopicConfigs(resource.ResourceName, resource.ConfigNames) | |
| 	case 4: // Broker | |
| 		return h.getBrokerConfigs(resource.ConfigNames) | |
| 	default: | |
| 		return []ConfigEntry{} | |
| 	} | |
| } | |
| 
 | |
| // getTopicConfigs returns topic-level configurations | |
| func (h *Handler) getTopicConfigs(topicName string, requestedConfigs []string) []ConfigEntry { | |
| 	// Default topic configs that admin clients commonly request | |
| 	allConfigs := map[string]ConfigEntry{ | |
| 		"cleanup.policy": { | |
| 			Name:      "cleanup.policy", | |
| 			Value:     "delete", | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"retention.ms": { | |
| 			Name:      "retention.ms", | |
| 			Value:     "604800000", // 7 days in milliseconds | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"retention.bytes": { | |
| 			Name:      "retention.bytes", | |
| 			Value:     "-1", // Unlimited | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"segment.ms": { | |
| 			Name:      "segment.ms", | |
| 			Value:     "86400000", // 1 day in milliseconds | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"max.message.bytes": { | |
| 			Name:      "max.message.bytes", | |
| 			Value:     "1048588", // ~1MB | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"min.insync.replicas": { | |
| 			Name:      "min.insync.replicas", | |
| 			Value:     "1", | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 	} | |
| 
 | |
| 	// If specific configs requested, filter to those | |
| 	if len(requestedConfigs) > 0 { | |
| 		filteredConfigs := make([]ConfigEntry, 0, len(requestedConfigs)) | |
| 		for _, configName := range requestedConfigs { | |
| 			if config, exists := allConfigs[configName]; exists { | |
| 				filteredConfigs = append(filteredConfigs, config) | |
| 			} | |
| 		} | |
| 		return filteredConfigs | |
| 	} | |
| 
 | |
| 	// Return all configs | |
| 	configs := make([]ConfigEntry, 0, len(allConfigs)) | |
| 	for _, config := range allConfigs { | |
| 		configs = append(configs, config) | |
| 	} | |
| 	return configs | |
| } | |
| 
 | |
| // getBrokerConfigs returns broker-level configurations | |
| func (h *Handler) getBrokerConfigs(requestedConfigs []string) []ConfigEntry { | |
| 	// Default broker configs that admin clients commonly request | |
| 	allConfigs := map[string]ConfigEntry{ | |
| 		"log.retention.hours": { | |
| 			Name:      "log.retention.hours", | |
| 			Value:     "168", // 7 days | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"log.segment.bytes": { | |
| 			Name:      "log.segment.bytes", | |
| 			Value:     "1073741824", // 1GB | |
| 			ReadOnly:  false, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"num.network.threads": { | |
| 			Name:      "num.network.threads", | |
| 			Value:     "3", | |
| 			ReadOnly:  true, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 		"num.io.threads": { | |
| 			Name:      "num.io.threads", | |
| 			Value:     "8", | |
| 			ReadOnly:  true, | |
| 			IsDefault: true, | |
| 			Sensitive: false, | |
| 		}, | |
| 	} | |
| 
 | |
| 	// If specific configs requested, filter to those | |
| 	if len(requestedConfigs) > 0 { | |
| 		filteredConfigs := make([]ConfigEntry, 0, len(requestedConfigs)) | |
| 		for _, configName := range requestedConfigs { | |
| 			if config, exists := allConfigs[configName]; exists { | |
| 				filteredConfigs = append(filteredConfigs, config) | |
| 			} | |
| 		} | |
| 		return filteredConfigs | |
| 	} | |
| 
 | |
| 	// Return all configs | |
| 	configs := make([]ConfigEntry, 0, len(allConfigs)) | |
| 	for _, config := range allConfigs { | |
| 		configs = append(configs, config) | |
| 	} | |
| 	return configs | |
| } | |
| 
 | |
| // buildConfigEntry builds the wire format for a single config entry | |
| func (h *Handler) buildConfigEntry(config ConfigEntry, apiVersion uint16) []byte { | |
| 	entry := make([]byte, 0, 256) | |
| 
 | |
| 	// Config name | |
| 	nameBytes := make([]byte, 2+len(config.Name)) | |
| 	binary.BigEndian.PutUint16(nameBytes[0:2], uint16(len(config.Name))) | |
| 	copy(nameBytes[2:], []byte(config.Name)) | |
| 	entry = append(entry, nameBytes...) | |
| 
 | |
| 	// Config value | |
| 	valueBytes := make([]byte, 2+len(config.Value)) | |
| 	binary.BigEndian.PutUint16(valueBytes[0:2], uint16(len(config.Value))) | |
| 	copy(valueBytes[2:], []byte(config.Value)) | |
| 	entry = append(entry, valueBytes...) | |
| 
 | |
| 	// Read only flag | |
| 	if config.ReadOnly { | |
| 		entry = append(entry, 1) | |
| 	} else { | |
| 		entry = append(entry, 0) | |
| 	} | |
| 
 | |
| 	// Is default flag (only for version 0) | |
| 	if apiVersion == 0 { | |
| 		if config.IsDefault { | |
| 			entry = append(entry, 1) | |
| 		} else { | |
| 			entry = append(entry, 0) | |
| 		} | |
| 	} | |
| 
 | |
| 	// Config source (for versions 1-3) | |
| 	if apiVersion >= 1 && apiVersion <= 3 { | |
| 		// ConfigSource: 1 = DYNAMIC_TOPIC_CONFIG, 2 = DYNAMIC_BROKER_CONFIG, 4 = STATIC_BROKER_CONFIG, 5 = DEFAULT_CONFIG | |
| 		configSource := int8(5) // DEFAULT_CONFIG for all our configs since they're defaults | |
| 		entry = append(entry, byte(configSource)) | |
| 	} | |
| 
 | |
| 	// Sensitive flag | |
| 	if config.Sensitive { | |
| 		entry = append(entry, 1) | |
| 	} else { | |
| 		entry = append(entry, 0) | |
| 	} | |
| 
 | |
| 	// Config synonyms (for versions 1-3) | |
| 	if apiVersion >= 1 && apiVersion <= 3 { | |
| 		// Empty synonyms array (4 bytes for array length = 0) | |
| 		synonymsLength := make([]byte, 4) | |
| 		binary.BigEndian.PutUint32(synonymsLength, 0) | |
| 		entry = append(entry, synonymsLength...) | |
| 	} | |
| 
 | |
| 	// Config type (for version 3 only) | |
| 	if apiVersion == 3 { | |
| 		configType := int8(1) // STRING type for all our configs | |
| 		entry = append(entry, byte(configType)) | |
| 	} | |
| 
 | |
| 	// Config documentation (for version 3 only) | |
| 	if apiVersion == 3 { | |
| 		// Null documentation (length = -1) | |
| 		docLength := make([]byte, 2) | |
| 		binary.BigEndian.PutUint16(docLength, 0xFFFF) // -1 as uint16 | |
| 		entry = append(entry, docLength...) | |
| 	} | |
| 
 | |
| 	return entry | |
| } | |
| 
 | |
| // registerSchemasViaBrokerAPI registers both key and value schemas via the broker's ConfigureTopic API | |
| // Only the gateway leader performs the registration to avoid concurrent updates. | |
| func (h *Handler) registerSchemasViaBrokerAPI(topicName string, valueRecordType *schema_pb.RecordType, keyRecordType *schema_pb.RecordType) error { | |
| 	if valueRecordType == nil && keyRecordType == nil { | |
| 		return nil | |
| 	} | |
| 
 | |
| 	// Check coordinator registry for multi-gateway deployments | |
| 	// In single-gateway mode, coordinator registry may not be initialized - that's OK | |
| 	if reg := h.GetCoordinatorRegistry(); reg != nil { | |
| 		// Multi-gateway mode - check if we're the leader | |
| 		isLeader := reg.IsLeader() | |
| 
 | |
| 		if !isLeader { | |
| 			// Not leader - in production multi-gateway setups, skip to avoid conflicts | |
| 			// In single-gateway setups where leader election fails, log warning but proceed | |
| 			// This ensures schema registration works even if distributed locking has issues | |
| 			// Note: Schema registration is idempotent, so duplicate registrations are safe | |
| 		} else { | |
| 		} | |
| 	} else { | |
| 		// No coordinator registry - definitely single-gateway mode | |
| 	} | |
| 
 | |
| 	// Require SeaweedMQ integration to access broker | |
| 	if h.seaweedMQHandler == nil { | |
| 		return fmt.Errorf("no SeaweedMQ handler available for broker access") | |
| 	} | |
| 
 | |
| 	// Get broker addresses | |
| 	brokerAddresses := h.seaweedMQHandler.GetBrokerAddresses() | |
| 	if len(brokerAddresses) == 0 { | |
| 		return fmt.Errorf("no broker addresses available") | |
| 	} | |
| 
 | |
| 	// Use the first available broker | |
| 	brokerAddress := brokerAddresses[0] | |
| 
 | |
| 	// Load security configuration | |
| 	util.LoadSecurityConfiguration() | |
| 	grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.mq") | |
| 
 | |
| 	// Get current topic configuration to preserve partition count | |
| 	seaweedTopic := &schema_pb.Topic{ | |
| 		Namespace: DefaultKafkaNamespace, | |
| 		Name:      topicName, | |
| 	} | |
| 
 | |
| 	return pb.WithBrokerGrpcClient(false, brokerAddress, grpcDialOption, func(client mq_pb.SeaweedMessagingClient) error { | |
| 		// First get current configuration | |
| 		getResp, err := client.GetTopicConfiguration(context.Background(), &mq_pb.GetTopicConfigurationRequest{ | |
| 			Topic: seaweedTopic, | |
| 		}) | |
| 		if err != nil { | |
| 			// Convert dual schemas to flat schema format | |
| 			var flatSchema *schema_pb.RecordType | |
| 			var keyColumns []string | |
| 			if keyRecordType != nil || valueRecordType != nil { | |
| 				flatSchema, keyColumns = mqschema.CombineFlatSchemaFromKeyValue(keyRecordType, valueRecordType) | |
| 			} | |
| 
 | |
| 			// If topic doesn't exist, create it with configurable default partition count | |
| 			// Get schema format from topic config if available | |
| 			schemaFormat := h.getTopicSchemaFormat(topicName) | |
| 			_, err := client.ConfigureTopic(context.Background(), &mq_pb.ConfigureTopicRequest{ | |
| 				Topic:             seaweedTopic, | |
| 				PartitionCount:    h.GetDefaultPartitions(), // Use configurable default | |
| 				MessageRecordType: flatSchema, | |
| 				KeyColumns:        keyColumns, | |
| 				SchemaFormat:      schemaFormat, | |
| 			}) | |
| 			return err | |
| 		} | |
| 
 | |
| 		// Convert dual schemas to flat schema format for update | |
| 		var flatSchema *schema_pb.RecordType | |
| 		var keyColumns []string | |
| 		if keyRecordType != nil || valueRecordType != nil { | |
| 			flatSchema, keyColumns = mqschema.CombineFlatSchemaFromKeyValue(keyRecordType, valueRecordType) | |
| 		} | |
| 
 | |
| 		// Update existing topic with new schema | |
| 		// Get schema format from topic config if available | |
| 		schemaFormat := h.getTopicSchemaFormat(topicName) | |
| 		_, err = client.ConfigureTopic(context.Background(), &mq_pb.ConfigureTopicRequest{ | |
| 			Topic:             seaweedTopic, | |
| 			PartitionCount:    getResp.PartitionCount, | |
| 			MessageRecordType: flatSchema, | |
| 			KeyColumns:        keyColumns, | |
| 			Retention:         getResp.Retention, | |
| 			SchemaFormat:      schemaFormat, | |
| 		}) | |
| 		return err | |
| 	}) | |
| } | |
| 
 | |
| // handleInitProducerId handles InitProducerId API requests (API key 22) | |
| // This API is used to initialize a producer for transactional or idempotent operations | |
| func (h *Handler) handleInitProducerId(correlationID uint32, apiVersion uint16, requestBody []byte) ([]byte, error) { | |
| 
 | |
| 	// InitProducerId Request Format (varies by version): | |
| 	// v0-v1: transactional_id(NULLABLE_STRING) + transaction_timeout_ms(INT32) | |
| 	// v2+: transactional_id(NULLABLE_STRING) + transaction_timeout_ms(INT32) + producer_id(INT64) + producer_epoch(INT16) | |
| 	// v4+: Uses flexible format with tagged fields | |
|  | |
| 	offset := 0 | |
| 
 | |
| 	// Parse transactional_id (NULLABLE_STRING or COMPACT_NULLABLE_STRING for flexible versions) | |
| 	var transactionalId *string | |
| 	if apiVersion >= 4 { | |
| 		// Flexible version - use compact nullable string | |
| 		if len(requestBody) < offset+1 { | |
| 			return nil, fmt.Errorf("InitProducerId request too short for transactional_id") | |
| 		} | |
| 
 | |
| 		length := int(requestBody[offset]) | |
| 		offset++ | |
| 
 | |
| 		if length == 0 { | |
| 			// Null string | |
| 			transactionalId = nil | |
| 		} else { | |
| 			// Non-null string (length is encoded as length+1 in compact format) | |
| 			actualLength := length - 1 | |
| 			if len(requestBody) < offset+actualLength { | |
| 				return nil, fmt.Errorf("InitProducerId request transactional_id too short") | |
| 			} | |
| 			if actualLength > 0 { | |
| 				id := string(requestBody[offset : offset+actualLength]) | |
| 				transactionalId = &id | |
| 				offset += actualLength | |
| 			} else { | |
| 				// Empty string | |
| 				id := "" | |
| 				transactionalId = &id | |
| 			} | |
| 		} | |
| 	} else { | |
| 		// Non-flexible version - use regular nullable string | |
| 		if len(requestBody) < offset+2 { | |
| 			return nil, fmt.Errorf("InitProducerId request too short for transactional_id length") | |
| 		} | |
| 
 | |
| 		length := int(binary.BigEndian.Uint16(requestBody[offset : offset+2])) | |
| 		offset += 2 | |
| 
 | |
| 		if length == 0xFFFF { | |
| 			// Null string (-1 as uint16) | |
| 			transactionalId = nil | |
| 		} else { | |
| 			if len(requestBody) < offset+length { | |
| 				return nil, fmt.Errorf("InitProducerId request transactional_id too short") | |
| 			} | |
| 			if length > 0 { | |
| 				id := string(requestBody[offset : offset+length]) | |
| 				transactionalId = &id | |
| 				offset += length | |
| 			} else { | |
| 				// Empty string | |
| 				id := "" | |
| 				transactionalId = &id | |
| 			} | |
| 		} | |
| 	} | |
| 	_ = transactionalId // Used for logging/tracking, but not in core logic yet | |
|  | |
| 	// Parse transaction_timeout_ms (INT32) | |
| 	if len(requestBody) < offset+4 { | |
| 		return nil, fmt.Errorf("InitProducerId request too short for transaction_timeout_ms") | |
| 	} | |
| 	_ = binary.BigEndian.Uint32(requestBody[offset : offset+4]) // transactionTimeoutMs | |
| 	offset += 4 | |
| 
 | |
| 	// For v2+, there might be additional fields, but we'll ignore them for now | |
| 	// as we're providing a basic implementation | |
|  | |
| 	// Build response | |
| 	response := make([]byte, 0, 64) | |
| 
 | |
| 	// NOTE: Correlation ID is handled by writeResponseWithHeader | |
| 	// Do NOT include it in the response body | |
| 	// Note: Header tagged fields are also handled by writeResponseWithHeader for flexible versions | |
|  | |
| 	// InitProducerId Response Format: | |
| 	// throttle_time_ms(INT32) + error_code(INT16) + producer_id(INT64) + producer_epoch(INT16) | |
| 	// + tagged_fields (for flexible versions) | |
|  | |
| 	// Throttle time (4 bytes) - v1+ | |
| 	if apiVersion >= 1 { | |
| 		response = append(response, 0, 0, 0, 0) // No throttling | |
| 	} | |
| 
 | |
| 	// Error code (2 bytes) - SUCCESS | |
| 	response = append(response, 0, 0) // No error | |
|  | |
| 	// Producer ID (8 bytes) - generate a simple producer ID | |
| 	// In a real implementation, this would be managed by a transaction coordinator | |
| 	producerId := int64(1000) // Simple fixed producer ID for now | |
| 	producerIdBytes := make([]byte, 8) | |
| 	binary.BigEndian.PutUint64(producerIdBytes, uint64(producerId)) | |
| 	response = append(response, producerIdBytes...) | |
| 
 | |
| 	// Producer epoch (2 bytes) - start with epoch 0 | |
| 	response = append(response, 0, 0) // Epoch 0 | |
|  | |
| 	// For flexible versions (v4+), add response body tagged fields | |
| 	if apiVersion >= 4 { | |
| 		response = append(response, 0x00) // Empty response body tagged fields | |
| 	} | |
| 
 | |
| 	return response, nil | |
| } | |
| 
 | |
| // createTopicWithSchemaSupport creates a topic with optional schema integration | |
| // This function creates topics with schema support when schema management is enabled | |
| func (h *Handler) createTopicWithSchemaSupport(topicName string, partitions int32) error { | |
| 
 | |
| 	// For system topics like _schemas, __consumer_offsets, etc., use default schema | |
| 	if isSystemTopic(topicName) { | |
| 		return h.createTopicWithDefaultFlexibleSchema(topicName, partitions) | |
| 	} | |
| 
 | |
| 	// Check if Schema Registry URL is configured | |
| 	if h.schemaRegistryURL != "" { | |
| 
 | |
| 		// Try to initialize schema management if not already done | |
| 		if h.schemaManager == nil { | |
| 			h.tryInitializeSchemaManagement() | |
| 		} | |
| 
 | |
| 		// If schema manager is still nil after initialization attempt, Schema Registry is unavailable | |
| 		if h.schemaManager == nil { | |
| 			return fmt.Errorf("Schema Registry is configured at %s but unavailable - cannot create topic %s without schema validation", h.schemaRegistryURL, topicName) | |
| 		} | |
| 
 | |
| 		// Schema Registry is available - try to fetch existing schema | |
| 		keyRecordType, valueRecordType, err := h.fetchSchemaForTopic(topicName) | |
| 		if err != nil { | |
| 			// Check if this is a connection error vs schema not found | |
| 			if h.isSchemaRegistryConnectionError(err) { | |
| 				return fmt.Errorf("Schema Registry is unavailable: %w", err) | |
| 			} | |
| 			// Schema not found - this is an error when schema management is enforced | |
| 			return fmt.Errorf("schema is required for topic %s but no schema found in Schema Registry", topicName) | |
| 		} | |
| 
 | |
| 		if keyRecordType != nil || valueRecordType != nil { | |
| 			// Create topic with schema from Schema Registry | |
| 			return h.seaweedMQHandler.CreateTopicWithSchemas(topicName, partitions, keyRecordType, valueRecordType) | |
| 		} | |
| 
 | |
| 		// No schemas found - this is an error when schema management is enforced | |
| 		return fmt.Errorf("schema is required for topic %s but no schema found in Schema Registry", topicName) | |
| 	} | |
| 
 | |
| 	// Schema Registry URL not configured - create topic without schema (backward compatibility) | |
| 	return h.seaweedMQHandler.CreateTopic(topicName, partitions) | |
| } | |
| 
 | |
| // createTopicWithDefaultFlexibleSchema creates a topic with a flexible default schema | |
| // that can handle both Avro and JSON messages when schema management is enabled | |
| func (h *Handler) createTopicWithDefaultFlexibleSchema(topicName string, partitions int32) error { | |
| 	// CRITICAL FIX: System topics like _schemas should be PLAIN Kafka topics without schema management | |
| 	// Schema Registry uses _schemas to STORE schemas, so it can't have schema management itself | |
| 	// This was causing issues with Schema Registry bootstrap | |
|  | |
| 	glog.V(1).Infof("Creating system topic %s as PLAIN topic (no schema management)", topicName) | |
| 	return h.seaweedMQHandler.CreateTopic(topicName, partitions) | |
| } | |
| 
 | |
| // fetchSchemaForTopic attempts to fetch schema information for a topic from Schema Registry | |
| // Returns key and value RecordTypes if schemas are found | |
| func (h *Handler) fetchSchemaForTopic(topicName string) (*schema_pb.RecordType, *schema_pb.RecordType, error) { | |
| 	if h.schemaManager == nil { | |
| 		return nil, nil, fmt.Errorf("schema manager not available") | |
| 	} | |
| 
 | |
| 	var keyRecordType *schema_pb.RecordType | |
| 	var valueRecordType *schema_pb.RecordType | |
| 	var lastConnectionError error | |
| 
 | |
| 	// Try to fetch value schema using standard Kafka naming convention: <topic>-value | |
| 	valueSubject := topicName + "-value" | |
| 	cachedSchema, err := h.schemaManager.GetLatestSchema(valueSubject) | |
| 	if err != nil { | |
| 		// Check if this is a connection error (Schema Registry unavailable) | |
| 		if h.isSchemaRegistryConnectionError(err) { | |
| 			lastConnectionError = err | |
| 		} | |
| 		// Not found or connection error - continue to check key schema | |
| 	} else if cachedSchema != nil { | |
| 
 | |
| 		// Convert schema to RecordType | |
| 		recordType, err := h.convertSchemaToRecordType(cachedSchema.Schema, cachedSchema.LatestID) | |
| 		if err == nil { | |
| 			valueRecordType = recordType | |
| 			// Store schema configuration for later use | |
| 			h.storeTopicSchemaConfig(topicName, cachedSchema.LatestID, schema.FormatAvro) | |
| 		} else { | |
| 		} | |
| 	} | |
| 
 | |
| 	// Try to fetch key schema (optional) | |
| 	keySubject := topicName + "-key" | |
| 	cachedKeySchema, keyErr := h.schemaManager.GetLatestSchema(keySubject) | |
| 	if keyErr != nil { | |
| 		if h.isSchemaRegistryConnectionError(keyErr) { | |
| 			lastConnectionError = keyErr | |
| 		} | |
| 		// Not found or connection error - key schema is optional | |
| 	} else if cachedKeySchema != nil { | |
| 
 | |
| 		// Convert schema to RecordType | |
| 		recordType, err := h.convertSchemaToRecordType(cachedKeySchema.Schema, cachedKeySchema.LatestID) | |
| 		if err == nil { | |
| 			keyRecordType = recordType | |
| 			// Store key schema configuration for later use | |
| 			h.storeTopicKeySchemaConfig(topicName, cachedKeySchema.LatestID, schema.FormatAvro) | |
| 		} else { | |
| 		} | |
| 	} | |
| 
 | |
| 	// If we encountered connection errors, fail fast | |
| 	if lastConnectionError != nil && keyRecordType == nil && valueRecordType == nil { | |
| 		return nil, nil, fmt.Errorf("Schema Registry is unavailable: %w", lastConnectionError) | |
| 	} | |
| 
 | |
| 	// Return error if no schemas found (but Schema Registry was reachable) | |
| 	if keyRecordType == nil && valueRecordType == nil { | |
| 		return nil, nil, fmt.Errorf("no schemas found for topic %s", topicName) | |
| 	} | |
| 
 | |
| 	return keyRecordType, valueRecordType, nil | |
| } | |
| 
 | |
| // isSchemaRegistryConnectionError determines if an error is due to Schema Registry being unavailable | |
| // vs a schema not being found (404) | |
| func (h *Handler) isSchemaRegistryConnectionError(err error) bool { | |
| 	if err == nil { | |
| 		return false | |
| 	} | |
| 
 | |
| 	errStr := err.Error() | |
| 
 | |
| 	// Connection errors (network issues, DNS resolution, etc.) | |
| 	if strings.Contains(errStr, "failed to fetch") && | |
| 		(strings.Contains(errStr, "connection refused") || | |
| 			strings.Contains(errStr, "no such host") || | |
| 			strings.Contains(errStr, "timeout") || | |
| 			strings.Contains(errStr, "network is unreachable")) { | |
| 		return true | |
| 	} | |
| 
 | |
| 	// HTTP 5xx errors (server errors) | |
| 	if strings.Contains(errStr, "schema registry error 5") { | |
| 		return true | |
| 	} | |
| 
 | |
| 	// HTTP 404 errors are "schema not found", not connection errors | |
| 	if strings.Contains(errStr, "schema registry error 404") { | |
| 		return false | |
| 	} | |
| 
 | |
| 	// Other HTTP errors (401, 403, etc.) should be treated as connection/config issues | |
| 	if strings.Contains(errStr, "schema registry error") { | |
| 		return true | |
| 	} | |
| 
 | |
| 	return false | |
| } | |
| 
 | |
| // convertSchemaToRecordType converts a schema string to a RecordType | |
| func (h *Handler) convertSchemaToRecordType(schemaStr string, schemaID uint32) (*schema_pb.RecordType, error) { | |
| 	// Get the cached schema to determine format | |
| 	cachedSchema, err := h.schemaManager.GetSchemaByID(schemaID) | |
| 	if err != nil { | |
| 		return nil, fmt.Errorf("failed to get cached schema: %w", err) | |
| 	} | |
| 
 | |
| 	// Create appropriate decoder and infer RecordType based on format | |
| 	switch cachedSchema.Format { | |
| 	case schema.FormatAvro: | |
| 		// Create Avro decoder and infer RecordType | |
| 		decoder, err := schema.NewAvroDecoder(schemaStr) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("failed to create Avro decoder: %w", err) | |
| 		} | |
| 		return decoder.InferRecordType() | |
| 
 | |
| 	case schema.FormatJSONSchema: | |
| 		// Create JSON Schema decoder and infer RecordType | |
| 		decoder, err := schema.NewJSONSchemaDecoder(schemaStr) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("failed to create JSON Schema decoder: %w", err) | |
| 		} | |
| 		return decoder.InferRecordType() | |
| 
 | |
| 	case schema.FormatProtobuf: | |
| 		// For Protobuf, we need the binary descriptor, not string | |
| 		// This is a limitation - Protobuf schemas in Schema Registry are typically stored as binary descriptors | |
| 		return nil, fmt.Errorf("Protobuf schema conversion from string not supported - requires binary descriptor") | |
| 
 | |
| 	default: | |
| 		return nil, fmt.Errorf("unsupported schema format: %v", cachedSchema.Format) | |
| 	} | |
| } | |
| 
 | |
| // isSystemTopic checks if a topic is a Kafka system topic | |
| func isSystemTopic(topicName string) bool { | |
| 	systemTopics := []string{ | |
| 		"_schemas", | |
| 		"__consumer_offsets", | |
| 		"__transaction_state", | |
| 		"_confluent-ksql-default__command_topic", | |
| 		"_confluent-metrics", | |
| 	} | |
| 
 | |
| 	for _, systemTopic := range systemTopics { | |
| 		if topicName == systemTopic { | |
| 			return true | |
| 		} | |
| 	} | |
| 
 | |
| 	// Check for topics starting with underscore (common system topic pattern) | |
| 	return len(topicName) > 0 && topicName[0] == '_' | |
| } | |
| 
 | |
| // getConnectionContextFromRequest extracts the connection context from the request context | |
| func (h *Handler) getConnectionContextFromRequest(ctx context.Context) *ConnectionContext { | |
| 	if connCtx, ok := ctx.Value(connContextKey).(*ConnectionContext); ok { | |
| 		return connCtx | |
| 	} | |
| 	return nil | |
| } | |
| 
 | |
| // getOrCreatePartitionReader gets an existing partition reader or creates a new one | |
| // This maintains persistent readers per connection that stream forward, eliminating | |
| // repeated offset lookups and reducing broker CPU load | |
| func (h *Handler) getOrCreatePartitionReader(ctx context.Context, connCtx *ConnectionContext, key TopicPartitionKey, startOffset int64) *partitionReader { | |
| 	// Try to get existing reader | |
| 	if val, ok := connCtx.partitionReaders.Load(key); ok { | |
| 		return val.(*partitionReader) | |
| 	} | |
| 
 | |
| 	// Create new reader | |
| 	reader := newPartitionReader(ctx, h, connCtx, key.Topic, key.Partition, startOffset) | |
| 
 | |
| 	// Store it (handle race condition where another goroutine created one) | |
| 	if actual, loaded := connCtx.partitionReaders.LoadOrStore(key, reader); loaded { | |
| 		// Another goroutine created it first, close ours and use theirs | |
| 		reader.close() | |
| 		return actual.(*partitionReader) | |
| 	} | |
| 
 | |
| 	return reader | |
| } | |
| 
 | |
| // cleanupPartitionReaders closes all partition readers for a connection | |
| // Called when connection is closing | |
| func cleanupPartitionReaders(connCtx *ConnectionContext) { | |
| 	if connCtx == nil { | |
| 		return | |
| 	} | |
| 
 | |
| 	connCtx.partitionReaders.Range(func(key, value interface{}) bool { | |
| 		if reader, ok := value.(*partitionReader); ok { | |
| 			reader.close() | |
| 		} | |
| 		return true // Continue iteration | |
| 	}) | |
| 
 | |
| 	glog.V(4).Infof("[%s] Cleaned up partition readers", connCtx.ConnectionID) | |
| }
 |