diff --git a/go.mod b/go.mod index 967a3fae2..1233188e3 100644 --- a/go.mod +++ b/go.mod @@ -111,7 +111,7 @@ require ( google.golang.org/api v0.247.0 google.golang.org/genproto v0.0.0-20250715232539-7130f93afb79 // indirect google.golang.org/grpc v1.75.0 - google.golang.org/protobuf v1.36.8 + google.golang.org/protobuf v1.36.9 gopkg.in/inf.v0 v0.9.1 // indirect modernc.org/b v1.0.0 // indirect modernc.org/mathutil v1.7.1 diff --git a/go.sum b/go.sum index e7a71748c..0854b14a8 100644 --- a/go.sum +++ b/go.sum @@ -2650,6 +2650,8 @@ google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= +google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/weed/mq/kafka/schema/envelope.go b/weed/mq/kafka/schema/envelope.go index 2baf4ffd9..fc4dbaa42 100644 --- a/weed/mq/kafka/schema/envelope.go +++ b/weed/mq/kafka/schema/envelope.go @@ -52,15 +52,21 @@ func ParseConfluentEnvelope(data []byte) (*ConfluentEnvelope, bool) { schemaID := binary.BigEndian.Uint32(data[1:5]) envelope := &ConfluentEnvelope{ - Format: FormatAvro, // Default assumption; will be refined later + Format: FormatAvro, // Default assumption; will be refined by schema registry lookup SchemaID: schemaID, Indexes: nil, Payload: data[5:], // Default: payload starts after schema ID } - // For Protobuf, there may be additional indexes after the schema ID - // This is a more complex parsing that we'll implement when we add Protobuf support - // For now, assume Avro format + // Try to detect Protobuf format by looking for message indexes + // Protobuf messages in Confluent format may have varint-encoded indexes + // after the schema ID to identify nested message types + if protobufEnvelope, isProtobuf := ParseConfluentProtobufEnvelope(data); isProtobuf { + // If it looks like Protobuf (has valid indexes), use that parsing + if len(protobufEnvelope.Indexes) > 0 { + return protobufEnvelope, true + } + } return envelope, true } diff --git a/weed/mq/kafka/schema/manager.go b/weed/mq/kafka/schema/manager.go index 11ac98e90..a5603d84e 100644 --- a/weed/mq/kafka/schema/manager.go +++ b/weed/mq/kafka/schema/manager.go @@ -4,6 +4,10 @@ import ( "fmt" "sync" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/types/dynamicpb" + "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" ) @@ -12,8 +16,9 @@ type Manager struct { registryClient *RegistryClient // Decoder cache - avroDecoders map[uint32]*AvroDecoder // schema ID -> decoder - decoderMu sync.RWMutex + avroDecoders map[uint32]*AvroDecoder // schema ID -> decoder + protobufDecoders map[uint32]*ProtobufDecoder // schema ID -> decoder + decoderMu sync.RWMutex // Configuration config ManagerConfig @@ -68,9 +73,10 @@ func NewManager(config ManagerConfig) (*Manager, error) { registryClient := NewRegistryClient(registryConfig) return &Manager{ - registryClient: registryClient, - avroDecoders: make(map[uint32]*AvroDecoder), - config: config, + registryClient: registryClient, + avroDecoders: make(map[uint32]*AvroDecoder), + protobufDecoders: make(map[uint32]*ProtobufDecoder), + config: config, }, nil } @@ -119,7 +125,10 @@ func (m *Manager) DecodeMessage(messageBytes []byte) (*DecodedMessage, error) { return nil, fmt.Errorf("failed to decode Avro message: %w", err) } case FormatProtobuf: - return nil, fmt.Errorf("Protobuf decoding not yet implemented (Phase 5)") + recordValue, recordType, err = m.decodeProtobufMessage(envelope, cachedSchema) + if err != nil { + return nil, fmt.Errorf("failed to decode Protobuf message: %w", err) + } case FormatJSONSchema: return nil, fmt.Errorf("JSON Schema decoding not yet implemented (Phase 6)") default: @@ -174,6 +183,38 @@ func (m *Manager) decodeAvroMessage(envelope *ConfluentEnvelope, cachedSchema *C return recordValue, recordType, nil } +// decodeProtobufMessage decodes a Protobuf message using cached or new decoder +func (m *Manager) decodeProtobufMessage(envelope *ConfluentEnvelope, cachedSchema *CachedSchema) (*schema_pb.RecordValue, *schema_pb.RecordType, error) { + // Get or create Protobuf decoder + decoder, err := m.getProtobufDecoder(envelope.SchemaID, cachedSchema.Schema) + if err != nil { + return nil, nil, fmt.Errorf("failed to get Protobuf decoder: %w", err) + } + + // Decode to RecordValue + recordValue, err := decoder.DecodeToRecordValue(envelope.Payload) + if err != nil { + if m.config.ValidationMode == ValidationStrict { + return nil, nil, fmt.Errorf("strict validation failed: %w", err) + } + // In permissive mode, try to decode as much as possible + return nil, nil, fmt.Errorf("permissive decoding failed: %w", err) + } + + // Get RecordType from descriptor + recordType, err := decoder.InferRecordType() + if err != nil { + // Fall back to inferring from the decoded map + if decodedMap, decodeErr := decoder.Decode(envelope.Payload); decodeErr == nil { + recordType = InferRecordTypeFromMap(decodedMap) + } else { + return nil, nil, fmt.Errorf("failed to infer record type: %w", err) + } + } + + return recordValue, recordType, nil +} + // getAvroDecoder gets or creates an Avro decoder for the given schema func (m *Manager) getAvroDecoder(schemaID uint32, schemaStr string) (*AvroDecoder, error) { // Check cache first @@ -198,6 +239,34 @@ func (m *Manager) getAvroDecoder(schemaID uint32, schemaStr string) (*AvroDecode return decoder, nil } +// getProtobufDecoder gets or creates a Protobuf decoder for the given schema +func (m *Manager) getProtobufDecoder(schemaID uint32, schemaStr string) (*ProtobufDecoder, error) { + // Check cache first + m.decoderMu.RLock() + if decoder, exists := m.protobufDecoders[schemaID]; exists { + m.decoderMu.RUnlock() + return decoder, nil + } + m.decoderMu.RUnlock() + + // For Protobuf, the schema is typically a binary FileDescriptorSet + // In Confluent Schema Registry, Protobuf schemas are stored as binary descriptors + schemaBytes := []byte(schemaStr) // Assume schemaStr contains binary data + + // Create new decoder + decoder, err := NewProtobufDecoder(schemaBytes) + if err != nil { + return nil, err + } + + // Cache the decoder + m.decoderMu.Lock() + m.protobufDecoders[schemaID] = decoder + m.decoderMu.Unlock() + + return decoder, nil +} + // createMetadata creates metadata for storage in SeaweedMQ func (m *Manager) createMetadata(envelope *ConfluentEnvelope, cachedSchema *CachedSchema) map[string]string { metadata := envelope.Metadata() @@ -254,6 +323,7 @@ func (m *Manager) ListSubjects() ([]string, error) { func (m *Manager) ClearCache() { m.decoderMu.Lock() m.avroDecoders = make(map[uint32]*AvroDecoder) + m.protobufDecoders = make(map[uint32]*ProtobufDecoder) m.decoderMu.Unlock() m.registryClient.ClearCache() @@ -262,7 +332,7 @@ func (m *Manager) ClearCache() { // GetCacheStats returns cache statistics func (m *Manager) GetCacheStats() (decoders, schemas, subjects int) { m.decoderMu.RLock() - decoders = len(m.avroDecoders) + decoders = len(m.avroDecoders) + len(m.protobufDecoders) m.decoderMu.RUnlock() schemas, subjects = m.registryClient.GetCacheStats() @@ -275,7 +345,7 @@ func (m *Manager) EncodeMessage(recordValue *schema_pb.RecordValue, schemaID uin case FormatAvro: return m.encodeAvroMessage(recordValue, schemaID) case FormatProtobuf: - return nil, fmt.Errorf("Protobuf encoding not yet implemented (Phase 7)") + return m.encodeProtobufMessage(recordValue, schemaID) case FormatJSONSchema: return nil, fmt.Errorf("JSON Schema encoding not yet implemented (Phase 7)") default: @@ -312,6 +382,124 @@ func (m *Manager) encodeAvroMessage(recordValue *schema_pb.RecordValue, schemaID return envelope, nil } +// encodeProtobufMessage encodes a RecordValue back to Protobuf binary format +func (m *Manager) encodeProtobufMessage(recordValue *schema_pb.RecordValue, schemaID uint32) ([]byte, error) { + // Get schema from registry + cachedSchema, err := m.registryClient.GetSchemaByID(schemaID) + if err != nil { + return nil, fmt.Errorf("failed to get schema for encoding: %w", err) + } + + // Get decoder (which contains the descriptor) + decoder, err := m.getProtobufDecoder(schemaID, cachedSchema.Schema) + if err != nil { + return nil, fmt.Errorf("failed to get decoder for encoding: %w", err) + } + + // Convert RecordValue back to Go map + goMap := recordValueToMap(recordValue) + + // Create a new message instance and populate it + msg := decoder.msgType.New() + if err := m.populateProtobufMessage(msg, goMap, decoder.descriptor); err != nil { + return nil, fmt.Errorf("failed to populate Protobuf message: %w", err) + } + + // Encode using Protobuf + binary, err := proto.Marshal(msg.Interface()) + if err != nil { + return nil, fmt.Errorf("failed to encode to Protobuf binary: %w", err) + } + + // Create Confluent envelope (with indexes if needed) + envelope := CreateConfluentEnvelope(FormatProtobuf, schemaID, nil, binary) + + return envelope, nil +} + +// populateProtobufMessage populates a Protobuf message from a Go map +func (m *Manager) populateProtobufMessage(msg protoreflect.Message, data map[string]interface{}, desc protoreflect.MessageDescriptor) error { + for key, value := range data { + // Find the field descriptor + fieldDesc := desc.Fields().ByName(protoreflect.Name(key)) + if fieldDesc == nil { + // Skip unknown fields in permissive mode + continue + } + + // Convert and set the value + protoValue, err := m.goValueToProtoValue(value, fieldDesc) + if err != nil { + return fmt.Errorf("failed to convert field %s: %w", key, err) + } + + msg.Set(fieldDesc, protoValue) + } + + return nil +} + +// goValueToProtoValue converts a Go value to a Protobuf Value +func (m *Manager) goValueToProtoValue(value interface{}, fieldDesc protoreflect.FieldDescriptor) (protoreflect.Value, error) { + if value == nil { + return protoreflect.Value{}, nil + } + + switch fieldDesc.Kind() { + case protoreflect.BoolKind: + if b, ok := value.(bool); ok { + return protoreflect.ValueOfBool(b), nil + } + case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind: + if i, ok := value.(int32); ok { + return protoreflect.ValueOfInt32(i), nil + } + case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind: + if i, ok := value.(int64); ok { + return protoreflect.ValueOfInt64(i), nil + } + case protoreflect.Uint32Kind, protoreflect.Fixed32Kind: + if i, ok := value.(uint32); ok { + return protoreflect.ValueOfUint32(i), nil + } + case protoreflect.Uint64Kind, protoreflect.Fixed64Kind: + if i, ok := value.(uint64); ok { + return protoreflect.ValueOfUint64(i), nil + } + case protoreflect.FloatKind: + if f, ok := value.(float32); ok { + return protoreflect.ValueOfFloat32(f), nil + } + case protoreflect.DoubleKind: + if f, ok := value.(float64); ok { + return protoreflect.ValueOfFloat64(f), nil + } + case protoreflect.StringKind: + if s, ok := value.(string); ok { + return protoreflect.ValueOfString(s), nil + } + case protoreflect.BytesKind: + if b, ok := value.([]byte); ok { + return protoreflect.ValueOfBytes(b), nil + } + case protoreflect.EnumKind: + if i, ok := value.(int32); ok { + return protoreflect.ValueOfEnum(protoreflect.EnumNumber(i)), nil + } + case protoreflect.MessageKind: + if nestedMap, ok := value.(map[string]interface{}); ok { + // Handle nested messages + nestedMsg := dynamicpb.NewMessage(fieldDesc.Message()) + if err := m.populateProtobufMessage(nestedMsg, nestedMap, fieldDesc.Message()); err != nil { + return protoreflect.Value{}, err + } + return protoreflect.ValueOfMessage(nestedMsg), nil + } + } + + return protoreflect.Value{}, fmt.Errorf("unsupported value type %T for field kind %v", value, fieldDesc.Kind()) +} + // recordValueToMap converts a RecordValue back to a Go map for encoding func recordValueToMap(recordValue *schema_pb.RecordValue) map[string]interface{} { result := make(map[string]interface{}) diff --git a/weed/mq/kafka/schema/protobuf_decoder.go b/weed/mq/kafka/schema/protobuf_decoder.go new file mode 100644 index 000000000..26a65f703 --- /dev/null +++ b/weed/mq/kafka/schema/protobuf_decoder.go @@ -0,0 +1,396 @@ +package schema + +import ( + "fmt" + + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/types/dynamicpb" + + "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" +) + +// ProtobufDecoder handles Protobuf schema decoding and conversion to SeaweedMQ format +type ProtobufDecoder struct { + descriptor protoreflect.MessageDescriptor + msgType protoreflect.MessageType +} + +// NewProtobufDecoder creates a new Protobuf decoder from a schema descriptor +func NewProtobufDecoder(schemaBytes []byte) (*ProtobufDecoder, error) { + // For Phase 5, we'll implement a simplified version + // In a full implementation, this would properly parse FileDescriptorSet + // and handle complex schema dependencies + + // For now, return an error indicating this needs proper implementation + return nil, fmt.Errorf("Protobuf decoder from binary descriptors not fully implemented in Phase 5 - use NewProtobufDecoderFromDescriptor for testing") +} + +// NewProtobufDecoderFromDescriptor creates a Protobuf decoder from a message descriptor +// This is used for testing and when we have pre-built descriptors +func NewProtobufDecoderFromDescriptor(msgDesc protoreflect.MessageDescriptor) *ProtobufDecoder { + msgType := dynamicpb.NewMessageType(msgDesc) + + return &ProtobufDecoder{ + descriptor: msgDesc, + msgType: msgType, + } +} + +// NewProtobufDecoderFromString creates a Protobuf decoder from a schema string +// This is a simplified version for testing - in production, schemas would be binary descriptors +func NewProtobufDecoderFromString(schemaStr string) (*ProtobufDecoder, error) { + // For Phase 5, we'll implement a basic string-to-descriptor parser + // In a full implementation, this would use protoc to compile .proto files + // or parse the Confluent Schema Registry's Protobuf descriptor format + + return nil, fmt.Errorf("string-based Protobuf schemas not yet implemented - use binary descriptors") +} + +// Decode decodes Protobuf binary data to a Go map representation +func (pd *ProtobufDecoder) Decode(data []byte) (map[string]interface{}, error) { + // Create a new message instance + msg := pd.msgType.New() + + // Unmarshal the binary data + if err := proto.Unmarshal(data, msg.Interface()); err != nil { + return nil, fmt.Errorf("failed to unmarshal Protobuf data: %w", err) + } + + // Convert to map representation + return pd.messageToMap(msg), nil +} + +// DecodeToRecordValue decodes Protobuf data directly to SeaweedMQ RecordValue +func (pd *ProtobufDecoder) DecodeToRecordValue(data []byte) (*schema_pb.RecordValue, error) { + msgMap, err := pd.Decode(data) + if err != nil { + return nil, err + } + + return MapToRecordValue(msgMap), nil +} + +// InferRecordType infers a SeaweedMQ RecordType from the Protobuf descriptor +func (pd *ProtobufDecoder) InferRecordType() (*schema_pb.RecordType, error) { + return pd.descriptorToRecordType(pd.descriptor), nil +} + +// messageToMap converts a Protobuf message to a Go map +func (pd *ProtobufDecoder) messageToMap(msg protoreflect.Message) map[string]interface{} { + result := make(map[string]interface{}) + + msg.Range(func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool { + fieldName := string(fd.Name()) + result[fieldName] = pd.valueToInterface(fd, v) + return true + }) + + return result +} + +// valueToInterface converts a Protobuf value to a Go interface{} +func (pd *ProtobufDecoder) valueToInterface(fd protoreflect.FieldDescriptor, v protoreflect.Value) interface{} { + if fd.IsList() { + // Handle repeated fields + list := v.List() + result := make([]interface{}, list.Len()) + for i := 0; i < list.Len(); i++ { + result[i] = pd.scalarValueToInterface(fd, list.Get(i)) + } + return result + } + + if fd.IsMap() { + // Handle map fields + mapVal := v.Map() + result := make(map[string]interface{}) + mapVal.Range(func(k protoreflect.MapKey, v protoreflect.Value) bool { + keyStr := fmt.Sprintf("%v", k.Interface()) + result[keyStr] = pd.scalarValueToInterface(fd.MapValue(), v) + return true + }) + return result + } + + return pd.scalarValueToInterface(fd, v) +} + +// scalarValueToInterface converts a scalar Protobuf value to Go interface{} +func (pd *ProtobufDecoder) scalarValueToInterface(fd protoreflect.FieldDescriptor, v protoreflect.Value) interface{} { + switch fd.Kind() { + case protoreflect.BoolKind: + return v.Bool() + case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind: + return int32(v.Int()) + case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind: + return v.Int() + case protoreflect.Uint32Kind, protoreflect.Fixed32Kind: + return uint32(v.Uint()) + case protoreflect.Uint64Kind, protoreflect.Fixed64Kind: + return v.Uint() + case protoreflect.FloatKind: + return float32(v.Float()) + case protoreflect.DoubleKind: + return v.Float() + case protoreflect.StringKind: + return v.String() + case protoreflect.BytesKind: + return v.Bytes() + case protoreflect.EnumKind: + return int32(v.Enum()) + case protoreflect.MessageKind: + // Handle nested messages + nestedMsg := v.Message() + return pd.messageToMap(nestedMsg) + default: + // Fallback to string representation + return fmt.Sprintf("%v", v.Interface()) + } +} + +// descriptorToRecordType converts a Protobuf descriptor to SeaweedMQ RecordType +func (pd *ProtobufDecoder) descriptorToRecordType(desc protoreflect.MessageDescriptor) *schema_pb.RecordType { + fields := make([]*schema_pb.Field, 0, desc.Fields().Len()) + + for i := 0; i < desc.Fields().Len(); i++ { + fd := desc.Fields().Get(i) + + field := &schema_pb.Field{ + Name: string(fd.Name()), + FieldIndex: int32(fd.Number() - 1), // Protobuf field numbers start at 1 + Type: pd.fieldDescriptorToType(fd), + IsRequired: fd.Cardinality() == protoreflect.Required, + IsRepeated: fd.IsList(), + } + + fields = append(fields, field) + } + + return &schema_pb.RecordType{ + Fields: fields, + } +} + +// fieldDescriptorToType converts a Protobuf field descriptor to SeaweedMQ Type +func (pd *ProtobufDecoder) fieldDescriptorToType(fd protoreflect.FieldDescriptor) *schema_pb.Type { + if fd.IsList() { + // Handle repeated fields + elementType := pd.scalarKindToType(fd.Kind(), fd.Message()) + return &schema_pb.Type{ + Kind: &schema_pb.Type_ListType{ + ListType: &schema_pb.ListType{ + ElementType: elementType, + }, + }, + } + } + + if fd.IsMap() { + // Handle map fields - for simplicity, treat as record with key/value fields + keyType := pd.scalarKindToType(fd.MapKey().Kind(), nil) + valueType := pd.scalarKindToType(fd.MapValue().Kind(), fd.MapValue().Message()) + + mapRecordType := &schema_pb.RecordType{ + Fields: []*schema_pb.Field{ + { + Name: "key", + FieldIndex: 0, + Type: keyType, + IsRequired: true, + }, + { + Name: "value", + FieldIndex: 1, + Type: valueType, + IsRequired: false, + }, + }, + } + + return &schema_pb.Type{ + Kind: &schema_pb.Type_RecordType{ + RecordType: mapRecordType, + }, + } + } + + return pd.scalarKindToType(fd.Kind(), fd.Message()) +} + +// scalarKindToType converts a Protobuf kind to SeaweedMQ scalar type +func (pd *ProtobufDecoder) scalarKindToType(kind protoreflect.Kind, msgDesc protoreflect.MessageDescriptor) *schema_pb.Type { + switch kind { + case protoreflect.BoolKind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_BOOL, + }, + } + case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_INT32, + }, + } + case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_INT64, + }, + } + case protoreflect.Uint32Kind, protoreflect.Fixed32Kind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_INT32, // Map uint32 to int32 for simplicity + }, + } + case protoreflect.Uint64Kind, protoreflect.Fixed64Kind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_INT64, // Map uint64 to int64 for simplicity + }, + } + case protoreflect.FloatKind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_FLOAT, + }, + } + case protoreflect.DoubleKind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_DOUBLE, + }, + } + case protoreflect.StringKind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_STRING, + }, + } + case protoreflect.BytesKind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_BYTES, + }, + } + case protoreflect.EnumKind: + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_INT32, // Enums as int32 + }, + } + case protoreflect.MessageKind: + if msgDesc != nil { + // Handle nested messages + nestedRecordType := pd.descriptorToRecordType(msgDesc) + return &schema_pb.Type{ + Kind: &schema_pb.Type_RecordType{ + RecordType: nestedRecordType, + }, + } + } + fallthrough + default: + // Default to string for unknown types + return &schema_pb.Type{ + Kind: &schema_pb.Type_ScalarType{ + ScalarType: schema_pb.ScalarType_STRING, + }, + } + } +} + +// ParseConfluentProtobufEnvelope parses a Confluent Protobuf envelope with indexes +func ParseConfluentProtobufEnvelope(data []byte) (*ConfluentEnvelope, bool) { + if len(data) < 5 { + return nil, false + } + + // Check for Confluent magic byte + if data[0] != 0x00 { + return nil, false + } + + // Extract schema ID + schemaID := uint32(data[1])<<24 | uint32(data[2])<<16 | uint32(data[3])<<8 | uint32(data[4]) + + envelope := &ConfluentEnvelope{ + Format: FormatProtobuf, + SchemaID: schemaID, + Indexes: nil, + Payload: data[5:], // Default: payload starts after schema ID + } + + // For Protobuf, there may be message indexes after the schema ID + // These are used to identify which message type within the schema to use + // Format: [magic_byte][schema_id][index1][index2]...[message_data] + // Indexes are encoded as varints + + offset := 5 + for offset < len(data) { + // Try to read a varint + index, bytesRead := readVarint(data[offset:]) + if bytesRead == 0 { + // No more varints, rest is message data + break + } + + envelope.Indexes = append(envelope.Indexes, int(index)) + offset += bytesRead + + // Limit to reasonable number of indexes to avoid infinite loop + if len(envelope.Indexes) > 10 { + break + } + } + + envelope.Payload = data[offset:] + return envelope, true +} + +// readVarint reads a varint from the byte slice and returns the value and bytes consumed +func readVarint(data []byte) (uint64, int) { + var result uint64 + var shift uint + + for i, b := range data { + if i >= 10 { // Prevent overflow (max varint is 10 bytes) + return 0, 0 + } + + result |= uint64(b&0x7F) << shift + + if b&0x80 == 0 { + // Last byte (MSB is 0) + return result, i + 1 + } + + shift += 7 + } + + // Incomplete varint + return 0, 0 +} + +// encodeVarint encodes a uint64 as a varint +func encodeVarint(value uint64) []byte { + if value == 0 { + return []byte{0} + } + + var result []byte + for value > 0 { + b := byte(value & 0x7F) + value >>= 7 + + if value > 0 { + b |= 0x80 // Set continuation bit + } + + result = append(result, b) + } + + return result +}