You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

359 lines
11 KiB

package schema
import (
"encoding/json"
"fmt"
"github.com/jhump/protoreflect/desc/protoparse"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protodesc"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/types/dynamicpb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
)
// ProtobufDecoder handles Protobuf schema decoding and conversion to SeaweedMQ format
type ProtobufDecoder struct {
descriptor protoreflect.MessageDescriptor
msgType protoreflect.MessageType
}
// NewProtobufDecoder creates a new Protobuf decoder from a schema descriptor
func NewProtobufDecoder(schemaBytes []byte) (*ProtobufDecoder, error) {
// Parse the binary descriptor using the descriptor parser
parser := NewProtobufDescriptorParser()
// For now, we need to extract the message name from the schema bytes
// In a real implementation, this would be provided by the Schema Registry
// For this phase, we'll try to find the first message in the descriptor
schema, err := parser.ParseBinaryDescriptor(schemaBytes, "")
if err != nil {
return nil, fmt.Errorf("failed to parse binary descriptor: %w", err)
}
// Create the decoder using the parsed descriptor
if schema.MessageDescriptor == nil {
return nil, fmt.Errorf("no message descriptor found in schema")
}
return NewProtobufDecoderFromDescriptor(schema.MessageDescriptor), nil
}
// NewProtobufDecoderFromDescriptor creates a Protobuf decoder from a message descriptor
// This is used for testing and when we have pre-built descriptors
func NewProtobufDecoderFromDescriptor(msgDesc protoreflect.MessageDescriptor) *ProtobufDecoder {
msgType := dynamicpb.NewMessageType(msgDesc)
return &ProtobufDecoder{
descriptor: msgDesc,
msgType: msgType,
}
}
// NewProtobufDecoderFromString creates a Protobuf decoder from a schema string
// This parses text .proto format from Schema Registry
func NewProtobufDecoderFromString(schemaStr string) (*ProtobufDecoder, error) {
// Use protoparse to parse the text .proto schema
parser := protoparse.Parser{
Accessor: protoparse.FileContentsFromMap(map[string]string{
"schema.proto": schemaStr,
}),
}
// Parse the schema
fileDescs, err := parser.ParseFiles("schema.proto")
if err != nil {
return nil, fmt.Errorf("failed to parse .proto schema: %w", err)
}
if len(fileDescs) == 0 {
return nil, fmt.Errorf("no file descriptors found in schema")
}
fileDesc := fileDescs[0]
// Convert to protoreflect FileDescriptor
fileDescProto := fileDesc.AsFileDescriptorProto()
// Create a FileDescriptor from the proto
protoFileDesc, err := protodesc.NewFile(fileDescProto, nil)
if err != nil {
return nil, fmt.Errorf("failed to create file descriptor: %w", err)
}
// Find the first message in the file
messages := protoFileDesc.Messages()
if messages.Len() == 0 {
return nil, fmt.Errorf("no message types found in schema")
}
// Get the first message descriptor
msgDesc := messages.Get(0)
return NewProtobufDecoderFromDescriptor(msgDesc), nil
}
// Decode decodes Protobuf binary data to a Go map representation
// Also supports JSON fallback for compatibility with producers that don't yet support Protobuf binary
func (pd *ProtobufDecoder) Decode(data []byte) (map[string]interface{}, error) {
// Create a new message instance
msg := pd.msgType.New()
// Try to unmarshal as Protobuf binary first
if err := proto.Unmarshal(data, msg.Interface()); err != nil {
// Fallback: Try JSON decoding (for compatibility with producers that send JSON)
var jsonMap map[string]interface{}
if jsonErr := json.Unmarshal(data, &jsonMap); jsonErr == nil {
// Successfully decoded as JSON - return it
// Note: This is a compatibility fallback, proper Protobuf binary is preferred
return jsonMap, nil
}
// Both failed - return the original Protobuf error
return nil, fmt.Errorf("failed to unmarshal Protobuf data: %w", err)
}
// Convert to map representation
return pd.messageToMap(msg), nil
}
// DecodeToRecordValue decodes Protobuf data directly to SeaweedMQ RecordValue
func (pd *ProtobufDecoder) DecodeToRecordValue(data []byte) (*schema_pb.RecordValue, error) {
msgMap, err := pd.Decode(data)
if err != nil {
return nil, err
}
return MapToRecordValue(msgMap), nil
}
// InferRecordType infers a SeaweedMQ RecordType from the Protobuf descriptor
func (pd *ProtobufDecoder) InferRecordType() (*schema_pb.RecordType, error) {
return pd.descriptorToRecordType(pd.descriptor), nil
}
// messageToMap converts a Protobuf message to a Go map
func (pd *ProtobufDecoder) messageToMap(msg protoreflect.Message) map[string]interface{} {
result := make(map[string]interface{})
msg.Range(func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool {
fieldName := string(fd.Name())
result[fieldName] = pd.valueToInterface(fd, v)
return true
})
return result
}
// valueToInterface converts a Protobuf value to a Go interface{}
func (pd *ProtobufDecoder) valueToInterface(fd protoreflect.FieldDescriptor, v protoreflect.Value) interface{} {
if fd.IsList() {
// Handle repeated fields
list := v.List()
result := make([]interface{}, list.Len())
for i := 0; i < list.Len(); i++ {
result[i] = pd.scalarValueToInterface(fd, list.Get(i))
}
return result
}
if fd.IsMap() {
// Handle map fields
mapVal := v.Map()
result := make(map[string]interface{})
mapVal.Range(func(k protoreflect.MapKey, v protoreflect.Value) bool {
keyStr := fmt.Sprintf("%v", k.Interface())
result[keyStr] = pd.scalarValueToInterface(fd.MapValue(), v)
return true
})
return result
}
return pd.scalarValueToInterface(fd, v)
}
// scalarValueToInterface converts a scalar Protobuf value to Go interface{}
func (pd *ProtobufDecoder) scalarValueToInterface(fd protoreflect.FieldDescriptor, v protoreflect.Value) interface{} {
switch fd.Kind() {
case protoreflect.BoolKind:
return v.Bool()
case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
return int32(v.Int())
case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
return v.Int()
case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
return uint32(v.Uint())
case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
return v.Uint()
case protoreflect.FloatKind:
return float32(v.Float())
case protoreflect.DoubleKind:
return v.Float()
case protoreflect.StringKind:
return v.String()
case protoreflect.BytesKind:
return v.Bytes()
case protoreflect.EnumKind:
return int32(v.Enum())
case protoreflect.MessageKind:
// Handle nested messages
nestedMsg := v.Message()
return pd.messageToMap(nestedMsg)
default:
// Fallback to string representation
return fmt.Sprintf("%v", v.Interface())
}
}
// descriptorToRecordType converts a Protobuf descriptor to SeaweedMQ RecordType
func (pd *ProtobufDecoder) descriptorToRecordType(desc protoreflect.MessageDescriptor) *schema_pb.RecordType {
fields := make([]*schema_pb.Field, 0, desc.Fields().Len())
for i := 0; i < desc.Fields().Len(); i++ {
fd := desc.Fields().Get(i)
field := &schema_pb.Field{
Name: string(fd.Name()),
FieldIndex: int32(fd.Number() - 1), // Protobuf field numbers start at 1
Type: pd.fieldDescriptorToType(fd),
IsRequired: fd.Cardinality() == protoreflect.Required,
IsRepeated: fd.IsList(),
}
fields = append(fields, field)
}
return &schema_pb.RecordType{
Fields: fields,
}
}
// fieldDescriptorToType converts a Protobuf field descriptor to SeaweedMQ Type
func (pd *ProtobufDecoder) fieldDescriptorToType(fd protoreflect.FieldDescriptor) *schema_pb.Type {
if fd.IsList() {
// Handle repeated fields
elementType := pd.scalarKindToType(fd.Kind(), fd.Message())
return &schema_pb.Type{
Kind: &schema_pb.Type_ListType{
ListType: &schema_pb.ListType{
ElementType: elementType,
},
},
}
}
if fd.IsMap() {
// Handle map fields - for simplicity, treat as record with key/value fields
keyType := pd.scalarKindToType(fd.MapKey().Kind(), nil)
valueType := pd.scalarKindToType(fd.MapValue().Kind(), fd.MapValue().Message())
mapRecordType := &schema_pb.RecordType{
Fields: []*schema_pb.Field{
{
Name: "key",
FieldIndex: 0,
Type: keyType,
IsRequired: true,
},
{
Name: "value",
FieldIndex: 1,
Type: valueType,
IsRequired: false,
},
},
}
return &schema_pb.Type{
Kind: &schema_pb.Type_RecordType{
RecordType: mapRecordType,
},
}
}
return pd.scalarKindToType(fd.Kind(), fd.Message())
}
// scalarKindToType converts a Protobuf kind to SeaweedMQ scalar type
func (pd *ProtobufDecoder) scalarKindToType(kind protoreflect.Kind, msgDesc protoreflect.MessageDescriptor) *schema_pb.Type {
switch kind {
case protoreflect.BoolKind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_BOOL,
},
}
case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_INT32,
},
}
case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_INT64,
},
}
case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_INT32, // Map uint32 to int32 for simplicity
},
}
case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_INT64, // Map uint64 to int64 for simplicity
},
}
case protoreflect.FloatKind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_FLOAT,
},
}
case protoreflect.DoubleKind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_DOUBLE,
},
}
case protoreflect.StringKind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_STRING,
},
}
case protoreflect.BytesKind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_BYTES,
},
}
case protoreflect.EnumKind:
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_INT32, // Enums as int32
},
}
case protoreflect.MessageKind:
if msgDesc != nil {
// Handle nested messages
nestedRecordType := pd.descriptorToRecordType(msgDesc)
return &schema_pb.Type{
Kind: &schema_pb.Type_RecordType{
RecordType: nestedRecordType,
},
}
}
fallthrough
default:
// Default to string for unknown types
return &schema_pb.Type{
Kind: &schema_pb.Type_ScalarType{
ScalarType: schema_pb.ScalarType_STRING,
},
}
}
}