You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

245 lines
8.2 KiB

package schema
import (
"fmt"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/types/descriptorpb"
)
// ProtobufSchema represents a parsed Protobuf schema with message type information
type ProtobufSchema struct {
FileDescriptorSet *descriptorpb.FileDescriptorSet
MessageDescriptor protoreflect.MessageDescriptor
MessageName string
PackageName string
Dependencies []string
}
// ProtobufDescriptorParser handles parsing of Confluent Schema Registry Protobuf descriptors
type ProtobufDescriptorParser struct {
// Cache for parsed descriptors to avoid re-parsing
descriptorCache map[string]*ProtobufSchema
}
// NewProtobufDescriptorParser creates a new parser instance
func NewProtobufDescriptorParser() *ProtobufDescriptorParser {
return &ProtobufDescriptorParser{
descriptorCache: make(map[string]*ProtobufSchema),
}
}
// ParseBinaryDescriptor parses a Confluent Schema Registry Protobuf binary descriptor
// The input is typically a serialized FileDescriptorSet from the schema registry
func (p *ProtobufDescriptorParser) ParseBinaryDescriptor(binaryData []byte, messageName string) (*ProtobufSchema, error) {
// Check cache first
cacheKey := fmt.Sprintf("%x:%s", binaryData[:min(32, len(binaryData))], messageName)
if cached, exists := p.descriptorCache[cacheKey]; exists {
// If we have a cached schema but no message descriptor, return the same error
if cached.MessageDescriptor == nil {
return nil, fmt.Errorf("failed to find message descriptor for %s: message descriptor resolution not fully implemented in Phase E1 - found message %s in package %s", messageName, messageName, cached.PackageName)
}
return cached, nil
}
// Parse the FileDescriptorSet from binary data
var fileDescriptorSet descriptorpb.FileDescriptorSet
if err := proto.Unmarshal(binaryData, &fileDescriptorSet); err != nil {
return nil, fmt.Errorf("failed to unmarshal FileDescriptorSet: %w", err)
}
// Validate the descriptor set
if err := p.validateDescriptorSet(&fileDescriptorSet); err != nil {
return nil, fmt.Errorf("invalid descriptor set: %w", err)
}
// Find the target message descriptor
messageDesc, packageName, err := p.findMessageDescriptor(&fileDescriptorSet, messageName)
if err != nil {
// For Phase E1, we still cache the FileDescriptorSet even if message resolution fails
// This allows us to test caching behavior and avoid re-parsing the same binary data
schema := &ProtobufSchema{
FileDescriptorSet: &fileDescriptorSet,
MessageDescriptor: nil, // Not resolved in Phase E1
MessageName: messageName,
PackageName: packageName,
Dependencies: p.extractDependencies(&fileDescriptorSet),
}
p.descriptorCache[cacheKey] = schema
return nil, fmt.Errorf("failed to find message descriptor for %s: %w", messageName, err)
}
// Extract dependencies
dependencies := p.extractDependencies(&fileDescriptorSet)
// Create the schema object
schema := &ProtobufSchema{
FileDescriptorSet: &fileDescriptorSet,
MessageDescriptor: messageDesc,
MessageName: messageName,
PackageName: packageName,
Dependencies: dependencies,
}
// Cache the result
p.descriptorCache[cacheKey] = schema
return schema, nil
}
// validateDescriptorSet performs basic validation on the FileDescriptorSet
func (p *ProtobufDescriptorParser) validateDescriptorSet(fds *descriptorpb.FileDescriptorSet) error {
if len(fds.File) == 0 {
return fmt.Errorf("FileDescriptorSet contains no files")
}
for i, file := range fds.File {
if file.Name == nil {
return fmt.Errorf("file descriptor %d has no name", i)
}
if file.Package == nil {
return fmt.Errorf("file descriptor %s has no package", *file.Name)
}
}
return nil
}
// findMessageDescriptor locates a specific message descriptor within the FileDescriptorSet
func (p *ProtobufDescriptorParser) findMessageDescriptor(fds *descriptorpb.FileDescriptorSet, messageName string) (protoreflect.MessageDescriptor, string, error) {
// This is a simplified implementation for Phase E1
// In a complete implementation, we would:
// 1. Build a complete descriptor registry from the FileDescriptorSet
// 2. Resolve all imports and dependencies
// 3. Handle nested message types and packages correctly
// 4. Support fully qualified message names
for _, file := range fds.File {
packageName := ""
if file.Package != nil {
packageName = *file.Package
}
// Search for the message in this file
for _, messageType := range file.MessageType {
if messageType.Name != nil && *messageType.Name == messageName {
// For Phase E1, we'll create a placeholder descriptor
// In Phase E2, this will be replaced with proper descriptor resolution
return nil, packageName, fmt.Errorf("message descriptor resolution not fully implemented in Phase E1 - found message %s in package %s", messageName, packageName)
}
// Search nested messages (simplified)
if nestedDesc := p.searchNestedMessages(messageType, messageName); nestedDesc != nil {
return nil, packageName, fmt.Errorf("nested message descriptor resolution not fully implemented in Phase E1 - found nested message %s", messageName)
}
}
}
return nil, "", fmt.Errorf("message %s not found in descriptor set", messageName)
}
// searchNestedMessages recursively searches for nested message types
func (p *ProtobufDescriptorParser) searchNestedMessages(messageType *descriptorpb.DescriptorProto, targetName string) *descriptorpb.DescriptorProto {
for _, nested := range messageType.NestedType {
if nested.Name != nil && *nested.Name == targetName {
return nested
}
// Recursively search deeper nesting
if found := p.searchNestedMessages(nested, targetName); found != nil {
return found
}
}
return nil
}
// extractDependencies extracts the list of dependencies from the FileDescriptorSet
func (p *ProtobufDescriptorParser) extractDependencies(fds *descriptorpb.FileDescriptorSet) []string {
dependencySet := make(map[string]bool)
for _, file := range fds.File {
for _, dep := range file.Dependency {
dependencySet[dep] = true
}
}
dependencies := make([]string, 0, len(dependencySet))
for dep := range dependencySet {
dependencies = append(dependencies, dep)
}
return dependencies
}
// GetMessageFields returns information about the fields in the message
func (s *ProtobufSchema) GetMessageFields() ([]FieldInfo, error) {
// This will be implemented in Phase E2 when we have proper descriptor resolution
return nil, fmt.Errorf("field information extraction not implemented in Phase E1")
}
// FieldInfo represents information about a Protobuf field
type FieldInfo struct {
Name string
Number int32
Type string
Label string // optional, required, repeated
TypeName string // for message/enum types
}
// GetFieldByName returns information about a specific field
func (s *ProtobufSchema) GetFieldByName(fieldName string) (*FieldInfo, error) {
fields, err := s.GetMessageFields()
if err != nil {
return nil, err
}
for _, field := range fields {
if field.Name == fieldName {
return &field, nil
}
}
return nil, fmt.Errorf("field %s not found", fieldName)
}
// GetFieldByNumber returns information about a field by its number
func (s *ProtobufSchema) GetFieldByNumber(fieldNumber int32) (*FieldInfo, error) {
fields, err := s.GetMessageFields()
if err != nil {
return nil, err
}
for _, field := range fields {
if field.Number == fieldNumber {
return &field, nil
}
}
return nil, fmt.Errorf("field number %d not found", fieldNumber)
}
// ValidateMessage validates that a message conforms to the schema
func (s *ProtobufSchema) ValidateMessage(messageData []byte) error {
// This will be implemented in Phase E2 with proper message validation
return fmt.Errorf("message validation not implemented in Phase E1")
}
// ClearCache clears the descriptor cache
func (p *ProtobufDescriptorParser) ClearCache() {
p.descriptorCache = make(map[string]*ProtobufSchema)
}
// GetCacheStats returns statistics about the descriptor cache
func (p *ProtobufDescriptorParser) GetCacheStats() map[string]interface{} {
return map[string]interface{}{
"cached_descriptors": len(p.descriptorCache),
}
}
// Helper function for min
func min(a, b int) int {
if a < b {
return a
}
return b
}