You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
1.9 KiB

  1. package schema
  2. import (
  3. "fmt"
  4. parquet "github.com/parquet-go/parquet-go"
  5. "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
  6. )
  7. func ToParquetSchema(topicName string, recordType *schema_pb.RecordType) (*parquet.Schema, error) {
  8. rootNode, err := toParquetFieldTypeRecord(recordType)
  9. if err != nil {
  10. return nil, fmt.Errorf("failed to convert record type to parquet schema: %v", err)
  11. }
  12. return parquet.NewSchema(topicName, rootNode), nil
  13. }
  14. func toParquetFieldType(field *schema_pb.Field) (parquet.Node, error) {
  15. var (
  16. dataType parquet.Node
  17. err error
  18. )
  19. switch field.Type.Kind.(type) {
  20. case *schema_pb.Type_ScalarType:
  21. dataType, err = toParquetFieldTypeScalar(field.Type.GetScalarType())
  22. case *schema_pb.Type_RecordType:
  23. dataType, err = toParquetFieldTypeRecord(field.Type.GetRecordType())
  24. default:
  25. return nil, fmt.Errorf("unknown field type: %T", field.Type.Kind)
  26. }
  27. return dataType, err
  28. }
  29. func toParquetFieldTypeScalar(scalarType schema_pb.ScalarType) (parquet.Node, error) {
  30. switch scalarType {
  31. case schema_pb.ScalarType_BOOLEAN:
  32. return parquet.Leaf(parquet.BooleanType), nil
  33. case schema_pb.ScalarType_INTEGER:
  34. return parquet.Leaf(parquet.Int32Type), nil
  35. case schema_pb.ScalarType_LONG:
  36. return parquet.Leaf(parquet.Int64Type), nil
  37. case schema_pb.ScalarType_FLOAT:
  38. return parquet.Leaf(parquet.FloatType), nil
  39. case schema_pb.ScalarType_DOUBLE:
  40. return parquet.Leaf(parquet.DoubleType), nil
  41. case schema_pb.ScalarType_BYTES:
  42. return parquet.Leaf(parquet.ByteArrayType), nil
  43. case schema_pb.ScalarType_STRING:
  44. return parquet.String(), nil
  45. default:
  46. return nil, fmt.Errorf("unknown scalar type: %v", scalarType)
  47. }
  48. }
  49. func toParquetFieldTypeRecord(recordType *schema_pb.RecordType) (parquet.Node, error) {
  50. recordNode := parquet.Group{}
  51. for _, field := range recordType.Fields {
  52. parquetFieldType, err := toParquetFieldType(field)
  53. if err != nil {
  54. return nil, err
  55. }
  56. recordNode[field.Name] = parquetFieldType
  57. }
  58. return recordNode, nil
  59. }