You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
3.4 KiB

9 months ago
9 months ago
9 months ago
9 months ago
  1. package schema
  2. import (
  3. "fmt"
  4. parquet "github.com/parquet-go/parquet-go"
  5. "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
  6. )
  7. func rowBuilderVisit(rowBuilder *parquet.RowBuilder, fieldType *schema_pb.Type, fieldValue *schema_pb.Value, columnIndex int) error {
  8. switch fieldType.Kind.(type) {
  9. case *schema_pb.Type_ScalarType:
  10. parquetValue, err := toParquetValue(fieldValue)
  11. if err != nil {
  12. return err
  13. }
  14. rowBuilder.Add(columnIndex, parquetValue)
  15. case *schema_pb.Type_ListType:
  16. elementType := fieldType.GetListType().ElementType
  17. for _, value := range fieldValue.GetListValue().Values {
  18. if err := rowBuilderVisit(rowBuilder, elementType, value, columnIndex); err != nil {
  19. return err
  20. }
  21. }
  22. rowBuilder.Next(columnIndex)
  23. }
  24. return nil
  25. }
  26. func AddRecordValue(rowBuilder *parquet.RowBuilder, recordType *schema_pb.RecordType, recordValue *schema_pb.RecordValue) error {
  27. visitor := func(fieldType *schema_pb.Type, fieldValue *schema_pb.Value, index int) error {
  28. return rowBuilderVisit(rowBuilder, fieldType, fieldValue, index)
  29. }
  30. fieldType := &schema_pb.Type{Kind: &schema_pb.Type_RecordType{RecordType: recordType}}
  31. fieldValue := &schema_pb.Value{Kind: &schema_pb.Value_RecordValue{RecordValue: recordValue}}
  32. return visitValue(fieldType, fieldValue, visitor)
  33. }
  34. // typeValueVisitor is a function that is called for each value in a schema_pb.Value
  35. // Find the column index.
  36. // intended to be used in RowBuilder.Add(columnIndex, value)
  37. type typeValueVisitor func(fieldType *schema_pb.Type, fieldValue *schema_pb.Value, index int) error
  38. func visitValue(fieldType *schema_pb.Type, fieldValue *schema_pb.Value, visitor typeValueVisitor) (err error) {
  39. _, err = doVisitValue(fieldType, fieldValue, 0, visitor)
  40. return
  41. }
  42. // endIndex is exclusive
  43. // same logic as RowBuilder.configure in row_builder.go
  44. func doVisitValue(fieldType *schema_pb.Type, fieldValue *schema_pb.Value, columnIndex int, visitor typeValueVisitor) (endIndex int, err error) {
  45. switch fieldType.Kind.(type) {
  46. case *schema_pb.Type_ScalarType:
  47. return columnIndex+1, visitor(fieldType, fieldValue, columnIndex)
  48. case *schema_pb.Type_ListType:
  49. return columnIndex+1, visitor(fieldType, fieldValue, columnIndex)
  50. case *schema_pb.Type_RecordType:
  51. for _, field := range fieldType.GetRecordType().Fields {
  52. fieldValue, found := fieldValue.GetRecordValue().Fields[field.Name]
  53. if !found {
  54. // TODO check this if no such field found
  55. return columnIndex, nil
  56. }
  57. endIndex, err = doVisitValue(field.Type, fieldValue, columnIndex, visitor)
  58. if err != nil {
  59. return
  60. }
  61. columnIndex = endIndex
  62. }
  63. return
  64. }
  65. return
  66. }
  67. func toParquetValue(value *schema_pb.Value) (parquet.Value, error) {
  68. switch value.Kind.(type) {
  69. case *schema_pb.Value_BoolValue:
  70. return parquet.BooleanValue(value.GetBoolValue()), nil
  71. case *schema_pb.Value_Int32Value:
  72. return parquet.Int32Value(value.GetInt32Value()), nil
  73. case *schema_pb.Value_Int64Value:
  74. return parquet.Int64Value(value.GetInt64Value()), nil
  75. case *schema_pb.Value_FloatValue:
  76. return parquet.FloatValue(value.GetFloatValue()), nil
  77. case *schema_pb.Value_DoubleValue:
  78. return parquet.DoubleValue(value.GetDoubleValue()), nil
  79. case *schema_pb.Value_BytesValue:
  80. return parquet.ByteArrayValue(value.GetBytesValue()), nil
  81. case *schema_pb.Value_StringValue:
  82. return parquet.ByteArrayValue([]byte(value.GetStringValue())), nil
  83. default:
  84. return parquet.NullValue(), fmt.Errorf("unknown value type: %T", value.Kind)
  85. }
  86. }