You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.8 KiB

10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
  1. package schema
  2. import (
  3. "fmt"
  4. "github.com/parquet-go/parquet-go"
  5. "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
  6. )
  7. // ToRecordValue converts a parquet.Row to a schema_pb.RecordValue
  8. // This does not work or did not test with nested structures.
  9. // Using this may fail to convert the parquet.Row to schema_pb.RecordValue
  10. func ToRecordValue(recordType *schema_pb.RecordType, row parquet.Row) (*schema_pb.RecordValue, error) {
  11. parquetLevels, err := ToParquetLevels(recordType)
  12. if err != nil {
  13. return nil, err
  14. }
  15. values := []parquet.Value(row)
  16. recordValue, _, err := toRecordValue(recordType, parquetLevels, values, 0)
  17. if err != nil {
  18. return nil, err
  19. }
  20. return recordValue.GetRecordValue(), nil
  21. }
  22. func ToValue(t *schema_pb.Type, levels *ParquetLevels, values []parquet.Value, valueIndex int) (value *schema_pb.Value, endValueIndex int, err error) {
  23. switch t.Kind.(type) {
  24. case *schema_pb.Type_ScalarType:
  25. return toScalarValue(t.GetScalarType(), levels, values, valueIndex)
  26. case *schema_pb.Type_ListType:
  27. return toListValue(t.GetListType(), levels, values, valueIndex)
  28. case *schema_pb.Type_RecordType:
  29. return toRecordValue(t.GetRecordType(), levels, values, valueIndex)
  30. }
  31. return nil, valueIndex, fmt.Errorf("unsupported type: %v", t)
  32. }
  33. func toRecordValue(recordType *schema_pb.RecordType, levels *ParquetLevels, values []parquet.Value, valueIndex int) (*schema_pb.Value, int, error) {
  34. recordValue := schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
  35. for _, field := range recordType.Fields {
  36. fieldLevels := levels.levels[field.Name]
  37. fieldValue, endValueIndex, err := ToValue(field.Type, fieldLevels, values, valueIndex)
  38. if err != nil {
  39. return nil, 0, err
  40. }
  41. valueIndex = endValueIndex
  42. recordValue.Fields[field.Name] = fieldValue
  43. }
  44. return &schema_pb.Value{Kind: &schema_pb.Value_RecordValue{RecordValue: &recordValue}}, valueIndex, nil
  45. }
  46. func toListValue(listType *schema_pb.ListType, levels *ParquetLevels, values []parquet.Value, valueIndex int) (listValue *schema_pb.Value, endValueIndex int, err error) {
  47. listValues := make([]*schema_pb.Value, 0)
  48. var value *schema_pb.Value
  49. for ;valueIndex < len(values); {
  50. if values[valueIndex].Column() != levels.startColumnIndex {
  51. break
  52. }
  53. value, valueIndex, err = ToValue(listType.ElementType, levels, values, valueIndex)
  54. if err != nil {
  55. return nil, valueIndex, err
  56. }
  57. listValues = append(listValues, value)
  58. }
  59. return &schema_pb.Value{Kind: &schema_pb.Value_ListValue{ListValue: &schema_pb.ListValue{Values: listValues}}}, valueIndex, nil
  60. }
  61. func toScalarValue(scalarType schema_pb.ScalarType, levels *ParquetLevels, values []parquet.Value, valueIndex int) (*schema_pb.Value, int, error) {
  62. value := values[valueIndex]
  63. if value.Column() != levels.startColumnIndex {
  64. return nil, valueIndex, nil
  65. }
  66. switch scalarType {
  67. case schema_pb.ScalarType_BOOLEAN:
  68. return &schema_pb.Value{Kind: &schema_pb.Value_BoolValue{BoolValue: value.Boolean()}}, valueIndex+1, nil
  69. case schema_pb.ScalarType_INTEGER:
  70. return &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: value.Int32()}}, valueIndex+1, nil
  71. case schema_pb.ScalarType_LONG:
  72. return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: value.Int64()}}, valueIndex+1, nil
  73. case schema_pb.ScalarType_FLOAT:
  74. return &schema_pb.Value{Kind: &schema_pb.Value_FloatValue{FloatValue: value.Float()}}, valueIndex+1, nil
  75. case schema_pb.ScalarType_DOUBLE:
  76. return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: value.Double()}}, valueIndex+1, nil
  77. case schema_pb.ScalarType_BYTES:
  78. return &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: value.ByteArray()}}, valueIndex+1, nil
  79. case schema_pb.ScalarType_STRING:
  80. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: string(value.ByteArray())}}, valueIndex+1, nil
  81. }
  82. return nil, valueIndex, fmt.Errorf("unsupported scalar type: %v", scalarType)
  83. }