You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
3.7 KiB

10 months ago
9 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
  1. package schema
  2. import (
  3. "fmt"
  4. "github.com/parquet-go/parquet-go"
  5. "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
  6. )
  7. // ToRecordValue converts a parquet.Row to a schema_pb.RecordValue
  8. // This does not work or did not test with nested structures.
  9. // Using this may fail to convert the parquet.Row to schema_pb.RecordValue
  10. func ToRecordValue(recordType *schema_pb.RecordType, row parquet.Row) (*schema_pb.RecordValue, error) {
  11. values := []parquet.Value(row)
  12. recordValue, _, _, err := toRecordValue(recordType, values, 0, 0)
  13. if err != nil {
  14. return nil, err
  15. }
  16. return recordValue.GetRecordValue(), nil
  17. }
  18. func ToValue(t *schema_pb.Type, values []parquet.Value, valueIndex, columnIndex int) (value *schema_pb.Value, endValueIndex, endColumnIndex int, err error) {
  19. switch t.Kind.(type) {
  20. case *schema_pb.Type_ScalarType:
  21. value, err = toScalarValue(t.GetScalarType(), values, valueIndex, columnIndex)
  22. return value, valueIndex + 1, columnIndex + 1, err
  23. case *schema_pb.Type_ListType:
  24. return toListValue(t.GetListType(), values, valueIndex, columnIndex)
  25. case *schema_pb.Type_RecordType:
  26. return toRecordValue(t.GetRecordType(), values, valueIndex, columnIndex)
  27. }
  28. return nil, 0, 0, fmt.Errorf("unsupported type: %v", t)
  29. }
  30. func toRecordValue(recordType *schema_pb.RecordType, values []parquet.Value, valueIndex, columnIndex int) (*schema_pb.Value, int, int, error) {
  31. recordValue := schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
  32. for _, field := range recordType.Fields {
  33. fieldValue, endValueIndex, endColumnIndex, err := ToValue(field.Type, values, valueIndex, columnIndex)
  34. if err != nil {
  35. return nil, 0, 0, err
  36. }
  37. columnIndex = endColumnIndex
  38. valueIndex = endValueIndex
  39. recordValue.Fields[field.Name] = fieldValue
  40. }
  41. return &schema_pb.Value{Kind: &schema_pb.Value_RecordValue{RecordValue: &recordValue}}, valueIndex, columnIndex, nil
  42. }
  43. func toListValue(listType *schema_pb.ListType, values []parquet.Value, valueIndex, columnIndex int) (listValue *schema_pb.Value, endValueIndex, endColumnIndex int, err error) {
  44. listValues := make([]*schema_pb.Value, 0)
  45. var value *schema_pb.Value
  46. for ;valueIndex < len(values); {
  47. if values[valueIndex].Column() != columnIndex {
  48. break
  49. }
  50. value, valueIndex, endColumnIndex, err = ToValue(listType.ElementType, values, valueIndex, columnIndex)
  51. if err != nil {
  52. return nil, 0,0, err
  53. }
  54. listValues = append(listValues, value)
  55. }
  56. return &schema_pb.Value{Kind: &schema_pb.Value_ListValue{ListValue: &schema_pb.ListValue{Values: listValues}}}, valueIndex, endColumnIndex, nil
  57. }
  58. func toScalarValue(scalarType schema_pb.ScalarType, values []parquet.Value, valueIndex, columnIndex int) (*schema_pb.Value, error) {
  59. value := values[valueIndex]
  60. if value.Column() != columnIndex {
  61. return nil, nil
  62. }
  63. switch scalarType {
  64. case schema_pb.ScalarType_BOOLEAN:
  65. return &schema_pb.Value{Kind: &schema_pb.Value_BoolValue{BoolValue: value.Boolean()}}, nil
  66. case schema_pb.ScalarType_INTEGER:
  67. return &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: value.Int32()}}, nil
  68. case schema_pb.ScalarType_LONG:
  69. return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: value.Int64()}}, nil
  70. case schema_pb.ScalarType_FLOAT:
  71. return &schema_pb.Value{Kind: &schema_pb.Value_FloatValue{FloatValue: value.Float()}}, nil
  72. case schema_pb.ScalarType_DOUBLE:
  73. return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: value.Double()}}, nil
  74. case schema_pb.ScalarType_BYTES:
  75. return &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: value.ByteArray()}}, nil
  76. case schema_pb.ScalarType_STRING:
  77. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: string(value.ByteArray())}}, nil
  78. }
  79. return nil, fmt.Errorf("unsupported scalar type: %v", scalarType)
  80. }