You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

85 lines
3.8 KiB

10 months ago
9 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
9 months ago
9 months ago
10 months ago
10 months ago
10 months ago
10 months ago
  1. package schema
  2. import (
  3. "fmt"
  4. "github.com/parquet-go/parquet-go"
  5. "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
  6. )
  7. // ToRecordValue converts a parquet.Row to a schema_pb.RecordValue
  8. // This does not work or did not test with nested structures.
  9. // Using this may fail to convert the parquet.Row to schema_pb.RecordValue
  10. func ToRecordValue(recordType *schema_pb.RecordType, parquetLevels *ParquetLevels, row parquet.Row) (*schema_pb.RecordValue, error) {
  11. values := []parquet.Value(row)
  12. recordValue, _, err := toRecordValue(recordType, parquetLevels, values, 0)
  13. if err != nil {
  14. return nil, err
  15. }
  16. return recordValue.GetRecordValue(), nil
  17. }
  18. func ToValue(t *schema_pb.Type, levels *ParquetLevels, values []parquet.Value, valueIndex int) (value *schema_pb.Value, endValueIndex int, err error) {
  19. switch t.Kind.(type) {
  20. case *schema_pb.Type_ScalarType:
  21. return toScalarValue(t.GetScalarType(), levels, values, valueIndex)
  22. case *schema_pb.Type_ListType:
  23. return toListValue(t.GetListType(), levels, values, valueIndex)
  24. case *schema_pb.Type_RecordType:
  25. return toRecordValue(t.GetRecordType(), levels, values, valueIndex)
  26. }
  27. return nil, valueIndex, fmt.Errorf("unsupported type: %v", t)
  28. }
  29. func toRecordValue(recordType *schema_pb.RecordType, levels *ParquetLevels, values []parquet.Value, valueIndex int) (*schema_pb.Value, int, error) {
  30. recordValue := schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
  31. for _, field := range recordType.Fields {
  32. fieldLevels := levels.levels[field.Name]
  33. fieldValue, endValueIndex, err := ToValue(field.Type, fieldLevels, values, valueIndex)
  34. if err != nil {
  35. return nil, 0, err
  36. }
  37. valueIndex = endValueIndex
  38. recordValue.Fields[field.Name] = fieldValue
  39. }
  40. return &schema_pb.Value{Kind: &schema_pb.Value_RecordValue{RecordValue: &recordValue}}, valueIndex, nil
  41. }
  42. func toListValue(listType *schema_pb.ListType, levels *ParquetLevels, values []parquet.Value, valueIndex int) (listValue *schema_pb.Value, endValueIndex int, err error) {
  43. listValues := make([]*schema_pb.Value, 0)
  44. var value *schema_pb.Value
  45. for ;valueIndex < len(values); {
  46. if values[valueIndex].Column() != levels.startColumnIndex {
  47. break
  48. }
  49. value, valueIndex, err = ToValue(listType.ElementType, levels, values, valueIndex)
  50. if err != nil {
  51. return nil, valueIndex, err
  52. }
  53. listValues = append(listValues, value)
  54. }
  55. return &schema_pb.Value{Kind: &schema_pb.Value_ListValue{ListValue: &schema_pb.ListValue{Values: listValues}}}, valueIndex, nil
  56. }
  57. func toScalarValue(scalarType schema_pb.ScalarType, levels *ParquetLevels, values []parquet.Value, valueIndex int) (*schema_pb.Value, int, error) {
  58. value := values[valueIndex]
  59. if value.Column() != levels.startColumnIndex {
  60. return nil, valueIndex, nil
  61. }
  62. switch scalarType {
  63. case schema_pb.ScalarType_BOOL:
  64. return &schema_pb.Value{Kind: &schema_pb.Value_BoolValue{BoolValue: value.Boolean()}}, valueIndex+1, nil
  65. case schema_pb.ScalarType_INT32:
  66. return &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: value.Int32()}}, valueIndex+1, nil
  67. case schema_pb.ScalarType_INT64:
  68. return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: value.Int64()}}, valueIndex+1, nil
  69. case schema_pb.ScalarType_FLOAT:
  70. return &schema_pb.Value{Kind: &schema_pb.Value_FloatValue{FloatValue: value.Float()}}, valueIndex+1, nil
  71. case schema_pb.ScalarType_DOUBLE:
  72. return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: value.Double()}}, valueIndex+1, nil
  73. case schema_pb.ScalarType_BYTES:
  74. return &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: value.ByteArray()}}, valueIndex+1, nil
  75. case schema_pb.ScalarType_STRING:
  76. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: string(value.ByteArray())}}, valueIndex+1, nil
  77. }
  78. return nil, valueIndex, fmt.Errorf("unsupported scalar type: %v", scalarType)
  79. }