You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

206 lines
5.5 KiB

package schema
import (
"fmt"
"sort"
"strings"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
)
// SplitFlatSchemaToKeyValue takes a flat RecordType and key column names,
// returns separate key and value RecordTypes
func SplitFlatSchemaToKeyValue(flatSchema *schema_pb.RecordType, keyColumns []string) (*schema_pb.RecordType, *schema_pb.RecordType, error) {
if flatSchema == nil {
return nil, nil, nil
}
// Create maps for fast lookup
keyColumnSet := make(map[string]bool)
for _, col := range keyColumns {
keyColumnSet[col] = true
}
var keyFields []*schema_pb.Field
var valueFields []*schema_pb.Field
// Split fields based on key columns
for _, field := range flatSchema.Fields {
if keyColumnSet[field.Name] {
// Create key field with reindexed field index
keyField := &schema_pb.Field{
Name: field.Name,
FieldIndex: int32(len(keyFields)),
Type: field.Type,
IsRepeated: field.IsRepeated,
IsRequired: field.IsRequired,
}
keyFields = append(keyFields, keyField)
} else {
// Create value field with reindexed field index
valueField := &schema_pb.Field{
Name: field.Name,
FieldIndex: int32(len(valueFields)),
Type: field.Type,
IsRepeated: field.IsRepeated,
IsRequired: field.IsRequired,
}
valueFields = append(valueFields, valueField)
}
}
// Validate that all key columns were found
if len(keyFields) != len(keyColumns) {
missingCols := []string{}
for _, col := range keyColumns {
found := false
for _, field := range keyFields {
if field.Name == col {
found = true
break
}
}
if !found {
missingCols = append(missingCols, col)
}
}
if len(missingCols) > 0 {
return nil, nil, fmt.Errorf("key columns not found in schema: %v", missingCols)
}
}
var keyRecordType *schema_pb.RecordType
if len(keyFields) > 0 {
keyRecordType = &schema_pb.RecordType{Fields: keyFields}
}
var valueRecordType *schema_pb.RecordType
if len(valueFields) > 0 {
valueRecordType = &schema_pb.RecordType{Fields: valueFields}
}
return keyRecordType, valueRecordType, nil
}
// CombineFlatSchemaFromKeyValue creates a flat RecordType by combining key and value schemas
// Key fields are placed first, then value fields
func CombineFlatSchemaFromKeyValue(keySchema *schema_pb.RecordType, valueSchema *schema_pb.RecordType) (*schema_pb.RecordType, []string) {
var combinedFields []*schema_pb.Field
var keyColumns []string
// Add key fields first
if keySchema != nil {
for _, field := range keySchema.Fields {
combinedField := &schema_pb.Field{
Name: field.Name,
FieldIndex: int32(len(combinedFields)),
Type: field.Type,
IsRepeated: field.IsRepeated,
IsRequired: field.IsRequired,
}
combinedFields = append(combinedFields, combinedField)
keyColumns = append(keyColumns, field.Name)
}
}
// Add value fields
if valueSchema != nil {
for _, field := range valueSchema.Fields {
// Check for name conflicts
fieldName := field.Name
for _, keyCol := range keyColumns {
if fieldName == keyCol {
// This shouldn't happen in well-formed schemas, but handle gracefully
fieldName = "value_" + fieldName
break
}
}
combinedField := &schema_pb.Field{
Name: fieldName,
FieldIndex: int32(len(combinedFields)),
Type: field.Type,
IsRepeated: field.IsRepeated,
IsRequired: field.IsRequired,
}
combinedFields = append(combinedFields, combinedField)
}
}
if len(combinedFields) == 0 {
return nil, keyColumns
}
return &schema_pb.RecordType{Fields: combinedFields}, keyColumns
}
// ExtractKeyColumnsFromCombinedSchema tries to infer key columns from a combined schema
// that was created using CreateCombinedRecordType (with key_ prefixes)
func ExtractKeyColumnsFromCombinedSchema(combinedSchema *schema_pb.RecordType) (flatSchema *schema_pb.RecordType, keyColumns []string) {
if combinedSchema == nil {
return nil, nil
}
var flatFields []*schema_pb.Field
var keyColumns_ []string
for _, field := range combinedSchema.Fields {
if strings.HasPrefix(field.Name, "key_") {
// This is a key field - remove the prefix
originalName := strings.TrimPrefix(field.Name, "key_")
flatField := &schema_pb.Field{
Name: originalName,
FieldIndex: int32(len(flatFields)),
Type: field.Type,
IsRepeated: field.IsRepeated,
IsRequired: field.IsRequired,
}
flatFields = append(flatFields, flatField)
keyColumns_ = append(keyColumns_, originalName)
} else {
// This is a value field
flatField := &schema_pb.Field{
Name: field.Name,
FieldIndex: int32(len(flatFields)),
Type: field.Type,
IsRepeated: field.IsRepeated,
IsRequired: field.IsRequired,
}
flatFields = append(flatFields, flatField)
}
}
// Sort key columns to ensure deterministic order
sort.Strings(keyColumns_)
if len(flatFields) == 0 {
return nil, keyColumns_
}
return &schema_pb.RecordType{Fields: flatFields}, keyColumns_
}
// ValidateKeyColumns checks that all key columns exist in the schema
func ValidateKeyColumns(schema *schema_pb.RecordType, keyColumns []string) error {
if schema == nil || len(keyColumns) == 0 {
return nil
}
fieldNames := make(map[string]bool)
for _, field := range schema.Fields {
fieldNames[field.Name] = true
}
var missingColumns []string
for _, keyCol := range keyColumns {
if !fieldNames[keyCol] {
missingColumns = append(missingColumns, keyCol)
}
}
if len(missingColumns) > 0 {
return fmt.Errorf("key columns not found in schema: %v", missingColumns)
}
return nil
}