|
|
@ -13,6 +13,7 @@ import ( |
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" |
|
|
|
"github.com/seaweedfs/seaweedfs/weed/query/sqltypes" |
|
|
|
"github.com/seaweedfs/seaweedfs/weed/util" |
|
|
|
"github.com/seaweedfs/seaweedfs/weed/util/http" |
|
|
|
"github.com/xwb1989/sqlparser" |
|
|
|
) |
|
|
@ -1096,6 +1097,19 @@ func (e *SQLEngine) executeAggregationQuery(ctx context.Context, hybridScanner * |
|
|
|
startTimeNs, stopTimeNs = e.extractTimeFilters(stmt.Where.Expr) |
|
|
|
} |
|
|
|
|
|
|
|
// 🚀 FAST PATH: Try to use parquet statistics for optimization
|
|
|
|
// This can be ~130x faster than scanning all data
|
|
|
|
if stmt.Where == nil { // Only optimize when no complex WHERE clause
|
|
|
|
fastResult, canOptimize := e.tryFastParquetAggregation(ctx, hybridScanner, aggregations) |
|
|
|
if canOptimize { |
|
|
|
fmt.Printf("✅ Using fast parquet statistics for aggregation (skipped full scan)\n") |
|
|
|
return fastResult, nil |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// SLOW PATH: Fall back to full table scan
|
|
|
|
fmt.Printf("⚠️ Using full table scan for aggregation (parquet optimization not applicable)\n") |
|
|
|
|
|
|
|
// Build scan options for full table scan (aggregations need all data)
|
|
|
|
hybridScanOptions := HybridScanOptions{ |
|
|
|
StartTimeNs: startTimeNs, |
|
|
@ -1340,6 +1354,292 @@ func (e *SQLEngine) compareValues(value1 *schema_pb.Value, value2 *schema_pb.Val |
|
|
|
return 0 |
|
|
|
} |
|
|
|
|
|
|
|
// tryFastParquetAggregation attempts to compute aggregations using parquet metadata instead of full scan
|
|
|
|
// Returns (result, canOptimize) where canOptimize=true means the fast path was used
|
|
|
|
func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec) (*QueryResult, bool) { |
|
|
|
// Check if all aggregations are optimizable with parquet statistics
|
|
|
|
for _, spec := range aggregations { |
|
|
|
if !e.canUseParquetStatsForAggregation(spec) { |
|
|
|
return nil, false |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Get all partitions for this topic
|
|
|
|
partitions, err := e.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name) |
|
|
|
if err != nil { |
|
|
|
return nil, false |
|
|
|
} |
|
|
|
|
|
|
|
// Collect parquet statistics from all partitions
|
|
|
|
allFileStats := make(map[string][]*ParquetFileStats) // partitionPath -> file stats
|
|
|
|
totalRowCount := int64(0) |
|
|
|
|
|
|
|
for _, partition := range partitions { |
|
|
|
partitionPath := fmt.Sprintf("/topics/%s/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name, partition) |
|
|
|
|
|
|
|
fileStats, err := hybridScanner.ReadParquetStatistics(partitionPath) |
|
|
|
if err != nil { |
|
|
|
// If we can't read stats from any partition, fall back to full scan
|
|
|
|
return nil, false |
|
|
|
} |
|
|
|
|
|
|
|
if len(fileStats) > 0 { |
|
|
|
allFileStats[partitionPath] = fileStats |
|
|
|
for _, fileStat := range fileStats { |
|
|
|
totalRowCount += fileStat.RowCount |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// If no parquet files found, can't optimize
|
|
|
|
if len(allFileStats) == 0 || totalRowCount == 0 { |
|
|
|
return nil, false |
|
|
|
} |
|
|
|
|
|
|
|
// Compute aggregations using parquet statistics
|
|
|
|
aggResults := make([]AggregationResult, len(aggregations)) |
|
|
|
|
|
|
|
for i, spec := range aggregations { |
|
|
|
switch spec.Function { |
|
|
|
case "COUNT": |
|
|
|
if spec.Column == "*" { |
|
|
|
// COUNT(*) = sum of all file row counts
|
|
|
|
aggResults[i].Count = totalRowCount |
|
|
|
} else { |
|
|
|
// COUNT(column) - for now, assume all rows have non-null values
|
|
|
|
// TODO: Use null counts from parquet stats for more accuracy
|
|
|
|
aggResults[i].Count = totalRowCount |
|
|
|
} |
|
|
|
|
|
|
|
case "MIN": |
|
|
|
// Find global minimum across all files and partitions
|
|
|
|
var globalMin interface{} |
|
|
|
var globalMinValue *schema_pb.Value |
|
|
|
|
|
|
|
for _, fileStats := range allFileStats { |
|
|
|
for _, fileStat := range fileStats { |
|
|
|
if colStats, exists := fileStat.ColumnStats[spec.Column]; exists { |
|
|
|
if globalMinValue == nil || e.compareValues(colStats.MinValue, globalMinValue) < 0 { |
|
|
|
globalMinValue = colStats.MinValue |
|
|
|
globalMin = e.extractRawValue(colStats.MinValue) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Handle system columns that aren't in parquet column stats
|
|
|
|
if globalMin == nil { |
|
|
|
globalMin = e.getSystemColumnGlobalMin(spec.Column, allFileStats) |
|
|
|
} |
|
|
|
|
|
|
|
aggResults[i].Min = globalMin |
|
|
|
|
|
|
|
case "MAX": |
|
|
|
// Find global maximum across all files and partitions
|
|
|
|
var globalMax interface{} |
|
|
|
var globalMaxValue *schema_pb.Value |
|
|
|
|
|
|
|
for _, fileStats := range allFileStats { |
|
|
|
for _, fileStat := range fileStats { |
|
|
|
if colStats, exists := fileStat.ColumnStats[spec.Column]; exists { |
|
|
|
if globalMaxValue == nil || e.compareValues(colStats.MaxValue, globalMaxValue) > 0 { |
|
|
|
globalMaxValue = colStats.MaxValue |
|
|
|
globalMax = e.extractRawValue(colStats.MaxValue) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Handle system columns that aren't in parquet column stats
|
|
|
|
if globalMax == nil { |
|
|
|
globalMax = e.getSystemColumnGlobalMax(spec.Column, allFileStats) |
|
|
|
} |
|
|
|
|
|
|
|
aggResults[i].Max = globalMax |
|
|
|
|
|
|
|
default: |
|
|
|
// SUM, AVG not easily optimizable with current parquet stats
|
|
|
|
return nil, false |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Build result using fast parquet statistics
|
|
|
|
columns := make([]string, len(aggregations)) |
|
|
|
row := make([]sqltypes.Value, len(aggregations)) |
|
|
|
|
|
|
|
for i, spec := range aggregations { |
|
|
|
columns[i] = spec.Alias |
|
|
|
row[i] = e.formatAggregationResult(spec, aggResults[i]) |
|
|
|
} |
|
|
|
|
|
|
|
result := &QueryResult{ |
|
|
|
Columns: columns, |
|
|
|
Rows: [][]sqltypes.Value{row}, |
|
|
|
} |
|
|
|
|
|
|
|
return result, true |
|
|
|
} |
|
|
|
|
|
|
|
// canUseParquetStatsForAggregation determines if an aggregation can be optimized with parquet stats
|
|
|
|
func (e *SQLEngine) canUseParquetStatsForAggregation(spec AggregationSpec) bool { |
|
|
|
switch spec.Function { |
|
|
|
case "COUNT": |
|
|
|
return spec.Column == "*" || e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column) |
|
|
|
case "MIN", "MAX": |
|
|
|
return e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column) |
|
|
|
case "SUM", "AVG": |
|
|
|
// These require scanning actual values, not just min/max
|
|
|
|
return false |
|
|
|
default: |
|
|
|
return false |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// isSystemColumn checks if a column is a system column (_timestamp_ns, _key, _source)
|
|
|
|
func (e *SQLEngine) isSystemColumn(columnName string) bool { |
|
|
|
lowerName := strings.ToLower(columnName) |
|
|
|
return lowerName == "_timestamp_ns" || lowerName == "timestamp_ns" || |
|
|
|
lowerName == "_key" || lowerName == "key" || |
|
|
|
lowerName == "_source" || lowerName == "source" |
|
|
|
} |
|
|
|
|
|
|
|
// isRegularColumn checks if a column might be a regular data column (placeholder)
|
|
|
|
func (e *SQLEngine) isRegularColumn(columnName string) bool { |
|
|
|
// For now, assume any non-system column is a regular column
|
|
|
|
return !e.isSystemColumn(columnName) |
|
|
|
} |
|
|
|
|
|
|
|
// getSystemColumnGlobalMin computes global min for system columns using file metadata
|
|
|
|
func (e *SQLEngine) getSystemColumnGlobalMin(columnName string, allFileStats map[string][]*ParquetFileStats) interface{} { |
|
|
|
lowerName := strings.ToLower(columnName) |
|
|
|
|
|
|
|
switch lowerName { |
|
|
|
case "_timestamp_ns", "timestamp_ns": |
|
|
|
// For timestamps, find the earliest timestamp across all files
|
|
|
|
// This should match what's in the Extended["min"] metadata
|
|
|
|
var minTimestamp *int64 |
|
|
|
for _, fileStats := range allFileStats { |
|
|
|
for _, fileStat := range fileStats { |
|
|
|
// Extract timestamp from filename (format: YYYY-MM-DD-HH-MM-SS.parquet)
|
|
|
|
timestamp := e.extractTimestampFromFilename(fileStat.FileName) |
|
|
|
if timestamp != 0 { |
|
|
|
if minTimestamp == nil || timestamp < *minTimestamp { |
|
|
|
minTimestamp = ×tamp |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
if minTimestamp != nil { |
|
|
|
return *minTimestamp |
|
|
|
} |
|
|
|
|
|
|
|
case "_key", "key": |
|
|
|
// For keys, we'd need to read the actual parquet column stats
|
|
|
|
// Fall back to scanning if not available in our current stats
|
|
|
|
return nil |
|
|
|
|
|
|
|
case "_source", "source": |
|
|
|
// Source is always "parquet_archive" for parquet files
|
|
|
|
return "parquet_archive" |
|
|
|
} |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
// getSystemColumnGlobalMax computes global max for system columns using file metadata
|
|
|
|
func (e *SQLEngine) getSystemColumnGlobalMax(columnName string, allFileStats map[string][]*ParquetFileStats) interface{} { |
|
|
|
lowerName := strings.ToLower(columnName) |
|
|
|
|
|
|
|
switch lowerName { |
|
|
|
case "_timestamp_ns", "timestamp_ns": |
|
|
|
// For timestamps, find the latest timestamp across all files
|
|
|
|
var maxTimestamp *int64 |
|
|
|
for _, fileStats := range allFileStats { |
|
|
|
for _, fileStat := range fileStats { |
|
|
|
// Extract timestamp from filename (format: YYYY-MM-DD-HH-MM-SS.parquet)
|
|
|
|
timestamp := e.extractTimestampFromFilename(fileStat.FileName) |
|
|
|
if timestamp != 0 { |
|
|
|
if maxTimestamp == nil || timestamp > *maxTimestamp { |
|
|
|
maxTimestamp = ×tamp |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
if maxTimestamp != nil { |
|
|
|
return *maxTimestamp |
|
|
|
} |
|
|
|
|
|
|
|
case "_key", "key": |
|
|
|
// For keys, we'd need to read the actual parquet column stats
|
|
|
|
return nil |
|
|
|
|
|
|
|
case "_source", "source": |
|
|
|
// Source is always "parquet_archive" for parquet files
|
|
|
|
return "parquet_archive" |
|
|
|
} |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
// extractTimestampFromFilename extracts timestamp from parquet filename
|
|
|
|
// Format: YYYY-MM-DD-HH-MM-SS.parquet
|
|
|
|
func (e *SQLEngine) extractTimestampFromFilename(filename string) int64 { |
|
|
|
// Remove .parquet extension
|
|
|
|
if strings.HasSuffix(filename, ".parquet") { |
|
|
|
filename = filename[:len(filename)-8] |
|
|
|
} |
|
|
|
|
|
|
|
// Parse timestamp format: 2006-01-02-15-04-05
|
|
|
|
t, err := time.Parse("2006-01-02-15-04-05", filename) |
|
|
|
if err != nil { |
|
|
|
return 0 |
|
|
|
} |
|
|
|
|
|
|
|
return t.UnixNano() |
|
|
|
} |
|
|
|
|
|
|
|
// discoverTopicPartitions discovers all partitions for a given topic
|
|
|
|
func (e *SQLEngine) discoverTopicPartitions(namespace, topicName string) ([]string, error) { |
|
|
|
// Use the same discovery logic as in hybrid_message_scanner.go
|
|
|
|
topicPath := fmt.Sprintf("/topics/%s/%s", namespace, topicName) |
|
|
|
|
|
|
|
// Get FilerClient from BrokerClient
|
|
|
|
filerClient, err := e.catalog.brokerClient.GetFilerClient() |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
|
|
|
|
var partitions []string |
|
|
|
err = filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(topicPath), "", func(entry *filer_pb.Entry, isLast bool) error { |
|
|
|
if !entry.IsDirectory { |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
// Check if this looks like a partition directory (format: vYYYY-MM-DD-HH-MM-SS)
|
|
|
|
if strings.HasPrefix(entry.Name, "v") && len(entry.Name) == 20 { |
|
|
|
// This is a time-based partition directory
|
|
|
|
// Look for numeric subdirectories (partition IDs)
|
|
|
|
partitionBasePath := fmt.Sprintf("%s/%s", topicPath, entry.Name) |
|
|
|
err := filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(partitionBasePath), "", func(subEntry *filer_pb.Entry, isLast bool) error { |
|
|
|
if subEntry.IsDirectory { |
|
|
|
// Check if this is a numeric partition directory (format: 0000-XXXX)
|
|
|
|
if len(subEntry.Name) >= 4 { |
|
|
|
partitionPath := fmt.Sprintf("%s/%s", entry.Name, subEntry.Name) |
|
|
|
partitions = append(partitions, partitionPath) |
|
|
|
} |
|
|
|
} |
|
|
|
return nil |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
return err |
|
|
|
} |
|
|
|
} |
|
|
|
return nil |
|
|
|
}) |
|
|
|
|
|
|
|
return partitions, err |
|
|
|
} |
|
|
|
|
|
|
|
func (e *SQLEngine) formatAggregationResult(spec AggregationSpec, result AggregationResult) sqltypes.Value { |
|
|
|
switch spec.Function { |
|
|
|
case "COUNT": |
|
|
|