You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							606 lines
						
					
					
						
							19 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							606 lines
						
					
					
						
							19 KiB
						
					
					
				
								package engine
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"context"
							 | 
						|
									"fmt"
							 | 
						|
									"strings"
							 | 
						|
								
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/mq/topic"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/query/sqltypes"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// AggregationSpec defines an aggregation function to be computed
							 | 
						|
								type AggregationSpec struct {
							 | 
						|
									Function string // COUNT, SUM, AVG, MIN, MAX
							 | 
						|
									Column   string // Column name, or "*" for COUNT(*)
							 | 
						|
									Alias    string // Optional alias for the result column
							 | 
						|
									Distinct bool   // Support for DISTINCT keyword
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// AggregationResult holds the computed result of an aggregation
							 | 
						|
								type AggregationResult struct {
							 | 
						|
									Count int64
							 | 
						|
									Sum   float64
							 | 
						|
									Min   interface{}
							 | 
						|
									Max   interface{}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// AggregationStrategy represents the strategy for executing aggregations
							 | 
						|
								type AggregationStrategy struct {
							 | 
						|
									CanUseFastPath   bool
							 | 
						|
									Reason           string
							 | 
						|
									UnsupportedSpecs []AggregationSpec
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TopicDataSources represents the data sources available for a topic
							 | 
						|
								type TopicDataSources struct {
							 | 
						|
									ParquetFiles      map[string][]*ParquetFileStats // partitionPath -> parquet file stats
							 | 
						|
									ParquetRowCount   int64
							 | 
						|
									LiveLogRowCount   int64
							 | 
						|
									LiveLogFilesCount int // Total count of live log files across all partitions
							 | 
						|
									PartitionsCount   int
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// FastPathOptimizer handles fast path aggregation optimization decisions
							 | 
						|
								type FastPathOptimizer struct {
							 | 
						|
									engine *SQLEngine
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// NewFastPathOptimizer creates a new fast path optimizer
							 | 
						|
								func NewFastPathOptimizer(engine *SQLEngine) *FastPathOptimizer {
							 | 
						|
									return &FastPathOptimizer{engine: engine}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// DetermineStrategy analyzes aggregations and determines if fast path can be used
							 | 
						|
								func (opt *FastPathOptimizer) DetermineStrategy(aggregations []AggregationSpec) AggregationStrategy {
							 | 
						|
									strategy := AggregationStrategy{
							 | 
						|
										CanUseFastPath:   true,
							 | 
						|
										Reason:           "all_aggregations_supported",
							 | 
						|
										UnsupportedSpecs: []AggregationSpec{},
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									for _, spec := range aggregations {
							 | 
						|
										if !opt.engine.canUseParquetStatsForAggregation(spec) {
							 | 
						|
											strategy.CanUseFastPath = false
							 | 
						|
											strategy.Reason = "unsupported_aggregation_functions"
							 | 
						|
											strategy.UnsupportedSpecs = append(strategy.UnsupportedSpecs, spec)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return strategy
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// CollectDataSources gathers information about available data sources for a topic
							 | 
						|
								func (opt *FastPathOptimizer) CollectDataSources(ctx context.Context, hybridScanner *HybridMessageScanner) (*TopicDataSources, error) {
							 | 
						|
									dataSources := &TopicDataSources{
							 | 
						|
										ParquetFiles:      make(map[string][]*ParquetFileStats),
							 | 
						|
										ParquetRowCount:   0,
							 | 
						|
										LiveLogRowCount:   0,
							 | 
						|
										LiveLogFilesCount: 0,
							 | 
						|
										PartitionsCount:   0,
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Discover partitions for the topic
							 | 
						|
									relativePartitions, err := opt.engine.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)
							 | 
						|
									if err != nil {
							 | 
						|
										return dataSources, DataSourceError{
							 | 
						|
											Source: "partition_discovery",
							 | 
						|
											Cause:  err,
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									topicBasePath := fmt.Sprintf("/topics/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name)
							 | 
						|
								
							 | 
						|
									// Collect stats from each partition
							 | 
						|
									for _, relPartition := range relativePartitions {
							 | 
						|
										partitionPath := fmt.Sprintf("%s/%s", topicBasePath, relPartition)
							 | 
						|
								
							 | 
						|
										// Read parquet file statistics
							 | 
						|
										parquetStats, err := hybridScanner.ReadParquetStatistics(partitionPath)
							 | 
						|
										if err == nil && len(parquetStats) > 0 {
							 | 
						|
											dataSources.ParquetFiles[partitionPath] = parquetStats
							 | 
						|
											for _, stat := range parquetStats {
							 | 
						|
												dataSources.ParquetRowCount += stat.RowCount
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Count live log files (excluding those converted to parquet)
							 | 
						|
										parquetSources := opt.engine.extractParquetSourceFiles(dataSources.ParquetFiles[partitionPath])
							 | 
						|
										liveLogCount, _ := opt.engine.countLiveLogRowsExcludingParquetSources(ctx, partitionPath, parquetSources)
							 | 
						|
										dataSources.LiveLogRowCount += liveLogCount
							 | 
						|
								
							 | 
						|
										// Count live log files for partition
							 | 
						|
										partition := topic.Partition{
							 | 
						|
											RangeStart: 0,    // This will be properly set in a full implementation
							 | 
						|
											RangeStop:  1000, // This will be properly set in a full implementation
							 | 
						|
										}
							 | 
						|
										liveLogFileCount, err := hybridScanner.countLiveLogFiles(partition)
							 | 
						|
										if err == nil {
							 | 
						|
											dataSources.LiveLogFilesCount += liveLogFileCount
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									dataSources.PartitionsCount = len(relativePartitions)
							 | 
						|
								
							 | 
						|
									return dataSources, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// AggregationComputer handles the computation of aggregations using fast path
							 | 
						|
								type AggregationComputer struct {
							 | 
						|
									engine *SQLEngine
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// NewAggregationComputer creates a new aggregation computer
							 | 
						|
								func NewAggregationComputer(engine *SQLEngine) *AggregationComputer {
							 | 
						|
									return &AggregationComputer{engine: engine}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// ComputeFastPathAggregations computes aggregations using parquet statistics and live log data
							 | 
						|
								func (comp *AggregationComputer) ComputeFastPathAggregations(
							 | 
						|
									ctx context.Context,
							 | 
						|
									aggregations []AggregationSpec,
							 | 
						|
									dataSources *TopicDataSources,
							 | 
						|
									partitions []string,
							 | 
						|
								) ([]AggregationResult, error) {
							 | 
						|
								
							 | 
						|
									aggResults := make([]AggregationResult, len(aggregations))
							 | 
						|
								
							 | 
						|
									for i, spec := range aggregations {
							 | 
						|
										switch spec.Function {
							 | 
						|
										case "COUNT":
							 | 
						|
											if spec.Column == "*" {
							 | 
						|
												aggResults[i].Count = dataSources.ParquetRowCount + dataSources.LiveLogRowCount
							 | 
						|
											} else {
							 | 
						|
												// For specific columns, we might need to account for NULLs in the future
							 | 
						|
												aggResults[i].Count = dataSources.ParquetRowCount + dataSources.LiveLogRowCount
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
										case "MIN":
							 | 
						|
											globalMin, err := comp.computeGlobalMin(spec, dataSources, partitions)
							 | 
						|
											if err != nil {
							 | 
						|
												return nil, AggregationError{
							 | 
						|
													Operation: spec.Function,
							 | 
						|
													Column:    spec.Column,
							 | 
						|
													Cause:     err,
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
											aggResults[i].Min = globalMin
							 | 
						|
								
							 | 
						|
										case "MAX":
							 | 
						|
											globalMax, err := comp.computeGlobalMax(spec, dataSources, partitions)
							 | 
						|
											if err != nil {
							 | 
						|
												return nil, AggregationError{
							 | 
						|
													Operation: spec.Function,
							 | 
						|
													Column:    spec.Column,
							 | 
						|
													Cause:     err,
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
											aggResults[i].Max = globalMax
							 | 
						|
								
							 | 
						|
										default:
							 | 
						|
											return nil, OptimizationError{
							 | 
						|
												Strategy: "fast_path_aggregation",
							 | 
						|
												Reason:   fmt.Sprintf("unsupported aggregation function: %s", spec.Function),
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return aggResults, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// computeGlobalMin computes the global minimum value across all data sources
							 | 
						|
								func (comp *AggregationComputer) computeGlobalMin(spec AggregationSpec, dataSources *TopicDataSources, partitions []string) (interface{}, error) {
							 | 
						|
									var globalMin interface{}
							 | 
						|
									var globalMinValue *schema_pb.Value
							 | 
						|
									hasParquetStats := false
							 | 
						|
								
							 | 
						|
									// Step 1: Get minimum from parquet statistics
							 | 
						|
									for _, fileStats := range dataSources.ParquetFiles {
							 | 
						|
										for _, fileStat := range fileStats {
							 | 
						|
											// Try case-insensitive column lookup
							 | 
						|
											var colStats *ParquetColumnStats
							 | 
						|
											var found bool
							 | 
						|
								
							 | 
						|
											// First try exact match
							 | 
						|
											if stats, exists := fileStat.ColumnStats[spec.Column]; exists {
							 | 
						|
												colStats = stats
							 | 
						|
												found = true
							 | 
						|
											} else {
							 | 
						|
												// Try case-insensitive lookup
							 | 
						|
												for colName, stats := range fileStat.ColumnStats {
							 | 
						|
													if strings.EqualFold(colName, spec.Column) {
							 | 
						|
														colStats = stats
							 | 
						|
														found = true
							 | 
						|
														break
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											if found && colStats != nil && colStats.MinValue != nil {
							 | 
						|
												if globalMinValue == nil || comp.engine.compareValues(colStats.MinValue, globalMinValue) < 0 {
							 | 
						|
													globalMinValue = colStats.MinValue
							 | 
						|
													extractedValue := comp.engine.extractRawValue(colStats.MinValue)
							 | 
						|
													if extractedValue != nil {
							 | 
						|
														globalMin = extractedValue
							 | 
						|
														hasParquetStats = true
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Step 2: Get minimum from live log data (only if no live logs or if we need to compare)
							 | 
						|
									if dataSources.LiveLogRowCount > 0 {
							 | 
						|
										for _, partition := range partitions {
							 | 
						|
											partitionParquetSources := make(map[string]bool)
							 | 
						|
											if partitionFileStats, exists := dataSources.ParquetFiles[partition]; exists {
							 | 
						|
												partitionParquetSources = comp.engine.extractParquetSourceFiles(partitionFileStats)
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											liveLogMin, _, err := comp.engine.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)
							 | 
						|
											if err != nil {
							 | 
						|
												continue // Skip partitions with errors
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											if liveLogMin != nil {
							 | 
						|
												if globalMin == nil {
							 | 
						|
													globalMin = liveLogMin
							 | 
						|
												} else {
							 | 
						|
													liveLogSchemaValue := comp.engine.convertRawValueToSchemaValue(liveLogMin)
							 | 
						|
													if liveLogSchemaValue != nil && comp.engine.compareValues(liveLogSchemaValue, globalMinValue) < 0 {
							 | 
						|
														globalMin = liveLogMin
							 | 
						|
														globalMinValue = liveLogSchemaValue
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Step 3: Handle system columns if no regular data found
							 | 
						|
									if globalMin == nil && !hasParquetStats {
							 | 
						|
										globalMin = comp.engine.getSystemColumnGlobalMin(spec.Column, dataSources.ParquetFiles)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return globalMin, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// computeGlobalMax computes the global maximum value across all data sources
							 | 
						|
								func (comp *AggregationComputer) computeGlobalMax(spec AggregationSpec, dataSources *TopicDataSources, partitions []string) (interface{}, error) {
							 | 
						|
									var globalMax interface{}
							 | 
						|
									var globalMaxValue *schema_pb.Value
							 | 
						|
									hasParquetStats := false
							 | 
						|
								
							 | 
						|
									// Step 1: Get maximum from parquet statistics
							 | 
						|
									for _, fileStats := range dataSources.ParquetFiles {
							 | 
						|
										for _, fileStat := range fileStats {
							 | 
						|
											// Try case-insensitive column lookup
							 | 
						|
											var colStats *ParquetColumnStats
							 | 
						|
											var found bool
							 | 
						|
								
							 | 
						|
											// First try exact match
							 | 
						|
											if stats, exists := fileStat.ColumnStats[spec.Column]; exists {
							 | 
						|
												colStats = stats
							 | 
						|
												found = true
							 | 
						|
											} else {
							 | 
						|
												// Try case-insensitive lookup
							 | 
						|
												for colName, stats := range fileStat.ColumnStats {
							 | 
						|
													if strings.EqualFold(colName, spec.Column) {
							 | 
						|
														colStats = stats
							 | 
						|
														found = true
							 | 
						|
														break
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											if found && colStats != nil && colStats.MaxValue != nil {
							 | 
						|
												if globalMaxValue == nil || comp.engine.compareValues(colStats.MaxValue, globalMaxValue) > 0 {
							 | 
						|
													globalMaxValue = colStats.MaxValue
							 | 
						|
													extractedValue := comp.engine.extractRawValue(colStats.MaxValue)
							 | 
						|
													if extractedValue != nil {
							 | 
						|
														globalMax = extractedValue
							 | 
						|
														hasParquetStats = true
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Step 2: Get maximum from live log data (only if live logs exist)
							 | 
						|
									if dataSources.LiveLogRowCount > 0 {
							 | 
						|
										for _, partition := range partitions {
							 | 
						|
											partitionParquetSources := make(map[string]bool)
							 | 
						|
											if partitionFileStats, exists := dataSources.ParquetFiles[partition]; exists {
							 | 
						|
												partitionParquetSources = comp.engine.extractParquetSourceFiles(partitionFileStats)
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											_, liveLogMax, err := comp.engine.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)
							 | 
						|
											if err != nil {
							 | 
						|
												continue // Skip partitions with errors
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
											if liveLogMax != nil {
							 | 
						|
												if globalMax == nil {
							 | 
						|
													globalMax = liveLogMax
							 | 
						|
												} else {
							 | 
						|
													liveLogSchemaValue := comp.engine.convertRawValueToSchemaValue(liveLogMax)
							 | 
						|
													if liveLogSchemaValue != nil && comp.engine.compareValues(liveLogSchemaValue, globalMaxValue) > 0 {
							 | 
						|
														globalMax = liveLogMax
							 | 
						|
														globalMaxValue = liveLogSchemaValue
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Step 3: Handle system columns if no regular data found
							 | 
						|
									if globalMax == nil && !hasParquetStats {
							 | 
						|
										globalMax = comp.engine.getSystemColumnGlobalMax(spec.Column, dataSources.ParquetFiles)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return globalMax, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// executeAggregationQuery handles SELECT queries with aggregation functions
							 | 
						|
								func (e *SQLEngine) executeAggregationQuery(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec, stmt *SelectStatement) (*QueryResult, error) {
							 | 
						|
									return e.executeAggregationQueryWithPlan(ctx, hybridScanner, aggregations, stmt, nil)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// executeAggregationQueryWithPlan handles SELECT queries with aggregation functions and populates execution plan
							 | 
						|
								func (e *SQLEngine) executeAggregationQueryWithPlan(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec, stmt *SelectStatement, plan *QueryExecutionPlan) (*QueryResult, error) {
							 | 
						|
									// Parse WHERE clause for filtering
							 | 
						|
									var predicate func(*schema_pb.RecordValue) bool
							 | 
						|
									var err error
							 | 
						|
									if stmt.Where != nil {
							 | 
						|
										predicate, err = e.buildPredicate(stmt.Where.Expr)
							 | 
						|
										if err != nil {
							 | 
						|
											return &QueryResult{Error: err}, err
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Extract time filters for optimization
							 | 
						|
									startTimeNs, stopTimeNs := int64(0), int64(0)
							 | 
						|
									if stmt.Where != nil {
							 | 
						|
										startTimeNs, stopTimeNs = e.extractTimeFilters(stmt.Where.Expr)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// FAST PATH: Try to use parquet statistics for optimization
							 | 
						|
									// This can be ~130x faster than scanning all data
							 | 
						|
									if stmt.Where == nil { // Only optimize when no complex WHERE clause
							 | 
						|
										fastResult, canOptimize := e.tryFastParquetAggregation(ctx, hybridScanner, aggregations)
							 | 
						|
										if canOptimize {
							 | 
						|
											if isDebugMode(ctx) {
							 | 
						|
												fmt.Printf("Using fast hybrid statistics for aggregation (parquet stats + live log counts)\n")
							 | 
						|
											}
							 | 
						|
											return fastResult, nil
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// SLOW PATH: Fall back to full table scan
							 | 
						|
									if isDebugMode(ctx) {
							 | 
						|
										fmt.Printf("Using full table scan for aggregation (parquet optimization not applicable)\n")
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Build scan options for full table scan (aggregations need all data)
							 | 
						|
									hybridScanOptions := HybridScanOptions{
							 | 
						|
										StartTimeNs: startTimeNs,
							 | 
						|
										StopTimeNs:  stopTimeNs,
							 | 
						|
										Limit:       0, // No limit for aggregations - need all data
							 | 
						|
										Predicate:   predicate,
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Execute the hybrid scan to get all matching records
							 | 
						|
									var results []HybridScanResult
							 | 
						|
									if plan != nil {
							 | 
						|
										// EXPLAIN mode - capture broker buffer stats
							 | 
						|
										var stats *HybridScanStats
							 | 
						|
										results, stats, err = hybridScanner.ScanWithStats(ctx, hybridScanOptions)
							 | 
						|
										if err != nil {
							 | 
						|
											return &QueryResult{Error: err}, err
							 | 
						|
										}
							 | 
						|
								
							 | 
						|
										// Populate plan with broker buffer information
							 | 
						|
										if stats != nil {
							 | 
						|
											plan.BrokerBufferQueried = stats.BrokerBufferQueried
							 | 
						|
											plan.BrokerBufferMessages = stats.BrokerBufferMessages
							 | 
						|
											plan.BufferStartIndex = stats.BufferStartIndex
							 | 
						|
								
							 | 
						|
											// Add broker_buffer to data sources if buffer was queried
							 | 
						|
											if stats.BrokerBufferQueried {
							 | 
						|
												// Check if broker_buffer is already in data sources
							 | 
						|
												hasBrokerBuffer := false
							 | 
						|
												for _, source := range plan.DataSources {
							 | 
						|
													if source == "broker_buffer" {
							 | 
						|
														hasBrokerBuffer = true
							 | 
						|
														break
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
												if !hasBrokerBuffer {
							 | 
						|
													plan.DataSources = append(plan.DataSources, "broker_buffer")
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									} else {
							 | 
						|
										// Normal mode - just get results
							 | 
						|
										results, err = hybridScanner.Scan(ctx, hybridScanOptions)
							 | 
						|
										if err != nil {
							 | 
						|
											return &QueryResult{Error: err}, err
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Compute aggregations
							 | 
						|
									aggResults := e.computeAggregations(results, aggregations)
							 | 
						|
								
							 | 
						|
									// Build result set
							 | 
						|
									columns := make([]string, len(aggregations))
							 | 
						|
									row := make([]sqltypes.Value, len(aggregations))
							 | 
						|
								
							 | 
						|
									for i, spec := range aggregations {
							 | 
						|
										columns[i] = spec.Alias
							 | 
						|
										row[i] = e.formatAggregationResult(spec, aggResults[i])
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return &QueryResult{
							 | 
						|
										Columns: columns,
							 | 
						|
										Rows:    [][]sqltypes.Value{row},
							 | 
						|
									}, nil
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// tryFastParquetAggregation attempts to compute aggregations using hybrid approach:
							 | 
						|
								// - Use parquet metadata for parquet files
							 | 
						|
								// - Count live log files for live data
							 | 
						|
								// - Combine both for accurate results per partition
							 | 
						|
								// Returns (result, canOptimize) where canOptimize=true means the hybrid fast path was used
							 | 
						|
								func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec) (*QueryResult, bool) {
							 | 
						|
									// Use the new modular components
							 | 
						|
									optimizer := NewFastPathOptimizer(e)
							 | 
						|
									computer := NewAggregationComputer(e)
							 | 
						|
								
							 | 
						|
									// Step 1: Determine strategy
							 | 
						|
									strategy := optimizer.DetermineStrategy(aggregations)
							 | 
						|
									if !strategy.CanUseFastPath {
							 | 
						|
										return nil, false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Step 2: Collect data sources
							 | 
						|
									dataSources, err := optimizer.CollectDataSources(ctx, hybridScanner)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Build partition list for aggregation computer
							 | 
						|
									relativePartitions, err := e.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									topicBasePath := fmt.Sprintf("/topics/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name)
							 | 
						|
									partitions := make([]string, len(relativePartitions))
							 | 
						|
									for i, relPartition := range relativePartitions {
							 | 
						|
										partitions[i] = fmt.Sprintf("%s/%s", topicBasePath, relPartition)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Debug: Show the hybrid optimization results (only in explain mode)
							 | 
						|
									if isDebugMode(ctx) && (dataSources.ParquetRowCount > 0 || dataSources.LiveLogRowCount > 0) {
							 | 
						|
										partitionsWithLiveLogs := 0
							 | 
						|
										if dataSources.LiveLogRowCount > 0 {
							 | 
						|
											partitionsWithLiveLogs = 1 // Simplified for now
							 | 
						|
										}
							 | 
						|
										fmt.Printf("Hybrid fast aggregation with deduplication: %d parquet rows + %d deduplicated live log rows from %d partitions\n",
							 | 
						|
											dataSources.ParquetRowCount, dataSources.LiveLogRowCount, partitionsWithLiveLogs)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Step 3: Compute aggregations using fast path
							 | 
						|
									aggResults, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
							 | 
						|
									if err != nil {
							 | 
						|
										return nil, false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Step 4: Build final query result
							 | 
						|
									columns := make([]string, len(aggregations))
							 | 
						|
									row := make([]sqltypes.Value, len(aggregations))
							 | 
						|
								
							 | 
						|
									for i, spec := range aggregations {
							 | 
						|
										columns[i] = spec.Alias
							 | 
						|
										row[i] = e.formatAggregationResult(spec, aggResults[i])
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									result := &QueryResult{
							 | 
						|
										Columns: columns,
							 | 
						|
										Rows:    [][]sqltypes.Value{row},
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return result, true
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// computeAggregations computes aggregation results from a full table scan
							 | 
						|
								func (e *SQLEngine) computeAggregations(results []HybridScanResult, aggregations []AggregationSpec) []AggregationResult {
							 | 
						|
									aggResults := make([]AggregationResult, len(aggregations))
							 | 
						|
								
							 | 
						|
									for i, spec := range aggregations {
							 | 
						|
										switch spec.Function {
							 | 
						|
										case "COUNT":
							 | 
						|
											if spec.Column == "*" {
							 | 
						|
												aggResults[i].Count = int64(len(results))
							 | 
						|
											} else {
							 | 
						|
												count := int64(0)
							 | 
						|
												for _, result := range results {
							 | 
						|
													if value := e.findColumnValue(result, spec.Column); value != nil && !e.isNullValue(value) {
							 | 
						|
														count++
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
												aggResults[i].Count = count
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
										case "SUM":
							 | 
						|
											sum := float64(0)
							 | 
						|
											for _, result := range results {
							 | 
						|
												if value := e.findColumnValue(result, spec.Column); value != nil {
							 | 
						|
													if numValue := e.convertToNumber(value); numValue != nil {
							 | 
						|
														sum += *numValue
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
											aggResults[i].Sum = sum
							 | 
						|
								
							 | 
						|
										case "AVG":
							 | 
						|
											sum := float64(0)
							 | 
						|
											count := int64(0)
							 | 
						|
											for _, result := range results {
							 | 
						|
												if value := e.findColumnValue(result, spec.Column); value != nil {
							 | 
						|
													if numValue := e.convertToNumber(value); numValue != nil {
							 | 
						|
														sum += *numValue
							 | 
						|
														count++
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
											if count > 0 {
							 | 
						|
												aggResults[i].Sum = sum / float64(count) // Store average in Sum field
							 | 
						|
												aggResults[i].Count = count
							 | 
						|
											}
							 | 
						|
								
							 | 
						|
										case "MIN":
							 | 
						|
											var min interface{}
							 | 
						|
											var minValue *schema_pb.Value
							 | 
						|
											for _, result := range results {
							 | 
						|
												if value := e.findColumnValue(result, spec.Column); value != nil {
							 | 
						|
													if minValue == nil || e.compareValues(value, minValue) < 0 {
							 | 
						|
														minValue = value
							 | 
						|
														min = e.extractRawValue(value)
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
											aggResults[i].Min = min
							 | 
						|
								
							 | 
						|
										case "MAX":
							 | 
						|
											var max interface{}
							 | 
						|
											var maxValue *schema_pb.Value
							 | 
						|
											for _, result := range results {
							 | 
						|
												if value := e.findColumnValue(result, spec.Column); value != nil {
							 | 
						|
													if maxValue == nil || e.compareValues(value, maxValue) > 0 {
							 | 
						|
														maxValue = value
							 | 
						|
														max = e.extractRawValue(value)
							 | 
						|
													}
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
											aggResults[i].Max = max
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return aggResults
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// canUseParquetStatsForAggregation determines if an aggregation can be optimized with parquet stats
							 | 
						|
								func (e *SQLEngine) canUseParquetStatsForAggregation(spec AggregationSpec) bool {
							 | 
						|
									switch spec.Function {
							 | 
						|
									case "COUNT":
							 | 
						|
										return spec.Column == "*" || e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column)
							 | 
						|
									case "MIN", "MAX":
							 | 
						|
										return e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column)
							 | 
						|
									case "SUM", "AVG":
							 | 
						|
										// These require scanning actual values, not just min/max
							 | 
						|
										return false
							 | 
						|
									default:
							 | 
						|
										return false
							 | 
						|
									}
							 | 
						|
								}
							 |