seaweedfs/weed/query/engine/aggregations.go


								package engine


								import (

									"context"

									"fmt"

									"strings"


									"github.com/seaweedfs/seaweedfs/weed/mq/topic"

									"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"

									"github.com/seaweedfs/seaweedfs/weed/query/sqltypes"

								)


								// AggregationSpec defines an aggregation function to be computed

								type AggregationSpec struct {

									Function string // COUNT, SUM, AVG, MIN, MAX

									Column   string // Column name, or "*" for COUNT(*)

									Alias    string // Optional alias for the result column

									Distinct bool   // Support for DISTINCT keyword

								}


								// AggregationResult holds the computed result of an aggregation

								type AggregationResult struct {

									Count int64

									Sum   float64

									Min   interface{}

									Max   interface{}

								}


								// AggregationStrategy represents the strategy for executing aggregations

								type AggregationStrategy struct {

									CanUseFastPath   bool

									Reason           string

									UnsupportedSpecs []AggregationSpec

								}


								// TopicDataSources represents the data sources available for a topic

								type TopicDataSources struct {

									ParquetFiles      map[string][]*ParquetFileStats // partitionPath -> parquet file stats

									ParquetRowCount   int64

									LiveLogRowCount   int64

									LiveLogFilesCount int // Total count of live log files across all partitions

									PartitionsCount   int

								}


								// FastPathOptimizer handles fast path aggregation optimization decisions

								type FastPathOptimizer struct {

									engine *SQLEngine

								}


								// NewFastPathOptimizer creates a new fast path optimizer

								func NewFastPathOptimizer(engine *SQLEngine) *FastPathOptimizer {

									return &FastPathOptimizer{engine: engine}

								}


								// DetermineStrategy analyzes aggregations and determines if fast path can be used

								func (opt *FastPathOptimizer) DetermineStrategy(aggregations []AggregationSpec) AggregationStrategy {

									strategy := AggregationStrategy{

										CanUseFastPath:   true,

										Reason:           "all_aggregations_supported",

										UnsupportedSpecs: []AggregationSpec{},

									}


									for _, spec := range aggregations {

										if !opt.engine.canUseParquetStatsForAggregation(spec) {

											strategy.CanUseFastPath = false

											strategy.Reason = "unsupported_aggregation_functions"

											strategy.UnsupportedSpecs = append(strategy.UnsupportedSpecs, spec)

										}

									}


									return strategy

								}


								// CollectDataSources gathers information about available data sources for a topic

								func (opt *FastPathOptimizer) CollectDataSources(ctx context.Context, hybridScanner *HybridMessageScanner) (*TopicDataSources, error) {

									dataSources := &TopicDataSources{

										ParquetFiles:      make(map[string][]*ParquetFileStats),

										ParquetRowCount:   0,

										LiveLogRowCount:   0,

										LiveLogFilesCount: 0,

										PartitionsCount:   0,

									}


									// Discover partitions for the topic

									relativePartitions, err := opt.engine.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)

									if err != nil {

										return dataSources, DataSourceError{

											Source: "partition_discovery",

											Cause:  err,

										}

									}


									topicBasePath := fmt.Sprintf("/topics/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name)


									// Collect stats from each partition

									for _, relPartition := range relativePartitions {

										partitionPath := fmt.Sprintf("%s/%s", topicBasePath, relPartition)


										// Read parquet file statistics

										parquetStats, err := hybridScanner.ReadParquetStatistics(partitionPath)

										if err == nil && len(parquetStats) > 0 {

											dataSources.ParquetFiles[partitionPath] = parquetStats

											for _, stat := range parquetStats {

												dataSources.ParquetRowCount += stat.RowCount

											}

										}


										// Count live log files (excluding those converted to parquet)

										parquetSources := opt.engine.extractParquetSourceFiles(dataSources.ParquetFiles[partitionPath])

										liveLogCount, _ := opt.engine.countLiveLogRowsExcludingParquetSources(ctx, partitionPath, parquetSources)

										dataSources.LiveLogRowCount += liveLogCount


										// Count live log files for partition

										partition := topic.Partition{

											RangeStart: 0,    // This will be properly set in a full implementation

											RangeStop:  1000, // This will be properly set in a full implementation

										}

										liveLogFileCount, err := hybridScanner.countLiveLogFiles(partition)

										if err == nil {

											dataSources.LiveLogFilesCount += liveLogFileCount

										}

									}


									dataSources.PartitionsCount = len(relativePartitions)


									return dataSources, nil

								}


								// AggregationComputer handles the computation of aggregations using fast path

								type AggregationComputer struct {

									engine *SQLEngine

								}


								// NewAggregationComputer creates a new aggregation computer

								func NewAggregationComputer(engine *SQLEngine) *AggregationComputer {

									return &AggregationComputer{engine: engine}

								}


								// ComputeFastPathAggregations computes aggregations using parquet statistics and live log data

								func (comp *AggregationComputer) ComputeFastPathAggregations(

									ctx context.Context,

									aggregations []AggregationSpec,

									dataSources *TopicDataSources,

									partitions []string,

								) ([]AggregationResult, error) {


									aggResults := make([]AggregationResult, len(aggregations))


									for i, spec := range aggregations {

										switch spec.Function {

										case "COUNT":

											if spec.Column == "*" {

												aggResults[i].Count = dataSources.ParquetRowCount + dataSources.LiveLogRowCount

											} else {

												// For specific columns, we might need to account for NULLs in the future

												aggResults[i].Count = dataSources.ParquetRowCount + dataSources.LiveLogRowCount

											}


										case "MIN":

											globalMin, err := comp.computeGlobalMin(spec, dataSources, partitions)

											if err != nil {

												return nil, AggregationError{

													Operation: spec.Function,

													Column:    spec.Column,

													Cause:     err,

												}

											}

											aggResults[i].Min = globalMin


										case "MAX":

											globalMax, err := comp.computeGlobalMax(spec, dataSources, partitions)

											if err != nil {

												return nil, AggregationError{

													Operation: spec.Function,

													Column:    spec.Column,

													Cause:     err,

												}

											}

											aggResults[i].Max = globalMax


										default:

											return nil, OptimizationError{

												Strategy: "fast_path_aggregation",

												Reason:   fmt.Sprintf("unsupported aggregation function: %s", spec.Function),

											}

										}

									}


									return aggResults, nil

								}


								// computeGlobalMin computes the global minimum value across all data sources

								func (comp *AggregationComputer) computeGlobalMin(spec AggregationSpec, dataSources *TopicDataSources, partitions []string) (interface{}, error) {

									var globalMin interface{}

									var globalMinValue *schema_pb.Value

									hasParquetStats := false


									// Step 1: Get minimum from parquet statistics

									for _, fileStats := range dataSources.ParquetFiles {

										for _, fileStat := range fileStats {

											// Try case-insensitive column lookup

											var colStats *ParquetColumnStats

											var found bool


											// First try exact match

											if stats, exists := fileStat.ColumnStats[spec.Column]; exists {

												colStats = stats

												found = true

											} else {

												// Try case-insensitive lookup

												for colName, stats := range fileStat.ColumnStats {

													if strings.EqualFold(colName, spec.Column) {

														colStats = stats

														found = true

														break

													}

												}

											}


											if found && colStats != nil && colStats.MinValue != nil {

												if globalMinValue == nil || comp.engine.compareValues(colStats.MinValue, globalMinValue) < 0 {

													globalMinValue = colStats.MinValue

													extractedValue := comp.engine.extractRawValue(colStats.MinValue)

													if extractedValue != nil {

														globalMin = extractedValue

														hasParquetStats = true

													}

												}

											}

										}

									}


									// Step 2: Get minimum from live log data (only if no live logs or if we need to compare)

									if dataSources.LiveLogRowCount > 0 {

										for _, partition := range partitions {

											partitionParquetSources := make(map[string]bool)

											if partitionFileStats, exists := dataSources.ParquetFiles[partition]; exists {

												partitionParquetSources = comp.engine.extractParquetSourceFiles(partitionFileStats)

											}


											liveLogMin, _, err := comp.engine.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)

											if err != nil {

												continue // Skip partitions with errors

											}


											if liveLogMin != nil {

												if globalMin == nil {

													globalMin = liveLogMin

												} else {

													liveLogSchemaValue := comp.engine.convertRawValueToSchemaValue(liveLogMin)

													if liveLogSchemaValue != nil && comp.engine.compareValues(liveLogSchemaValue, globalMinValue) < 0 {

														globalMin = liveLogMin

														globalMinValue = liveLogSchemaValue

													}

												}

											}

										}

									}


									// Step 3: Handle system columns if no regular data found

									if globalMin == nil && !hasParquetStats {

										globalMin = comp.engine.getSystemColumnGlobalMin(spec.Column, dataSources.ParquetFiles)

									}


									return globalMin, nil

								}


								// computeGlobalMax computes the global maximum value across all data sources

								func (comp *AggregationComputer) computeGlobalMax(spec AggregationSpec, dataSources *TopicDataSources, partitions []string) (interface{}, error) {

									var globalMax interface{}

									var globalMaxValue *schema_pb.Value

									hasParquetStats := false


									// Step 1: Get maximum from parquet statistics

									for _, fileStats := range dataSources.ParquetFiles {

										for _, fileStat := range fileStats {

											// Try case-insensitive column lookup

											var colStats *ParquetColumnStats

											var found bool


											// First try exact match

											if stats, exists := fileStat.ColumnStats[spec.Column]; exists {

												colStats = stats

												found = true

											} else {

												// Try case-insensitive lookup

												for colName, stats := range fileStat.ColumnStats {

													if strings.EqualFold(colName, spec.Column) {

														colStats = stats

														found = true

														break

													}

												}

											}


											if found && colStats != nil && colStats.MaxValue != nil {

												if globalMaxValue == nil || comp.engine.compareValues(colStats.MaxValue, globalMaxValue) > 0 {

													globalMaxValue = colStats.MaxValue

													extractedValue := comp.engine.extractRawValue(colStats.MaxValue)

													if extractedValue != nil {

														globalMax = extractedValue

														hasParquetStats = true

													}

												}

											}

										}

									}


									// Step 2: Get maximum from live log data (only if live logs exist)

									if dataSources.LiveLogRowCount > 0 {

										for _, partition := range partitions {

											partitionParquetSources := make(map[string]bool)

											if partitionFileStats, exists := dataSources.ParquetFiles[partition]; exists {

												partitionParquetSources = comp.engine.extractParquetSourceFiles(partitionFileStats)

											}


											_, liveLogMax, err := comp.engine.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)

											if err != nil {

												continue // Skip partitions with errors

											}


											if liveLogMax != nil {

												if globalMax == nil {

													globalMax = liveLogMax

												} else {

													liveLogSchemaValue := comp.engine.convertRawValueToSchemaValue(liveLogMax)

													if liveLogSchemaValue != nil && comp.engine.compareValues(liveLogSchemaValue, globalMaxValue) > 0 {

														globalMax = liveLogMax

														globalMaxValue = liveLogSchemaValue

													}

												}

											}

										}

									}


									// Step 3: Handle system columns if no regular data found

									if globalMax == nil && !hasParquetStats {

										globalMax = comp.engine.getSystemColumnGlobalMax(spec.Column, dataSources.ParquetFiles)

									}


									return globalMax, nil

								}


								// executeAggregationQuery handles SELECT queries with aggregation functions

								func (e *SQLEngine) executeAggregationQuery(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec, stmt *SelectStatement) (*QueryResult, error) {

									return e.executeAggregationQueryWithPlan(ctx, hybridScanner, aggregations, stmt, nil)

								}


								// executeAggregationQueryWithPlan handles SELECT queries with aggregation functions and populates execution plan

								func (e *SQLEngine) executeAggregationQueryWithPlan(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec, stmt *SelectStatement, plan *QueryExecutionPlan) (*QueryResult, error) {

									// Parse WHERE clause for filtering

									var predicate func(*schema_pb.RecordValue) bool

									var err error

									if stmt.Where != nil {

										predicate, err = e.buildPredicate(stmt.Where.Expr)

										if err != nil {

											return &QueryResult{Error: err}, err

										}

									}


									// Extract time filters for optimization

									startTimeNs, stopTimeNs := int64(0), int64(0)

									if stmt.Where != nil {

										startTimeNs, stopTimeNs = e.extractTimeFilters(stmt.Where.Expr)

									}


									// FAST PATH: Try to use parquet statistics for optimization

									// This can be ~130x faster than scanning all data

									if stmt.Where == nil { // Only optimize when no complex WHERE clause

										fastResult, canOptimize := e.tryFastParquetAggregation(ctx, hybridScanner, aggregations)

										if canOptimize {

											if isDebugMode(ctx) {

												fmt.Printf("Using fast hybrid statistics for aggregation (parquet stats + live log counts)\n")

											}

											return fastResult, nil

										}

									}


									// SLOW PATH: Fall back to full table scan

									if isDebugMode(ctx) {

										fmt.Printf("Using full table scan for aggregation (parquet optimization not applicable)\n")

									}


									// Build scan options for full table scan (aggregations need all data)

									hybridScanOptions := HybridScanOptions{

										StartTimeNs: startTimeNs,

										StopTimeNs:  stopTimeNs,

										Limit:       0, // No limit for aggregations - need all data

										Predicate:   predicate,

									}


									// Execute the hybrid scan to get all matching records

									var results []HybridScanResult

									if plan != nil {

										// EXPLAIN mode - capture broker buffer stats

										var stats *HybridScanStats

										results, stats, err = hybridScanner.ScanWithStats(ctx, hybridScanOptions)

										if err != nil {

											return &QueryResult{Error: err}, err

										}


										// Populate plan with broker buffer information

										if stats != nil {

											plan.BrokerBufferQueried = stats.BrokerBufferQueried

											plan.BrokerBufferMessages = stats.BrokerBufferMessages

											plan.BufferStartIndex = stats.BufferStartIndex


											// Add broker_buffer to data sources if buffer was queried

											if stats.BrokerBufferQueried {

												// Check if broker_buffer is already in data sources

												hasBrokerBuffer := false

												for _, source := range plan.DataSources {

													if source == "broker_buffer" {

														hasBrokerBuffer = true

														break

													}

												}

												if !hasBrokerBuffer {

													plan.DataSources = append(plan.DataSources, "broker_buffer")

												}

											}

										}

									} else {

										// Normal mode - just get results

										results, err = hybridScanner.Scan(ctx, hybridScanOptions)

										if err != nil {

											return &QueryResult{Error: err}, err

										}

									}


									// Compute aggregations

									aggResults := e.computeAggregations(results, aggregations)


									// Build result set

									columns := make([]string, len(aggregations))

									row := make([]sqltypes.Value, len(aggregations))


									for i, spec := range aggregations {

										columns[i] = spec.Alias

										row[i] = e.formatAggregationResult(spec, aggResults[i])

									}


									return &QueryResult{

										Columns: columns,

										Rows:    [][]sqltypes.Value{row},

									}, nil

								}


								// tryFastParquetAggregation attempts to compute aggregations using hybrid approach:

								// - Use parquet metadata for parquet files

								// - Count live log files for live data

								// - Combine both for accurate results per partition

								// Returns (result, canOptimize) where canOptimize=true means the hybrid fast path was used

								func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec) (*QueryResult, bool) {

									// Use the new modular components

									optimizer := NewFastPathOptimizer(e)

									computer := NewAggregationComputer(e)


									// Step 1: Determine strategy

									strategy := optimizer.DetermineStrategy(aggregations)

									if !strategy.CanUseFastPath {

										return nil, false

									}


									// Step 2: Collect data sources

									dataSources, err := optimizer.CollectDataSources(ctx, hybridScanner)

									if err != nil {

										return nil, false

									}


									// Build partition list for aggregation computer

									relativePartitions, err := e.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)

									if err != nil {

										return nil, false

									}


									topicBasePath := fmt.Sprintf("/topics/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name)

									partitions := make([]string, len(relativePartitions))

									for i, relPartition := range relativePartitions {

										partitions[i] = fmt.Sprintf("%s/%s", topicBasePath, relPartition)

									}


									// Debug: Show the hybrid optimization results (only in explain mode)

									if isDebugMode(ctx) && (dataSources.ParquetRowCount > 0 || dataSources.LiveLogRowCount > 0) {

										partitionsWithLiveLogs := 0

										if dataSources.LiveLogRowCount > 0 {

											partitionsWithLiveLogs = 1 // Simplified for now

										}

										fmt.Printf("Hybrid fast aggregation with deduplication: %d parquet rows + %d deduplicated live log rows from %d partitions\n",

											dataSources.ParquetRowCount, dataSources.LiveLogRowCount, partitionsWithLiveLogs)

									}


									// Step 3: Compute aggregations using fast path

									aggResults, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)

									if err != nil {

										return nil, false

									}


									// Step 4: Build final query result

									columns := make([]string, len(aggregations))

									row := make([]sqltypes.Value, len(aggregations))


									for i, spec := range aggregations {

										columns[i] = spec.Alias

										row[i] = e.formatAggregationResult(spec, aggResults[i])

									}


									result := &QueryResult{

										Columns: columns,

										Rows:    [][]sqltypes.Value{row},

									}


									return result, true

								}


								// computeAggregations computes aggregation results from a full table scan

								func (e *SQLEngine) computeAggregations(results []HybridScanResult, aggregations []AggregationSpec) []AggregationResult {

									aggResults := make([]AggregationResult, len(aggregations))


									for i, spec := range aggregations {

										switch spec.Function {

										case "COUNT":

											if spec.Column == "*" {

												aggResults[i].Count = int64(len(results))

											} else {

												count := int64(0)

												for _, result := range results {

													if value := e.findColumnValue(result, spec.Column); value != nil && !e.isNullValue(value) {

														count++

													}

												}

												aggResults[i].Count = count

											}


										case "SUM":

											sum := float64(0)

											for _, result := range results {

												if value := e.findColumnValue(result, spec.Column); value != nil {

													if numValue := e.convertToNumber(value); numValue != nil {

														sum += *numValue

													}

												}

											}

											aggResults[i].Sum = sum


										case "AVG":

											sum := float64(0)

											count := int64(0)

											for _, result := range results {

												if value := e.findColumnValue(result, spec.Column); value != nil {

													if numValue := e.convertToNumber(value); numValue != nil {

														sum += *numValue

														count++

													}

												}

											}

											if count > 0 {

												aggResults[i].Sum = sum / float64(count) // Store average in Sum field

												aggResults[i].Count = count

											}


										case "MIN":

											var min interface{}

											var minValue *schema_pb.Value

											for _, result := range results {

												if value := e.findColumnValue(result, spec.Column); value != nil {

													if minValue == nil || e.compareValues(value, minValue) < 0 {

														minValue = value

														min = e.extractRawValue(value)

													}

												}

											}

											aggResults[i].Min = min


										case "MAX":

											var max interface{}

											var maxValue *schema_pb.Value

											for _, result := range results {

												if value := e.findColumnValue(result, spec.Column); value != nil {

													if maxValue == nil || e.compareValues(value, maxValue) > 0 {

														maxValue = value

														max = e.extractRawValue(value)

													}

												}

											}

											aggResults[i].Max = max

										}

									}


									return aggResults

								}


								// canUseParquetStatsForAggregation determines if an aggregation can be optimized with parquet stats

								func (e *SQLEngine) canUseParquetStatsForAggregation(spec AggregationSpec) bool {

									switch spec.Function {

									case "COUNT":

										return spec.Column == "*" || e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column)

									case "MIN", "MAX":

										return e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column)

									case "SUM", "AVG":

										// These require scanning actual values, not just min/max

										return false

									default:

										return false

									}

								}