package engine import ( "context" "testing" "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" "github.com/stretchr/testify/assert" ) // TestFastPathCountFixRealistic tests the specific scenario mentioned in the bug report: // Fast path returning 0 for COUNT(*) when slow path returns 1803 func TestFastPathCountFixRealistic(t *testing.T) { engine := NewMockSQLEngine() // Set up debug mode to see our new logging ctx := context.WithValue(context.Background(), "debug", true) // Create realistic data sources that mimic a scenario with 1803 rows dataSources := &TopicDataSources{ ParquetFiles: map[string][]*ParquetFileStats{ "/topics/test/large-topic/0000-1023": { { RowCount: 800, ColumnStats: map[string]*ParquetColumnStats{ "id": { ColumnName: "id", MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1}}, MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 800}}, NullCount: 0, RowCount: 800, }, }, }, { RowCount: 500, ColumnStats: map[string]*ParquetColumnStats{ "id": { ColumnName: "id", MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 801}}, MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1300}}, NullCount: 0, RowCount: 500, }, }, }, }, "/topics/test/large-topic/1024-2047": { { RowCount: 300, ColumnStats: map[string]*ParquetColumnStats{ "id": { ColumnName: "id", MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1301}}, MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1600}}, NullCount: 0, RowCount: 300, }, }, }, }, }, ParquetRowCount: 1600, // 800 + 500 + 300 LiveLogRowCount: 203, // Additional live log data PartitionsCount: 2, LiveLogFilesCount: 15, } partitions := []string{ "/topics/test/large-topic/0000-1023", "/topics/test/large-topic/1024-2047", } t.Run("COUNT(*) should return correct total (1803)", func(t *testing.T) { computer := NewAggregationComputer(engine.SQLEngine) aggregations := []AggregationSpec{ {Function: FuncCOUNT, Column: "*", Alias: "COUNT(*)"}, } results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions) assert.NoError(t, err, "Fast path aggregation should not error") assert.Len(t, results, 1, "Should return one result") // This is the key test - before our fix, this was returning 0 expectedCount := int64(1803) // 1600 (parquet) + 203 (live log) actualCount := results[0].Count assert.Equal(t, expectedCount, actualCount, "COUNT(*) should return %d (1600 parquet + 203 live log), but got %d", expectedCount, actualCount) }) t.Run("MIN/MAX should work with multiple partitions", func(t *testing.T) { computer := NewAggregationComputer(engine.SQLEngine) aggregations := []AggregationSpec{ {Function: FuncMIN, Column: "id", Alias: "MIN(id)"}, {Function: FuncMAX, Column: "id", Alias: "MAX(id)"}, } results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions) assert.NoError(t, err, "Fast path aggregation should not error") assert.Len(t, results, 2, "Should return two results") // MIN should be the lowest across all parquet files assert.Equal(t, int64(1), results[0].Min, "MIN should be 1") // MAX should be the highest across all parquet files assert.Equal(t, int64(1600), results[1].Max, "MAX should be 1600") }) } // TestFastPathDataSourceDiscoveryLogging tests that our debug logging works correctly func TestFastPathDataSourceDiscoveryLogging(t *testing.T) { // This test verifies that our enhanced data source collection structure is correct t.Run("DataSources structure validation", func(t *testing.T) { // Test the TopicDataSources structure initialization dataSources := &TopicDataSources{ ParquetFiles: make(map[string][]*ParquetFileStats), ParquetRowCount: 0, LiveLogRowCount: 0, LiveLogFilesCount: 0, PartitionsCount: 0, } assert.NotNil(t, dataSources, "Data sources should not be nil") assert.NotNil(t, dataSources.ParquetFiles, "ParquetFiles map should be initialized") assert.GreaterOrEqual(t, dataSources.PartitionsCount, 0, "PartitionsCount should be non-negative") assert.GreaterOrEqual(t, dataSources.ParquetRowCount, int64(0), "ParquetRowCount should be non-negative") assert.GreaterOrEqual(t, dataSources.LiveLogRowCount, int64(0), "LiveLogRowCount should be non-negative") }) } // TestFastPathValidationLogic tests the enhanced validation we added func TestFastPathValidationLogic(t *testing.T) { t.Run("Validation catches data source vs computation mismatch", func(t *testing.T) { // Create a scenario where data sources and computation might be inconsistent dataSources := &TopicDataSources{ ParquetFiles: make(map[string][]*ParquetFileStats), ParquetRowCount: 1000, // Data sources say 1000 rows LiveLogRowCount: 0, PartitionsCount: 1, } // But aggregation result says different count (simulating the original bug) aggResults := []AggregationResult{ {Count: 0}, // Bug: returns 0 when data sources show 1000 } // This simulates the validation logic from tryFastParquetAggregation totalRows := dataSources.ParquetRowCount + dataSources.LiveLogRowCount countResult := aggResults[0].Count // Our validation should catch this mismatch assert.NotEqual(t, totalRows, countResult, "This test simulates the bug: data sources show %d but COUNT returns %d", totalRows, countResult) // In the real code, this would trigger a fallback to slow path validationPassed := (countResult == totalRows) assert.False(t, validationPassed, "Validation should fail for inconsistent data") }) t.Run("Validation passes for consistent data", func(t *testing.T) { // Create a scenario where everything is consistent dataSources := &TopicDataSources{ ParquetFiles: make(map[string][]*ParquetFileStats), ParquetRowCount: 1000, LiveLogRowCount: 803, PartitionsCount: 1, } // Aggregation result matches data sources aggResults := []AggregationResult{ {Count: 1803}, // Correct: matches 1000 + 803 } totalRows := dataSources.ParquetRowCount + dataSources.LiveLogRowCount countResult := aggResults[0].Count // Our validation should pass this assert.Equal(t, totalRows, countResult, "Validation should pass when data sources (%d) match COUNT result (%d)", totalRows, countResult) validationPassed := (countResult == totalRows) assert.True(t, validationPassed, "Validation should pass for consistent data") }) }