You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							193 lines
						
					
					
						
							6.6 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							193 lines
						
					
					
						
							6.6 KiB
						
					
					
				| package engine | |
| 
 | |
| import ( | |
| 	"context" | |
| 	"testing" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" | |
| 	"github.com/stretchr/testify/assert" | |
| ) | |
| 
 | |
| // TestFastPathCountFixRealistic tests the specific scenario mentioned in the bug report: | |
| // Fast path returning 0 for COUNT(*) when slow path returns 1803 | |
| func TestFastPathCountFixRealistic(t *testing.T) { | |
| 	engine := NewMockSQLEngine() | |
| 
 | |
| 	// Set up debug mode to see our new logging | |
| 	ctx := context.WithValue(context.Background(), "debug", true) | |
| 
 | |
| 	// Create realistic data sources that mimic a scenario with 1803 rows | |
| 	dataSources := &TopicDataSources{ | |
| 		ParquetFiles: map[string][]*ParquetFileStats{ | |
| 			"/topics/test/large-topic/0000-1023": { | |
| 				{ | |
| 					RowCount: 800, | |
| 					ColumnStats: map[string]*ParquetColumnStats{ | |
| 						"id": { | |
| 							ColumnName: "id", | |
| 							MinValue:   &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1}}, | |
| 							MaxValue:   &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 800}}, | |
| 							NullCount:  0, | |
| 							RowCount:   800, | |
| 						}, | |
| 					}, | |
| 				}, | |
| 				{ | |
| 					RowCount: 500, | |
| 					ColumnStats: map[string]*ParquetColumnStats{ | |
| 						"id": { | |
| 							ColumnName: "id", | |
| 							MinValue:   &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 801}}, | |
| 							MaxValue:   &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1300}}, | |
| 							NullCount:  0, | |
| 							RowCount:   500, | |
| 						}, | |
| 					}, | |
| 				}, | |
| 			}, | |
| 			"/topics/test/large-topic/1024-2047": { | |
| 				{ | |
| 					RowCount: 300, | |
| 					ColumnStats: map[string]*ParquetColumnStats{ | |
| 						"id": { | |
| 							ColumnName: "id", | |
| 							MinValue:   &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1301}}, | |
| 							MaxValue:   &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: 1600}}, | |
| 							NullCount:  0, | |
| 							RowCount:   300, | |
| 						}, | |
| 					}, | |
| 				}, | |
| 			}, | |
| 		}, | |
| 		ParquetRowCount:   1600, // 800 + 500 + 300 | |
| 		LiveLogRowCount:   203,  // Additional live log data | |
| 		PartitionsCount:   2, | |
| 		LiveLogFilesCount: 15, | |
| 	} | |
| 
 | |
| 	partitions := []string{ | |
| 		"/topics/test/large-topic/0000-1023", | |
| 		"/topics/test/large-topic/1024-2047", | |
| 	} | |
| 
 | |
| 	t.Run("COUNT(*) should return correct total (1803)", func(t *testing.T) { | |
| 		computer := NewAggregationComputer(engine.SQLEngine) | |
| 
 | |
| 		aggregations := []AggregationSpec{ | |
| 			{Function: FuncCOUNT, Column: "*", Alias: "COUNT(*)"}, | |
| 		} | |
| 
 | |
| 		results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions) | |
| 
 | |
| 		assert.NoError(t, err, "Fast path aggregation should not error") | |
| 		assert.Len(t, results, 1, "Should return one result") | |
| 
 | |
| 		// This is the key test - before our fix, this was returning 0 | |
| 		expectedCount := int64(1803) // 1600 (parquet) + 203 (live log) | |
| 		actualCount := results[0].Count | |
| 
 | |
| 		assert.Equal(t, expectedCount, actualCount, | |
| 			"COUNT(*) should return %d (1600 parquet + 203 live log), but got %d", | |
| 			expectedCount, actualCount) | |
| 	}) | |
| 
 | |
| 	t.Run("MIN/MAX should work with multiple partitions", func(t *testing.T) { | |
| 		computer := NewAggregationComputer(engine.SQLEngine) | |
| 
 | |
| 		aggregations := []AggregationSpec{ | |
| 			{Function: FuncMIN, Column: "id", Alias: "MIN(id)"}, | |
| 			{Function: FuncMAX, Column: "id", Alias: "MAX(id)"}, | |
| 		} | |
| 
 | |
| 		results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions) | |
| 
 | |
| 		assert.NoError(t, err, "Fast path aggregation should not error") | |
| 		assert.Len(t, results, 2, "Should return two results") | |
| 
 | |
| 		// MIN should be the lowest across all parquet files | |
| 		assert.Equal(t, int64(1), results[0].Min, "MIN should be 1") | |
| 
 | |
| 		// MAX should be the highest across all parquet files | |
| 		assert.Equal(t, int64(1600), results[1].Max, "MAX should be 1600") | |
| 	}) | |
| } | |
| 
 | |
| // TestFastPathDataSourceDiscoveryLogging tests that our debug logging works correctly | |
| func TestFastPathDataSourceDiscoveryLogging(t *testing.T) { | |
| 	// This test verifies that our enhanced data source collection structure is correct | |
|  | |
| 	t.Run("DataSources structure validation", func(t *testing.T) { | |
| 		// Test the TopicDataSources structure initialization | |
| 		dataSources := &TopicDataSources{ | |
| 			ParquetFiles:      make(map[string][]*ParquetFileStats), | |
| 			ParquetRowCount:   0, | |
| 			LiveLogRowCount:   0, | |
| 			LiveLogFilesCount: 0, | |
| 			PartitionsCount:   0, | |
| 		} | |
| 
 | |
| 		assert.NotNil(t, dataSources, "Data sources should not be nil") | |
| 		assert.NotNil(t, dataSources.ParquetFiles, "ParquetFiles map should be initialized") | |
| 		assert.GreaterOrEqual(t, dataSources.PartitionsCount, 0, "PartitionsCount should be non-negative") | |
| 		assert.GreaterOrEqual(t, dataSources.ParquetRowCount, int64(0), "ParquetRowCount should be non-negative") | |
| 		assert.GreaterOrEqual(t, dataSources.LiveLogRowCount, int64(0), "LiveLogRowCount should be non-negative") | |
| 	}) | |
| } | |
| 
 | |
| // TestFastPathValidationLogic tests the enhanced validation we added | |
| func TestFastPathValidationLogic(t *testing.T) { | |
| 	t.Run("Validation catches data source vs computation mismatch", func(t *testing.T) { | |
| 		// Create a scenario where data sources and computation might be inconsistent | |
| 		dataSources := &TopicDataSources{ | |
| 			ParquetFiles:    make(map[string][]*ParquetFileStats), | |
| 			ParquetRowCount: 1000, // Data sources say 1000 rows | |
| 			LiveLogRowCount: 0, | |
| 			PartitionsCount: 1, | |
| 		} | |
| 
 | |
| 		// But aggregation result says different count (simulating the original bug) | |
| 		aggResults := []AggregationResult{ | |
| 			{Count: 0}, // Bug: returns 0 when data sources show 1000 | |
| 		} | |
| 
 | |
| 		// This simulates the validation logic from tryFastParquetAggregation | |
| 		totalRows := dataSources.ParquetRowCount + dataSources.LiveLogRowCount | |
| 		countResult := aggResults[0].Count | |
| 
 | |
| 		// Our validation should catch this mismatch | |
| 		assert.NotEqual(t, totalRows, countResult, | |
| 			"This test simulates the bug: data sources show %d but COUNT returns %d", | |
| 			totalRows, countResult) | |
| 
 | |
| 		// In the real code, this would trigger a fallback to slow path | |
| 		validationPassed := (countResult == totalRows) | |
| 		assert.False(t, validationPassed, "Validation should fail for inconsistent data") | |
| 	}) | |
| 
 | |
| 	t.Run("Validation passes for consistent data", func(t *testing.T) { | |
| 		// Create a scenario where everything is consistent | |
| 		dataSources := &TopicDataSources{ | |
| 			ParquetFiles:    make(map[string][]*ParquetFileStats), | |
| 			ParquetRowCount: 1000, | |
| 			LiveLogRowCount: 803, | |
| 			PartitionsCount: 1, | |
| 		} | |
| 
 | |
| 		// Aggregation result matches data sources | |
| 		aggResults := []AggregationResult{ | |
| 			{Count: 1803}, // Correct: matches 1000 + 803 | |
| 		} | |
| 
 | |
| 		totalRows := dataSources.ParquetRowCount + dataSources.LiveLogRowCount | |
| 		countResult := aggResults[0].Count | |
| 
 | |
| 		// Our validation should pass this | |
| 		assert.Equal(t, totalRows, countResult, | |
| 			"Validation should pass when data sources (%d) match COUNT result (%d)", | |
| 			totalRows, countResult) | |
| 
 | |
| 		validationPassed := (countResult == totalRows) | |
| 		assert.True(t, validationPassed, "Validation should pass for consistent data") | |
| 	}) | |
| }
 |