From d60c542ecc4851b5a3f0e0c038213f5ab8fe56f2 Mon Sep 17 00:00:00 2001 From: chrislu Date: Wed, 3 Sep 2025 07:11:18 -0700 Subject: [PATCH] feat: Replace pg_query_go with lightweight SQL parser (no CGO required) - Remove github.com/pganalyze/pg_query_go/v6 dependency to avoid CGO requirement - Implement lightweight SQL parser for basic SELECT, SHOW, and DDL statements - Fix operator precedence in WHERE clause parsing (handle AND/OR before comparisons) - Support INTEGER, FLOAT, and STRING literals in WHERE conditions - All SQL engine tests passing with new parser - PostgreSQL integration tests can now build without CGO The lightweight parser handles the essential SQL features needed for the SeaweedFS query engine while maintaining compatibility and avoiding CGO dependencies that caused Docker build issues. --- go.mod | 1 - go.sum | 3 - test/postgres/Dockerfile.seaweedfs | 8 +- weed/query/engine/engine.go | 335 +++++++++++++++++------------ 4 files changed, 205 insertions(+), 142 deletions(-) diff --git a/go.mod b/go.mod index 03f0a2b45..c919d47c0 100644 --- a/go.mod +++ b/go.mod @@ -147,7 +147,6 @@ require ( github.com/minio/crc64nvme v1.1.1 github.com/orcaman/concurrent-map/v2 v2.0.1 github.com/parquet-go/parquet-go v0.25.1 - github.com/pganalyze/pg_query_go/v6 v6.1.0 github.com/pkg/sftp v1.13.9 github.com/rabbitmq/amqp091-go v1.10.0 github.com/rclone/rclone v1.70.3 diff --git a/go.sum b/go.sum index fb6733d8b..32f1173e4 100644 --- a/go.sum +++ b/go.sum @@ -1437,8 +1437,6 @@ github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 h1:XeOYlK9W1uC github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14/go.mod h1:jVblp62SafmidSkvWrXyxAme3gaTfEtWwRPGz5cpvHg= github.com/peterh/liner v1.2.2 h1:aJ4AOodmL+JxOZZEL2u9iJf8omNRpqHc/EbrK+3mAXw= github.com/peterh/liner v1.2.2/go.mod h1:xFwJyiKIXJZUKItq5dGHZSTBRAuG/CpeNpWLyiNRNwI= -github.com/pganalyze/pg_query_go/v6 v6.1.0 h1:jG5ZLhcVgL1FAw4C/0VNQaVmX1SUJx71wBGdtTtBvls= -github.com/pganalyze/pg_query_go/v6 v6.1.0/go.mod h1:nvTHIuoud6e1SfrUaFwHqT0i4b5Nr+1rPWVds3B5+50= github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0= github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= @@ -2538,7 +2536,6 @@ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= diff --git a/test/postgres/Dockerfile.seaweedfs b/test/postgres/Dockerfile.seaweedfs index 2b5fe70d6..49ff74930 100644 --- a/test/postgres/Dockerfile.seaweedfs +++ b/test/postgres/Dockerfile.seaweedfs @@ -1,7 +1,7 @@ FROM golang:1.24-alpine AS builder -# Install build dependencies including gcc for CGO (required by pg_query_go) -RUN apk add --no-cache git make gcc musl-dev +# Install git and other build dependencies +RUN apk add --no-cache git make # Set working directory WORKDIR /app @@ -13,8 +13,8 @@ RUN go mod download # Copy source code COPY . . -# Build the weed binary with CGO enabled (required for pg_query_go) -RUN CGO_ENABLED=1 GOOS=linux go build -ldflags "-s -w" -o weed ./weed/ +# Build the weed binary without CGO +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -o weed ./weed/ # Final stage - minimal runtime image FROM alpine:latest diff --git a/weed/query/engine/engine.go b/weed/query/engine/engine.go index a84b02951..53aadbcaf 100644 --- a/weed/query/engine/engine.go +++ b/weed/query/engine/engine.go @@ -11,7 +11,6 @@ import ( "strings" "time" - pg_query "github.com/pganalyze/pg_query_go/v6" "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/mq/schema" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" @@ -202,12 +201,12 @@ const ( NotEqualStr = "!=" ) -// ParseSQL uses PostgreSQL parser to parse SQL statements +// ParseSQL uses a lightweight parser to parse SQL statements func ParseSQL(sql string) (Statement, error) { sql = strings.TrimSpace(sql) sqlUpper := strings.ToUpper(sql) - // Handle SHOW statements first (before PostgreSQL parser since pg doesn't support SHOW) + // Handle SHOW statements if strings.HasPrefix(sqlUpper, "SHOW DATABASES") || strings.HasPrefix(sqlUpper, "SHOW SCHEMAS") { return &ShowStatement{Type: "databases"}, nil } @@ -229,7 +228,7 @@ func ParseSQL(sql string) (Statement, error) { return stmt, nil } - // Handle DDL statements by parsing SQL text patterns (before PostgreSQL parser) + // Handle DDL statements if strings.HasPrefix(sqlUpper, "CREATE TABLE") { return parseCreateTableFromSQL(sql) } @@ -240,96 +239,223 @@ func ParseSQL(sql string) (Statement, error) { return parseAlterTableFromSQL(sql) } - // Parse with pg_query_go for SELECT and other standard PostgreSQL statements - result, err := pg_query.Parse(sql) - if err != nil { - return nil, fmt.Errorf("PostgreSQL parse error: %v", err) - } - - if len(result.Stmts) == 0 { - return nil, fmt.Errorf("no statements parsed") - } - - // Convert first statement - stmt := result.Stmts[0] - // Handle SELECT statements - if selectStmt := stmt.Stmt.GetSelectStmt(); selectStmt != nil { - return convertSelectStatement(selectStmt), nil + if strings.HasPrefix(sqlUpper, "SELECT") { + return parseSelectStatement(sql) } - return nil, fmt.Errorf("unsupported statement type") + return nil, fmt.Errorf("unsupported statement type: %s", sqlUpper) } -// Conversion helpers -func convertSelectStatement(stmt *pg_query.SelectStmt) *SelectStatement { +// parseSelectStatement parses SELECT statements using a lightweight parser +func parseSelectStatement(sql string) (*SelectStatement, error) { s := &SelectStatement{ SelectExprs: []SelectExpr{}, From: []TableExpr{}, } - // Convert SELECT expressions - for _, target := range stmt.GetTargetList() { - if resTarget := target.GetResTarget(); resTarget != nil { - if resTarget.GetVal().GetColumnRef() != nil { - // This is likely SELECT * + sqlUpper := strings.ToUpper(sql) + + // Find SELECT clause + selectIdx := strings.Index(sqlUpper, "SELECT") + if selectIdx == -1 { + return nil, fmt.Errorf("SELECT keyword not found") + } + + // Find FROM clause + fromIdx := strings.Index(sqlUpper, "FROM") + var selectClause string + if fromIdx != -1 { + selectClause = sql[selectIdx+6 : fromIdx] // Skip "SELECT" + } else { + selectClause = sql[selectIdx+6:] // No FROM clause + } + + // Parse SELECT expressions + selectClause = strings.TrimSpace(selectClause) + if selectClause == "*" { + s.SelectExprs = append(s.SelectExprs, &StarExpr{}) + } else { + // Split by commas and parse each expression + parts := strings.Split(selectClause, ",") + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "*" { s.SelectExprs = append(s.SelectExprs, &StarExpr{}) } else { + // Handle column names and functions expr := &AliasedExpr{} - if resTarget.GetName() != "" { - expr.As = aliasValue(resTarget.GetName()) + if strings.Contains(strings.ToUpper(part), "(") && strings.Contains(part, ")") { + // Function expression + funcExpr := &FuncExpr{Name: stringValue(extractFunctionName(part))} + expr.Expr = funcExpr } else { - expr.As = aliasValue("") + // Column name + colExpr := &ColName{Name: stringValue(part)} + expr.Expr = colExpr } s.SelectExprs = append(s.SelectExprs, expr) } } } - // Convert FROM clause - for _, fromExpr := range stmt.GetFromClause() { - if rangeVar := fromExpr.GetRangeVar(); rangeVar != nil { - tableName := TableName{ - Name: stringValue(rangeVar.GetRelname()), - Qualifier: stringValue(rangeVar.GetSchemaname()), + // Parse FROM clause + if fromIdx != -1 { + remaining := sql[fromIdx+4:] // Skip "FROM" + + // Find WHERE clause + whereIdx := strings.Index(strings.ToUpper(remaining), "WHERE") + var fromClause string + if whereIdx != -1 { + fromClause = remaining[:whereIdx] + } else { + // Find LIMIT clause + limitIdx := strings.Index(strings.ToUpper(remaining), "LIMIT") + if limitIdx != -1 { + fromClause = remaining[:limitIdx] + } else { + fromClause = remaining + } + } + + fromClause = strings.TrimSpace(fromClause) + tableName := TableName{ + Name: stringValue(fromClause), + Qualifier: stringValue(""), // Initialize to empty string to avoid nil pointer + } + s.From = append(s.From, &AliasedTableExpr{Expr: tableName}) + + // Parse WHERE clause + if whereIdx != -1 { + whereClause := remaining[whereIdx+5:] // Skip "WHERE" + + // Find LIMIT clause + limitIdx := strings.Index(strings.ToUpper(whereClause), "LIMIT") + if limitIdx != -1 { + whereClause = whereClause[:limitIdx] + } + + whereClause = strings.TrimSpace(whereClause) + if whereClause != "" { + whereExpr, err := parseSimpleWhereExpression(whereClause) + if err != nil { + return nil, fmt.Errorf("failed to parse WHERE clause: %v", err) + } + s.Where = &WhereClause{Expr: whereExpr} + } + } + + // Parse LIMIT clause + limitIdx := strings.Index(strings.ToUpper(remaining), "LIMIT") + if limitIdx != -1 { + limitClause := remaining[limitIdx+5:] // Skip "LIMIT" + limitClause = strings.TrimSpace(limitClause) + + if _, err := strconv.Atoi(limitClause); err == nil { + s.Limit = &LimitClause{ + Rowcount: &SQLVal{ + Type: IntVal, + Val: []byte(limitClause), + }, + } } - s.From = append(s.From, &AliasedTableExpr{Expr: tableName}) } } - // Convert WHERE clause - if stmt.GetWhereClause() != nil { - s.Where = &WhereClause{ - Expr: convertExpressionNode(stmt.GetWhereClause()), - } - } + return s, nil +} - // Convert LIMIT clause - if stmt.GetLimitCount() != nil { - s.Limit = &LimitClause{ - Rowcount: convertExpressionNode(stmt.GetLimitCount()), - } +// extractFunctionName extracts the function name from a function call expression +func extractFunctionName(expr string) string { + parenIdx := strings.Index(expr, "(") + if parenIdx == -1 { + return expr } - - return s + return strings.TrimSpace(expr[:parenIdx]) } -// convertExpressionNode converts PostgreSQL parser expression nodes to our internal ExprNode types -func convertExpressionNode(node *pg_query.Node) ExprNode { - if node == nil { - return nil +// parseSimpleWhereExpression parses a simple WHERE expression +func parseSimpleWhereExpression(whereClause string) (ExprNode, error) { + whereClause = strings.TrimSpace(whereClause) + + // Handle AND/OR expressions first (higher precedence) + if strings.Contains(strings.ToUpper(whereClause), " AND ") { + // Use original case for parsing but ToUpper for detection + originalParts := strings.SplitN(whereClause, " AND ", 2) + if len(originalParts) != 2 { + originalParts = strings.SplitN(whereClause, " and ", 2) + } + if len(originalParts) == 2 { + left, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[0])) + if err != nil { + return nil, err + } + right, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[1])) + if err != nil { + return nil, err + } + return &AndExpr{Left: left, Right: right}, nil + } } - - // Handle A_Expr nodes (comparison operations: >, <, =, etc.) - if aExpr := node.GetAExpr(); aExpr != nil { - left := convertExpressionNode(aExpr.GetLexpr()) - right := convertExpressionNode(aExpr.GetRexpr()) - - // Convert operator name - operator := "" - if len(aExpr.GetName()) > 0 { - opName := aExpr.GetName()[0].GetString_().GetSval() - switch opName { + + if strings.Contains(strings.ToUpper(whereClause), " OR ") { + // Use original case for parsing but ToUpper for detection + originalParts := strings.SplitN(whereClause, " OR ", 2) + if len(originalParts) != 2 { + originalParts = strings.SplitN(whereClause, " or ", 2) + } + if len(originalParts) == 2 { + left, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[0])) + if err != nil { + return nil, err + } + right, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[1])) + if err != nil { + return nil, err + } + return &OrExpr{Left: left, Right: right}, nil + } + } + + // Handle simple comparison operations + operators := []string{">=", "<=", "!=", "<>", "=", ">", "<"} + + for _, op := range operators { + if idx := strings.Index(whereClause, op); idx != -1 { + left := strings.TrimSpace(whereClause[:idx]) + right := strings.TrimSpace(whereClause[idx+len(op):]) + + // Parse left side (should be a column name) + leftExpr := &ColName{Name: stringValue(left)} + + // Parse right side (should be a value) + var rightExpr ExprNode + if strings.HasPrefix(right, "'") && strings.HasSuffix(right, "'") { + // String literal + rightExpr = &SQLVal{ + Type: StrVal, + Val: []byte(strings.Trim(right, "'")), + } + } else if _, err := strconv.ParseInt(right, 10, 64); err == nil { + // Integer literal + rightExpr = &SQLVal{ + Type: IntVal, + Val: []byte(right), + } + } else if _, err := strconv.ParseFloat(right, 64); err == nil { + // Float literal + rightExpr = &SQLVal{ + Type: FloatVal, + Val: []byte(right), + } + } else { + // Assume it's a column name + rightExpr = &ColName{Name: stringValue(right)} + } + + // Convert operator to internal representation + var operator string + switch op { case ">": operator = GreaterThanStr case "<": @@ -340,80 +466,21 @@ func convertExpressionNode(node *pg_query.Node) ExprNode { operator = LessEqualStr case "=": operator = EqualStr - case "<>", "!=": + case "!=", "<>": operator = NotEqualStr default: - operator = opName - } - } - - return &ComparisonExpr{ - Left: left, - Right: right, - Operator: operator, - } - } - - // Handle BoolExpr nodes (AND, OR operations) - if boolExpr := node.GetBoolExpr(); boolExpr != nil { - args := boolExpr.GetArgs() - if len(args) >= 2 { - left := convertExpressionNode(args[0]) - right := convertExpressionNode(args[1]) - - switch boolExpr.GetBoolop() { - case pg_query.BoolExprType_AND_EXPR: - return &AndExpr{ - Left: left, - Right: right, - } - case pg_query.BoolExprType_OR_EXPR: - return &OrExpr{ - Left: left, - Right: right, - } - } - } - } - - // Handle constants - if aConst := node.GetAConst(); aConst != nil { - if aConst.GetIval() != nil { - return &SQLVal{ - Type: IntVal, - Val: []byte(fmt.Sprintf("%d", aConst.GetIval().GetIval())), - } - } - if aConst.GetSval() != nil { - return &SQLVal{ - Type: StrVal, - Val: []byte(aConst.GetSval().GetSval()), - } - } - if aConst.GetFval() != nil { - return &SQLVal{ - Type: FloatVal, - Val: []byte(aConst.GetFval().GetFval()), - } - } - } - - // Handle column references - if columnRef := node.GetColumnRef(); columnRef != nil { - fields := columnRef.GetFields() - if len(fields) > 0 { - // Extract column name from the first field - if stringNode := fields[0].GetString_(); stringNode != nil { - return &ColName{ - Name: stringValue(stringNode.GetSval()), - } + operator = op } + + return &ComparisonExpr{ + Left: leftExpr, + Right: rightExpr, + Operator: operator, + }, nil } } - - // Return nil for unsupported expression types instead of a placeholder - // This will help identify what still needs to be implemented - return nil + + return nil, fmt.Errorf("unsupported WHERE expression: %s", whereClause) } func parseCreateTableFromSQL(sql string) (*DDLStatement, error) {