Browse Source

feat: Replace pg_query_go with lightweight SQL parser (no CGO required)

- Remove github.com/pganalyze/pg_query_go/v6 dependency to avoid CGO requirement
- Implement lightweight SQL parser for basic SELECT, SHOW, and DDL statements
- Fix operator precedence in WHERE clause parsing (handle AND/OR before comparisons)
- Support INTEGER, FLOAT, and STRING literals in WHERE conditions
- All SQL engine tests passing with new parser
- PostgreSQL integration tests can now build without CGO

The lightweight parser handles the essential SQL features needed for the
SeaweedFS query engine while maintaining compatibility and avoiding CGO
dependencies that caused Docker build issues.
pull/7185/head
chrislu 1 month ago
parent
commit
d60c542ecc
  1. 1
      go.mod
  2. 3
      go.sum
  3. 8
      test/postgres/Dockerfile.seaweedfs
  4. 335
      weed/query/engine/engine.go

1
go.mod

@ -147,7 +147,6 @@ require (
github.com/minio/crc64nvme v1.1.1
github.com/orcaman/concurrent-map/v2 v2.0.1
github.com/parquet-go/parquet-go v0.25.1
github.com/pganalyze/pg_query_go/v6 v6.1.0
github.com/pkg/sftp v1.13.9
github.com/rabbitmq/amqp091-go v1.10.0
github.com/rclone/rclone v1.70.3

3
go.sum

@ -1437,8 +1437,6 @@ github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 h1:XeOYlK9W1uC
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14/go.mod h1:jVblp62SafmidSkvWrXyxAme3gaTfEtWwRPGz5cpvHg=
github.com/peterh/liner v1.2.2 h1:aJ4AOodmL+JxOZZEL2u9iJf8omNRpqHc/EbrK+3mAXw=
github.com/peterh/liner v1.2.2/go.mod h1:xFwJyiKIXJZUKItq5dGHZSTBRAuG/CpeNpWLyiNRNwI=
github.com/pganalyze/pg_query_go/v6 v6.1.0 h1:jG5ZLhcVgL1FAw4C/0VNQaVmX1SUJx71wBGdtTtBvls=
github.com/pganalyze/pg_query_go/v6 v6.1.0/go.mod h1:nvTHIuoud6e1SfrUaFwHqT0i4b5Nr+1rPWVds3B5+50=
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM=
github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
@ -2538,7 +2536,6 @@ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=

8
test/postgres/Dockerfile.seaweedfs

@ -1,7 +1,7 @@
FROM golang:1.24-alpine AS builder
# Install build dependencies including gcc for CGO (required by pg_query_go)
RUN apk add --no-cache git make gcc musl-dev
# Install git and other build dependencies
RUN apk add --no-cache git make
# Set working directory
WORKDIR /app
@ -13,8 +13,8 @@ RUN go mod download
# Copy source code
COPY . .
# Build the weed binary with CGO enabled (required for pg_query_go)
RUN CGO_ENABLED=1 GOOS=linux go build -ldflags "-s -w" -o weed ./weed/
# Build the weed binary without CGO
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -o weed ./weed/
# Final stage - minimal runtime image
FROM alpine:latest

335
weed/query/engine/engine.go

@ -11,7 +11,6 @@ import (
"strings"
"time"
pg_query "github.com/pganalyze/pg_query_go/v6"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/mq/schema"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
@ -202,12 +201,12 @@ const (
NotEqualStr = "!="
)
// ParseSQL uses PostgreSQL parser to parse SQL statements
// ParseSQL uses a lightweight parser to parse SQL statements
func ParseSQL(sql string) (Statement, error) {
sql = strings.TrimSpace(sql)
sqlUpper := strings.ToUpper(sql)
// Handle SHOW statements first (before PostgreSQL parser since pg doesn't support SHOW)
// Handle SHOW statements
if strings.HasPrefix(sqlUpper, "SHOW DATABASES") || strings.HasPrefix(sqlUpper, "SHOW SCHEMAS") {
return &ShowStatement{Type: "databases"}, nil
}
@ -229,7 +228,7 @@ func ParseSQL(sql string) (Statement, error) {
return stmt, nil
}
// Handle DDL statements by parsing SQL text patterns (before PostgreSQL parser)
// Handle DDL statements
if strings.HasPrefix(sqlUpper, "CREATE TABLE") {
return parseCreateTableFromSQL(sql)
}
@ -240,96 +239,223 @@ func ParseSQL(sql string) (Statement, error) {
return parseAlterTableFromSQL(sql)
}
// Parse with pg_query_go for SELECT and other standard PostgreSQL statements
result, err := pg_query.Parse(sql)
if err != nil {
return nil, fmt.Errorf("PostgreSQL parse error: %v", err)
}
if len(result.Stmts) == 0 {
return nil, fmt.Errorf("no statements parsed")
}
// Convert first statement
stmt := result.Stmts[0]
// Handle SELECT statements
if selectStmt := stmt.Stmt.GetSelectStmt(); selectStmt != nil {
return convertSelectStatement(selectStmt), nil
if strings.HasPrefix(sqlUpper, "SELECT") {
return parseSelectStatement(sql)
}
return nil, fmt.Errorf("unsupported statement type")
return nil, fmt.Errorf("unsupported statement type: %s", sqlUpper)
}
// Conversion helpers
func convertSelectStatement(stmt *pg_query.SelectStmt) *SelectStatement {
// parseSelectStatement parses SELECT statements using a lightweight parser
func parseSelectStatement(sql string) (*SelectStatement, error) {
s := &SelectStatement{
SelectExprs: []SelectExpr{},
From: []TableExpr{},
}
// Convert SELECT expressions
for _, target := range stmt.GetTargetList() {
if resTarget := target.GetResTarget(); resTarget != nil {
if resTarget.GetVal().GetColumnRef() != nil {
// This is likely SELECT *
sqlUpper := strings.ToUpper(sql)
// Find SELECT clause
selectIdx := strings.Index(sqlUpper, "SELECT")
if selectIdx == -1 {
return nil, fmt.Errorf("SELECT keyword not found")
}
// Find FROM clause
fromIdx := strings.Index(sqlUpper, "FROM")
var selectClause string
if fromIdx != -1 {
selectClause = sql[selectIdx+6 : fromIdx] // Skip "SELECT"
} else {
selectClause = sql[selectIdx+6:] // No FROM clause
}
// Parse SELECT expressions
selectClause = strings.TrimSpace(selectClause)
if selectClause == "*" {
s.SelectExprs = append(s.SelectExprs, &StarExpr{})
} else {
// Split by commas and parse each expression
parts := strings.Split(selectClause, ",")
for _, part := range parts {
part = strings.TrimSpace(part)
if part == "*" {
s.SelectExprs = append(s.SelectExprs, &StarExpr{})
} else {
// Handle column names and functions
expr := &AliasedExpr{}
if resTarget.GetName() != "" {
expr.As = aliasValue(resTarget.GetName())
if strings.Contains(strings.ToUpper(part), "(") && strings.Contains(part, ")") {
// Function expression
funcExpr := &FuncExpr{Name: stringValue(extractFunctionName(part))}
expr.Expr = funcExpr
} else {
expr.As = aliasValue("")
// Column name
colExpr := &ColName{Name: stringValue(part)}
expr.Expr = colExpr
}
s.SelectExprs = append(s.SelectExprs, expr)
}
}
}
// Convert FROM clause
for _, fromExpr := range stmt.GetFromClause() {
if rangeVar := fromExpr.GetRangeVar(); rangeVar != nil {
tableName := TableName{
Name: stringValue(rangeVar.GetRelname()),
Qualifier: stringValue(rangeVar.GetSchemaname()),
// Parse FROM clause
if fromIdx != -1 {
remaining := sql[fromIdx+4:] // Skip "FROM"
// Find WHERE clause
whereIdx := strings.Index(strings.ToUpper(remaining), "WHERE")
var fromClause string
if whereIdx != -1 {
fromClause = remaining[:whereIdx]
} else {
// Find LIMIT clause
limitIdx := strings.Index(strings.ToUpper(remaining), "LIMIT")
if limitIdx != -1 {
fromClause = remaining[:limitIdx]
} else {
fromClause = remaining
}
}
fromClause = strings.TrimSpace(fromClause)
tableName := TableName{
Name: stringValue(fromClause),
Qualifier: stringValue(""), // Initialize to empty string to avoid nil pointer
}
s.From = append(s.From, &AliasedTableExpr{Expr: tableName})
// Parse WHERE clause
if whereIdx != -1 {
whereClause := remaining[whereIdx+5:] // Skip "WHERE"
// Find LIMIT clause
limitIdx := strings.Index(strings.ToUpper(whereClause), "LIMIT")
if limitIdx != -1 {
whereClause = whereClause[:limitIdx]
}
whereClause = strings.TrimSpace(whereClause)
if whereClause != "" {
whereExpr, err := parseSimpleWhereExpression(whereClause)
if err != nil {
return nil, fmt.Errorf("failed to parse WHERE clause: %v", err)
}
s.Where = &WhereClause{Expr: whereExpr}
}
}
// Parse LIMIT clause
limitIdx := strings.Index(strings.ToUpper(remaining), "LIMIT")
if limitIdx != -1 {
limitClause := remaining[limitIdx+5:] // Skip "LIMIT"
limitClause = strings.TrimSpace(limitClause)
if _, err := strconv.Atoi(limitClause); err == nil {
s.Limit = &LimitClause{
Rowcount: &SQLVal{
Type: IntVal,
Val: []byte(limitClause),
},
}
}
s.From = append(s.From, &AliasedTableExpr{Expr: tableName})
}
}
// Convert WHERE clause
if stmt.GetWhereClause() != nil {
s.Where = &WhereClause{
Expr: convertExpressionNode(stmt.GetWhereClause()),
}
}
return s, nil
}
// Convert LIMIT clause
if stmt.GetLimitCount() != nil {
s.Limit = &LimitClause{
Rowcount: convertExpressionNode(stmt.GetLimitCount()),
}
// extractFunctionName extracts the function name from a function call expression
func extractFunctionName(expr string) string {
parenIdx := strings.Index(expr, "(")
if parenIdx == -1 {
return expr
}
return s
return strings.TrimSpace(expr[:parenIdx])
}
// convertExpressionNode converts PostgreSQL parser expression nodes to our internal ExprNode types
func convertExpressionNode(node *pg_query.Node) ExprNode {
if node == nil {
return nil
// parseSimpleWhereExpression parses a simple WHERE expression
func parseSimpleWhereExpression(whereClause string) (ExprNode, error) {
whereClause = strings.TrimSpace(whereClause)
// Handle AND/OR expressions first (higher precedence)
if strings.Contains(strings.ToUpper(whereClause), " AND ") {
// Use original case for parsing but ToUpper for detection
originalParts := strings.SplitN(whereClause, " AND ", 2)
if len(originalParts) != 2 {
originalParts = strings.SplitN(whereClause, " and ", 2)
}
if len(originalParts) == 2 {
left, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[0]))
if err != nil {
return nil, err
}
right, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[1]))
if err != nil {
return nil, err
}
return &AndExpr{Left: left, Right: right}, nil
}
}
// Handle A_Expr nodes (comparison operations: >, <, =, etc.)
if aExpr := node.GetAExpr(); aExpr != nil {
left := convertExpressionNode(aExpr.GetLexpr())
right := convertExpressionNode(aExpr.GetRexpr())
// Convert operator name
operator := ""
if len(aExpr.GetName()) > 0 {
opName := aExpr.GetName()[0].GetString_().GetSval()
switch opName {
if strings.Contains(strings.ToUpper(whereClause), " OR ") {
// Use original case for parsing but ToUpper for detection
originalParts := strings.SplitN(whereClause, " OR ", 2)
if len(originalParts) != 2 {
originalParts = strings.SplitN(whereClause, " or ", 2)
}
if len(originalParts) == 2 {
left, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[0]))
if err != nil {
return nil, err
}
right, err := parseSimpleWhereExpression(strings.TrimSpace(originalParts[1]))
if err != nil {
return nil, err
}
return &OrExpr{Left: left, Right: right}, nil
}
}
// Handle simple comparison operations
operators := []string{">=", "<=", "!=", "<>", "=", ">", "<"}
for _, op := range operators {
if idx := strings.Index(whereClause, op); idx != -1 {
left := strings.TrimSpace(whereClause[:idx])
right := strings.TrimSpace(whereClause[idx+len(op):])
// Parse left side (should be a column name)
leftExpr := &ColName{Name: stringValue(left)}
// Parse right side (should be a value)
var rightExpr ExprNode
if strings.HasPrefix(right, "'") && strings.HasSuffix(right, "'") {
// String literal
rightExpr = &SQLVal{
Type: StrVal,
Val: []byte(strings.Trim(right, "'")),
}
} else if _, err := strconv.ParseInt(right, 10, 64); err == nil {
// Integer literal
rightExpr = &SQLVal{
Type: IntVal,
Val: []byte(right),
}
} else if _, err := strconv.ParseFloat(right, 64); err == nil {
// Float literal
rightExpr = &SQLVal{
Type: FloatVal,
Val: []byte(right),
}
} else {
// Assume it's a column name
rightExpr = &ColName{Name: stringValue(right)}
}
// Convert operator to internal representation
var operator string
switch op {
case ">":
operator = GreaterThanStr
case "<":
@ -340,80 +466,21 @@ func convertExpressionNode(node *pg_query.Node) ExprNode {
operator = LessEqualStr
case "=":
operator = EqualStr
case "<>", "!=":
case "!=", "<>":
operator = NotEqualStr
default:
operator = opName
}
}
return &ComparisonExpr{
Left: left,
Right: right,
Operator: operator,
}
}
// Handle BoolExpr nodes (AND, OR operations)
if boolExpr := node.GetBoolExpr(); boolExpr != nil {
args := boolExpr.GetArgs()
if len(args) >= 2 {
left := convertExpressionNode(args[0])
right := convertExpressionNode(args[1])
switch boolExpr.GetBoolop() {
case pg_query.BoolExprType_AND_EXPR:
return &AndExpr{
Left: left,
Right: right,
}
case pg_query.BoolExprType_OR_EXPR:
return &OrExpr{
Left: left,
Right: right,
}
}
}
}
// Handle constants
if aConst := node.GetAConst(); aConst != nil {
if aConst.GetIval() != nil {
return &SQLVal{
Type: IntVal,
Val: []byte(fmt.Sprintf("%d", aConst.GetIval().GetIval())),
}
}
if aConst.GetSval() != nil {
return &SQLVal{
Type: StrVal,
Val: []byte(aConst.GetSval().GetSval()),
}
}
if aConst.GetFval() != nil {
return &SQLVal{
Type: FloatVal,
Val: []byte(aConst.GetFval().GetFval()),
}
}
}
// Handle column references
if columnRef := node.GetColumnRef(); columnRef != nil {
fields := columnRef.GetFields()
if len(fields) > 0 {
// Extract column name from the first field
if stringNode := fields[0].GetString_(); stringNode != nil {
return &ColName{
Name: stringValue(stringNode.GetSval()),
}
operator = op
}
return &ComparisonExpr{
Left: leftExpr,
Right: rightExpr,
Operator: operator,
}, nil
}
}
// Return nil for unsupported expression types instead of a placeholder
// This will help identify what still needs to be implemented
return nil
return nil, fmt.Errorf("unsupported WHERE expression: %s", whereClause)
}
func parseCreateTableFromSQL(sql string) (*DDLStatement, error) {

Loading…
Cancel
Save