name: Spark Integration Tests

on:
  push:
    paths:
      - 'test/java/spark/**'
      - 'other/java/hdfs2/**'
      - 'other/java/hdfs3/**'
      - 'other/java/client/**'
      - '.github/workflows/spark-integration-tests.yml'
  pull_request:
    paths:
      - 'test/java/spark/**'
      - 'other/java/hdfs2/**'
      - 'other/java/hdfs3/**'
      - 'other/java/client/**'
      - '.github/workflows/spark-integration-tests.yml'
  workflow_dispatch:

permissions:
  contents: read
  checks: write
  pull-requests: write

jobs:
  spark-integration-tests:
    name: Spark Integration Tests
    runs-on: ubuntu-latest
    timeout-minutes: 45
    
    steps:
    # ========================================
    # SETUP & BUILD
    # ========================================
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Set up JDK 11
      uses: actions/setup-java@v4
      with:
        java-version: '11'
        distribution: 'temurin'
        cache: maven

    - name: Set up Go
      uses: actions/setup-go@v5
      with:
        go-version: '1.24'

    - name: Build SeaweedFS binary
      run: |
        echo "Building SeaweedFS binary (statically linked for Alpine)..."
        cd weed
        CGO_ENABLED=0 go build -o ../docker/weed
        cd ../docker
        ls -la weed filer.toml entrypoint.sh
        file weed
        echo "OK SeaweedFS binary built"

    - name: Build SeaweedFS Java dependencies
      run: |
        echo "Building Java client..."
        cd other/java/client
        mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true
        echo "OK Java client built"
        cd ../../..
        
        echo "Building HDFS2 client..."
        cd other/java/hdfs2
        mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true
        echo "OK HDFS2 client built"
        cd ../../..
        
        echo "Building HDFS3 client..."
        cd other/java/hdfs3
        mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true
        echo "OK HDFS3 client built"
        echo ""
        echo "All Java dependencies installed to ~/.m2/repository"

    # ========================================
    # SPARK INTEGRATION TESTS (DOCKER)
    # ========================================
    - name: Start SeaweedFS services for tests
      working-directory: test/java/spark
      run: |
        echo "=== Starting SeaweedFS Services for Tests ==="
        docker compose down -v || true
        docker compose build --no-cache
        docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer
        
        echo "Waiting for services..."
        for i in {1..30}; do
          if curl -f http://localhost:8888/ > /dev/null 2>&1; then
            echo "OK SeaweedFS filer is ready!"
            break
          fi
          if [ $i -eq 30 ]; then
            echo "FAILED Services failed to start"
            docker compose ps -a
            docker compose logs
            exit 1
          fi
          echo "Waiting... ($i/30)"
          sleep 2
        done
        
        curl -f http://localhost:9333/cluster/status || exit 1
        echo "OK All services healthy"

    - name: Prepare Maven repository for Docker
      working-directory: test/java/spark
      run: |
        echo "Copying Maven artifacts for Docker container..."
        mkdir -p .m2/repository/com
        cp -r ~/.m2/repository/com/seaweedfs .m2/repository/com/
        echo "OK Maven artifacts ready"

    - name: Run Spark integration tests
      working-directory: test/java/spark
      continue-on-error: true
      id: test-run
      run: |
        echo "=== Running Spark Integration Tests ==="
        # Run tests in detached mode
        docker compose up -d spark-tests
        
        echo "Real-time monitoring: Will download file the instant EOF error appears..."
        
        # Monitor logs and download chunk data DIRECTLY from volume server
        (
          while docker ps | grep -q seaweedfs-spark-tests; do
            # Check if EOF error has appeared
            if docker compose logs spark-tests 2>&1 | grep -q "EOFException.*Still have: 78 bytes left"; then
              echo ""
              echo "=== EOF ERROR DETECTED! Extracting chunk data ==="
              
              # Get the last file that was read before the error
              # Look for the entry with chunks info right before the error
              FULL_LOG=$(docker compose logs spark-tests 2>&1)
              
              # Extract chunk file_id (format: "7,d0364fd01")
              # Look for the line "file_id: " but NOT "source_file_id: "
              CHUNK_ID=$(echo "$FULL_LOG" | grep -B 50 "EOFException" | grep '  file_id: "' | tail -1 | grep -oP '"\K[^"]+')
              echo "Found chunk ID: $CHUNK_ID"
              
              if [ -n "$CHUNK_ID" ]; then
                # Download directly from volume server (data persists even after filer metadata deleted)
                echo "Downloading chunk from volume server: http://localhost:8080/$CHUNK_ID"
                curl -v -o test.parquet "http://localhost:8080/$CHUNK_ID"
                
                if [ -f test.parquet ] && [ -s test.parquet ]; then
                  FILE_SIZE=$(stat --format=%s test.parquet 2>/dev/null || stat -f%z test.parquet 2>/dev/null)
                  echo "SUCCESS: Downloaded $FILE_SIZE bytes from volume!"
                  ls -lh test.parquet
                  
                  # Quick analysis
                  echo ""
                  echo "Installing parquet-tools..."
                  pip3 install -q parquet-tools
                  
                  echo ""
                  echo "=== File Header (first 100 bytes) ==="
                  hexdump -C test.parquet | head -10
                  
                  echo ""
                  echo "=== File Footer (last 200 bytes) ==="
                  tail -c 200 test.parquet | hexdump -C
                  
                  echo ""
                  echo "=== Magic bytes check ==="
                  echo "First 4 bytes (should be PAR1):"
                  head -c 4 test.parquet | xxd
                  echo "Last 4 bytes (should be PAR1):"
                  tail -c 4 test.parquet | xxd
                  
                  echo ""
                  echo "=== Parquet metadata ==="
                  parquet-tools inspect test.parquet || echo "parquet-tools inspect failed"
                  
                  echo ""
                  echo "=== Try reading data ==="
                  parquet-tools show test.parquet || echo "parquet-tools show failed"
                  
                  echo ""
                  echo "=== CRITICAL ANALYSIS: Where are the missing 78 bytes? ==="
                  echo "Actual file size: $FILE_SIZE bytes"
                  
                  # Parse footer to find what size Parquet thinks the file should be
                  echo ""
                  echo "Reading footer length (last 8 bytes)..."
                  FOOTER_LEN_HEX=$(tail -c 8 test.parquet | head -c 4 | xxd -p)
                  echo "Footer length (hex): $FOOTER_LEN_HEX"
                  
                  # Get the highest offset from column metadata
                  echo ""
                  echo "Examining column chunk offsets from metadata..."
                  parquet-tools meta test.parquet > meta.txt 2>&1 || true
                  cat meta.txt
                  
                  echo ""
                  echo "Analyzing offset pattern..."
                  grep -i "offset" meta.txt || echo "No offset info"
                  
                  echo ""
                  echo "Expected file size based on Parquet metadata:"
                  echo "  If Parquet reader expects $((FILE_SIZE + 78)) bytes,"
                  echo "  then column chunks claim offsets beyond actual data"
                  
                  echo ""
                  echo "=== Download the file as artifact for local analysis ==="
                  ls -lh test.parquet
                else
                  echo "FAILED: Could not download chunk"
                fi
              else
                echo "ERROR: Could not extract chunk ID from logs"
              fi
              break
            fi
            sleep 1
          done
        ) &
        MONITOR_PID=$!
        
        # Wait for tests to complete
        docker wait seaweedfs-spark-tests
        TEST_EXIT_CODE=$(docker inspect seaweedfs-spark-tests --format='{{.State.ExitCode}}')
        
        # Give monitor time to finish
        sleep 3
        kill $MONITOR_PID 2>/dev/null || true
        
        # Show full logs
        echo ""
        echo "=== Test Logs ==="
        docker compose logs spark-tests | tail -100
        
        echo ""
        echo "Tests completed with exit code: $TEST_EXIT_CODE"
        echo "exit_code=$TEST_EXIT_CODE" >> $GITHUB_OUTPUT
        
        exit $TEST_EXIT_CODE

    - name: Examine Parquet file
      if: steps.test-run.outcome == 'failure'
      working-directory: test/java/spark
      run: |
        echo "=== Examining Parquet file for analysis ==="
        
        # Check if file was already downloaded
        if [ ! -f test.parquet ] || [ ! -s test.parquet ]; then
          echo "ERROR: test.parquet not found or empty"
          echo "File was not successfully downloaded during test run"
          exit 1
        fi
        
        echo "Found test.parquet, proceeding with analysis..."
        
        # Install parquet-tools
        pip3 install parquet-tools
        
        echo ""
        echo "=== File Size ==="
        ls -lh test.parquet
        FILE_SIZE=$(stat -f%z test.parquet 2>/dev/null || stat -c%s test.parquet)
        echo "Actual file size: $FILE_SIZE bytes"
        
        echo ""
        echo "=== File Header (first 100 bytes) ==="
        hexdump -C test.parquet | head -10
        
        echo ""
        echo "=== File Footer (last 200 bytes) ==="
        tail -c 200 test.parquet | hexdump -C
        
        echo ""
        echo "=== Magic Bytes Check ==="
        echo "First 4 bytes (should be PAR1):"
        head -c 4 test.parquet | xxd
        echo "Last 4 bytes (should be PAR1):"
        tail -c 4 test.parquet | xxd
        
        echo ""
        echo "=== Parquet Metadata ==="
        parquet-tools inspect test.parquet || echo "parquet-tools failed"
        
        echo ""
        echo "=== Try Reading with Parquet Tools ==="
        parquet-tools show test.parquet || echo "Failed to read file"
        
        echo ""
        echo "=== File Validation ==="
        if head -c 4 test.parquet | grep -q "PAR1"; then
          echo "OK Valid Parquet header"
        else
          echo "FAILED INVALID Parquet header"
        fi
        
        if tail -c 4 test.parquet | grep -q "PAR1"; then
          echo "OK Valid Parquet trailer"
        else
          echo "FAILED INVALID Parquet trailer"
        fi

    - name: Stop test services
      if: always()
      working-directory: test/java/spark
      run: docker compose down -v

    - name: Upload test results
      if: always()
      uses: actions/upload-artifact@v4
      with:
        name: spark-test-results
        path: test/java/spark/target/surefire-reports/
        retention-days: 30

    - name: Upload Parquet file for analysis
      if: failure()
      uses: actions/upload-artifact@v4
      with:
        name: failed-parquet-file
        path: test/java/spark/test.parquet
        retention-days: 7
        if-no-files-found: ignore

    - name: Publish test report
      if: always()
      uses: dorny/test-reporter@v1
      with:
        name: Spark Test Results
        path: test/java/spark/target/surefire-reports/*.xml
        reporter: java-junit
        fail-on-error: true

    - name: Check test results
      if: steps.test-run.outcome == 'failure'
      run: |
        echo "ERROR Tests failed with exit code: ${{ steps.test-run.outputs.exit_code }}"
        echo "But file analysis was completed above."
        exit 1

    # ========================================
    # SPARK EXAMPLE (HOST-BASED)
    # ========================================
    - name: Cache Apache Spark
      if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
      id: cache-spark
      uses: actions/cache@v4
      with:
        path: spark-3.5.0-bin-hadoop3
        key: spark-3.5.0-hadoop3

    - name: Download Apache Spark
      if: (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && steps.cache-spark.outputs.cache-hit != 'true'
      run: |
        echo "Downloading Apache Spark 3.5.0..."
        wget -q https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz
        tar xzf spark-3.5.0-bin-hadoop3.tgz
        echo "OK Spark downloaded"

    - name: Start SeaweedFS services for example
      if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
      working-directory: test/java/spark
      run: |
        echo "=== Starting SeaweedFS Services for Example ==="
        docker compose down -v || true
        docker compose build --no-cache
        docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer
        
        echo "Waiting for services..."
        for i in {1..30}; do
          if curl -f http://localhost:8888/ > /dev/null 2>&1; then
            echo "OK SeaweedFS filer is ready!"
            break
          fi
          if [ $i -eq 30 ]; then
            echo "FAILED Services failed to start"
            docker compose ps -a
            docker compose logs
            exit 1
          fi
          echo "Waiting... ($i/30)"
          sleep 2
        done
        
        curl -f http://localhost:9333/cluster/status || exit 1
        echo "OK All services healthy"

    - name: Build project for example
      if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
      working-directory: test/java/spark
      run: mvn clean package -DskipTests

    - name: Run Spark example application
      if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
      working-directory: test/java/spark
      run: |
        echo "=== Running Spark Example Application ==="
        export SPARK_HOME=$(pwd)/../../../spark-3.5.0-bin-hadoop3
        $SPARK_HOME/bin/spark-submit \
          --class seaweed.spark.SparkSeaweedFSExample \
          --master local[2] \
          --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \
          --conf spark.hadoop.fs.seaweed.filer.host=localhost \
          --conf spark.hadoop.fs.seaweed.filer.port=8888 \
          --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \
          --conf spark.hadoop.fs.seaweed.replication="" \
          target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \
          seaweedfs://localhost:8888/ci-spark-output
        echo "OK Example completed"

    - name: Verify example output
      if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
      run: |
        echo "Verifying output..."
        curl -s http://localhost:8888/ci-spark-output/ || echo "Output listing unavailable"

    - name: Stop example services
      if: always() && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
      working-directory: test/java/spark
      run: docker compose down -v

    # ========================================
    # DIAGNOSTICS
    # ========================================
    - name: Display diagnostics on failure
      if: failure()
      working-directory: test/java/spark
      run: |
        echo "=== Container Status ==="
        docker compose ps -a
        echo ""
        echo "=== Master Logs ==="
        docker compose logs seaweedfs-master
        echo ""
        echo "=== Volume Logs ==="
        docker compose logs seaweedfs-volume
        echo ""
        echo "=== Filer Logs ==="
        docker compose logs seaweedfs-filer
        echo ""
        echo "=== Volume List ==="
        docker compose exec -T seaweedfs-master weed shell <<EOF || echo "Failed"
        volume.list
        exit
        EOF
        echo ""
        echo "=== Cluster Status ==="
        curl -s http://localhost:9333/dir/status | jq '.' || curl -s http://localhost:9333/dir/status