name: Spark Integration Tests on: push: branches: [ master, main ] paths: - 'test/java/spark/**' - 'other/java/hdfs2/**' - 'other/java/hdfs3/**' - 'other/java/client/**' - '.github/workflows/spark-integration-tests.yml' pull_request: branches: [ master, main ] paths: - 'test/java/spark/**' - 'other/java/hdfs2/**' - 'other/java/hdfs3/**' - 'other/java/client/**' - '.github/workflows/spark-integration-tests.yml' workflow_dispatch: permissions: contents: read checks: write pull-requests: write jobs: build-deps: name: Build SeaweedFS Dependencies runs-on: ubuntu-latest timeout-minutes: 15 steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up JDK 11 uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' cache: maven - name: Set up Go uses: actions/setup-go@v5 with: go-version: '1.24' - name: Build SeaweedFS binary run: | echo "Building SeaweedFS binary (statically linked for Alpine)..." cd weed CGO_ENABLED=0 go build -o ../docker/weed cd ../docker # Dockerfile.local expects these files in the build context ls -la weed filer.toml entrypoint.sh file weed echo "✓ SeaweedFS binary built and ready for Docker build" - name: Build SeaweedFS Java dependencies run: | echo "Building Java client (required by HDFS clients)..." cd other/java/client mvn clean install -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true echo "✓ Java client built and installed to local Maven repo" cd ../../.. echo "Building HDFS2 client (depends on Java client)..." cd other/java/hdfs2 mvn clean install -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true echo "✓ HDFS2 client built" cd ../../.. echo "Building HDFS3 client (depends on Java client)..." cd other/java/hdfs3 mvn clean install -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true echo "✓ HDFS3 client built" - name: Prepare artifacts for upload run: | echo "Preparing artifacts for upload..." mkdir -p artifacts/docker mkdir -p artifacts/.m2/repository/com cp docker/weed artifacts/docker/ cp -r ~/.m2/repository/com/seaweedfs artifacts/.m2/repository/com/ echo "✓ Artifacts prepared" - name: Upload build artifacts uses: actions/upload-artifact@v4 with: name: seaweedfs-build path: artifacts/ retention-days: 1 spark-tests: name: Spark Integration Tests runs-on: ubuntu-latest needs: build-deps timeout-minutes: 30 steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up JDK 11 uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' - name: Download build artifacts uses: actions/download-artifact@v4 with: name: seaweedfs-build path: build-artifacts - name: Restore Maven repository run: | echo "Restoring Maven artifacts to ~/.m2/repository..." mkdir -p ~/.m2/repository/com if [ -d "build-artifacts/.m2/repository/com/seaweedfs" ]; then cp -r build-artifacts/.m2/repository/com/seaweedfs ~/.m2/repository/com/ echo "✓ Maven artifacts restored" else echo "⚠️ Warning: Maven artifacts not found in download" ls -la build-artifacts/ fi - name: Prepare SeaweedFS binary run: | echo "Copying SeaweedFS binary to docker directory..." ls -la build-artifacts/docker/ cp build-artifacts/docker/weed docker/ chmod +x docker/weed ls -la docker/weed file docker/weed echo "✓ Binary is ready" - name: Start SeaweedFS services working-directory: test/java/spark run: | echo "Cleaning up any existing Docker Compose resources..." docker compose down -v || true echo "Starting SeaweedFS with Docker Compose..." docker compose build --no-cache docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer echo "Waiting for SeaweedFS filer to be ready..." for i in {1..30}; do if curl -f http://localhost:8888/ > /dev/null 2>&1; then echo "✓ SeaweedFS filer is ready!" break fi if [ $i -eq 30 ]; then echo "✗ SeaweedFS failed to start after 60 seconds" echo "" echo "=== Container Status ===" docker compose ps -a echo "" echo "=== Master Container Logs ===" docker compose logs seaweedfs-master echo "" echo "=== Volume Container Logs ===" docker compose logs seaweedfs-volume echo "" echo "=== Filer Container Logs ===" docker compose logs seaweedfs-filer echo "" echo "=== Inspecting master container ===" docker compose exec -T seaweedfs-master ls -la /usr/bin/weed || echo "Failed to inspect container" exit 1 fi echo "Waiting... ($i/30)" sleep 2 done # Verify all services echo "Verifying SeaweedFS services..." curl -f http://localhost:9333/cluster/status || exit 1 curl -f http://localhost:8888/ || exit 1 # Check volume server registration echo "Checking volume server status..." curl -s http://localhost:9333/dir/status | jq '.' || echo "jq not available" echo "✓ Volume server registered, volumes will be created on-demand during tests" - name: Build Spark integration tests working-directory: test/java/spark run: | echo "Building Spark integration tests..." mvn clean package -DskipTests echo "✓ Build completed" - name: Run Spark integration tests working-directory: test/java/spark env: SEAWEEDFS_TEST_ENABLED: true SEAWEEDFS_FILER_HOST: localhost SEAWEEDFS_FILER_PORT: 8888 SEAWEEDFS_FILER_GRPC_PORT: 18888 run: | echo "Running Spark integration tests..." mvn test -B echo "✓ Tests completed" - name: Upload test results if: always() uses: actions/upload-artifact@v4 with: name: spark-test-results path: test/java/spark/target/surefire-reports/ retention-days: 30 - name: Publish test report if: always() uses: dorny/test-reporter@v1 with: name: Spark Test Results path: test/java/spark/target/surefire-reports/*.xml reporter: java-junit fail-on-error: true - name: Display SeaweedFS logs on failure if: failure() working-directory: test/java/spark run: | echo "=== SeaweedFS Master Logs ===" docker compose logs seaweedfs-master echo "" echo "=== SeaweedFS Volume Logs ===" docker compose logs seaweedfs-volume echo "" echo "=== SeaweedFS Filer Logs ===" docker compose logs seaweedfs-filer echo "" echo "=== Volume List (via weed shell) ===" docker compose exec -T seaweedfs-master weed shell <> $GITHUB_ENV echo "$(pwd)/spark-3.5.0-bin-hadoop3/bin" >> $GITHUB_PATH - name: Start SeaweedFS services working-directory: test/java/spark run: | echo "Cleaning up any existing Docker Compose resources..." docker compose down -v || true echo "Starting SeaweedFS with Docker Compose..." docker compose build --no-cache docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer echo "Waiting for SeaweedFS filer to be ready..." for i in {1..30}; do if curl -f http://localhost:8888/ > /dev/null 2>&1; then echo "✓ SeaweedFS filer is ready!" break fi if [ $i -eq 30 ]; then echo "✗ SeaweedFS failed to start after 60 seconds" echo "" echo "=== Container Status ===" docker compose ps -a echo "" echo "=== Master Container Logs ===" docker compose logs seaweedfs-master echo "" echo "=== Volume Container Logs ===" docker compose logs seaweedfs-volume echo "" echo "=== Filer Container Logs ===" docker compose logs seaweedfs-filer exit 1 fi echo "Waiting... ($i/30)" sleep 2 done # Verify all services echo "Verifying SeaweedFS services..." curl -f http://localhost:9333/cluster/status || exit 1 curl -f http://localhost:8888/ || exit 1 # Check volume server registration echo "Checking volume server status..." curl -s http://localhost:9333/dir/status | jq '.' || echo "jq not available" echo "✓ Volume server registered, volumes will be created on-demand during tests" - name: Build project working-directory: test/java/spark run: mvn clean package -DskipTests - name: Run Spark example application working-directory: test/java/spark run: | echo "Running Spark example application..." spark-submit \ --class seaweed.spark.SparkSeaweedFSExample \ --master local[2] \ --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ --conf spark.hadoop.fs.seaweed.filer.host=localhost \ --conf spark.hadoop.fs.seaweed.filer.port=8888 \ --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \ --conf spark.hadoop.fs.seaweed.replication="" \ target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \ seaweedfs://localhost:8888/ci-spark-output echo "✓ Example application completed successfully" - name: Verify output run: | echo "Verifying Spark output in SeaweedFS..." curl -s http://localhost:8888/ci-spark-output/ || echo "Output directory listing not available" - name: Display SeaweedFS diagnostics on failure if: failure() working-directory: test/java/spark run: | echo "=== SeaweedFS Master Logs ===" docker compose logs seaweedfs-master echo "" echo "=== SeaweedFS Volume Logs ===" docker compose logs seaweedfs-volume echo "" echo "=== SeaweedFS Filer Logs ===" docker compose logs seaweedfs-filer echo "" echo "=== Volume List (via weed shell) ===" docker compose exec -T seaweedfs-master weed shell <