name: Spark Integration Tests on: push: branches: [ master, main ] paths: - 'test/java/spark/**' - 'other/java/hdfs2/**' - 'other/java/hdfs3/**' - 'other/java/client/**' - '.github/workflows/spark-integration-tests.yml' pull_request: branches: [ master, main ] paths: - 'test/java/spark/**' - 'other/java/hdfs2/**' - 'other/java/hdfs3/**' - 'other/java/client/**' - '.github/workflows/spark-integration-tests.yml' workflow_dispatch: permissions: contents: read checks: write pull-requests: write jobs: build-deps: name: Build SeaweedFS Dependencies runs-on: ubuntu-latest timeout-minutes: 15 steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up JDK 11 uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' cache: maven - name: Set up Go uses: actions/setup-go@v5 with: go-version: '1.24' - name: Build SeaweedFS binary run: | echo "Building SeaweedFS binary..." cd weed go build -o ../docker/weed cd ../docker # Dockerfile.local expects these files in the build context ls -la weed filer.toml entrypoint.sh echo "✓ SeaweedFS binary built and ready for Docker build" - name: Build SeaweedFS Java dependencies run: | echo "Building Java client (required by HDFS clients)..." cd other/java/client mvn clean install -DskipTests -Dgpg.skip=true echo "✓ Java client built and installed to local Maven repo" cd ../../.. echo "Building HDFS2 client (depends on Java client)..." cd other/java/hdfs2 mvn clean install -DskipTests -Dgpg.skip=true echo "✓ HDFS2 client built" cd ../../.. echo "Building HDFS3 client (depends on Java client)..." cd other/java/hdfs3 mvn clean install -DskipTests -Dgpg.skip=true echo "✓ HDFS3 client built" - name: Prepare artifacts for upload run: | echo "Preparing artifacts for upload..." mkdir -p artifacts/docker mkdir -p artifacts/.m2/repository/com cp docker/weed artifacts/docker/ cp -r ~/.m2/repository/com/seaweedfs artifacts/.m2/repository/com/ echo "✓ Artifacts prepared" - name: Upload build artifacts uses: actions/upload-artifact@v4 with: name: seaweedfs-build path: artifacts/ retention-days: 1 spark-tests: name: Spark Integration Tests runs-on: ubuntu-latest needs: build-deps timeout-minutes: 30 steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up JDK 11 uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' - name: Download build artifacts uses: actions/download-artifact@v4 with: name: seaweedfs-build path: build-artifacts - name: Restore Maven repository run: | echo "Restoring Maven artifacts to ~/.m2/repository..." mkdir -p ~/.m2/repository/com if [ -d "build-artifacts/.m2/repository/com/seaweedfs" ]; then cp -r build-artifacts/.m2/repository/com/seaweedfs ~/.m2/repository/com/ echo "✓ Maven artifacts restored" else echo "⚠️ Warning: Maven artifacts not found in download" ls -la build-artifacts/ fi - name: Prepare SeaweedFS binary run: | echo "Copying SeaweedFS binary to docker directory..." ls -la build-artifacts/docker/ cp build-artifacts/docker/weed docker/ chmod +x docker/weed ls -la docker/weed file docker/weed echo "✓ Binary is ready" - name: Start SeaweedFS services working-directory: test/java/spark run: | echo "Starting SeaweedFS with Docker Compose..." docker compose build --no-cache docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer echo "Waiting for SeaweedFS filer to be ready..." for i in {1..30}; do if curl -f http://localhost:8888/ > /dev/null 2>&1; then echo "✓ SeaweedFS filer is ready!" break fi if [ $i -eq 30 ]; then echo "✗ SeaweedFS failed to start after 60 seconds" docker compose logs exit 1 fi echo "Waiting... ($i/30)" sleep 2 done # Verify all services echo "Verifying SeaweedFS services..." curl -f http://localhost:9333/cluster/status || exit 1 curl -f http://localhost:8888/ || exit 1 echo "✓ All SeaweedFS services are healthy" - name: Build Spark integration tests working-directory: test/java/spark run: | echo "Building Spark integration tests..." mvn clean package -DskipTests echo "✓ Build completed" - name: Run Spark integration tests working-directory: test/java/spark env: SEAWEEDFS_TEST_ENABLED: true SEAWEEDFS_FILER_HOST: localhost SEAWEEDFS_FILER_PORT: 8888 SEAWEEDFS_FILER_GRPC_PORT: 18888 run: | echo "Running Spark integration tests..." mvn test -B echo "✓ Tests completed" - name: Upload test results if: always() uses: actions/upload-artifact@v4 with: name: spark-test-results path: test/java/spark/target/surefire-reports/ retention-days: 30 - name: Publish test report if: always() uses: dorny/test-reporter@v1 with: name: Spark Test Results path: test/java/spark/target/surefire-reports/*.xml reporter: java-junit fail-on-error: true - name: Display SeaweedFS logs on failure if: failure() working-directory: test/java/spark run: | echo "=== SeaweedFS Master Logs ===" docker compose logs seaweedfs-master echo "" echo "=== SeaweedFS Volume Logs ===" docker compose logs seaweedfs-volume echo "" echo "=== SeaweedFS Filer Logs ===" docker compose logs seaweedfs-filer - name: Stop SeaweedFS services if: always() working-directory: test/java/spark run: | echo "Stopping SeaweedFS services..." docker compose down -v echo "✓ Cleanup completed" spark-example: name: Run Spark Example Application runs-on: ubuntu-latest needs: [build-deps, spark-tests] timeout-minutes: 20 if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up JDK 11 uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' - name: Download build artifacts uses: actions/download-artifact@v4 with: name: seaweedfs-build path: build-artifacts - name: Restore Maven repository run: | echo "Restoring Maven artifacts to ~/.m2/repository..." mkdir -p ~/.m2/repository/com if [ -d "build-artifacts/.m2/repository/com/seaweedfs" ]; then cp -r build-artifacts/.m2/repository/com/seaweedfs ~/.m2/repository/com/ echo "✓ Maven artifacts restored" else echo "⚠️ Warning: Maven artifacts not found in download" ls -la build-artifacts/ fi - name: Prepare SeaweedFS binary run: | echo "Copying SeaweedFS binary to docker directory..." cp build-artifacts/docker/weed docker/ chmod +x docker/weed ls -la docker/weed echo "✓ Binary is ready" - name: Cache Apache Spark id: cache-spark uses: actions/cache@v4 with: path: spark-3.5.0-bin-hadoop3 key: spark-3.5.0-hadoop3 - name: Download Apache Spark if: steps.cache-spark.outputs.cache-hit != 'true' run: | echo "Downloading Apache Spark 3.5.0..." wget -q https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz tar xzf spark-3.5.0-bin-hadoop3.tgz echo "✓ Spark downloaded and extracted" - name: Set Spark environment run: | echo "SPARK_HOME=$(pwd)/spark-3.5.0-bin-hadoop3" >> $GITHUB_ENV echo "$(pwd)/spark-3.5.0-bin-hadoop3/bin" >> $GITHUB_PATH - name: Start SeaweedFS services working-directory: test/java/spark run: | echo "Starting SeaweedFS with Docker Compose..." docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer echo "Waiting for SeaweedFS filer to be ready..." for i in {1..30}; do if curl -f http://localhost:8888/ > /dev/null 2>&1; then echo "✓ SeaweedFS filer is ready!" break fi if [ $i -eq 30 ]; then echo "✗ SeaweedFS failed to start after 60 seconds" docker compose logs exit 1 fi echo "Waiting... ($i/30)" sleep 2 done # Verify all services echo "Verifying SeaweedFS services..." curl -f http://localhost:9333/cluster/status || exit 1 curl -f http://localhost:8888/ || exit 1 echo "✓ All SeaweedFS services are healthy" - name: Build project working-directory: test/java/spark run: mvn clean package -DskipTests - name: Run Spark example application working-directory: test/java/spark run: | echo "Running Spark example application..." spark-submit \ --class seaweed.spark.SparkSeaweedFSExample \ --master local[2] \ --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ --conf spark.hadoop.fs.seaweed.filer.host=localhost \ --conf spark.hadoop.fs.seaweed.filer.port=8888 \ --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \ --conf spark.hadoop.fs.seaweed.replication="" \ target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \ seaweedfs://localhost:8888/ci-spark-output echo "✓ Example application completed successfully" - name: Verify output run: | echo "Verifying Spark output in SeaweedFS..." curl -s http://localhost:8888/ci-spark-output/ || echo "Output directory listing not available" - name: Stop SeaweedFS services if: always() working-directory: test/java/spark run: docker compose down -v summary: name: Test Summary runs-on: ubuntu-latest needs: [spark-tests, spark-example] if: always() steps: - name: Check test results run: | echo "=== Spark Integration Tests Summary ===" echo "Spark Tests: ${{ needs.spark-tests.result }}" echo "Spark Example: ${{ needs.spark-example.result }}" if [ "${{ needs.spark-tests.result }}" != "success" ]; then echo "❌ Spark integration tests failed" exit 1 fi if [ "${{ needs.spark-example.result }}" != "success" ] && [ "${{ needs.spark-example.result }}" != "skipped" ]; then echo "⚠️ Spark example application had issues" fi echo "✅ All critical tests passed"