From d9ab0721b977b5e64ef487cf6d67c64c5c9f62d5 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 23 Nov 2025 11:29:33 -0800 Subject: [PATCH] refactor: merge workflow jobs into single job Benefits: - Eliminates artifact upload/download complexity - Maven artifacts stay in ~/.m2 throughout - Simpler debugging (all logs in one place) - Faster execution (no transfer overhead) - More reliable (no artifact transfer failures) Structure: 1. Build SeaweedFS binary + Java dependencies 2. Run Spark integration tests (Docker) 3. Run Spark example (host-based, push/dispatch only) 4. Upload results & diagnostics Trade-off: Example runs sequentially after tests instead of parallel, but overall runtime is likely faster without artifact transfers. --- .github/workflows/spark-integration-tests.yml | 370 ++++-------------- 1 file changed, 73 insertions(+), 297 deletions(-) diff --git a/.github/workflows/spark-integration-tests.yml b/.github/workflows/spark-integration-tests.yml index b9ac7e5b3..9d2524d33 100644 --- a/.github/workflows/spark-integration-tests.yml +++ b/.github/workflows/spark-integration-tests.yml @@ -25,12 +25,15 @@ permissions: pull-requests: write jobs: - build-deps: - name: Build SeaweedFS Dependencies + spark-integration-tests: + name: Spark Integration Tests runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 45 steps: + # ======================================== + # SETUP & BUILD + # ======================================== - name: Checkout code uses: actions/checkout@v4 @@ -52,200 +55,81 @@ jobs: cd weed CGO_ENABLED=0 go build -o ../docker/weed cd ../docker - # Dockerfile.local expects these files in the build context ls -la weed filer.toml entrypoint.sh file weed - echo "✓ SeaweedFS binary built and ready for Docker build" + echo "✓ SeaweedFS binary built" - name: Build SeaweedFS Java dependencies run: | - echo "Building Java client (required by HDFS clients)..." + echo "Building Java client..." cd other/java/client mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true - echo "✓ Java client built and installed to local Maven repo" - - # Verify the JAR contains updated logging code - echo "Verifying JAR contains updated code..." - JAR_PATH=~/.m2/repository/com/seaweedfs/seaweedfs-client/3.80/seaweedfs-client-3.80.jar - if [ -f "$JAR_PATH" ]; then - echo "Checking for updated logging in SeaweedOutputStream..." - if javap -cp "$JAR_PATH" -c seaweedfs.client.SeaweedOutputStream | grep -q "totalPosition"; then - echo "✓ JAR contains updated logging code" - else - echo "⚠️ Warning: JAR may not contain updated code" - echo "Listing class methods:" - javap -cp "$JAR_PATH" seaweedfs.client.SeaweedOutputStream | head -20 - fi - fi + echo "✓ Java client built" cd ../../.. - echo "Building HDFS2 client (depends on Java client)..." + echo "Building HDFS2 client..." cd other/java/hdfs2 mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true echo "✓ HDFS2 client built" cd ../../.. - echo "Building HDFS3 client (depends on Java client)..." + echo "Building HDFS3 client..." cd other/java/hdfs3 mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true echo "✓ HDFS3 client built" + echo "" + echo "All Java dependencies installed to ~/.m2/repository" - - name: Prepare artifacts for upload - run: | - echo "Preparing artifacts for upload..." - mkdir -p artifacts/docker - mkdir -p artifacts/.m2/repository/com - - echo "Copying SeaweedFS binary..." - cp docker/weed artifacts/docker/ - ls -lh artifacts/docker/weed - - echo "Copying Maven artifacts..." - echo "Source: ~/.m2/repository/com/seaweedfs" - ls -la ~/.m2/repository/com/seaweedfs/ || echo "ERROR: Maven artifacts not found!" - - if [ -d ~/.m2/repository/com/seaweedfs ]; then - cp -r ~/.m2/repository/com/seaweedfs artifacts/.m2/repository/com/ - echo "✓ Maven artifacts copied" - echo "Verifying copied artifacts:" - find artifacts/.m2/repository/com/seaweedfs -name "*3.80.1-SNAPSHOT*" | head -10 - else - echo "ERROR: Maven repository not found at ~/.m2/repository/com/seaweedfs" - echo "Contents of ~/.m2/repository/com:" - ls -la ~/.m2/repository/com/ || echo "Directory doesn't exist" - exit 1 - fi - - echo "Final artifact structure:" - find artifacts -type f | head -20 - echo "✓ Artifacts prepared" - - - name: Upload build artifacts - uses: actions/upload-artifact@v4 - with: - name: seaweedfs-build - path: artifacts/ - retention-days: 1 - - spark-tests: - name: Spark Integration Tests - runs-on: ubuntu-latest - needs: build-deps - timeout-minutes: 30 - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK 11 - uses: actions/setup-java@v4 - with: - java-version: '11' - distribution: 'temurin' - - - name: Download build artifacts - uses: actions/download-artifact@v4 - with: - name: seaweedfs-build - path: build-artifacts - - - name: Restore Maven repository - run: | - echo "Restoring Maven artifacts to ~/.m2/repository..." - mkdir -p ~/.m2/repository/com - if [ -d "build-artifacts/.m2/repository/com/seaweedfs" ]; then - cp -r build-artifacts/.m2/repository/com/seaweedfs ~/.m2/repository/com/ - echo "✓ Maven artifacts restored" - else - echo "⚠️ Warning: Maven artifacts not found in download" - ls -la build-artifacts/ - fi - - - name: Prepare SeaweedFS binary - run: | - echo "Copying SeaweedFS binary to docker directory..." - ls -la build-artifacts/docker/ - cp build-artifacts/docker/weed docker/ - chmod +x docker/weed - ls -la docker/weed - file docker/weed - echo "✓ Binary is ready" - - - name: Start SeaweedFS services + # ======================================== + # SPARK INTEGRATION TESTS (DOCKER) + # ======================================== + - name: Start SeaweedFS services for tests working-directory: test/java/spark run: | - echo "Cleaning up any existing Docker Compose resources..." + echo "=== Starting SeaweedFS Services for Tests ===" docker compose down -v || true - - echo "Starting SeaweedFS with Docker Compose..." docker compose build --no-cache docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer - echo "Waiting for SeaweedFS filer to be ready..." + echo "Waiting for services..." for i in {1..30}; do if curl -f http://localhost:8888/ > /dev/null 2>&1; then echo "✓ SeaweedFS filer is ready!" break fi if [ $i -eq 30 ]; then - echo "✗ SeaweedFS failed to start after 60 seconds" - echo "" - echo "=== Container Status ===" + echo "✗ Services failed to start" docker compose ps -a - echo "" - echo "=== Master Container Logs ===" - docker compose logs seaweedfs-master - echo "" - echo "=== Volume Container Logs ===" - docker compose logs seaweedfs-volume - echo "" - echo "=== Filer Container Logs ===" - docker compose logs seaweedfs-filer - echo "" - echo "=== Inspecting master container ===" - docker compose exec -T seaweedfs-master ls -la /usr/bin/weed || echo "Failed to inspect container" + docker compose logs exit 1 fi echo "Waiting... ($i/30)" sleep 2 done - # Verify all services - echo "Verifying SeaweedFS services..." curl -f http://localhost:9333/cluster/status || exit 1 - curl -f http://localhost:8888/ || exit 1 - - # Check volume server registration - echo "Checking volume server status..." - curl -s http://localhost:9333/dir/status | jq '.' || echo "jq not available" - echo "✓ Volume server registered, volumes will be created on-demand during tests" + echo "✓ All services healthy" - name: Prepare Maven repository for Docker working-directory: test/java/spark run: | - echo "Copying Maven artifacts into workspace for Docker access..." + echo "Copying Maven artifacts for Docker container..." mkdir -p .m2/repository/com cp -r ~/.m2/repository/com/seaweedfs .m2/repository/com/ - - echo "Verifying copied artifacts:" - find .m2/repository/com/seaweedfs -name "*3.80.1-SNAPSHOT*" | sort - echo "" - echo "Directory size:" - du -sh .m2/repository/com/seaweedfs - - - name: Run Spark integration tests in Docker + echo "✓ Maven artifacts ready" + + - name: Run Spark integration tests working-directory: test/java/spark run: | - echo "Running Spark integration tests in Docker container..." - echo "Maven repository path on host: $HOME/.m2" - echo "Will be mounted as /root/.m2 in container" - echo "" - echo "Docker networks:" - docker network ls - echo "Starting spark-tests container (will start dependencies automatically)..." + echo "=== Running Spark Integration Tests ===" docker compose up --abort-on-container-exit --exit-code-from spark-tests spark-tests echo "✓ Tests completed" + - name: Stop test services + if: always() + working-directory: test/java/spark + run: docker compose down -v + - name: Upload test results if: always() uses: actions/upload-artifact@v4 @@ -263,82 +147,11 @@ jobs: reporter: java-junit fail-on-error: true - - name: Display SeaweedFS logs on failure - if: failure() - working-directory: test/java/spark - run: | - echo "=== SeaweedFS Master Logs ===" - docker compose logs seaweedfs-master - echo "" - echo "=== SeaweedFS Volume Logs ===" - docker compose logs seaweedfs-volume - echo "" - echo "=== SeaweedFS Filer Logs ===" - docker compose logs seaweedfs-filer - echo "" - echo "=== Volume List (via weed shell) ===" - docker compose exec -T seaweedfs-master weed shell <> $GITHUB_ENV - echo "$(pwd)/spark-3.5.0-bin-hadoop3/bin" >> $GITHUB_PATH + echo "✓ Spark downloaded" - - name: Start SeaweedFS services + - name: Start SeaweedFS services for example + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' working-directory: test/java/spark run: | - echo "Cleaning up any existing Docker Compose resources..." + echo "=== Starting SeaweedFS Services for Example ===" docker compose down -v || true - - echo "Starting SeaweedFS with Docker Compose..." docker compose build --no-cache docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer - echo "Waiting for SeaweedFS filer to be ready..." + echo "Waiting for services..." for i in {1..30}; do if curl -f http://localhost:8888/ > /dev/null 2>&1; then echo "✓ SeaweedFS filer is ready!" break fi if [ $i -eq 30 ]; then - echo "✗ SeaweedFS failed to start after 60 seconds" - echo "" - echo "=== Container Status ===" + echo "✗ Services failed to start" docker compose ps -a - echo "" - echo "=== Master Container Logs ===" - docker compose logs seaweedfs-master - echo "" - echo "=== Volume Container Logs ===" - docker compose logs seaweedfs-volume - echo "" - echo "=== Filer Container Logs ===" - docker compose logs seaweedfs-filer + docker compose logs exit 1 fi echo "Waiting... ($i/30)" sleep 2 done - # Verify all services - echo "Verifying SeaweedFS services..." curl -f http://localhost:9333/cluster/status || exit 1 - curl -f http://localhost:8888/ || exit 1 - - # Check volume server registration - echo "Checking volume server status..." - curl -s http://localhost:9333/dir/status | jq '.' || echo "jq not available" - echo "✓ Volume server registered, volumes will be created on-demand during tests" + echo "✓ All services healthy" - - name: Build project + - name: Build project for example + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' working-directory: test/java/spark run: mvn clean package -DskipTests - name: Run Spark example application + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' working-directory: test/java/spark run: | - echo "Running Spark example application..." - spark-submit \ + echo "=== Running Spark Example Application ===" + export SPARK_HOME=$(pwd)/../../../spark-3.5.0-bin-hadoop3 + $SPARK_HOME/bin/spark-submit \ --class seaweed.spark.SparkSeaweedFSExample \ --master local[2] \ --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ @@ -422,28 +215,40 @@ jobs: --conf spark.hadoop.fs.seaweed.replication="" \ target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \ seaweedfs://localhost:8888/ci-spark-output - echo "✓ Example application completed successfully" + echo "✓ Example completed" - - name: Verify output + - name: Verify example output + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' run: | - echo "Verifying Spark output in SeaweedFS..." - curl -s http://localhost:8888/ci-spark-output/ || echo "Output directory listing not available" + echo "Verifying output..." + curl -s http://localhost:8888/ci-spark-output/ || echo "Output listing unavailable" + + - name: Stop example services + if: always() && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + working-directory: test/java/spark + run: docker compose down -v - - name: Display SeaweedFS diagnostics on failure + # ======================================== + # DIAGNOSTICS + # ======================================== + - name: Display diagnostics on failure if: failure() working-directory: test/java/spark run: | - echo "=== SeaweedFS Master Logs ===" + echo "=== Container Status ===" + docker compose ps -a + echo "" + echo "=== Master Logs ===" docker compose logs seaweedfs-master echo "" - echo "=== SeaweedFS Volume Logs ===" + echo "=== Volume Logs ===" docker compose logs seaweedfs-volume echo "" - echo "=== SeaweedFS Filer Logs ===" + echo "=== Filer Logs ===" docker compose logs seaweedfs-filer echo "" - echo "=== Volume List (via weed shell) ===" - docker compose exec -T seaweedfs-master weed shell <