fix: keep containers running during file download

REAL ROOT CAUSE: --abort-on-container-exit stops ALL containers immediately when the test container exits, including the filer. So we couldn't download files because filer was already stopped. SOLUTION: Run tests in detached mode, wait for completion, then download while filer is still running. Changes: 1. docker compose up -d spark-tests (detached mode) 2. docker wait seaweedfs-spark-tests (wait for completion) 3. docker inspect to get exit code 4. docker compose logs to show test output 5. Download file while all services still running 6. Then exit with test exit code Improved grep pattern to be more specific: part-[a-f0-9-]+\.c000\.snappy\.parquet This MUST work - filer is guaranteed to be running during download!
3 months ago · 8ea2646084
1 changed files with 25 additions and 12 deletions
--- a/.github/workflows/spark-integration-tests.yml
+++ b/.github/workflows/spark-integration-tests.yml
@ -122,33 +122,46 @@ jobs:
      id: test-run
      run: |
        echo "=== Running Spark Integration Tests ==="
-        docker compose up --abort-on-container-exit --exit-code-from spark-tests spark-tests
-        TEST_EXIT_CODE=$?
-        echo "exit_code=$TEST_EXIT_CODE" >> $GITHUB_OUTPUT
+        # Run tests in detached mode, then wait for completion
+        docker compose up -d spark-tests
+        
+        # Wait for test container to finish and capture exit code
+        echo "Waiting for tests to complete..."
+        docker wait seaweedfs-spark-tests
+        TEST_EXIT_CODE=$(docker inspect seaweedfs-spark-tests --format='{{.State.ExitCode}}')
+        
        echo "Tests completed with exit code: $TEST_EXIT_CODE"
+        echo "exit_code=$TEST_EXIT_CODE" >> $GITHUB_OUTPUT
+        
+        # Show test logs
+        docker compose logs spark-tests
        
-        # If tests failed, download file IMMEDIATELY before containers stop
-        if [ $TEST_EXIT_CODE -ne 0 ]; then
+        # If tests failed, download file NOW (while filer is still running)
+        if [ "$TEST_EXIT_CODE" -ne 0 ]; then
          echo ""
-          echo "=== Tests failed, downloading Parquet file NOW (before containers stop) ==="
+          echo "=== Tests failed, downloading Parquet file NOW ==="
+          
+          # List directory to see what's there
+          echo "Listing files in /test-spark/employees/:"
+          curl -s http://localhost:8888/test-spark/employees/ || echo "Failed to list directory"
          
-          # Quick check and download
-          PARQUET_FILE=$(curl -s http://localhost:8888/test-spark/employees/ | grep -oP 'part-[^"]*\.parquet' | head -1)
-          echo "Found file: $PARQUET_FILE"
+          # Find parquet file
+          PARQUET_FILE=$(curl -s http://localhost:8888/test-spark/employees/ | grep -oE 'part-[a-f0-9-]+\.c000\.snappy\.parquet' | head -1)
+          echo "Found file: '$PARQUET_FILE'"
          
          if [ -n "$PARQUET_FILE" ]; then
            echo "Downloading: $PARQUET_FILE"
            curl -o test.parquet "http://localhost:8888/test-spark/employees/$PARQUET_FILE"
+            
            if [ -f test.parquet ] && [ -s test.parquet ]; then
-              echo "SUCCESS: Downloaded $(stat -f%z test.parquet 2>/dev/null || stat -c%s test.parquet) bytes"
+              FILE_SIZE=$(stat --format=%s test.parquet 2>/dev/null || stat -f%z test.parquet 2>/dev/null)
+              echo "SUCCESS: Downloaded $FILE_SIZE bytes"
              ls -lh test.parquet
            else
              echo "FAILED: File not downloaded"
            fi
          else
            echo "ERROR: No Parquet files found"
-            echo "Directory listing:"
-            curl -s http://localhost:8888/test-spark/employees/
          fi
        fi