You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
477 lines
18 KiB
477 lines
18 KiB
name: Spark Integration Tests
|
|
|
|
on:
|
|
push:
|
|
paths:
|
|
- 'test/java/spark/**'
|
|
- 'other/java/hdfs2/**'
|
|
- 'other/java/hdfs3/**'
|
|
- 'other/java/client/**'
|
|
- '.github/workflows/spark-integration-tests.yml'
|
|
pull_request:
|
|
paths:
|
|
- 'test/java/spark/**'
|
|
- 'other/java/hdfs2/**'
|
|
- 'other/java/hdfs3/**'
|
|
- 'other/java/client/**'
|
|
- '.github/workflows/spark-integration-tests.yml'
|
|
workflow_dispatch:
|
|
|
|
permissions:
|
|
contents: read
|
|
checks: write
|
|
pull-requests: write
|
|
|
|
jobs:
|
|
spark-integration-tests:
|
|
name: Spark Integration Tests
|
|
runs-on: ubuntu-latest
|
|
timeout-minutes: 45
|
|
|
|
steps:
|
|
# ========================================
|
|
# SETUP & BUILD
|
|
# ========================================
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Set up JDK 11
|
|
uses: actions/setup-java@v4
|
|
with:
|
|
java-version: '11'
|
|
distribution: 'temurin'
|
|
cache: maven
|
|
|
|
- name: Set up Go
|
|
uses: actions/setup-go@v5
|
|
with:
|
|
go-version: '1.24'
|
|
|
|
- name: Build SeaweedFS binary
|
|
run: |
|
|
echo "Building SeaweedFS binary (statically linked for Alpine)..."
|
|
cd weed
|
|
CGO_ENABLED=0 go build -o ../docker/weed
|
|
cd ../docker
|
|
ls -la weed filer.toml entrypoint.sh
|
|
file weed
|
|
echo "OK SeaweedFS binary built"
|
|
|
|
- name: Build SeaweedFS Java dependencies
|
|
run: |
|
|
echo "Building Java client..."
|
|
cd other/java/client
|
|
mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true
|
|
echo "OK Java client built"
|
|
cd ../../..
|
|
|
|
echo "Building HDFS2 client..."
|
|
cd other/java/hdfs2
|
|
mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true
|
|
echo "OK HDFS2 client built"
|
|
cd ../../..
|
|
|
|
echo "Building HDFS3 client..."
|
|
cd other/java/hdfs3
|
|
mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true
|
|
echo "OK HDFS3 client built"
|
|
echo ""
|
|
echo "All Java dependencies installed to ~/.m2/repository"
|
|
|
|
# ========================================
|
|
# SPARK INTEGRATION TESTS (DOCKER)
|
|
# ========================================
|
|
- name: Start SeaweedFS services for tests
|
|
working-directory: test/java/spark
|
|
run: |
|
|
echo "=== Starting SeaweedFS Services for Tests ==="
|
|
docker compose down -v || true
|
|
docker compose build --no-cache
|
|
docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer
|
|
|
|
echo "Waiting for services..."
|
|
for i in {1..30}; do
|
|
if curl -f http://localhost:8888/ > /dev/null 2>&1; then
|
|
echo "OK SeaweedFS filer is ready!"
|
|
break
|
|
fi
|
|
if [ $i -eq 30 ]; then
|
|
echo "FAILED Services failed to start"
|
|
docker compose ps -a
|
|
docker compose logs
|
|
exit 1
|
|
fi
|
|
echo "Waiting... ($i/30)"
|
|
sleep 2
|
|
done
|
|
|
|
curl -f http://localhost:9333/cluster/status || exit 1
|
|
echo "OK All services healthy"
|
|
|
|
- name: Prepare Maven repository for Docker
|
|
working-directory: test/java/spark
|
|
run: |
|
|
echo "Copying Maven artifacts for Docker container..."
|
|
mkdir -p .m2/repository/com
|
|
cp -r ~/.m2/repository/com/seaweedfs .m2/repository/com/
|
|
echo "OK Maven artifacts ready"
|
|
|
|
- name: Run Spark integration tests
|
|
working-directory: test/java/spark
|
|
continue-on-error: true
|
|
id: test-run
|
|
run: |
|
|
echo "=== Running Spark Integration Tests ==="
|
|
# Run tests in detached mode
|
|
docker compose up -d spark-tests
|
|
|
|
echo "Real-time monitoring: Will download file the instant EOF error appears..."
|
|
|
|
# Monitor logs and download chunk data DIRECTLY from volume server
|
|
(
|
|
while docker ps | grep -q seaweedfs-spark-tests; do
|
|
# Check if EOF error has appeared
|
|
if docker compose logs spark-tests 2>&1 | grep -q "EOFException.*Still have: 78 bytes left"; then
|
|
echo ""
|
|
echo "=== EOF ERROR DETECTED! Extracting chunk data ==="
|
|
|
|
# Get the full log and extract the EXACT file causing the error
|
|
FULL_LOG=$(docker compose logs spark-tests 2>&1)
|
|
|
|
# Extract the failing filename from the EOF error message
|
|
# The error message format: "...seaweedfs://seaweedfs-filer:8888/test-spark/employees/part-xxx.parquet..."
|
|
FAILING_FILE=$(echo "$FULL_LOG" | grep -B 5 "EOFException.*78 bytes" | grep "seaweedfs://" | grep -oP 'part-[a-f0-9-]+\.c000\.snappy\.parquet' | head -1)
|
|
echo "Failing file: $FAILING_FILE"
|
|
|
|
if [ -z "$FAILING_FILE" ]; then
|
|
echo "ERROR: Could not extract failing filename from error message"
|
|
echo "Searching for error message pattern..."
|
|
echo "$FULL_LOG" | grep -A 2 "EOFException.*78 bytes" | head -20
|
|
break
|
|
fi
|
|
|
|
# Now find the chunk info for THIS SPECIFIC FILE
|
|
# The file is being READ when the error occurs, so look for SeaweedInputStream opening it
|
|
echo "Searching logs for when $FAILING_FILE was opened for reading..."
|
|
|
|
# Find all instances where this file is mentioned and get nearby chunk info
|
|
# Strategy: Search for the filename, then look for "chunks {" blocks near it
|
|
CHUNK_CONTEXT=$(echo "$FULL_LOG" | grep -A 100 "new path:.*$FAILING_FILE")
|
|
|
|
if [ -n "$CHUNK_CONTEXT" ]; then
|
|
echo "Found read context for file"
|
|
CHUNK_ID=$(echo "$CHUNK_CONTEXT" | head -30 | grep ' file_id: "' | head -1 | grep -oP '"\K[^"]+')
|
|
else
|
|
echo "No read context, trying write context..."
|
|
# Maybe it's in the write logs
|
|
CHUNK_CONTEXT=$(echo "$FULL_LOG" | grep -B 50 -A 20 "$FAILING_FILE")
|
|
CHUNK_ID=$(echo "$CHUNK_CONTEXT" | grep ' file_id: "' | head -1 | grep -oP '"\K[^"]+')
|
|
fi
|
|
echo "Found chunk ID: $CHUNK_ID"
|
|
|
|
if [ -n "$CHUNK_ID" ]; then
|
|
# Download directly from volume server (data persists even after filer metadata deleted)
|
|
echo "Downloading chunk from volume server: http://localhost:8080/$CHUNK_ID"
|
|
curl -v -o test.parquet "http://localhost:8080/$CHUNK_ID"
|
|
|
|
if [ -f test.parquet ] && [ -s test.parquet ]; then
|
|
FILE_SIZE=$(stat --format=%s test.parquet 2>/dev/null || stat -f%z test.parquet 2>/dev/null)
|
|
echo "SUCCESS: Downloaded $FILE_SIZE bytes from volume!"
|
|
ls -lh test.parquet
|
|
|
|
# Quick analysis
|
|
echo ""
|
|
echo "Installing parquet-tools..."
|
|
pip3 install -q parquet-tools
|
|
|
|
echo ""
|
|
echo "=== File Header (first 100 bytes) ==="
|
|
hexdump -C test.parquet | head -10
|
|
|
|
echo ""
|
|
echo "=== File Footer (last 200 bytes) ==="
|
|
tail -c 200 test.parquet | hexdump -C
|
|
|
|
echo ""
|
|
echo "=== Magic bytes check ==="
|
|
echo "First 4 bytes (should be PAR1):"
|
|
head -c 4 test.parquet | xxd
|
|
echo "Last 4 bytes (should be PAR1):"
|
|
tail -c 4 test.parquet | xxd
|
|
|
|
echo ""
|
|
echo "=== Parquet metadata ==="
|
|
parquet-tools inspect test.parquet || echo "parquet-tools inspect failed"
|
|
|
|
echo ""
|
|
echo "=== Try reading data ==="
|
|
parquet-tools show test.parquet || echo "parquet-tools show failed"
|
|
|
|
echo ""
|
|
echo "=== CRITICAL ANALYSIS: Where are the missing 78 bytes? ==="
|
|
echo "Actual file size: $FILE_SIZE bytes"
|
|
|
|
# Parse footer to find what size Parquet thinks the file should be
|
|
echo ""
|
|
echo "Reading footer length (last 8 bytes)..."
|
|
FOOTER_LEN_HEX=$(tail -c 8 test.parquet | head -c 4 | xxd -p)
|
|
echo "Footer length (hex): $FOOTER_LEN_HEX"
|
|
|
|
# Get the highest offset from column metadata
|
|
echo ""
|
|
echo "Examining column chunk offsets from metadata..."
|
|
parquet-tools meta test.parquet > meta.txt 2>&1 || true
|
|
cat meta.txt
|
|
|
|
echo ""
|
|
echo "Analyzing offset pattern..."
|
|
grep -i "offset" meta.txt || echo "No offset info"
|
|
|
|
echo ""
|
|
echo "Expected file size based on Parquet metadata:"
|
|
echo " If Parquet reader expects $((FILE_SIZE + 78)) bytes,"
|
|
echo " then column chunks claim offsets beyond actual data"
|
|
|
|
echo ""
|
|
echo "=== Download the file as artifact for local analysis ==="
|
|
ls -lh test.parquet
|
|
else
|
|
echo "FAILED: Could not download chunk"
|
|
fi
|
|
else
|
|
echo "ERROR: Could not extract chunk ID from logs"
|
|
fi
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
) &
|
|
MONITOR_PID=$!
|
|
|
|
# Wait for tests to complete
|
|
docker wait seaweedfs-spark-tests
|
|
TEST_EXIT_CODE=$(docker inspect seaweedfs-spark-tests --format='{{.State.ExitCode}}')
|
|
|
|
# Give monitor time to finish
|
|
sleep 3
|
|
kill $MONITOR_PID 2>/dev/null || true
|
|
|
|
# Show full logs
|
|
echo ""
|
|
echo "=== Test Logs ==="
|
|
docker compose logs spark-tests | tail -100
|
|
|
|
echo ""
|
|
echo "Tests completed with exit code: $TEST_EXIT_CODE"
|
|
echo "exit_code=$TEST_EXIT_CODE" >> $GITHUB_OUTPUT
|
|
|
|
exit $TEST_EXIT_CODE
|
|
|
|
- name: Examine Parquet file
|
|
if: steps.test-run.outcome == 'failure'
|
|
working-directory: test/java/spark
|
|
run: |
|
|
echo "=== Examining Parquet file for analysis ==="
|
|
|
|
# Check if file was already downloaded
|
|
if [ ! -f test.parquet ] || [ ! -s test.parquet ]; then
|
|
echo "ERROR: test.parquet not found or empty"
|
|
echo "File was not successfully downloaded during test run"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Found test.parquet, proceeding with analysis..."
|
|
|
|
# Install parquet-tools
|
|
pip3 install parquet-tools
|
|
|
|
echo ""
|
|
echo "=== File Size ==="
|
|
ls -lh test.parquet
|
|
FILE_SIZE=$(stat -f%z test.parquet 2>/dev/null || stat -c%s test.parquet)
|
|
echo "Actual file size: $FILE_SIZE bytes"
|
|
|
|
echo ""
|
|
echo "=== File Header (first 100 bytes) ==="
|
|
hexdump -C test.parquet | head -10
|
|
|
|
echo ""
|
|
echo "=== File Footer (last 200 bytes) ==="
|
|
tail -c 200 test.parquet | hexdump -C
|
|
|
|
echo ""
|
|
echo "=== Magic Bytes Check ==="
|
|
echo "First 4 bytes (should be PAR1):"
|
|
head -c 4 test.parquet | xxd
|
|
echo "Last 4 bytes (should be PAR1):"
|
|
tail -c 4 test.parquet | xxd
|
|
|
|
echo ""
|
|
echo "=== Parquet Metadata ==="
|
|
parquet-tools inspect test.parquet || echo "parquet-tools failed"
|
|
|
|
echo ""
|
|
echo "=== Try Reading with Parquet Tools ==="
|
|
parquet-tools show test.parquet || echo "Failed to read file"
|
|
|
|
echo ""
|
|
echo "=== File Validation ==="
|
|
if head -c 4 test.parquet | grep -q "PAR1"; then
|
|
echo "OK Valid Parquet header"
|
|
else
|
|
echo "FAILED INVALID Parquet header"
|
|
fi
|
|
|
|
if tail -c 4 test.parquet | grep -q "PAR1"; then
|
|
echo "OK Valid Parquet trailer"
|
|
else
|
|
echo "FAILED INVALID Parquet trailer"
|
|
fi
|
|
|
|
- name: Stop test services
|
|
if: always()
|
|
working-directory: test/java/spark
|
|
run: docker compose down -v
|
|
|
|
- name: Upload test results
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: spark-test-results
|
|
path: test/java/spark/target/surefire-reports/
|
|
retention-days: 30
|
|
|
|
- name: Upload Parquet file for analysis
|
|
if: failure()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: failed-parquet-file
|
|
path: test/java/spark/test.parquet
|
|
retention-days: 7
|
|
if-no-files-found: ignore
|
|
|
|
- name: Publish test report
|
|
if: always()
|
|
uses: dorny/test-reporter@v1
|
|
with:
|
|
name: Spark Test Results
|
|
path: test/java/spark/target/surefire-reports/*.xml
|
|
reporter: java-junit
|
|
fail-on-error: true
|
|
|
|
- name: Check test results
|
|
if: steps.test-run.outcome == 'failure'
|
|
run: |
|
|
echo "ERROR Tests failed with exit code: ${{ steps.test-run.outputs.exit_code }}"
|
|
echo "But file analysis was completed above."
|
|
exit 1
|
|
|
|
# ========================================
|
|
# SPARK EXAMPLE (HOST-BASED)
|
|
# ========================================
|
|
- name: Cache Apache Spark
|
|
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
|
id: cache-spark
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: spark-3.5.0-bin-hadoop3
|
|
key: spark-3.5.0-hadoop3
|
|
|
|
- name: Download Apache Spark
|
|
if: (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && steps.cache-spark.outputs.cache-hit != 'true'
|
|
run: |
|
|
echo "Downloading Apache Spark 3.5.0..."
|
|
wget -q https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz
|
|
tar xzf spark-3.5.0-bin-hadoop3.tgz
|
|
echo "OK Spark downloaded"
|
|
|
|
- name: Start SeaweedFS services for example
|
|
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
|
working-directory: test/java/spark
|
|
run: |
|
|
echo "=== Starting SeaweedFS Services for Example ==="
|
|
docker compose down -v || true
|
|
docker compose build --no-cache
|
|
docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer
|
|
|
|
echo "Waiting for services..."
|
|
for i in {1..30}; do
|
|
if curl -f http://localhost:8888/ > /dev/null 2>&1; then
|
|
echo "OK SeaweedFS filer is ready!"
|
|
break
|
|
fi
|
|
if [ $i -eq 30 ]; then
|
|
echo "FAILED Services failed to start"
|
|
docker compose ps -a
|
|
docker compose logs
|
|
exit 1
|
|
fi
|
|
echo "Waiting... ($i/30)"
|
|
sleep 2
|
|
done
|
|
|
|
curl -f http://localhost:9333/cluster/status || exit 1
|
|
echo "OK All services healthy"
|
|
|
|
- name: Build project for example
|
|
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
|
working-directory: test/java/spark
|
|
run: mvn clean package -DskipTests
|
|
|
|
- name: Run Spark example application
|
|
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
|
working-directory: test/java/spark
|
|
run: |
|
|
echo "=== Running Spark Example Application ==="
|
|
export SPARK_HOME=$(pwd)/../../../spark-3.5.0-bin-hadoop3
|
|
$SPARK_HOME/bin/spark-submit \
|
|
--class seaweed.spark.SparkSeaweedFSExample \
|
|
--master local[2] \
|
|
--conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \
|
|
--conf spark.hadoop.fs.seaweed.filer.host=localhost \
|
|
--conf spark.hadoop.fs.seaweed.filer.port=8888 \
|
|
--conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \
|
|
--conf spark.hadoop.fs.seaweed.replication="" \
|
|
target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \
|
|
seaweedfs://localhost:8888/ci-spark-output
|
|
echo "OK Example completed"
|
|
|
|
- name: Verify example output
|
|
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
|
run: |
|
|
echo "Verifying output..."
|
|
curl -s http://localhost:8888/ci-spark-output/ || echo "Output listing unavailable"
|
|
|
|
- name: Stop example services
|
|
if: always() && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
|
|
working-directory: test/java/spark
|
|
run: docker compose down -v
|
|
|
|
# ========================================
|
|
# DIAGNOSTICS
|
|
# ========================================
|
|
- name: Display diagnostics on failure
|
|
if: failure()
|
|
working-directory: test/java/spark
|
|
run: |
|
|
echo "=== Container Status ==="
|
|
docker compose ps -a
|
|
echo ""
|
|
echo "=== Master Logs ==="
|
|
docker compose logs seaweedfs-master
|
|
echo ""
|
|
echo "=== Volume Logs ==="
|
|
docker compose logs seaweedfs-volume
|
|
echo ""
|
|
echo "=== Filer Logs ==="
|
|
docker compose logs seaweedfs-filer
|
|
echo ""
|
|
echo "=== Volume List ==="
|
|
docker compose exec -T seaweedfs-master weed shell <<EOF || echo "Failed"
|
|
volume.list
|
|
exit
|
|
EOF
|
|
echo ""
|
|
echo "=== Cluster Status ==="
|
|
curl -s http://localhost:9333/dir/status | jq '.' || curl -s http://localhost:9333/dir/status
|
|
|