#!/bin/bash # Run Spark test and capture the Parquet file before cleanup echo "Starting SeaweedFS services..." docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer sleep 10 echo "Running Spark test in background..." docker compose run --rm -e SEAWEEDFS_TEST_ENABLED=true spark-tests bash -c "mvn test -Dtest=SparkSQLTest#testCreateTableAndQuery 2>&1" > /tmp/spark-test-capture.log & TEST_PID=$! echo "Monitoring for Parquet file creation..." while kill -0 $TEST_PID 2>/dev/null; do # Check if employees directory exists FILES=$(curl -s http://localhost:8888/test-spark/employees/ 2>/dev/null | grep -o 'part-[^"]*\.parquet' || echo "") if [ -n "$FILES" ]; then echo "Found Parquet file(s)!" for FILE in $FILES; do echo "Downloading: $FILE" curl -s "http://localhost:8888/test-spark/employees/$FILE" > "/tmp/$FILE" FILE_SIZE=$(stat -f%z "/tmp/$FILE" 2>/dev/null || stat --format=%s "/tmp/$FILE" 2>/dev/null) echo "Downloaded $FILE: $FILE_SIZE bytes" if [ -f "/tmp/$FILE" ] && [ $FILE_SIZE -gt 0 ]; then echo "SUCCESS: Captured $FILE" echo "Installing parquet-tools..." pip3 install -q parquet-tools 2>/dev/null || echo "parquet-tools might already be installed" echo "" echo "=== Parquet File Metadata ===" python3 -m parquet_tools meta "/tmp/$FILE" || echo "parquet-tools failed" echo "" echo "=== File Header (first 100 bytes) ===" hexdump -C "/tmp/$FILE" | head -10 echo "" echo "=== File Footer (last 100 bytes) ===" tail -c 100 "/tmp/$FILE" | hexdump -C kill $TEST_PID 2>/dev/null exit 0 fi done fi sleep 0.5 done echo "Test completed, checking logs..." tail -50 /tmp/spark-test-capture.log