You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

50 lines
2.0 KiB

#!/bin/bash
# Run Spark test and capture the Parquet file before cleanup
echo "Starting SeaweedFS services..."
docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer
sleep 10
echo "Running Spark test in background..."
docker compose run --rm -e SEAWEEDFS_TEST_ENABLED=true spark-tests bash -c "mvn test -Dtest=SparkSQLTest#testCreateTableAndQuery 2>&1" > /tmp/spark-test-capture.log &
TEST_PID=$!
echo "Monitoring for Parquet file creation..."
while kill -0 $TEST_PID 2>/dev/null; do
# Check if employees directory exists
FILES=$(curl -s http://localhost:8888/test-spark/employees/ 2>/dev/null | grep -o 'part-[^"]*\.parquet' || echo "")
if [ -n "$FILES" ]; then
echo "Found Parquet file(s)!"
for FILE in $FILES; do
echo "Downloading: $FILE"
curl -s "http://localhost:8888/test-spark/employees/$FILE" > "/tmp/$FILE"
FILE_SIZE=$(stat -f%z "/tmp/$FILE" 2>/dev/null || stat --format=%s "/tmp/$FILE" 2>/dev/null)
echo "Downloaded $FILE: $FILE_SIZE bytes"
if [ -f "/tmp/$FILE" ] && [ $FILE_SIZE -gt 0 ]; then
echo "SUCCESS: Captured $FILE"
echo "Installing parquet-tools..."
pip3 install -q parquet-tools 2>/dev/null || echo "parquet-tools might already be installed"
echo ""
echo "=== Parquet File Metadata ==="
python3 -m parquet_tools meta "/tmp/$FILE" || echo "parquet-tools failed"
echo ""
echo "=== File Header (first 100 bytes) ==="
hexdump -C "/tmp/$FILE" | head -10
echo ""
echo "=== File Footer (last 100 bytes) ==="
tail -c 100 "/tmp/$FILE" | hexdump -C
kill $TEST_PID 2>/dev/null
exit 0
fi
done
fi
sleep 0.5
done
echo "Test completed, checking logs..."
tail -50 /tmp/spark-test-capture.log