You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
2.0 KiB
50 lines
2.0 KiB
#!/bin/bash
|
|
# Run Spark test and capture the Parquet file before cleanup
|
|
|
|
echo "Starting SeaweedFS services..."
|
|
docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer
|
|
sleep 10
|
|
|
|
echo "Running Spark test in background..."
|
|
docker compose run --rm -e SEAWEEDFS_TEST_ENABLED=true spark-tests bash -c "mvn test -Dtest=SparkSQLTest#testCreateTableAndQuery 2>&1" > /tmp/spark-test-capture.log &
|
|
TEST_PID=$!
|
|
|
|
echo "Monitoring for Parquet file creation..."
|
|
while kill -0 $TEST_PID 2>/dev/null; do
|
|
# Check if employees directory exists
|
|
FILES=$(curl -s http://localhost:8888/test-spark/employees/ 2>/dev/null | grep -o 'part-[^"]*\.parquet' || echo "")
|
|
if [ -n "$FILES" ]; then
|
|
echo "Found Parquet file(s)!"
|
|
for FILE in $FILES; do
|
|
echo "Downloading: $FILE"
|
|
curl -s "http://localhost:8888/test-spark/employees/$FILE" > "/tmp/$FILE"
|
|
FILE_SIZE=$(stat -f%z "/tmp/$FILE" 2>/dev/null || stat --format=%s "/tmp/$FILE" 2>/dev/null)
|
|
echo "Downloaded $FILE: $FILE_SIZE bytes"
|
|
|
|
if [ -f "/tmp/$FILE" ] && [ $FILE_SIZE -gt 0 ]; then
|
|
echo "SUCCESS: Captured $FILE"
|
|
echo "Installing parquet-tools..."
|
|
pip3 install -q parquet-tools 2>/dev/null || echo "parquet-tools might already be installed"
|
|
|
|
echo ""
|
|
echo "=== Parquet File Metadata ==="
|
|
python3 -m parquet_tools meta "/tmp/$FILE" || echo "parquet-tools failed"
|
|
|
|
echo ""
|
|
echo "=== File Header (first 100 bytes) ==="
|
|
hexdump -C "/tmp/$FILE" | head -10
|
|
|
|
echo ""
|
|
echo "=== File Footer (last 100 bytes) ==="
|
|
tail -c 100 "/tmp/$FILE" | hexdump -C
|
|
|
|
kill $TEST_PID 2>/dev/null
|
|
exit 0
|
|
fi
|
|
done
|
|
fi
|
|
sleep 0.5
|
|
done
|
|
|
|
echo "Test completed, checking logs..."
|
|
tail -50 /tmp/spark-test-capture.log
|