You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

60 lines
2.0 KiB

#!/bin/bash
set -e
echo "=== Testing if Parquet file written by Spark can be read by parquet-tools ==="
# Run the test to write a Parquet file
echo "1. Writing Parquet file with Spark..."
docker compose run --rm -e SEAWEEDFS_TEST_ENABLED=true spark-tests bash -c '
cd /workspace
mvn test -Dtest=SparkSQLTest#testCreateTableAndQuery -q 2>&1 | tail -5
' > /tmp/write_test.log 2>&1 || true
# Find the Parquet file that was written
echo "2. Finding Parquet file..."
PARQUET_FILE=$(docker compose run --rm spark-tests bash -c '
curl -s "http://seaweedfs-filer:8888/test-spark/employees/?pretty=y" | grep -oP "\"name\":\s*\"\K[^\"]+\.parquet" | head -1
' 2>&1 | grep -v "Creating" | grep "\.parquet" | head -1)
if [ -z "$PARQUET_FILE" ]; then
echo "ERROR: No Parquet file found!"
exit 1
fi
echo "Found file: $PARQUET_FILE"
# Download the file
echo "3. Downloading file from SeaweedFS..."
curl -s "http://localhost:8888/test-spark/employees/$PARQUET_FILE" -o /tmp/test.parquet
if [ ! -f /tmp/test.parquet ] || [ ! -s /tmp/test.parquet ]; then
echo "ERROR: Failed to download file!"
exit 1
fi
FILE_SIZE=$(stat -f%z /tmp/test.parquet 2>/dev/null || stat --format=%s /tmp/test.parquet 2>/dev/null)
echo "Downloaded $FILE_SIZE bytes"
# Try to read with parquet-tools
echo "4. Reading with parquet-tools..."
pip3 install -q parquet-tools 2>&1 | grep -v "Requirement already satisfied" || true
echo ""
echo "=== Parquet Metadata ==="
parquet-tools inspect /tmp/test.parquet 2>&1 || echo "FAILED to inspect"
echo ""
echo "=== Try to read data ==="
parquet-tools show /tmp/test.parquet 2>&1 || echo "FAILED to read data"
echo ""
echo "=== Conclusion ==="
if parquet-tools show /tmp/test.parquet > /dev/null 2>&1; then
echo "✅ SUCCESS: File can be read by parquet-tools!"
echo "The file itself is VALID Parquet format."
echo "The issue is specific to how Spark reads it back."
else
echo "❌ FAILED: File cannot be read by parquet-tools"
echo "The file is CORRUPTED or has invalid Parquet format."
fi