seaweedfs/test/java/spark/test_with_readers.sh


								#!/bin/bash

								set -e


								echo "=== Testing Parquet file with multiple readers ==="

								echo ""


								# Start services

								docker compose up -d 2>&1 | grep -v "Running"

								sleep 2


								# Run test and capture chunk ID

								echo "1. Writing Parquet file and capturing chunk ID..."

								docker compose run --rm -e SEAWEEDFS_TEST_ENABLED=true spark-tests bash -c '

								cd /workspace

								mvn test -Dtest=SparkSQLTest#testCreateTableAndQuery 2>&1

								' 2>&1 | tee /tmp/test_output.log | tail -20 &

								TEST_PID=$!


								# Wait for the file to be written

								echo "2. Waiting for file write..."

								sleep 10


								# Extract chunk ID from logs

								CHUNK_ID=$(grep "PARQUET FILE WRITTEN TO EMPLOYEES" /tmp/test_output.log | grep -oP 'CHUNKS: \[\K[^\]]+' | head -1)


								if [ -z "$CHUNK_ID" ]; then

								    echo "Waiting more..."

								    sleep 5

								    CHUNK_ID=$(grep "PARQUET FILE WRITTEN TO EMPLOYEES" /tmp/test_output.log | grep -oP 'CHUNKS: \[\K[^\]]+' | head -1)

								fi


								if [ -z "$CHUNK_ID" ]; then

								    echo "ERROR: Could not find chunk ID in logs"

								    echo "Log excerpt:"

								    grep -E "PARQUET|CHUNKS|employees" /tmp/test_output.log | tail -20

								    kill $TEST_PID 2>/dev/null || true

								    exit 1

								fi


								echo "Found chunk ID: $CHUNK_ID"


								# Download directly from volume server

								echo "3. Downloading from volume server..."

								curl -s "http://localhost:8080/$CHUNK_ID" -o /tmp/test.parquet


								if [ ! -f /tmp/test.parquet ] || [ ! -s /tmp/test.parquet ]; then

								    echo "ERROR: Download failed!"

								    exit 1

								fi


								FILE_SIZE=$(stat -f%z /tmp/test.parquet 2>/dev/null || stat --format=%s /tmp/test.parquet 2>/dev/null)

								echo "Downloaded: $FILE_SIZE bytes"

								echo ""


								# Kill test process

								kill $TEST_PID 2>/dev/null || true

								wait $TEST_PID 2>/dev/null || true


								# Test with readers

								echo "=== Testing with Multiple Parquet Readers ==="

								echo ""


								# Check magic bytes

								echo "1. Magic Bytes:"

								FIRST=$(head -c 4 /tmp/test.parquet | xxd -p)

								LAST=$(tail -c 4 /tmp/test.parquet | xxd -p)

								echo "   First 4 bytes: $FIRST"

								echo "   Last 4 bytes: $LAST"

								if [ "$FIRST" = "50415231" ] && [ "$LAST" = "50415231" ]; then

								    echo "   ✅ Valid PAR1 magic"

								else

								    echo "   ❌ Invalid magic!"

								fi

								echo ""


								# Python pyarrow

								echo "2. Python pyarrow:"

								python3 -c "

								import pyarrow.parquet as pq

								try:

								    table = pq.read_table('/tmp/test.parquet')

								    print(f'   ✅ Read {table.num_rows} rows, {table.num_columns} columns')

								    print(f'   Data: {table.to_pandas().to_dict(\"records\")}')

								except Exception as e:

								    print(f'   ❌ FAILED: {e}')

								" 2>&1

								echo ""


								# Pandas

								echo "3. Pandas:"

								python3 -c "

								import pandas as pd

								try:

								    df = pd.read_parquet('/tmp/test.parquet')

								    print(f'   ✅ Read {len(df)} rows')

								    print(f'   Data:\n{df}')

								except Exception as e:

								    print(f'   ❌ FAILED: {e}')

								" 2>&1

								echo ""


								# DuckDB

								echo "4. DuckDB:"

								python3 -c "

								import duckdb

								try:

								    conn = duckdb.connect(':memory:')

								    result = conn.execute('SELECT * FROM \"/tmp/test.parquet\"').fetchall()

								    print(f'   ✅ Read {len(result)} rows')

								    print(f'   Data: {result}')

								except Exception as e:

								    print(f'   ❌ FAILED: {e}')

								" 2>&1

								echo ""


								echo "=== Summary ==="

								echo "File: $FILE_SIZE bytes"

								echo "If readers succeeded: File is VALID ✅"

								echo "If readers failed: Footer metadata is corrupted ❌"