#!/bin/bash
set -e

echo "=== Testing Parquet file with multiple readers ==="
echo ""

# Start services
docker compose up -d 2>&1 | grep -v "Running"
sleep 2

# Run test and capture chunk ID
echo "1. Writing Parquet file and capturing chunk ID..."
docker compose run --rm -e SEAWEEDFS_TEST_ENABLED=true spark-tests bash -c '
cd /workspace
mvn test -Dtest=SparkSQLTest#testCreateTableAndQuery 2>&1
' 2>&1 | tee /tmp/test_output.log | tail -20 &
TEST_PID=$!

# Wait for the file to be written
echo "2. Waiting for file write..."
sleep 10

# Extract chunk ID from logs
CHUNK_ID=$(grep "PARQUET FILE WRITTEN TO EMPLOYEES" /tmp/test_output.log | grep -oP 'CHUNKS: \[\K[^\]]+' | head -1)

if [ -z "$CHUNK_ID" ]; then
    echo "Waiting more..."
    sleep 5
    CHUNK_ID=$(grep "PARQUET FILE WRITTEN TO EMPLOYEES" /tmp/test_output.log | grep -oP 'CHUNKS: \[\K[^\]]+' | head -1)
fi

if [ -z "$CHUNK_ID" ]; then
    echo "ERROR: Could not find chunk ID in logs"
    echo "Log excerpt:"
    grep -E "PARQUET|CHUNKS|employees" /tmp/test_output.log | tail -20
    kill $TEST_PID 2>/dev/null || true
    exit 1
fi

echo "Found chunk ID: $CHUNK_ID"

# Download directly from volume server
echo "3. Downloading from volume server..."
curl -s "http://localhost:8080/$CHUNK_ID" -o /tmp/test.parquet

if [ ! -f /tmp/test.parquet ] || [ ! -s /tmp/test.parquet ]; then
    echo "ERROR: Download failed!"
    exit 1
fi

FILE_SIZE=$(stat -f%z /tmp/test.parquet 2>/dev/null || stat --format=%s /tmp/test.parquet 2>/dev/null)
echo "Downloaded: $FILE_SIZE bytes"
echo ""

# Kill test process
kill $TEST_PID 2>/dev/null || true
wait $TEST_PID 2>/dev/null || true

# Test with readers
echo "=== Testing with Multiple Parquet Readers ==="
echo ""

# Check magic bytes
echo "1. Magic Bytes:"
FIRST=$(head -c 4 /tmp/test.parquet | xxd -p)
LAST=$(tail -c 4 /tmp/test.parquet | xxd -p)
echo "   First 4 bytes: $FIRST"
echo "   Last 4 bytes: $LAST"
if [ "$FIRST" = "50415231" ] && [ "$LAST" = "50415231" ]; then
    echo "   ✅ Valid PAR1 magic"
else
    echo "   ❌ Invalid magic!"
fi
echo ""

# Python pyarrow
echo "2. Python pyarrow:"
python3 -c "
import pyarrow.parquet as pq
try:
    table = pq.read_table('/tmp/test.parquet')
    print(f'   ✅ Read {table.num_rows} rows, {table.num_columns} columns')
    print(f'   Data: {table.to_pandas().to_dict(\"records\")}')
except Exception as e:
    print(f'   ❌ FAILED: {e}')
" 2>&1
echo ""

# Pandas
echo "3. Pandas:"
python3 -c "
import pandas as pd
try:
    df = pd.read_parquet('/tmp/test.parquet')
    print(f'   ✅ Read {len(df)} rows')
    print(f'   Data:\n{df}')
except Exception as e:
    print(f'   ❌ FAILED: {e}')
" 2>&1
echo ""

# DuckDB
echo "4. DuckDB:"
python3 -c "
import duckdb
try:
    conn = duckdb.connect(':memory:')
    result = conn.execute('SELECT * FROM \"/tmp/test.parquet\"').fetchall()
    print(f'   ✅ Read {len(result)} rows')
    print(f'   Data: {result}')
except Exception as e:
    print(f'   ❌ FAILED: {e}')
" 2>&1
echo ""

echo "=== Summary ==="
echo "File: $FILE_SIZE bytes"
echo "If readers succeeded: File is VALID ✅"
echo "If readers failed: Footer metadata is corrupted ❌"