diff --git a/.github/workflows/spark-integration-tests.yml b/.github/workflows/spark-integration-tests.yml index ca23cc2eb..fe489e506 100644 --- a/.github/workflows/spark-integration-tests.yml +++ b/.github/workflows/spark-integration-tests.yml @@ -131,11 +131,11 @@ jobs: ( DOWNLOADED=false while docker ps | grep -q seaweedfs-spark-tests; do - # Check if we've reached the SQL test (where employees files are created) - if docker compose logs spark-tests 2>&1 | grep -q "Running seaweed.spark.SparkSQLTest"; then + # Check if an employees Parquet file has been written (we log this explicitly) + if docker compose logs spark-tests 2>&1 | grep -q "PARQUET FILE WRITTEN TO EMPLOYEES"; then if [ "$DOWNLOADED" = "false" ]; then echo "" - echo "=== SparkSQLTest started! Polling for employees file creation ===" + echo "=== EMPLOYEES FILE WRITTEN! Downloading immediately ===" # Poll for files to appear (max 30 seconds) for i in {1..30}; do diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java index 222e7d9dc..3dd5106b1 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java @@ -249,6 +249,12 @@ public class SeaweedOutputStream extends OutputStream { LOG.info( "[DEBUG-2024] close END: path={} finalPosition={} totalBytesWritten={} writeCalls={} (buffer had {} bytes)", path, position, totalBytesWritten, writeCallCount, bufferPosBeforeFlush); + + // Special logging for employees directory files (to help CI download timing) + if (path.contains("/test-spark/employees/") && path.endsWith(".parquet")) { + String filename = path.substring(path.lastIndexOf('/') + 1); + LOG.warn("=== PARQUET FILE WRITTEN TO EMPLOYEES: {} ({} bytes) ===", filename, position); + } } finally { lastError = new IOException("Stream is closed!"); ByteBufferPool.release(buffer);