From b35463c8b43524082fd7d5cd7e90c8b5e8c87cf8 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 22 Nov 2025 13:04:44 -0800 Subject: [PATCH] spark: fix flaky test by sorting DataFrame before first() - In testLargeDataset(), add orderBy("value") before calling first() - Parquet files don't guarantee row order, so first() on unordered DataFrame can return any row, making assertions flaky - Sorting by 'value' ensures the first row is always the one with value=0, making the test deterministic and reliable --- .../spark/src/test/java/seaweed/spark/SparkReadWriteTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkReadWriteTest.java b/test/java/spark/src/test/java/seaweed/spark/SparkReadWriteTest.java index 3c49a551f..e69b3038a 100644 --- a/test/java/spark/src/test/java/seaweed/spark/SparkReadWriteTest.java +++ b/test/java/spark/src/test/java/seaweed/spark/SparkReadWriteTest.java @@ -168,8 +168,8 @@ public class SparkReadWriteTest extends SparkTestBase { Dataset readDf = spark.read().parquet(outputPath); assertEquals(10000, readDf.count()); - // Verify some data - Row firstRow = readDf.first(); + // Verify some data (sort to ensure deterministic order) + Row firstRow = readDf.orderBy("value").first(); assertEquals(0L, firstRow.getLong(0)); assertEquals(0L, firstRow.getLong(1)); }