From 48a2ddf6f866b1d3ff312d0e00ffa6a1b10dd49c Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 23 Nov 2025 12:24:52 -0800 Subject: [PATCH] debug: track ALL writes to Parquet files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CRITICAL FINDING from previous run: - getPos() was NEVER called by Parquet/Hadoop! - This eliminates position tracking mismatch hypothesis - Bytes are genuinely not reaching our write() method Added detailed write() logging to track: - Every write call for .parquet files - Cumulative totalBytesWritten after each write - Buffer state during writes This will show the exact write pattern and reveal: A) If Parquet writes 762 bytes but only 684 reach us → FSDataOutputStream buffering issue B) If Parquet only writes 684 bytes → Parquet calculates size incorrectly C) Number and size of write() calls for a typical Parquet file Expected patterns: - Parquet typically writes in chunks: header, data pages, footer - For small files: might be 2-3 write calls - Footer should be ~78 bytes if that's what's missing Next run will show EXACT write sequence. --- .../src/main/java/seaweedfs/client/SeaweedOutputStream.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java index 07ce080de..c0c5863e5 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java @@ -149,6 +149,7 @@ public class SeaweedOutputStream extends OutputStream { @Override public void write(final int byteVal) throws IOException { + LOG.debug("[DEBUG-2024] ✍️ write(int): 1 byte, path={}", path); write(new byte[] { (byte) (byteVal & 0xFF) }); } @@ -166,6 +167,11 @@ public class SeaweedOutputStream extends OutputStream { } totalBytesWritten += length; + if (path.contains("parquet")) { + LOG.info("[DEBUG-2024] ✍️ write({} bytes): totalSoFar={} position={} bufferPos={}, file={}", + length, totalBytesWritten, position, buffer.position(), + path.substring(path.lastIndexOf('/') + 1)); + } // System.out.println(path + " write [" + (outputIndex + off) + "," + // ((outputIndex + off) + length) + ")");