From 65c3ead62f511645d47f5539e6d24aa762d4894f Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 23 Nov 2025 13:28:38 -0800 Subject: [PATCH] debug: enhance logging to capture footer writes and getPos calls Added targeted logging to answer the key question: "Are the missing 78 bytes the Parquet footer that never got written?" Changes: 1. Log ALL writes after call 220 (likely footer-related) - Previous: only logged writes >= 20 bytes - Now: also log small writes near end marked [FOOTER?] 2. Enhanced getPos() logging with writeCalls context - Shows relationship between getPos() and actual writes - Helps identify if Parquet calculates size before writing footer This will reveal: A) What the last ~14 write calls contain (footer structure) B) If getPos() is called before/during footer writes C) If there's a mismatch between calculated size and actual writes Expected pattern if footer is missing: - Large writes up to ~600 bytes (data pages) - Small writes for metadata - getPos() called to calculate footer offset - Footer writes (78 bytes) that either: * Never happen (bug in Parquet) * Get lost in FSDataOutputStream * Are written but lost in flush Next run will show the exact write sequence! --- .../seaweedfs/client/SeaweedOutputStream.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java index ed7f1d1d8..54a15bfba 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java @@ -99,8 +99,10 @@ public class SeaweedOutputStream extends OutputStream { */ public synchronized long getPos() { long currentPos = position + buffer.position(); - LOG.info("[DEBUG-2024] 📍 getPos() called: flushedPosition={} bufferPosition={} returning={} path={}", - position, buffer.position(), currentPos, path); + if (path.contains("parquet")) { + LOG.info("[DEBUG-2024] 📍 getPos() called: flushedPosition={} bufferPosition={} returning={} totalBytesWritten={} writeCalls={}", + position, buffer.position(), currentPos, totalBytesWritten, writeCallCount); + } return currentPos; } @@ -168,11 +170,18 @@ public class SeaweedOutputStream extends OutputStream { totalBytesWritten += length; writeCallCount++; - // Only log significant writes to avoid flooding logs with byte-by-byte writes - if (path.contains("parquet") && length >= 20) { - LOG.info("[DEBUG-2024] ✍️ write({} bytes): totalSoFar={} writeCalls={} position={} bufferPos={}, file={}", - length, totalBytesWritten, writeCallCount, position, buffer.position(), - path.substring(path.lastIndexOf('/') + 1)); + // Log significant writes AND writes near the end (potential footer) + if (path.contains("parquet")) { + if (length >= 20) { + LOG.info("[DEBUG-2024] ✍️ write({} bytes): totalSoFar={} writeCalls={} position={} bufferPos={}, file={}", + length, totalBytesWritten, writeCallCount, position, buffer.position(), + path.substring(path.lastIndexOf('/') + 1)); + } else if (writeCallCount >= 220) { + // Log all small writes after call 220 (likely footer writes) + LOG.info("[DEBUG-2024] ✍️ write({} bytes): totalSoFar={} writeCalls={} position={} bufferPos={} [FOOTER?], file={}", + length, totalBytesWritten, writeCallCount, position, buffer.position(), + path.substring(path.lastIndexOf('/') + 1)); + } } // System.out.println(path + " write [" + (outputIndex + off) + "," +