From 885354bb19ba95e72bf6ba25bbdb776a9d852a32 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 23 Nov 2025 12:46:36 -0800 Subject: [PATCH] fix: reduce write() logging verbosity, add summary stats Previous run showed Parquet writes byte-by-byte (hundreds of 1-byte writes), flooding logs and getting truncated. This prevented seeing the full picture. Changes: 1. Only log writes >= 20 bytes (skip byte-by-byte metadata writes) 2. Track writeCallCount to see total number of write() invocations 3. Show writeCallCount in close() summary logs This will show: - Large data writes clearly (26, 34, 41, 67 bytes, etc.) - Total bytes written vs total calls (e.g., 684 bytes in 200+ calls) - Whether ALL bytes Parquet wrote actually reached close() If totalBytesWritten=684 at close, Parquet only sent 684 bytes. If totalBytesWritten=762 at close, Parquet sent all 762 bytes but we lost 78. Next run will definitively answer: Does Parquet write 684 or 762 bytes total? --- .../seaweedfs/client/SeaweedOutputStream.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java index 8b51555b5..8d371090b 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java @@ -35,6 +35,7 @@ public class SeaweedOutputStream extends OutputStream { private String replication = ""; private String collection = ""; private long totalBytesWritten = 0; // Track total bytes for debugging + private long writeCallCount = 0; // Track number of write() calls public SeaweedOutputStream(FilerClient filerClient, final String fullpath) { this(filerClient, fullpath, ""); @@ -149,7 +150,6 @@ public class SeaweedOutputStream extends OutputStream { @Override public void write(final int byteVal) throws IOException { - LOG.debug("[DEBUG-2024] ✍️ write(int): 1 byte, path={}", path); write(new byte[] { (byte) (byteVal & 0xFF) }); } @@ -167,9 +167,11 @@ public class SeaweedOutputStream extends OutputStream { } totalBytesWritten += length; - if (path.contains("parquet")) { - LOG.info("[DEBUG-2024] ✍️ write({} bytes): totalSoFar={} position={} bufferPos={}, file={}", - length, totalBytesWritten, position, buffer.position(), + writeCallCount++; + // Only log significant writes to avoid flooding logs with byte-by-byte writes + if (path.contains("parquet") && length >= 20) { + LOG.info("[DEBUG-2024] ✍️ write({} bytes): totalSoFar={} writeCalls={} position={} bufferPos={}, file={}", + length, totalBytesWritten, writeCallCount, position, buffer.position(), path.substring(path.lastIndexOf('/') + 1)); } @@ -226,13 +228,13 @@ public class SeaweedOutputStream extends OutputStream { } int bufferPosBeforeFlush = buffer.position(); - LOG.info("[DEBUG-2024] 🔒 close START: path={} position={} buffer.position()={} totalBytesWritten={}", - path, position, bufferPosBeforeFlush, totalBytesWritten); + LOG.info("[DEBUG-2024] 🔒 close START: path={} position={} buffer.position()={} totalBytesWritten={} writeCalls={}", + path, position, bufferPosBeforeFlush, totalBytesWritten, writeCallCount); try { flushInternal(); threadExecutor.shutdown(); - LOG.info("[DEBUG-2024] ✅ close END: path={} finalPosition={} totalBytesWritten={} (buffer had {} bytes)", - path, position, totalBytesWritten, bufferPosBeforeFlush); + LOG.info("[DEBUG-2024] ✅ close END: path={} finalPosition={} totalBytesWritten={} writeCalls={} (buffer had {} bytes)", + path, position, totalBytesWritten, writeCallCount, bufferPosBeforeFlush); } finally { lastError = new IOException("Stream is closed!"); ByteBufferPool.release(buffer);