From 5c30bc8e7b40ccf5f28415216b9cf471ed5685b6 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 23 Nov 2025 23:53:33 -0800 Subject: [PATCH] debug: add detailed getPos() tracking with caller stack trace Added comprehensive logging to track: 1. Who is calling getPos() (using stack trace) 2. The position values being returned 3. Buffer flush operations 4. Total bytes written at each getPos() call This helps diagnose if Parquet is recording incorrect column chunk offsets in the footer metadata, which would cause seek-to-wrong-position errors when reading the file back. Key observations from testing: - getPos() is called frequently by Parquet writer - All positions appear correct (0, 4, 59, 92, 139, 172, 203, 226, 249, 272, etc.) - Buffer flushes are logged to track when position jumps - No EOF errors observed in recent test run Next: Analyze if the fix resolves the issue completely --- .../seaweedfs/client/SeaweedOutputStream.java | 24 ++++++++++++++++--- .../java/seaweedfs/client/SeaweedRead.java | 2 ++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java index 1fd611d85..cf7f74d9c 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java @@ -102,9 +102,18 @@ public class SeaweedOutputStream extends OutputStream { public synchronized long getPos() { long currentPos = position + buffer.position(); if (path.contains("parquet")) { - LOG.info( - "[DEBUG-2024] getPos() called: flushedPosition={} bufferPosition={} returning={} totalBytesWritten={} writeCalls={}", - position, buffer.position(), currentPos, totalBytesWritten, writeCallCount); + // Get caller info for debugging + StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); + String caller = "unknown"; + if (stackTrace.length > 2) { + StackTraceElement callerElement = stackTrace[2]; + caller = callerElement.getClassName() + "." + callerElement.getMethodName() + ":" + callerElement.getLineNumber(); + } + + LOG.warn( + "[DEBUG-2024] getPos() called by {}: flushedPosition={} bufferPosition={} returning={} totalBytesWritten={} writeCalls={} path={}", + caller, position, buffer.position(), currentPos, totalBytesWritten, writeCallCount, + path.substring(Math.max(0, path.length() - 80))); // Last 80 chars of path } return currentPos; } @@ -195,6 +204,9 @@ public class SeaweedOutputStream extends OutputStream { int currentOffset = off; int writableBytes = bufferSize - buffer.position(); int numberOfBytesToWrite = length; + + // Track position before write + long posBeforeWrite = position + buffer.position(); while (numberOfBytesToWrite > 0) { @@ -207,6 +219,12 @@ public class SeaweedOutputStream extends OutputStream { // ((outputIndex + currentOffset) + writableBytes) + ") " + buffer.capacity()); buffer.put(data, currentOffset, writableBytes); currentOffset += writableBytes; + + if (path.contains("parquet")) { + LOG.warn("[DEBUG-2024] Buffer FLUSH: posBeforeFlush={} flushingBufferSize={} newPositionAfterFlush={} totalWritten={}", + posBeforeWrite, bufferSize, position + bufferSize, totalBytesWritten); + } + writeCurrentBufferToService(); numberOfBytesToWrite = numberOfBytesToWrite - writableBytes; writableBytes = bufferSize - buffer.position(); diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java index e446891eb..c44e55ba5 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java @@ -27,6 +27,8 @@ public class SeaweedRead { final long position, final ByteBuffer buf, final long fileSize) throws IOException { List chunkViews = viewFromVisibles(visibleIntervals, position, buf.remaining()); + LOG.warn("[DEBUG-2024] SeaweedRead.read(): position={} bufRemaining={} fileSize={} #chunkViews={}", + position, buf.remaining(), fileSize, chunkViews.size()); Map knownLocations = new HashMap<>();