Browse Source

debug: add detailed getPos() tracking with caller stack trace

Added comprehensive logging to track:
1. Who is calling getPos() (using stack trace)
2. The position values being returned
3. Buffer flush operations
4. Total bytes written at each getPos() call

This helps diagnose if Parquet is recording incorrect column chunk
offsets in the footer metadata, which would cause seek-to-wrong-position
errors when reading the file back.

Key observations from testing:
- getPos() is called frequently by Parquet writer
- All positions appear correct (0, 4, 59, 92, 139, 172, 203, 226, 249, 272, etc.)
- Buffer flushes are logged to track when position jumps
- No EOF errors observed in recent test run

Next: Analyze if the fix resolves the issue completely
pull/7526/head
chrislu 1 week ago
parent
commit
5c30bc8e7b
  1. 24
      other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java
  2. 2
      other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java

24
other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java

@ -102,9 +102,18 @@ public class SeaweedOutputStream extends OutputStream {
public synchronized long getPos() {
long currentPos = position + buffer.position();
if (path.contains("parquet")) {
LOG.info(
"[DEBUG-2024] getPos() called: flushedPosition={} bufferPosition={} returning={} totalBytesWritten={} writeCalls={}",
position, buffer.position(), currentPos, totalBytesWritten, writeCallCount);
// Get caller info for debugging
StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
String caller = "unknown";
if (stackTrace.length > 2) {
StackTraceElement callerElement = stackTrace[2];
caller = callerElement.getClassName() + "." + callerElement.getMethodName() + ":" + callerElement.getLineNumber();
}
LOG.warn(
"[DEBUG-2024] getPos() called by {}: flushedPosition={} bufferPosition={} returning={} totalBytesWritten={} writeCalls={} path={}",
caller, position, buffer.position(), currentPos, totalBytesWritten, writeCallCount,
path.substring(Math.max(0, path.length() - 80))); // Last 80 chars of path
}
return currentPos;
}
@ -195,6 +204,9 @@ public class SeaweedOutputStream extends OutputStream {
int currentOffset = off;
int writableBytes = bufferSize - buffer.position();
int numberOfBytesToWrite = length;
// Track position before write
long posBeforeWrite = position + buffer.position();
while (numberOfBytesToWrite > 0) {
@ -207,6 +219,12 @@ public class SeaweedOutputStream extends OutputStream {
// ((outputIndex + currentOffset) + writableBytes) + ") " + buffer.capacity());
buffer.put(data, currentOffset, writableBytes);
currentOffset += writableBytes;
if (path.contains("parquet")) {
LOG.warn("[DEBUG-2024] Buffer FLUSH: posBeforeFlush={} flushingBufferSize={} newPositionAfterFlush={} totalWritten={}",
posBeforeWrite, bufferSize, position + bufferSize, totalBytesWritten);
}
writeCurrentBufferToService();
numberOfBytesToWrite = numberOfBytesToWrite - writableBytes;
writableBytes = bufferSize - buffer.position();

2
other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java

@ -27,6 +27,8 @@ public class SeaweedRead {
final long position, final ByteBuffer buf, final long fileSize) throws IOException {
List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, buf.remaining());
LOG.warn("[DEBUG-2024] SeaweedRead.read(): position={} bufRemaining={} fileSize={} #chunkViews={}",
position, buf.remaining(), fileSize, chunkViews.size());
Map<String, FilerProto.Locations> knownLocations = new HashMap<>();

Loading…
Cancel
Save