From d5b2a3a34520d5b55e4190f15c003eba8782eb40 Mon Sep 17 00:00:00 2001 From: pingqiu Date: Mon, 30 Mar 2026 20:26:27 -0700 Subject: [PATCH] fix: WALTailLSN is now an LSN boundary, ScanWALEntries uses durable checkpoint Finding 1: WALTailLSN semantic fix - StatusSnapshot().WALTailLSN now reads super.WALCheckpointLSN (an LSN) - Was: wal.Tail() which returns a physical byte offset - Entries with LSN > WALTailLSN are guaranteed in the WAL Finding 2: ScanWALEntries replay-source fix - ScanWALEntries passes super.WALCheckpointLSN as the recycled boundary - Was: flusher.CheckpointLSN() which in V1 equals CommittedLSN - The flusher's live checkpoint may advance in memory, but entries above the durable superblock checkpoint are still physically in the WAL - Normal catch-up (replica at 70, committed at 100) now works because fromLSN=71 > super.WALCheckpointLSN (which is the last persisted checkpoint, not the live flusher state) Co-Authored-By: Claude Opus 4.6 (1M context) --- weed/storage/blockvol/blockvol.go | 33 +++++++++++++++--------- weed/storage/blockvol/v2bridge/reader.go | 6 ++--- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/weed/storage/blockvol/blockvol.go b/weed/storage/blockvol/blockvol.go index 6e0e6a832..ef042bc57 100644 --- a/weed/storage/blockvol/blockvol.go +++ b/weed/storage/blockvol/blockvol.go @@ -877,30 +877,32 @@ type V2StatusSnapshot struct { // Each field reads from the authoritative source: // // WALHeadLSN ← nextLSN - 1 (last written LSN) -// WALTailLSN ← wal.Tail() (oldest retained WAL entry) -// CommittedLSN ← flusher.CheckpointLSN() (barrier-confirmed + flushed) +// WALTailLSN ← super.WALCheckpointLSN (LSN boundary, not byte offset) +// CommittedLSN ← flusher.CheckpointLSN() (V1 interim: barrier-confirmed + flushed) // CheckpointLSN ← super.WALCheckpointLSN (durable base image) -// CheckpointTrusted ← super.Valid (superblock integrity check) +// CheckpointTrusted ← super.Validate() == nil (superblock integrity) func (v *BlockVol) StatusSnapshot() V2StatusSnapshot { headLSN := v.nextLSN.Load() if headLSN > 0 { headLSN-- } - var walTail uint64 - if v.wal != nil { - walTail = v.wal.Tail() - } + // WALTailLSN: the oldest retained LSN boundary for recovery classification. + // Entries with LSN > WALTailLSN are guaranteed in the WAL. + // Entries with LSN <= WALTailLSN have been checkpointed and WAL space + // may be reused. This is an LSN (not a physical byte offset). + walTailLSN := v.super.WALCheckpointLSN - var checkpointLSN uint64 + // CommittedLSN: V1 interim mapping. committed = checkpointed after flush. + var committedLSN uint64 if v.flusher != nil { - checkpointLSN = v.flusher.CheckpointLSN() + committedLSN = v.flusher.CheckpointLSN() } return V2StatusSnapshot{ WALHeadLSN: headLSN, - WALTailLSN: walTail, - CommittedLSN: checkpointLSN, // V1: committed = checkpointed after flush + WALTailLSN: walTailLSN, + CommittedLSN: committedLSN, CheckpointLSN: v.super.WALCheckpointLSN, CheckpointTrusted: v.super.Validate() == nil, } @@ -935,11 +937,18 @@ func (v *BlockVol) SetV2RetentionFloor(fn func() (uint64, bool)) { // ScanWALEntries reads WAL entries from fromLSN using the real ScanFrom mechanism. // This is the entry point for the V2 bridge executor's catch-up path. +// +// Uses super.WALCheckpointLSN as the recycled boundary (not flusher.CheckpointLSN). +// The superblock checkpoint is the durable boundary persisted to disk. +// The flusher's live checkpointLSN may have advanced further in memory +// but entries between super.WALCheckpointLSN and headLSN are still in the WAL. func (v *BlockVol) ScanWALEntries(fromLSN uint64, fn func(*WALEntry) error) error { if v.wal == nil { return fmt.Errorf("WAL not initialized") } - return v.wal.ScanFrom(v.fd, v.super.WALOffset, v.flusher.CheckpointLSN(), fromLSN, fn) + // Use the durable superblock checkpoint as the recycled boundary. + // Entries with LSN > super.WALCheckpointLSN are guaranteed in the WAL. + return v.wal.ScanFrom(v.fd, v.super.WALOffset, v.super.WALCheckpointLSN, fromLSN, fn) } // ReplicaReceiverAddrInfo holds canonical addresses from the replica receiver. diff --git a/weed/storage/blockvol/v2bridge/reader.go b/weed/storage/blockvol/v2bridge/reader.go index 3d63fa0d6..ec28f28b0 100644 --- a/weed/storage/blockvol/v2bridge/reader.go +++ b/weed/storage/blockvol/v2bridge/reader.go @@ -39,10 +39,10 @@ func NewReader(vol *blockvol.BlockVol) *Reader { // Each field maps to a specific blockvol source: // // WALHeadLSN ← vol.nextLSN - 1 (last written LSN) -// WALTailLSN ← vol.wal.Tail() (oldest retained WAL entry) -// CommittedLSN ← vol.flusher.CheckpointLSN() (last flushed = committed) +// WALTailLSN ← vol.super.WALCheckpointLSN (LSN boundary, not byte offset) +// CommittedLSN ← vol.flusher.CheckpointLSN() (V1 interim: committed = checkpointed) // CheckpointLSN ← vol.super.WALCheckpointLSN -// CheckpointTrusted ← vol.super.Valid (superblock integrity) +// CheckpointTrusted ← vol.super.Validate() == nil (superblock integrity) // // Note: CommittedLSN maps to CheckpointLSN in the current V1 model where // barrier-confirmed = flusher-checkpointed. In V2, these may diverge when