Browse Source

fix: WALTailLSN is now an LSN boundary, ScanWALEntries uses durable checkpoint

Finding 1: WALTailLSN semantic fix
- StatusSnapshot().WALTailLSN now reads super.WALCheckpointLSN (an LSN)
- Was: wal.Tail() which returns a physical byte offset
- Entries with LSN > WALTailLSN are guaranteed in the WAL

Finding 2: ScanWALEntries replay-source fix
- ScanWALEntries passes super.WALCheckpointLSN as the recycled boundary
- Was: flusher.CheckpointLSN() which in V1 equals CommittedLSN
- The flusher's live checkpoint may advance in memory, but entries above
  the durable superblock checkpoint are still physically in the WAL
- Normal catch-up (replica at 70, committed at 100) now works because
  fromLSN=71 > super.WALCheckpointLSN (which is the last persisted
  checkpoint, not the live flusher state)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
feature/sw-block
pingqiu 2 days ago
parent
commit
d5b2a3a345
  1. 33
      weed/storage/blockvol/blockvol.go
  2. 6
      weed/storage/blockvol/v2bridge/reader.go

33
weed/storage/blockvol/blockvol.go

@ -877,30 +877,32 @@ type V2StatusSnapshot struct {
// Each field reads from the authoritative source:
//
// WALHeadLSN ← nextLSN - 1 (last written LSN)
// WALTailLSN ← wal.Tail() (oldest retained WAL entry)
// CommittedLSN ← flusher.CheckpointLSN() (barrier-confirmed + flushed)
// WALTailLSN ← super.WALCheckpointLSN (LSN boundary, not byte offset)
// CommittedLSN ← flusher.CheckpointLSN() (V1 interim: barrier-confirmed + flushed)
// CheckpointLSN ← super.WALCheckpointLSN (durable base image)
// CheckpointTrusted ← super.Valid (superblock integrity check)
// CheckpointTrusted ← super.Validate() == nil (superblock integrity)
func (v *BlockVol) StatusSnapshot() V2StatusSnapshot {
headLSN := v.nextLSN.Load()
if headLSN > 0 {
headLSN--
}
var walTail uint64
if v.wal != nil {
walTail = v.wal.Tail()
}
// WALTailLSN: the oldest retained LSN boundary for recovery classification.
// Entries with LSN > WALTailLSN are guaranteed in the WAL.
// Entries with LSN <= WALTailLSN have been checkpointed and WAL space
// may be reused. This is an LSN (not a physical byte offset).
walTailLSN := v.super.WALCheckpointLSN
var checkpointLSN uint64
// CommittedLSN: V1 interim mapping. committed = checkpointed after flush.
var committedLSN uint64
if v.flusher != nil {
checkpointLSN = v.flusher.CheckpointLSN()
committedLSN = v.flusher.CheckpointLSN()
}
return V2StatusSnapshot{
WALHeadLSN: headLSN,
WALTailLSN: walTail,
CommittedLSN: checkpointLSN, // V1: committed = checkpointed after flush
WALTailLSN: walTailLSN,
CommittedLSN: committedLSN,
CheckpointLSN: v.super.WALCheckpointLSN,
CheckpointTrusted: v.super.Validate() == nil,
}
@ -935,11 +937,18 @@ func (v *BlockVol) SetV2RetentionFloor(fn func() (uint64, bool)) {
// ScanWALEntries reads WAL entries from fromLSN using the real ScanFrom mechanism.
// This is the entry point for the V2 bridge executor's catch-up path.
//
// Uses super.WALCheckpointLSN as the recycled boundary (not flusher.CheckpointLSN).
// The superblock checkpoint is the durable boundary persisted to disk.
// The flusher's live checkpointLSN may have advanced further in memory
// but entries between super.WALCheckpointLSN and headLSN are still in the WAL.
func (v *BlockVol) ScanWALEntries(fromLSN uint64, fn func(*WALEntry) error) error {
if v.wal == nil {
return fmt.Errorf("WAL not initialized")
}
return v.wal.ScanFrom(v.fd, v.super.WALOffset, v.flusher.CheckpointLSN(), fromLSN, fn)
// Use the durable superblock checkpoint as the recycled boundary.
// Entries with LSN > super.WALCheckpointLSN are guaranteed in the WAL.
return v.wal.ScanFrom(v.fd, v.super.WALOffset, v.super.WALCheckpointLSN, fromLSN, fn)
}
// ReplicaReceiverAddrInfo holds canonical addresses from the replica receiver.

6
weed/storage/blockvol/v2bridge/reader.go

@ -39,10 +39,10 @@ func NewReader(vol *blockvol.BlockVol) *Reader {
// Each field maps to a specific blockvol source:
//
// WALHeadLSN ← vol.nextLSN - 1 (last written LSN)
// WALTailLSN ← vol.wal.Tail() (oldest retained WAL entry)
// CommittedLSN ← vol.flusher.CheckpointLSN() (last flushed = committed)
// WALTailLSN ← vol.super.WALCheckpointLSN (LSN boundary, not byte offset)
// CommittedLSN ← vol.flusher.CheckpointLSN() (V1 interim: committed = checkpointed)
// CheckpointLSN ← vol.super.WALCheckpointLSN
// CheckpointTrusted ← vol.super.Valid (superblock integrity)
// CheckpointTrusted ← vol.super.Validate() == nil (superblock integrity)
//
// Note: CommittedLSN maps to CheckpointLSN in the current V1 model where
// barrier-confirmed = flusher-checkpointed. In V2, these may diverge when

Loading…
Cancel
Save