From 942a0b7da74f71617b73cae4bad48197e84ae555 Mon Sep 17 00:00:00 2001 From: pingqiu Date: Sat, 28 Mar 2026 18:52:11 -0700 Subject: [PATCH] fix: strengthen IsRecoverable contiguity check and StateAt snapshot correctness IsRecoverable now verifies three conditions: - startExclusive >= tailLSN (not recycled) - endInclusive <= headLSN (within WAL) - all LSNs in range exist contiguously (no holes) StateAt now uses base snapshot captured during AdvanceTail: - returns nil for LSNs before snapshot boundary (unreconstructable) - correctly includes block state from recycled entries via snapshot 5 new tests: end-beyond-head, missing entries, state after tail advance, nil before snapshot, block last written before tail. Co-Authored-By: Claude Opus 4.6 (1M context) --- sw-block/prototype/enginev2/p3_test.go | 97 +++++++++++++++++++++++ sw-block/prototype/enginev2/walhistory.go | 68 ++++++++++++++-- 2 files changed, 159 insertions(+), 6 deletions(-) diff --git a/sw-block/prototype/enginev2/p3_test.go b/sw-block/prototype/enginev2/p3_test.go index a5910d3f8..7a1a96cde 100644 --- a/sw-block/prototype/enginev2/p3_test.go +++ b/sw-block/prototype/enginev2/p3_test.go @@ -112,6 +112,103 @@ func TestWAL_StateAt_HistoricalCorrectness(t *testing.T) { } } +// --- IsRecoverable: strengthened checks --- + +func TestWAL_IsRecoverable_FalseWhenEndBeyondHead(t *testing.T) { + w := NewWALHistory() + for i := uint64(1); i <= 50; i++ { + w.Append(WALEntry{LSN: i}) + } + // End is 100 but head is only 50. + if w.IsRecoverable(0, 100) { + t.Fatal("should be false when endInclusive > headLSN") + } +} + +func TestWAL_IsRecoverable_FalseWhenEntriesMissing(t *testing.T) { + w := NewWALHistory() + // Append non-contiguous: 1, 2, 3, 5, 6 (missing 4). + w.Append(WALEntry{LSN: 1}) + w.Append(WALEntry{LSN: 2}) + w.Append(WALEntry{LSN: 3}) + w.entries = append(w.entries, WALEntry{LSN: 5}) // skip 4 + w.entries = append(w.entries, WALEntry{LSN: 6}) + w.headLSN = 6 + + // Range 0→6 has a hole at LSN 4. + if w.IsRecoverable(0, 6) { + t.Fatal("should be false when entries are non-contiguous") + } + // Range 0→3 is contiguous. + if !w.IsRecoverable(0, 3) { + t.Fatal("range 0→3 should be recoverable (contiguous)") + } +} + +// --- StateAt: correctness after tail advancement --- + +func TestWAL_StateAt_CorrectAfterTailAdvance(t *testing.T) { + w := NewWALHistory() + w.Append(WALEntry{LSN: 1, Block: 1, Value: 100}) + w.Append(WALEntry{LSN: 2, Block: 2, Value: 200}) + w.Append(WALEntry{LSN: 3, Block: 1, Value: 300}) // overwrites block 1 + w.Append(WALEntry{LSN: 4, Block: 3, Value: 400}) + w.Append(WALEntry{LSN: 5, Block: 2, Value: 500}) // overwrites block 2 + + w.AdvanceTail(3) // recycle LSNs 1-3, snapshot captures block1=300, block2=200 + + // StateAt(5): should include snapshot state + retained entries. + s := w.StateAt(5) + if s[1] != 300 { // from snapshot (LSN 3) + t.Fatalf("block 1=%d, want 300", s[1]) + } + if s[2] != 500 { // from retained entry (LSN 5) + t.Fatalf("block 2=%d, want 500", s[2]) + } + if s[3] != 400 { // from retained entry (LSN 4) + t.Fatalf("block 3=%d, want 400", s[3]) + } +} + +func TestWAL_StateAt_ReturnsNilBeforeSnapshot(t *testing.T) { + w := NewWALHistory() + for i := uint64(1); i <= 10; i++ { + w.Append(WALEntry{LSN: i, Block: i, Value: i * 10}) + } + w.AdvanceTail(5) // snapshot at LSN 5 + + // StateAt(3) — before snapshot boundary, entries recycled. + s := w.StateAt(3) + if s != nil { + t.Fatal("StateAt before snapshot should return nil") + } + + // StateAt(5) — at snapshot boundary, valid. + s = w.StateAt(5) + if s == nil { + t.Fatal("StateAt at snapshot boundary should work") + } +} + +func TestWAL_StateAt_BlockLastWrittenBeforeTail(t *testing.T) { + w := NewWALHistory() + w.Append(WALEntry{LSN: 1, Block: 99, Value: 42}) // block 99 only written once + w.Append(WALEntry{LSN: 2, Block: 1, Value: 100}) + w.Append(WALEntry{LSN: 3, Block: 1, Value: 200}) + + w.AdvanceTail(2) // snapshot captures block99=42, block1=100 + + // StateAt(3): block 99's last write was LSN 1 (recycled), but + // captured in snapshot. Must still be present. + s := w.StateAt(3) + if s[99] != 42 { + t.Fatalf("block 99=%d, want 42 (from snapshot of recycled entry)", s[99]) + } + if s[1] != 200 { + t.Fatalf("block 1=%d, want 200 (from retained entry)", s[1]) + } +} + // --- Recoverability proof: "why is catch-up allowed?" --- func TestRecoverability_Provable_GapWithinRetention(t *testing.T) { diff --git a/sw-block/prototype/enginev2/walhistory.go b/sw-block/prototype/enginev2/walhistory.go index b4e771ed8..5ca6275b8 100644 --- a/sw-block/prototype/enginev2/walhistory.go +++ b/sw-block/prototype/enginev2/walhistory.go @@ -19,6 +19,11 @@ type WALHistory struct { headLSN uint64 // highest LSN written tailLSN uint64 // oldest retained LSN (exclusive: entries with LSN > tailLSN are kept) committedLSN uint64 // lineage-safe boundary + + // Base snapshot: block→value state at tailLSN. Captured when tail advances. + // Required for correct StateAt() after entries are recycled. + baseSnapshot map[uint64]uint64 + baseSnapshotLSN uint64 } // NewWALHistory creates an empty WAL history. @@ -47,14 +52,26 @@ func (w *WALHistory) Commit(lsn uint64) error { return nil } -// AdvanceTail recycles entries at or below lsn. After this, entries with -// LSN <= lsn are no longer available for catch-up recovery. +// AdvanceTail recycles entries at or below lsn. Before recycling, +// captures a base snapshot of block state at the new tail boundary +// so that StateAt() remains correct after entries are gone. func (w *WALHistory) AdvanceTail(lsn uint64) { if lsn <= w.tailLSN { return } + // Capture base snapshot: replay all entries up to new tail. + if w.baseSnapshot == nil { + w.baseSnapshot = map[uint64]uint64{} + } + for _, e := range w.entries { + if e.LSN > lsn { + break + } + w.baseSnapshot[e.Block] = e.Value + } + w.baseSnapshotLSN = lsn w.tailLSN = lsn - // Remove recycled entries from storage. + // Remove recycled entries. kept := w.entries[:0] for _, e := range w.entries { if e.LSN > lsn { @@ -101,8 +118,33 @@ func (w *WALHistory) EntriesInRange(startExclusive, endInclusive uint64) ([]WALE // IsRecoverable checks whether all entries from startExclusive+1 to // endInclusive are retained in the WAL. This is the executable proof // of "why catch-up is allowed." +// +// Verifies three conditions: +// 1. startExclusive >= tailLSN (gap start not recycled) +// 2. endInclusive <= headLSN (gap end within WAL) +// 3. All LSNs in (startExclusive, endInclusive] exist contiguously func (w *WALHistory) IsRecoverable(startExclusive, endInclusive uint64) bool { - return startExclusive >= w.tailLSN + if startExclusive < w.tailLSN { + return false // start is in recycled region + } + if endInclusive > w.headLSN { + return false // end is beyond WAL head + } + // Verify contiguous coverage. + expect := startExclusive + 1 + for _, e := range w.entries { + if e.LSN <= startExclusive { + continue + } + if e.LSN > endInclusive { + break + } + if e.LSN != expect { + return false // gap in retained entries + } + expect++ + } + return expect > endInclusive // all LSNs covered } // MakeHandshakeResult generates a HandshakeResult from the WAL state @@ -132,10 +174,24 @@ func (w *WALHistory) CommittedLSN() uint64 { return w.committedLSN } // Len returns the number of retained entries. func (w *WALHistory) Len() int { return len(w.entries) } -// StateAt replays entries up to lsn and returns block→value state. -// Used to verify historical data correctness after recovery. +// StateAt returns block→value state at a given LSN. +// +// If lsn >= baseSnapshotLSN, starts from the base snapshot and replays +// retained entries up to lsn. This is correct even after tail advancement +// because the snapshot captures all block state from recycled entries. +// +// If lsn < baseSnapshotLSN, the required entries have been recycled and +// the state cannot be reconstructed — returns nil. func (w *WALHistory) StateAt(lsn uint64) map[uint64]uint64 { + if w.baseSnapshotLSN > 0 && lsn < w.baseSnapshotLSN { + return nil // state unreconstructable: entries recycled + } + // Start from base snapshot (if any). state := map[uint64]uint64{} + for k, v := range w.baseSnapshot { + state[k] = v + } + // Replay retained entries up to lsn. for _, e := range w.entries { if e.LSN > lsn { break