Browse Source

fix: true mid-executor invalidation test via OnStep hook

CatchUpExecutor.OnStep: optional callback fired between executor-managed
progress steps. Enables deterministic fault injection (epoch bump)
between steps without racing or manual sender calls.

E2_EpochBump_MidExecutorLoop:
- Executor runs 5 progress steps
- OnStep hook bumps epoch after step 1 (after 2 successful steps)
- Executor's own loop detects invalidation at step 2's check
- Resources released by executor's release path (not manual cancel)
- Log shows session_invalidated + exec_resources_released

This closes the remaining FC2 gap: invalidation is now detected
and cleaned up by the executor itself, not by external code.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
feature/sw-block
pingqiu 1 day ago
parent
commit
4df61f290b
  1. 10
      sw-block/engine/replication/executor.go
  2. 51
      sw-block/engine/replication/validation_test.go

10
sw-block/engine/replication/executor.go

@ -16,6 +16,11 @@ type CatchUpExecutor struct {
replicaID string
sessID uint64
released bool
// OnStep is an optional callback invoked between executor-managed steps.
// Used for deterministic fault injection in tests (e.g., epoch bump).
// step is the 0-based index of the completed step.
OnStep func(step int)
}
// NewCatchUpExecutor creates an executor from a plan. The plan's resources
@ -68,6 +73,11 @@ func (e *CatchUpExecutor) Execute(progressLSNs []uint64, startTick uint64) error
return err
}
// Fire step callback (test hook for fault injection).
if e.OnStep != nil {
e.OnStep(i)
}
// Check budget after each step.
v, err := s.CheckBudget(e.sessID, tick)
if err != nil {

51
sw-block/engine/replication/validation_test.go

@ -99,9 +99,10 @@ func TestP3_E1_ChangedAddress_OldPlanCancelledByDriver(t *testing.T) {
// --- E2 / FC2: Epoch bump during active executor step ---
func TestP3_E2_EpochBump_AfterExecutorProgress(t *testing.T) {
// True mid-execution: executor makes progress, THEN epoch bumps,
// THEN next step fails.
func TestP3_E2_EpochBump_MidExecutorLoop(t *testing.T) {
// True mid-execution through executor's own loop:
// executor makes progress steps, epoch bumps BETWEEN executor-managed
// steps via OnStep hook, executor detects invalidation at next step.
storage := newMockStorage(RetainedHistory{
HeadLSN: 100, TailLSN: 0, CommittedLSN: 100,
})
@ -116,39 +117,29 @@ func TestP3_E2_EpochBump_AfterExecutorProgress(t *testing.T) {
})
plan, _ := driver.PlanRecovery("r1", 50)
s := driver.Orchestrator.Registry.Sender("r1")
sessID := plan.SessionID
// Manually drive the executor steps to place the epoch bump BETWEEN steps.
// Step 1: begin catch-up.
s.BeginCatchUp(sessID, 0)
// Step 2: first progress step succeeds.
s.RecordCatchUpProgress(sessID, 60, 1)
// Step 3: second progress step succeeds.
s.RecordCatchUpProgress(sessID, 70, 2)
exec := NewCatchUpExecutor(driver, plan)
// EPOCH BUMPS between progress steps (real mid-execution).
driver.Orchestrator.InvalidateEpoch(2)
driver.Orchestrator.UpdateSenderEpoch("r1", 2)
// OnStep hook: bump epoch after step 1 (executor has made real progress).
exec.OnStep = func(step int) {
if step == 1 { // after second progress step succeeds
driver.Orchestrator.InvalidateEpoch(2)
driver.Orchestrator.UpdateSenderEpoch("r1", 2)
}
}
// Step 4: third progress step fails — session invalidated.
err := s.RecordCatchUpProgress(sessID, 80, 3)
// Executor runs 5 steps. After step 1, epoch bumps.
// Step 2's invalidation check catches the stale session.
err := exec.Execute([]uint64{60, 70, 80, 90, 100}, 0)
if err == nil {
t.Fatal("E2: progress after mid-execution epoch bump must fail")
t.Fatal("E2: executor must fail when epoch bumps between its managed steps")
}
// Executor cancel releases resources.
exec := NewCatchUpExecutor(driver, plan)
exec.Cancel("epoch_bump_after_progress")
// WAL pin released.
// Resources released by executor's own release path.
if len(storage.pinnedWAL) != 0 {
t.Fatal("E2: WAL pin must be released after mid-execution epoch bump")
t.Fatal("E2: WAL pin must be released by executor after mid-loop invalidation")
}
// E5: Log shows per-replica invalidation + resource release.
// E5: Log shows per-replica invalidation + executor resource release.
hasInvalidation := false
hasRelease := false
for _, e := range driver.Orchestrator.Log.EventsFor("r1") {
@ -160,10 +151,10 @@ func TestP3_E2_EpochBump_AfterExecutorProgress(t *testing.T) {
}
}
if !hasInvalidation {
t.Fatal("E2/E5: log must show session invalidation with epoch cause")
t.Fatal("E2/E5: log must show session invalidation")
}
if !hasRelease {
t.Fatal("E2/E5: log must show resource release on cancellation")
t.Fatal("E2/E5: log must show executor resource release")
}
}

Loading…
Cancel
Save