feat: add storage/control adapters and recovery driver (Phase 06 P0/P1)

Phase 06 module boundaries: adapter.go — StorageAdapter + ControlPlaneAdapter interfaces: - GetRetainedHistory: real WAL retention state - PinSnapshot / ReleaseSnapshot: rebuild resource management - PinWALRetention / ReleaseWALRetention: catch-up resource management - HandleHeartbeat / HandleFailover: control-plane event conversion driver.go — RecoveryDriver replaces synchronous convenience: - PlanRecovery: connect + handshake from storage state + acquire resources - PlanRebuild: acquire snapshot + WAL pins for rebuild - ReleasePlan: release all acquired resources Convenience flow classification: - ProcessAssignment, UpdateSenderEpoch, InvalidateEpoch → stepwise engine tasks - ExecuteRecovery → planner (connect + classify) - CompleteCatchUp, CompleteRebuild → TEST-ONLY convenience 7 new tests (driver_test.go): - CatchUp plan + execute with WAL pin - ZeroGap plan (no resources pinned) - NeedsRebuild → rebuild plan with resource acquisition - WAL pin failure → logged + error - Snapshot pin failure → logged + error - ReplicaAhead truncation through driver - Cross-layer: storage proves recoverability, engine consumes proof Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 day ago · f73a3fdab2
3 changed files with 575 additions and 0 deletions
--- a/sw-block/engine/replication/adapter.go
+++ b/sw-block/engine/replication/adapter.go
@ -0,0 +1,71 @@
+package replication
+
+// === Phase 06: Storage and Control-Plane Adapter Interfaces ===
+//
+// These interfaces define the boundary between the engine replication core
+// and external systems (storage backend, coordinator/control plane).
+// The engine consumes these interfaces — it does not reach into storage
+// or control-plane internals directly.
+
+// StorageAdapter provides real retained-history and checkpoint state
+// from the storage backend. The engine uses this to make recovery
+// decisions grounded in actual data, not reconstructed test inputs.
+type StorageAdapter interface {
+	// GetRetainedHistory returns the current WAL retention state.
+	// Must reflect actual TailLSN, HeadLSN, CommittedLSN, and checkpoint.
+	GetRetainedHistory() RetainedHistory
+
+	// PinSnapshot pins a checkpoint/base image at the given LSN for
+	// rebuild use. The snapshot must not be garbage-collected while pinned.
+	// Returns an error if no valid snapshot exists at that LSN.
+	PinSnapshot(checkpointLSN uint64) (SnapshotPin, error)
+
+	// ReleaseSnapshot releases a previously pinned snapshot.
+	ReleaseSnapshot(pin SnapshotPin)
+
+	// PinWALRetention holds WAL entries from startLSN to prevent reclaim.
+	// The engine calls this before starting catch-up to ensure the WAL
+	// tail does not advance past the required range.
+	PinWALRetention(startLSN uint64) (RetentionPin, error)
+
+	// ReleaseWALRetention releases a WAL retention hold.
+	ReleaseWALRetention(pin RetentionPin)
+}
+
+// SnapshotPin represents a held reference to a pinned snapshot/checkpoint.
+type SnapshotPin struct {
+	LSN    uint64
+	PinID  uint64 // unique identifier for this pin
+	Valid  bool
+}
+
+// RetentionPin represents a held reference to a WAL retention range.
+type RetentionPin struct {
+	StartLSN uint64
+	PinID    uint64
+	Valid    bool
+}
+
+// ControlPlaneAdapter converts external assignment events into
+// AssignmentIntent for the orchestrator.
+type ControlPlaneAdapter interface {
+	// HandleHeartbeat processes a heartbeat from a volume server and
+	// returns any assignment updates that should be applied.
+	HandleHeartbeat(serverID string, volumes []VolumeHeartbeat) []AssignmentIntent
+
+	// HandleFailover processes a failover event and returns assignments
+	// for the affected replicas.
+	HandleFailover(deadServerID string) []AssignmentIntent
+}
+
+// VolumeHeartbeat represents one volume's state in a heartbeat.
+type VolumeHeartbeat struct {
+	VolumeID    string
+	ReplicaID   string
+	Epoch       uint64
+	FlushedLSN  uint64
+	State       string
+	DataAddr    string
+	CtrlAddr    string
+	AddrVersion uint64
+}
--- a/sw-block/engine/replication/driver.go
+++ b/sw-block/engine/replication/driver.go
@ -0,0 +1,160 @@
+package replication
+
+import "fmt"
+
+// === Phase 06: Execution Driver ===
+//
+// Convenience flow classification (Phase 06 P0):
+//
+//   ProcessAssignment      → stepwise engine task (real entry point)
+//   ExecuteRecovery        → planner (connect + classify outcome)
+//   CompleteCatchUp        → TEST-ONLY convenience (bundles plan+execute+complete)
+//   CompleteRebuild        → TEST-ONLY convenience (bundles plan+execute+complete)
+//   UpdateSenderEpoch      → stepwise engine task
+//   InvalidateEpoch        → stepwise engine task
+//
+// The real engine flow splits catch-up and rebuild into:
+//   1. Plan: acquire resources (pin WAL or snapshot)
+//   2. Execute: stream entries stepwise (not one-shot)
+//   3. Complete: release resources, transition to InSync
+//
+// RecoveryDriver is the Phase 06 replacement for the synchronous
+// convenience helpers. It plans, acquires resources, and provides
+// a stepwise execution interface.
+
+// RecoveryPlan represents a planned recovery operation with acquired resources.
+type RecoveryPlan struct {
+	ReplicaID    string
+	SessionID    uint64
+	Outcome      RecoveryOutcome
+	Proof        *RecoverabilityProof
+
+	// Resource pins (non-nil when resources are acquired).
+	RetentionPin *RetentionPin // for catch-up
+	SnapshotPin  *SnapshotPin  // for rebuild
+
+	// Targets.
+	CatchUpTarget uint64 // for catch-up: target LSN
+	TruncateLSN   uint64 // non-zero if truncation required
+	RebuildSource RebuildSource
+}
+
+// RecoveryDriver plans and executes recovery operations using real
+// storage adapter inputs. It replaces the synchronous convenience
+// helpers (CompleteCatchUp, CompleteRebuild) with a resource-aware,
+// stepwise execution model.
+type RecoveryDriver struct {
+	Orchestrator *RecoveryOrchestrator
+	Storage      StorageAdapter
+}
+
+// NewRecoveryDriver creates a driver with a fresh orchestrator.
+func NewRecoveryDriver(storage StorageAdapter) *RecoveryDriver {
+	return &RecoveryDriver{
+		Orchestrator: NewRecoveryOrchestrator(),
+		Storage:      storage,
+	}
+}
+
+// PlanRecovery connects, handshakes from real storage state, classifies
+// the outcome, and acquires the necessary resources (WAL pin or snapshot pin).
+// Returns a RecoveryPlan that the caller can execute stepwise.
+func (d *RecoveryDriver) PlanRecovery(replicaID string, replicaFlushedLSN uint64) (*RecoveryPlan, error) {
+	history := d.Storage.GetRetainedHistory()
+
+	result := d.Orchestrator.ExecuteRecovery(replicaID, replicaFlushedLSN, &history)
+	if result.Error != nil {
+		return nil, result.Error
+	}
+
+	plan := &RecoveryPlan{
+		ReplicaID: replicaID,
+		SessionID: d.Orchestrator.Registry.Sender(replicaID).SessionID(),
+		Outcome:   result.Outcome,
+		Proof:     result.Proof,
+	}
+
+	switch result.Outcome {
+	case OutcomeZeroGap:
+		// Already completed by ExecuteRecovery.
+		return plan, nil
+
+	case OutcomeCatchUp:
+		// Acquire WAL retention pin.
+		pin, err := d.Storage.PinWALRetention(replicaFlushedLSN)
+		if err != nil {
+			d.Orchestrator.Log.Record(replicaID, plan.SessionID, "wal_pin_failed", err.Error())
+			return nil, fmt.Errorf("WAL retention pin failed: %w", err)
+		}
+		plan.RetentionPin = &pin
+		plan.CatchUpTarget = history.CommittedLSN
+
+		// Check if truncation is needed (replica ahead).
+		proof := history.ProveRecoverability(replicaFlushedLSN)
+		if proof.Reason == "replica_ahead_needs_truncation" {
+			plan.TruncateLSN = history.CommittedLSN
+		}
+
+		d.Orchestrator.Log.Record(replicaID, plan.SessionID, "plan_catchup",
+			fmt.Sprintf("target=%d pin=%d truncate=%d", plan.CatchUpTarget, pin.PinID, plan.TruncateLSN))
+		return plan, nil
+
+	case OutcomeNeedsRebuild:
+		// No resource acquisition — needs rebuild assignment first.
+		return plan, nil
+	}
+
+	return plan, nil
+}
+
+// PlanRebuild acquires rebuild resources (snapshot pin + optional WAL pin)
+// from real storage state. Called after a rebuild assignment.
+func (d *RecoveryDriver) PlanRebuild(replicaID string) (*RecoveryPlan, error) {
+	history := d.Storage.GetRetainedHistory()
+	source, snapLSN := history.RebuildSourceDecision()
+
+	plan := &RecoveryPlan{
+		ReplicaID:     replicaID,
+		SessionID:     d.Orchestrator.Registry.Sender(replicaID).SessionID(),
+		Outcome:       OutcomeNeedsRebuild,
+		RebuildSource: source,
+	}
+
+	if source == RebuildSnapshotTail {
+		// Pin snapshot.
+		snapPin, err := d.Storage.PinSnapshot(snapLSN)
+		if err != nil {
+			d.Orchestrator.Log.Record(replicaID, plan.SessionID, "snapshot_pin_failed", err.Error())
+			return nil, fmt.Errorf("snapshot pin failed: %w", err)
+		}
+		plan.SnapshotPin = &snapPin
+
+		// Pin WAL retention for tail replay.
+		retPin, err := d.Storage.PinWALRetention(snapLSN)
+		if err != nil {
+			d.Storage.ReleaseSnapshot(snapPin)
+			d.Orchestrator.Log.Record(replicaID, plan.SessionID, "wal_pin_failed", err.Error())
+			return nil, fmt.Errorf("WAL retention pin failed: %w", err)
+		}
+		plan.RetentionPin = &retPin
+
+		d.Orchestrator.Log.Record(replicaID, plan.SessionID, "plan_rebuild_snapshot_tail",
+			fmt.Sprintf("snapshot=%d", snapLSN))
+	} else {
+		d.Orchestrator.Log.Record(replicaID, plan.SessionID, "plan_rebuild_full_base", "")
+	}
+
+	return plan, nil
+}
+
+// ReleasePlan releases any resources acquired by a plan.
+func (d *RecoveryDriver) ReleasePlan(plan *RecoveryPlan) {
+	if plan.RetentionPin != nil {
+		d.Storage.ReleaseWALRetention(*plan.RetentionPin)
+		plan.RetentionPin = nil
+	}
+	if plan.SnapshotPin != nil {
+		d.Storage.ReleaseSnapshot(*plan.SnapshotPin)
+		plan.SnapshotPin = nil
+	}
+}
--- a/sw-block/engine/replication/driver_test.go
+++ b/sw-block/engine/replication/driver_test.go
@ -0,0 +1,344 @@
+package replication
+
+import (
+	"fmt"
+	"sync/atomic"
+	"testing"
+)
+
+// ============================================================
+// Phase 06 P0/P1: Recovery driver tests with mock storage adapter
+// ============================================================
+
+// --- Mock storage adapter ---
+
+type mockStorage struct {
+	history       RetainedHistory
+	nextPinID     atomic.Uint64
+	pinnedSnaps   map[uint64]bool
+	pinnedWAL     map[uint64]bool
+	failSnapshotPin bool
+	failWALPin      bool
+}
+
+func newMockStorage(history RetainedHistory) *mockStorage {
+	return &mockStorage{
+		history:     history,
+		pinnedSnaps: map[uint64]bool{},
+		pinnedWAL:   map[uint64]bool{},
+	}
+}
+
+func (m *mockStorage) GetRetainedHistory() RetainedHistory { return m.history }
+
+func (m *mockStorage) PinSnapshot(lsn uint64) (SnapshotPin, error) {
+	if m.failSnapshotPin {
+		return SnapshotPin{}, fmt.Errorf("snapshot pin refused")
+	}
+	id := m.nextPinID.Add(1)
+	m.pinnedSnaps[id] = true
+	return SnapshotPin{LSN: lsn, PinID: id, Valid: true}, nil
+}
+
+func (m *mockStorage) ReleaseSnapshot(pin SnapshotPin) {
+	delete(m.pinnedSnaps, pin.PinID)
+}
+
+func (m *mockStorage) PinWALRetention(startLSN uint64) (RetentionPin, error) {
+	if m.failWALPin {
+		return RetentionPin{}, fmt.Errorf("WAL retention pin refused")
+	}
+	id := m.nextPinID.Add(1)
+	m.pinnedWAL[id] = true
+	return RetentionPin{StartLSN: startLSN, PinID: id, Valid: true}, nil
+}
+
+func (m *mockStorage) ReleaseWALRetention(pin RetentionPin) {
+	delete(m.pinnedWAL, pin.PinID)
+}
+
+// --- Plan + execute: catch-up ---
+
+func TestDriver_PlanRecovery_CatchUp(t *testing.T) {
+	storage := newMockStorage(RetainedHistory{
+		HeadLSN: 100, TailLSN: 30, CommittedLSN: 100,
+	})
+	driver := NewRecoveryDriver(storage)
+
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
+	})
+
+	plan, err := driver.PlanRecovery("r1", 70)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if plan.Outcome != OutcomeCatchUp {
+		t.Fatalf("outcome=%s", plan.Outcome)
+	}
+	if plan.RetentionPin == nil {
+		t.Fatal("WAL retention should be pinned")
+	}
+	if plan.CatchUpTarget != 100 {
+		t.Fatalf("target=%d", plan.CatchUpTarget)
+	}
+	if !plan.Proof.Recoverable {
+		t.Fatalf("proof: %s", plan.Proof.Reason)
+	}
+
+	// WAL is pinned.
+	if len(storage.pinnedWAL) != 1 {
+		t.Fatalf("expected 1 WAL pin, got %d", len(storage.pinnedWAL))
+	}
+
+	// Execute catch-up via orchestrator.
+	driver.Orchestrator.CompleteCatchUp("r1", CatchUpOptions{TargetLSN: plan.CatchUpTarget})
+
+	// Release resources.
+	driver.ReleasePlan(plan)
+	if len(storage.pinnedWAL) != 0 {
+		t.Fatal("WAL pin should be released")
+	}
+}
+
+// --- Plan + execute: zero-gap ---
+
+func TestDriver_PlanRecovery_ZeroGap(t *testing.T) {
+	storage := newMockStorage(RetainedHistory{
+		HeadLSN: 100, TailLSN: 0, CommittedLSN: 100,
+	})
+	driver := NewRecoveryDriver(storage)
+
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
+	})
+
+	plan, err := driver.PlanRecovery("r1", 100)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if plan.Outcome != OutcomeZeroGap {
+		t.Fatalf("outcome=%s", plan.Outcome)
+	}
+
+	// Zero-gap: no resources pinned.
+	if plan.RetentionPin != nil {
+		t.Fatal("zero-gap should not pin WAL")
+	}
+
+	// Already completed.
+	if driver.Orchestrator.Registry.Sender("r1").State() != StateInSync {
+		t.Fatalf("state=%s", driver.Orchestrator.Registry.Sender("r1").State())
+	}
+}
+
+// --- Plan + execute: needs rebuild ---
+
+func TestDriver_PlanRecovery_NeedsRebuild_ThenRebuild(t *testing.T) {
+	storage := newMockStorage(RetainedHistory{
+		HeadLSN: 100, TailLSN: 60, CommittedLSN: 100,
+		CheckpointLSN: 50, CheckpointTrusted: true,
+	})
+	driver := NewRecoveryDriver(storage)
+
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
+	})
+
+	// Plan: catch-up fails.
+	plan, err := driver.PlanRecovery("r1", 30)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if plan.Outcome != OutcomeNeedsRebuild {
+		t.Fatalf("outcome=%s", plan.Outcome)
+	}
+
+	// Rebuild assignment.
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionRebuild},
+	})
+
+	// Plan rebuild with resource acquisition.
+	rebuildPlan, err := driver.PlanRebuild("r1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	// Checkpoint at 50, tail at 60 → unreplayable → full base.
+	if rebuildPlan.RebuildSource != RebuildFullBase {
+		t.Fatalf("source=%s (checkpoint at 50 but tail at 60)", rebuildPlan.RebuildSource)
+	}
+
+	// Execute rebuild via orchestrator.
+	driver.Orchestrator.CompleteRebuild("r1", &storage.history)
+
+	if driver.Orchestrator.Registry.Sender("r1").State() != StateInSync {
+		t.Fatalf("state=%s", driver.Orchestrator.Registry.Sender("r1").State())
+	}
+}
+
+// --- Resource failure: WAL pin refused ---
+
+func TestDriver_PlanRecovery_WALPinFailure(t *testing.T) {
+	storage := newMockStorage(RetainedHistory{
+		HeadLSN: 100, TailLSN: 30, CommittedLSN: 100,
+	})
+	storage.failWALPin = true
+	driver := NewRecoveryDriver(storage)
+
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
+	})
+
+	_, err := driver.PlanRecovery("r1", 70)
+	if err == nil {
+		t.Fatal("should fail when WAL pin is refused")
+	}
+
+	// Log should show the failure.
+	hasFailure := false
+	for _, e := range driver.Orchestrator.Log.EventsFor("r1") {
+		if e.Event == "wal_pin_failed" {
+			hasFailure = true
+		}
+	}
+	if !hasFailure {
+		t.Fatal("log should contain wal_pin_failed")
+	}
+}
+
+// --- Resource failure: snapshot pin refused → fallback ---
+
+func TestDriver_PlanRebuild_SnapshotPinFailure(t *testing.T) {
+	storage := newMockStorage(RetainedHistory{
+		HeadLSN: 100, TailLSN: 30, CommittedLSN: 100,
+		CheckpointLSN: 50, CheckpointTrusted: true,
+	})
+	storage.failSnapshotPin = true
+	driver := NewRecoveryDriver(storage)
+
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionRebuild},
+	})
+
+	_, err := driver.PlanRebuild("r1")
+	if err == nil {
+		t.Fatal("should fail when snapshot pin is refused")
+	}
+
+	hasFailure := false
+	for _, e := range driver.Orchestrator.Log.EventsFor("r1") {
+		if e.Event == "snapshot_pin_failed" {
+			hasFailure = true
+		}
+	}
+	if !hasFailure {
+		t.Fatal("log should contain snapshot_pin_failed")
+	}
+}
+
+// --- Replica-ahead with truncation through driver ---
+
+func TestDriver_PlanRecovery_ReplicaAhead_Truncation(t *testing.T) {
+	storage := newMockStorage(RetainedHistory{
+		HeadLSN: 100, TailLSN: 0, CommittedLSN: 100,
+	})
+	driver := NewRecoveryDriver(storage)
+
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
+	})
+
+	plan, err := driver.PlanRecovery("r1", 105) // replica ahead
+	if err != nil {
+		t.Fatal(err)
+	}
+	if plan.Outcome != OutcomeCatchUp {
+		t.Fatalf("outcome=%s", plan.Outcome)
+	}
+	if plan.TruncateLSN != 100 {
+		t.Fatalf("truncate=%d, want 100", plan.TruncateLSN)
+	}
+
+	// Execute with truncation.
+	err = driver.Orchestrator.CompleteCatchUp("r1", CatchUpOptions{
+		TargetLSN:   plan.CatchUpTarget,
+		TruncateLSN: plan.TruncateLSN,
+	})
+	if err != nil {
+		t.Fatalf("catch-up with truncation: %v", err)
+	}
+
+	driver.ReleasePlan(plan)
+}
+
+// --- Cross-layer contract: storage proves recoverability ---
+
+func TestDriver_CrossLayer_StorageProvesRecoverability(t *testing.T) {
+	// The engine asks "is this recoverable?" and the storage adapter
+	// answers from real state — not from test-reconstructed inputs.
+	storage := newMockStorage(RetainedHistory{
+		HeadLSN: 100, TailLSN: 50, CommittedLSN: 100,
+		CheckpointLSN: 40, CheckpointTrusted: true,
+	})
+	driver := NewRecoveryDriver(storage)
+
+	driver.Orchestrator.ProcessAssignment(AssignmentIntent{
+		Replicas: []ReplicaAssignment{
+			{ReplicaID: "r1", Endpoint: Endpoint{DataAddr: "r1:9333", Version: 1}},
+		},
+		Epoch:           1,
+		RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
+	})
+
+	// Engine asks storage for recoverability proof.
+	history := storage.GetRetainedHistory()
+	proof := history.ProveRecoverability(60) // gap 60→100
+
+	if !proof.Recoverable {
+		t.Fatalf("storage should prove recoverable: %s", proof.Reason)
+	}
+
+	// Engine asks for rebuild source decision.
+	source, snapLSN := history.RebuildSourceDecision()
+	// Checkpoint at 40, tail at 50 → checkpoint < tail → unreplayable.
+	if source != RebuildFullBase {
+		t.Fatalf("source=%s snap=%d (checkpoint 40 < tail 50)", source, snapLSN)
+	}
+
+	// Failure is observable: log from PlanRecovery.
+	plan, _ := driver.PlanRecovery("r1", 60)
+	if plan.Proof == nil || !plan.Proof.Recoverable {
+		t.Fatal("plan should carry proof from storage")
+	}
+
+	driver.ReleasePlan(plan)
+}