You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

117 lines
3.8 KiB

package replication
import "fmt"
// RetainedHistory represents the primary's WAL retention state as seen
// by the recovery decision path. It answers "why is recovery allowed?"
// with executable proof, not just policy assertions.
//
// This is the engine-level equivalent of the prototype's WALHistory —
// it provides the recoverability inputs that ClassifyRecoveryOutcome
// and rebuild-source selection consume.
type RetainedHistory struct {
// HeadLSN is the highest LSN written to the primary WAL.
HeadLSN uint64
// TailLSN is the oldest retained LSN boundary (exclusive).
// Entries with LSN > TailLSN are available for catch-up.
// Entries with LSN <= TailLSN have been recycled.
TailLSN uint64
// CommittedLSN is the lineage-safe boundary — the highest LSN
// acknowledged as durable by the commit protocol.
CommittedLSN uint64
// CheckpointLSN is the highest LSN with a durable base image.
// Used for rebuild-source decision: if CheckpointLSN > 0 and
// the checkpoint is trusted, snapshot+tail rebuild is possible.
CheckpointLSN uint64
// CheckpointTrusted indicates whether the checkpoint base image
// is known to be consistent and usable for rebuild.
CheckpointTrusted bool
}
// MakeHandshakeResult generates a HandshakeResult from the primary's
// retained history and a replica's reported flushed LSN.
func (rh *RetainedHistory) MakeHandshakeResult(replicaFlushedLSN uint64) HandshakeResult {
retentionStart := rh.TailLSN + 1
if rh.TailLSN == 0 {
retentionStart = 0
}
return HandshakeResult{
ReplicaFlushedLSN: replicaFlushedLSN,
CommittedLSN: rh.CommittedLSN,
RetentionStartLSN: retentionStart,
}
}
// IsRecoverable checks whether all entries from startExclusive+1 to
// endInclusive are available in the retained WAL.
func (rh *RetainedHistory) IsRecoverable(startExclusive, endInclusive uint64) bool {
if startExclusive < rh.TailLSN {
return false
}
if endInclusive > rh.HeadLSN {
return false
}
return true
}
// RebuildSourceDecision determines the optimal rebuild source from
// the current retained history state. Snapshot+tail is only chosen
// when BOTH conditions are met:
// 1. A trusted checkpoint exists
// 2. The WAL tail from CheckpointLSN to CommittedLSN is replayable
// (i.e., CheckpointLSN >= TailLSN and CommittedLSN <= HeadLSN)
func (rh *RetainedHistory) RebuildSourceDecision() (source RebuildSource, snapshotLSN uint64) {
if rh.CheckpointTrusted && rh.CheckpointLSN > 0 &&
rh.IsRecoverable(rh.CheckpointLSN, rh.CommittedLSN) {
return RebuildSnapshotTail, rh.CheckpointLSN
}
return RebuildFullBase, 0
}
// RecoverabilityProof explains why a gap is or is not recoverable.
type RecoverabilityProof struct {
ReplicaFlushedLSN uint64
CommittedLSN uint64
TailLSN uint64
HeadLSN uint64
Recoverable bool
Reason string
}
// ProveRecoverability generates an explicit proof for a recovery decision.
func (rh *RetainedHistory) ProveRecoverability(replicaFlushedLSN uint64) RecoverabilityProof {
proof := RecoverabilityProof{
ReplicaFlushedLSN: replicaFlushedLSN,
CommittedLSN: rh.CommittedLSN,
TailLSN: rh.TailLSN,
HeadLSN: rh.HeadLSN,
}
if replicaFlushedLSN == rh.CommittedLSN {
proof.Recoverable = true
proof.Reason = "zero_gap"
return proof
}
if replicaFlushedLSN > rh.CommittedLSN {
proof.Recoverable = true
proof.Reason = "replica_ahead_needs_truncation"
return proof
}
if rh.IsRecoverable(replicaFlushedLSN, rh.CommittedLSN) {
proof.Recoverable = true
proof.Reason = fmt.Sprintf("gap_within_retention: need LSN %d-%d, tail=%d head=%d",
replicaFlushedLSN+1, rh.CommittedLSN, rh.TailLSN, rh.HeadLSN)
return proof
}
proof.Recoverable = false
proof.Reason = fmt.Sprintf("gap_beyond_retention: need LSN %d but tail=%d",
replicaFlushedLSN+1, rh.TailLSN)
return proof
}