You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
323 lines
9.1 KiB
323 lines
9.1 KiB
package replication
|
|
|
|
import "testing"
|
|
|
|
// ============================================================
|
|
// Phase 05 Slice 1: Engine ownership/fencing tests
|
|
// ============================================================
|
|
|
|
// Helper: build ReplicaAssignment list from map.
|
|
func replicas(m map[string]Endpoint) []ReplicaAssignment {
|
|
var out []ReplicaAssignment
|
|
for id, ep := range m {
|
|
out = append(out, ReplicaAssignment{ReplicaID: id, Endpoint: ep})
|
|
}
|
|
return out
|
|
}
|
|
|
|
// --- Changed-address invalidation (A10) ---
|
|
|
|
func TestEngine_ChangedDataAddr_PreservesSenderIdentity(t *testing.T) {
|
|
// THE core V2 test: DataAddr changes but stable ReplicaID stays.
|
|
// Sender must survive. Session must be invalidated (endpoint changed).
|
|
r := NewRegistry()
|
|
r.ApplyAssignment(AssignmentIntent{
|
|
Replicas: []ReplicaAssignment{
|
|
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.1:9333", CtrlAddr: "10.0.0.1:9334", Version: 1}},
|
|
},
|
|
Epoch: 1,
|
|
RecoveryTargets: map[string]SessionKind{"replica-1": SessionCatchUp},
|
|
})
|
|
|
|
s := r.Sender("replica-1")
|
|
sessID := s.SessionID()
|
|
s.BeginConnect(sessID)
|
|
|
|
// DataAddr changes (replica restarted on different port/IP).
|
|
r.Reconcile([]ReplicaAssignment{
|
|
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.2:9333", CtrlAddr: "10.0.0.2:9334", Version: 2}},
|
|
}, 1)
|
|
|
|
// Sender identity preserved (same pointer, same ReplicaID).
|
|
if r.Sender("replica-1") != s {
|
|
t.Fatal("sender identity must be preserved across DataAddr change")
|
|
}
|
|
// Session invalidated (endpoint changed).
|
|
if s.HasActiveSession() {
|
|
t.Fatal("session should be invalidated by DataAddr change")
|
|
}
|
|
// Endpoint updated.
|
|
if s.Endpoint().DataAddr != "10.0.0.2:9333" {
|
|
t.Fatalf("endpoint not updated: %s", s.Endpoint().DataAddr)
|
|
}
|
|
|
|
// New session can be attached on the updated endpoint.
|
|
result := r.ApplyAssignment(AssignmentIntent{
|
|
Replicas: []ReplicaAssignment{
|
|
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.2:9333", CtrlAddr: "10.0.0.2:9334", Version: 2}},
|
|
},
|
|
Epoch: 1,
|
|
RecoveryTargets: map[string]SessionKind{"replica-1": SessionCatchUp},
|
|
})
|
|
if len(result.SessionsCreated) != 1 {
|
|
t.Fatalf("should create new session: %v", result)
|
|
}
|
|
if s.SessionID() == sessID {
|
|
t.Fatal("new session should have different ID")
|
|
}
|
|
t.Logf("DataAddr changed: sender preserved, old session invalidated, new session attached")
|
|
}
|
|
|
|
func TestEngine_ChangedCtrlAddr_InvalidatesSession(t *testing.T) {
|
|
r := NewRegistry()
|
|
r.ApplyAssignment(AssignmentIntent{
|
|
Replicas: replicas(map[string]Endpoint{
|
|
"r1": {DataAddr: "10.0.0.1:9333", CtrlAddr: "10.0.0.1:9334", Version: 1},
|
|
}),
|
|
Epoch: 1,
|
|
RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
|
|
})
|
|
|
|
s := r.Sender("r1")
|
|
sessID := s.SessionID()
|
|
s.BeginConnect(sessID)
|
|
|
|
r.Reconcile(replicas(map[string]Endpoint{
|
|
"r1": {DataAddr: "10.0.0.1:9333", CtrlAddr: "10.0.0.1:9445", Version: 2},
|
|
}), 1)
|
|
|
|
if s.HasActiveSession() {
|
|
t.Fatal("CtrlAddr change should invalidate session")
|
|
}
|
|
if s.State() != StateDisconnected {
|
|
t.Fatalf("state=%s", s.State())
|
|
}
|
|
}
|
|
|
|
// --- Stale-session rejection (A3) ---
|
|
|
|
func TestEngine_StaleSessionID_RejectedAtAllAPIs(t *testing.T) {
|
|
s := NewSender("r1", Endpoint{DataAddr: "r1:9333", Version: 1}, 1)
|
|
staleID, _ := s.AttachSession(1, SessionCatchUp)
|
|
|
|
s.UpdateEpoch(2)
|
|
s.AttachSession(2, SessionCatchUp)
|
|
|
|
if err := s.BeginConnect(staleID); err == nil {
|
|
t.Fatal("stale BeginConnect should reject")
|
|
}
|
|
if err := s.RecordHandshake(staleID, 0, 10); err == nil {
|
|
t.Fatal("stale RecordHandshake should reject")
|
|
}
|
|
if err := s.BeginCatchUp(staleID); err == nil {
|
|
t.Fatal("stale BeginCatchUp should reject")
|
|
}
|
|
if err := s.RecordCatchUpProgress(staleID, 5); err == nil {
|
|
t.Fatal("stale RecordCatchUpProgress should reject")
|
|
}
|
|
if s.CompleteSessionByID(staleID) {
|
|
t.Fatal("stale CompleteSessionByID should reject")
|
|
}
|
|
}
|
|
|
|
func TestEngine_StaleCompletion_AfterSupersede(t *testing.T) {
|
|
s := NewSender("r1", Endpoint{DataAddr: "r1:9333", Version: 1}, 1)
|
|
id1, _ := s.AttachSession(1, SessionCatchUp)
|
|
|
|
s.UpdateEpoch(2)
|
|
s.AttachSession(2, SessionCatchUp)
|
|
|
|
if s.CompleteSessionByID(id1) {
|
|
t.Fatal("stale completion must be rejected")
|
|
}
|
|
if !s.HasActiveSession() {
|
|
t.Fatal("new session should be active")
|
|
}
|
|
}
|
|
|
|
// --- Epoch-bump invalidation (A3) ---
|
|
|
|
func TestEngine_EpochBump_InvalidatesAllSessions(t *testing.T) {
|
|
r := NewRegistry()
|
|
r.ApplyAssignment(AssignmentIntent{
|
|
Replicas: replicas(map[string]Endpoint{
|
|
"r1": {DataAddr: "r1:9333", Version: 1},
|
|
"r2": {DataAddr: "r2:9333", Version: 1},
|
|
}),
|
|
Epoch: 1,
|
|
RecoveryTargets: map[string]SessionKind{
|
|
"r1": SessionCatchUp,
|
|
"r2": SessionCatchUp,
|
|
},
|
|
})
|
|
|
|
count := r.InvalidateEpoch(2)
|
|
if count != 2 {
|
|
t.Fatalf("invalidated=%d, want 2", count)
|
|
}
|
|
}
|
|
|
|
func TestEngine_EpochBump_StaleAssignment_Rejected(t *testing.T) {
|
|
r := NewRegistry()
|
|
r.ApplyAssignment(AssignmentIntent{
|
|
Replicas: replicas(map[string]Endpoint{"r1": {DataAddr: "r1:9333", Version: 1}}),
|
|
Epoch: 2,
|
|
})
|
|
|
|
result := r.ApplyAssignment(AssignmentIntent{
|
|
Replicas: replicas(map[string]Endpoint{"r1": {DataAddr: "r1:9333", Version: 1}}),
|
|
Epoch: 1,
|
|
RecoveryTargets: map[string]SessionKind{"r1": SessionCatchUp},
|
|
})
|
|
|
|
if len(result.SessionsFailed) != 1 {
|
|
t.Fatalf("stale epoch should fail: %v", result)
|
|
}
|
|
}
|
|
|
|
// --- Rebuild exclusivity ---
|
|
|
|
func TestEngine_Rebuild_CatchUpAPIs_Rejected(t *testing.T) {
|
|
s := NewSender("r1", Endpoint{DataAddr: "r1:9333", Version: 1}, 1)
|
|
sessID, _ := s.AttachSession(1, SessionRebuild)
|
|
s.BeginConnect(sessID)
|
|
s.RecordHandshake(sessID, 0, 100)
|
|
|
|
if err := s.BeginCatchUp(sessID); err == nil {
|
|
t.Fatal("rebuild: BeginCatchUp should reject")
|
|
}
|
|
if s.CompleteSessionByID(sessID) {
|
|
t.Fatal("rebuild: catch-up completion should reject")
|
|
}
|
|
}
|
|
|
|
func TestEngine_Rebuild_FullLifecycle(t *testing.T) {
|
|
s := NewSender("r1", Endpoint{DataAddr: "r1:9333", Version: 1}, 1)
|
|
sessID, _ := s.AttachSession(1, SessionRebuild)
|
|
|
|
s.BeginConnect(sessID)
|
|
s.RecordHandshake(sessID, 0, 100)
|
|
s.SelectRebuildSource(sessID, 50, true, 100)
|
|
s.BeginRebuildTransfer(sessID)
|
|
s.RecordRebuildTransferProgress(sessID, 50)
|
|
s.BeginRebuildTailReplay(sessID)
|
|
s.RecordRebuildTailProgress(sessID, 100)
|
|
|
|
if err := s.CompleteRebuild(sessID); err != nil {
|
|
t.Fatalf("rebuild: %v", err)
|
|
}
|
|
if s.State() != StateInSync {
|
|
t.Fatalf("state=%s", s.State())
|
|
}
|
|
}
|
|
|
|
// --- Bounded catch-up ---
|
|
|
|
func TestEngine_FrozenTarget_RejectsChase(t *testing.T) {
|
|
s := NewSender("r1", Endpoint{DataAddr: "r1:9333", Version: 1}, 1)
|
|
sessID, _ := s.AttachSession(1, SessionCatchUp)
|
|
|
|
s.BeginConnect(sessID)
|
|
s.RecordHandshake(sessID, 0, 50)
|
|
s.BeginCatchUp(sessID)
|
|
|
|
if err := s.RecordCatchUpProgress(sessID, 51); err == nil {
|
|
t.Fatal("beyond frozen target should be rejected")
|
|
}
|
|
}
|
|
|
|
func TestEngine_BudgetViolation_Escalates(t *testing.T) {
|
|
s := NewSender("r1", Endpoint{DataAddr: "r1:9333", Version: 1}, 1)
|
|
sessID, _ := s.AttachSession(1, SessionCatchUp, WithBudget(CatchUpBudget{MaxDurationTicks: 5}))
|
|
|
|
s.BeginConnect(sessID)
|
|
s.RecordHandshake(sessID, 0, 100)
|
|
s.BeginCatchUp(sessID, 0)
|
|
s.RecordCatchUpProgress(sessID, 10)
|
|
|
|
v, _ := s.CheckBudget(sessID, 10)
|
|
if v != BudgetDurationExceeded {
|
|
t.Fatalf("budget=%s", v)
|
|
}
|
|
if s.State() != StateNeedsRebuild {
|
|
t.Fatalf("state=%s", s.State())
|
|
}
|
|
}
|
|
|
|
// --- Encapsulation ---
|
|
|
|
func TestEngine_Encapsulation_SnapshotIsReadOnly(t *testing.T) {
|
|
s := NewSender("r1", Endpoint{DataAddr: "r1:9333", Version: 1}, 1)
|
|
sessID, _ := s.AttachSession(1, SessionCatchUp)
|
|
|
|
snap := s.SessionSnapshot()
|
|
snap.Phase = PhaseCompleted
|
|
snap.Active = false
|
|
|
|
if !s.HasActiveSession() {
|
|
t.Fatal("snapshot mutation should not affect sender")
|
|
}
|
|
if err := s.BeginConnect(sessID); err != nil {
|
|
t.Fatalf("execution should still work: %v", err)
|
|
}
|
|
}
|
|
|
|
// --- E2E ---
|
|
|
|
func TestEngine_E2E_ThreeReplicas_ThreeOutcomes(t *testing.T) {
|
|
r := NewRegistry()
|
|
r.ApplyAssignment(AssignmentIntent{
|
|
Replicas: replicas(map[string]Endpoint{
|
|
"r1": {DataAddr: "r1:9333", Version: 1},
|
|
"r2": {DataAddr: "r2:9333", Version: 1},
|
|
"r3": {DataAddr: "r3:9333", Version: 1},
|
|
}),
|
|
Epoch: 1,
|
|
RecoveryTargets: map[string]SessionKind{
|
|
"r1": SessionCatchUp,
|
|
"r2": SessionCatchUp,
|
|
"r3": SessionCatchUp,
|
|
},
|
|
})
|
|
|
|
// r1: zero-gap.
|
|
r1 := r.Sender("r1")
|
|
id1 := r1.SessionID()
|
|
r1.BeginConnect(id1)
|
|
o1, _ := r1.RecordHandshakeWithOutcome(id1, HandshakeResult{
|
|
ReplicaFlushedLSN: 100, CommittedLSN: 100, RetentionStartLSN: 50,
|
|
})
|
|
if o1 != OutcomeZeroGap {
|
|
t.Fatalf("r1: %s", o1)
|
|
}
|
|
r1.CompleteSessionByID(id1)
|
|
|
|
// r2: catch-up.
|
|
r2 := r.Sender("r2")
|
|
id2 := r2.SessionID()
|
|
r2.BeginConnect(id2)
|
|
o2, _ := r2.RecordHandshakeWithOutcome(id2, HandshakeResult{
|
|
ReplicaFlushedLSN: 70, CommittedLSN: 100, RetentionStartLSN: 50,
|
|
})
|
|
if o2 != OutcomeCatchUp {
|
|
t.Fatalf("r2: %s", o2)
|
|
}
|
|
r2.BeginCatchUp(id2)
|
|
r2.RecordCatchUpProgress(id2, 100)
|
|
r2.CompleteSessionByID(id2)
|
|
|
|
// r3: needs rebuild.
|
|
r3 := r.Sender("r3")
|
|
id3 := r3.SessionID()
|
|
r3.BeginConnect(id3)
|
|
o3, _ := r3.RecordHandshakeWithOutcome(id3, HandshakeResult{
|
|
ReplicaFlushedLSN: 10, CommittedLSN: 100, RetentionStartLSN: 50,
|
|
})
|
|
if o3 != OutcomeNeedsRebuild {
|
|
t.Fatalf("r3: %s", o3)
|
|
}
|
|
|
|
if r.InSyncCount() != 2 {
|
|
t.Fatalf("in_sync=%d", r.InSyncCount())
|
|
}
|
|
}
|