You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
313 lines
9.4 KiB
313 lines
9.4 KiB
package multi_master
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
)
|
|
|
|
const (
|
|
// Election timeout is 3s in our cluster config; allow generous margin.
|
|
leaderElectionTimeout = 20 * time.Second
|
|
)
|
|
|
|
// TestLeaderDownAndRecoverQuickly verifies that when the leader is stopped and
|
|
// restarted quickly, the cluster re-elects a leader and the restarted node
|
|
// rejoins as a follower. TopologyId must be consistent across all nodes.
|
|
func TestLeaderDownAndRecoverQuickly(t *testing.T) {
|
|
mc := StartMasterCluster(t)
|
|
|
|
// Record initial state.
|
|
leaderIdx, leaderAddr := mc.FindLeader()
|
|
if leaderIdx < 0 {
|
|
t.Fatal("no leader found after cluster start")
|
|
}
|
|
t.Logf("initial leader: node %d at %s", leaderIdx, leaderAddr)
|
|
|
|
topologyId, err := mc.GetTopologyId(leaderIdx)
|
|
if err != nil || topologyId == "" {
|
|
t.Fatalf("failed to get initial TopologyId: %v", err)
|
|
}
|
|
t.Logf("initial TopologyId: %s", topologyId)
|
|
|
|
// Stop the leader.
|
|
mc.StopNode(leaderIdx)
|
|
t.Logf("stopped leader node %d", leaderIdx)
|
|
|
|
// Wait for a new leader from the remaining 2 nodes.
|
|
newLeaderIdx, newLeaderAddr, err := mc.WaitForNewLeader(leaderAddr, leaderElectionTimeout)
|
|
if err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("new leader not elected after stopping old leader: %v", err)
|
|
}
|
|
t.Logf("new leader: node %d at %s", newLeaderIdx, newLeaderAddr)
|
|
|
|
// Restart the old leader quickly.
|
|
mc.StartNode(leaderIdx)
|
|
if err := mc.WaitForNodeReady(leaderIdx, waitTimeout); err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("restarted node %d not ready: %v", leaderIdx, err)
|
|
}
|
|
t.Logf("restarted node %d", leaderIdx)
|
|
|
|
// Give raft time to settle.
|
|
time.Sleep(3 * time.Second)
|
|
|
|
// Verify leader is stable.
|
|
finalLeaderIdx, _ := mc.FindLeader()
|
|
if finalLeaderIdx < 0 {
|
|
mc.DumpLogs()
|
|
t.Fatal("no leader after restarting old leader node")
|
|
}
|
|
|
|
// Verify TopologyId is consistent across all nodes.
|
|
assertTopologyIdConsistent(t, mc, topologyId)
|
|
}
|
|
|
|
// TestLeaderDownSlowRecover verifies that when the leader goes down and takes
|
|
// a long time to come back, the remaining 2 nodes elect a new leader and the
|
|
// cluster continues to function. When the slow node returns, it rejoins.
|
|
func TestLeaderDownSlowRecover(t *testing.T) {
|
|
mc := StartMasterCluster(t)
|
|
|
|
leaderIdx, leaderAddr := mc.FindLeader()
|
|
if leaderIdx < 0 {
|
|
t.Fatal("no leader found")
|
|
}
|
|
topologyId, err := mc.GetTopologyId(leaderIdx)
|
|
if err != nil || topologyId == "" {
|
|
t.Fatalf("failed to get initial TopologyId: %v", err)
|
|
}
|
|
t.Logf("initial leader: node %d, TopologyId: %s", leaderIdx, topologyId)
|
|
|
|
// Stop the leader.
|
|
mc.StopNode(leaderIdx)
|
|
|
|
// Wait for a new leader.
|
|
newLeaderIdx, _, err := mc.WaitForNewLeader(leaderAddr, leaderElectionTimeout)
|
|
if err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("new leader not elected: %v", err)
|
|
}
|
|
t.Logf("new leader: node %d", newLeaderIdx)
|
|
|
|
// Verify cluster functions with only 2 nodes (quorum is 2/3).
|
|
cs, err := mc.GetClusterStatus(newLeaderIdx)
|
|
if err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("cannot get cluster status from new leader: %v", err)
|
|
}
|
|
if !cs.IsLeader {
|
|
t.Fatalf("node %d claims not to be leader", newLeaderIdx)
|
|
}
|
|
|
|
// Simulate slow recovery: wait significantly longer than election timeout.
|
|
t.Log("simulating slow recovery (10 seconds)...")
|
|
time.Sleep(10 * time.Second)
|
|
|
|
// Verify leader is still stable during the outage.
|
|
stableLeaderIdx, _ := mc.FindLeader()
|
|
if stableLeaderIdx < 0 {
|
|
mc.DumpLogs()
|
|
t.Fatal("leader lost during extended outage of one node")
|
|
}
|
|
|
|
// Restart the downed node.
|
|
mc.StartNode(leaderIdx)
|
|
if err := mc.WaitForNodeReady(leaderIdx, waitTimeout); err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("slow-recovered node %d not ready: %v", leaderIdx, err)
|
|
}
|
|
|
|
time.Sleep(3 * time.Second)
|
|
assertTopologyIdConsistent(t, mc, topologyId)
|
|
}
|
|
|
|
// TestTwoMastersDownAndRestart verifies that when 2 of 3 masters go down
|
|
// (losing quorum), the cluster cannot elect a leader. When both restart,
|
|
// a leader is elected and TopologyId is preserved.
|
|
func TestTwoMastersDownAndRestart(t *testing.T) {
|
|
mc := StartMasterCluster(t)
|
|
|
|
leaderIdx, _ := mc.FindLeader()
|
|
if leaderIdx < 0 {
|
|
t.Fatal("no leader found")
|
|
}
|
|
topologyId, err := mc.GetTopologyId(leaderIdx)
|
|
if err != nil || topologyId == "" {
|
|
t.Fatalf("failed to get initial TopologyId: %v", err)
|
|
}
|
|
t.Logf("initial TopologyId: %s", topologyId)
|
|
|
|
// Determine which 2 nodes to stop (stop the leader + one follower).
|
|
down1 := leaderIdx
|
|
down2 := (leaderIdx + 1) % 3
|
|
survivor := (leaderIdx + 2) % 3
|
|
t.Logf("stopping nodes %d and %d, keeping node %d", down1, down2, survivor)
|
|
|
|
mc.StopNode(down1)
|
|
mc.StopNode(down2)
|
|
|
|
// The surviving node alone cannot form a quorum — no leader expected.
|
|
// Wait long enough for any stale leadership to expire (election timeout
|
|
// is 3s in our config, quorum check fires every election timeout).
|
|
time.Sleep(5 * time.Second)
|
|
soloLeaderIdx, _ := mc.FindLeader()
|
|
if soloLeaderIdx >= 0 {
|
|
// It's possible the survivor briefly thinks it's leader before stepping down.
|
|
// Give it time to realize it lost quorum.
|
|
time.Sleep(5 * time.Second)
|
|
soloLeaderIdx, _ = mc.FindLeader()
|
|
}
|
|
if soloLeaderIdx >= 0 {
|
|
mc.DumpLogs()
|
|
t.Fatalf("expected no leader with only 1 of 3 nodes, but node %d claims leadership", soloLeaderIdx)
|
|
}
|
|
|
|
// Restart both downed nodes.
|
|
mc.StartNode(down1)
|
|
mc.StartNode(down2)
|
|
for _, i := range []int{down1, down2} {
|
|
if err := mc.WaitForNodeReady(i, waitTimeout); err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("restarted node %d not ready: %v", i, err)
|
|
}
|
|
}
|
|
|
|
// Wait for leader election.
|
|
if err := mc.WaitForLeader(leaderElectionTimeout); err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("no leader after restarting 2 downed nodes: %v", err)
|
|
}
|
|
|
|
time.Sleep(3 * time.Second)
|
|
assertTopologyIdConsistent(t, mc, topologyId)
|
|
}
|
|
|
|
// TestAllMastersDownAndRestart verifies that when all 3 masters are stopped
|
|
// and restarted, the cluster elects a leader and all nodes agree on a
|
|
// TopologyId. With RaftResumeState=false (default), raft state is cleared on
|
|
// restart. The TopologyId is recovered from snapshots when available; on a
|
|
// short-lived cluster that hasn't taken snapshots on all nodes, a new
|
|
// TopologyId may be generated — but all nodes must still agree.
|
|
func TestAllMastersDownAndRestart(t *testing.T) {
|
|
mc := StartMasterCluster(t)
|
|
|
|
leaderIdx, _ := mc.FindLeader()
|
|
if leaderIdx < 0 {
|
|
t.Fatal("no leader found")
|
|
}
|
|
topologyId, _ := mc.GetTopologyId(leaderIdx)
|
|
if topologyId == "" {
|
|
t.Fatal("no TopologyId on initial leader")
|
|
}
|
|
t.Logf("initial TopologyId: %s", topologyId)
|
|
|
|
// Stop all nodes.
|
|
for i := range 3 {
|
|
mc.StopNode(i)
|
|
}
|
|
t.Log("all nodes stopped")
|
|
|
|
time.Sleep(2 * time.Second)
|
|
|
|
// Restart all nodes.
|
|
for i := range 3 {
|
|
mc.StartNode(i)
|
|
}
|
|
for i := range 3 {
|
|
if err := mc.WaitForNodeReady(i, waitTimeout); err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("node %d not ready after full restart: %v", i, err)
|
|
}
|
|
}
|
|
|
|
// Wait for leader.
|
|
if err := mc.WaitForLeader(leaderElectionTimeout); err != nil {
|
|
mc.DumpLogs()
|
|
t.Fatalf("no leader after full cluster restart: %v", err)
|
|
}
|
|
|
|
newLeaderIdx, _ := mc.FindLeader()
|
|
t.Logf("leader after full restart: node %d", newLeaderIdx)
|
|
|
|
time.Sleep(3 * time.Second)
|
|
|
|
// All nodes must agree on a TopologyId (may differ from original if
|
|
// snapshots were not yet taken on all nodes before shutdown).
|
|
newTopologyId, err := mc.GetTopologyId(newLeaderIdx)
|
|
if err != nil || newTopologyId == "" {
|
|
mc.DumpLogs()
|
|
t.Fatal("no TopologyId after full restart")
|
|
}
|
|
if newTopologyId == topologyId {
|
|
t.Logf("TopologyId preserved across full restart: %s", topologyId)
|
|
} else {
|
|
t.Logf("TopologyId changed (expected for short-lived cluster without snapshots): %s -> %s", topologyId, newTopologyId)
|
|
}
|
|
assertTopologyIdConsistent(t, mc, newTopologyId)
|
|
}
|
|
|
|
// TestLeaderConsistencyAcrossNodes verifies that all nodes agree on who the
|
|
// leader is and report the same TopologyId.
|
|
func TestLeaderConsistencyAcrossNodes(t *testing.T) {
|
|
mc := StartMasterCluster(t)
|
|
|
|
// Allow cluster to stabilize.
|
|
time.Sleep(3 * time.Second)
|
|
|
|
leaderIdx, leaderAddr := mc.FindLeader()
|
|
if leaderIdx < 0 {
|
|
t.Fatal("no leader found")
|
|
}
|
|
t.Logf("leader: node %d at %s", leaderIdx, leaderAddr)
|
|
|
|
// Every node should agree on the leader.
|
|
for i := range 3 {
|
|
cs, err := mc.GetClusterStatus(i)
|
|
if err != nil {
|
|
t.Fatalf("node %d cluster/status error: %v", i, err)
|
|
}
|
|
if i == leaderIdx {
|
|
if !cs.IsLeader {
|
|
t.Errorf("node %d should be leader but IsLeader=false", i)
|
|
}
|
|
} else {
|
|
if cs.IsLeader {
|
|
t.Errorf("node %d should not be leader but IsLeader=true", i)
|
|
}
|
|
// cs.Leader is a ServerAddress like "127.0.0.1:10000.20000";
|
|
// convert to HTTP address for comparison with leaderAddr.
|
|
leaderHttp := pb.ServerAddress(cs.Leader).ToHttpAddress()
|
|
if leaderHttp != leaderAddr {
|
|
t.Errorf("node %d reports leader %q (http: %s), expected %q", i, cs.Leader, leaderHttp, leaderAddr)
|
|
}
|
|
}
|
|
}
|
|
|
|
// All nodes should have the same TopologyId.
|
|
topologyId, _ := mc.GetTopologyId(leaderIdx)
|
|
if topologyId == "" {
|
|
t.Fatal("leader has no TopologyId")
|
|
}
|
|
assertTopologyIdConsistent(t, mc, topologyId)
|
|
}
|
|
|
|
// assertTopologyIdConsistent verifies that all running nodes report the expected TopologyId.
|
|
func assertTopologyIdConsistent(t *testing.T, mc *MasterCluster, expectedId string) {
|
|
t.Helper()
|
|
for i := range 3 {
|
|
if !mc.IsNodeRunning(i) {
|
|
continue
|
|
}
|
|
id, err := mc.GetTopologyId(i)
|
|
if err != nil {
|
|
t.Errorf("node %d: failed to get TopologyId: %v", i, err)
|
|
continue
|
|
}
|
|
if id != expectedId {
|
|
t.Errorf("node %d: TopologyId=%q, expected %q", i, id, expectedId)
|
|
}
|
|
}
|
|
}
|