feat: CP11B-3 safe ops — promotion hardening, preflight, manual promote

Six-task checkpoint hardening the promotion and failover paths: T1: 4-gate candidate evaluation (heartbeat freshness, WAL lag, role, server liveness) with structured rejection reasons. T2: Orphaned-primary re-evaluation on replica reconnect (B-06/B-08). T3: Deferred timer safety — epoch validation prevents stale timers from firing on recreated/changed volumes (B-07). T4: Rebuild addr cleanup on promotion (B-11), NVMe publication refresh on heartbeat, and preflight endpoint wiring. T5: Manual promote API — POST /block/volume/{name}/promote with force flag, target server selection, and structured rejection response. Shared applyPromotionLocked/finalizePromotion helpers eliminate duplication between auto and manual paths. T6: Read-only preflight endpoint (GET /block/volume/{name}/preflight) and blockapi client wrappers (Preflight, Promote). BUG-T5-1: PromotionsTotal counter moved to finalizePromotion (shared by both auto and manual paths) to prevent metrics divergence. 24 files changed, ~6500 lines added. 42 new QA adversarial tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
3 weeks ago · 075ff52219
24 changed files with 6540 additions and 114 deletions
--- a/weed/server/integration_block_test.go
+++ b/weed/server/integration_block_test.go
@ -645,13 +645,16 @@ func TestIntegration_DoubleFailover(t *testing.T) {
 	// Reconnect vs1 first so it becomes a replica (via recoverBlockVolumes).
 	ms.recoverBlockVolumes(vs1)

-	// Simulate heartbeat from vs1 that restores iSCSI addr and health score
-	// (in production this happens when the VS re-registers after reconnect).
+	// Simulate heartbeat from vs1 that restores iSCSI addr, health score,
+	// role, and heartbeat timestamp (in production this happens when the
+	// VS re-registers after reconnect and completes rebuild).
 	e1, _ = ms.blockRegistry.Lookup("pvc-double-1")
 	for i := range e1.Replicas {
 		if e1.Replicas[i].Server == vs1 {
 			e1.Replicas[i].ISCSIAddr = vs1 + ":3260"
 			e1.Replicas[i].HealthScore = 1.0
+			e1.Replicas[i].Role = blockvol.RoleToWire(blockvol.RoleReplica)
+			e1.Replicas[i].LastHeartbeat = time.Now()
 		}
 	}

--- a/weed/server/master_block_failover.go
+++ b/weed/server/master_block_failover.go
@ -57,7 +57,19 @@ func (ms *MasterServer) failoverBlockVolumes(deadServer string) {
 				delay := leaseExpiry.Sub(now)
 				glog.V(0).Infof("failover: %q lease expires in %v, deferring promotion", entry.Name, delay)
 				volumeName := entry.Name
+				capturedEpoch := entry.Epoch // T3: capture epoch for stale-timer validation
 				timer := time.AfterFunc(delay, func() {
+					// T3: Re-validate before acting — prevent stale timer on recreated/changed volume.
+					current, ok := ms.blockRegistry.Lookup(volumeName)
+					if !ok {
+						glog.V(0).Infof("failover: deferred promotion for %q skipped (volume deleted)", volumeName)
+						return
+					}
+					if current.Epoch != capturedEpoch {
+						glog.V(0).Infof("failover: deferred promotion for %q skipped (epoch changed %d -> %d)",
+							volumeName, capturedEpoch, current.Epoch)
+						return
+					}
 					ms.promoteReplica(volumeName)
 				})
 				ms.blockFailover.mu.Lock()
@ -116,8 +128,15 @@ func (ms *MasterServer) promoteReplica(volumeName string) {
 		return
 	}

+	ms.finalizePromotion(volumeName, oldPrimary, oldPath, newEpoch)
+}
+
+// finalizePromotion performs post-registry promotion steps:
+// enqueue assignment for new primary, record pending rebuild for old primary, bump metrics.
+// Called by both promoteReplica (auto) and blockVolumePromoteHandler (manual).
+func (ms *MasterServer) finalizePromotion(volumeName, oldPrimary, oldPath string, newEpoch uint64) {
 	// Re-read entry after promotion.
-	entry, ok = ms.blockRegistry.Lookup(volumeName)
+	entry, ok := ms.blockRegistry.Lookup(volumeName)
 	if !ok {
 		return
 	}
@ -198,11 +217,15 @@ func (ms *MasterServer) cancelDeferredTimers(server string) {

 // recoverBlockVolumes is called when a previously dead VS reconnects.
 // It cancels any deferred promotion timers (R2-F2), drains pending rebuilds,
-// and enqueues rebuild assignments.
+// enqueues rebuild assignments, and checks for orphaned primaries (T2/B-06).
 func (ms *MasterServer) recoverBlockVolumes(reconnectedServer string) {
 	// R2-F2: Cancel deferred promotion timers for this server to prevent split-brain.
 	ms.cancelDeferredTimers(reconnectedServer)

+	// T2 (B-06): Check for orphaned primaries — volumes where the reconnecting
+	// server is a replica but the primary is dead/disconnected.
+	ms.reevaluateOrphanedPrimaries(reconnectedServer)
+
 	rebuilds := ms.drainPendingRebuilds(reconnectedServer)
 	if len(rebuilds) == 0 {
 		return
@ -221,16 +244,74 @@ func (ms *MasterServer) recoverBlockVolumes(reconnectedServer string) {
 			Path:   rb.OldPath,
 		})

+		// T4: Warn if RebuildListenAddr is empty (new primary hasn't heartbeated yet).
+		rebuildAddr := entry.RebuildListenAddr
+		if rebuildAddr == "" {
+			glog.Warningf("rebuild: %q RebuildListenAddr is empty (new primary %s may not have heartbeated yet), "+
+				"queuing rebuild anyway — VS should retry on empty addr", rb.VolumeName, entry.VolumeServer)
+		}
+
 		// Enqueue rebuild assignment for the reconnected server.
 		ms.blockAssignmentQueue.Enqueue(reconnectedServer, blockvol.BlockVolumeAssignment{
 			Path:        rb.OldPath,
 			Epoch:       entry.Epoch,
 			Role:        blockvol.RoleToWire(blockvol.RoleRebuilding),
-			RebuildAddr: entry.RebuildListenAddr,
+			RebuildAddr: rebuildAddr,
 		})

 		ms.blockRegistry.RebuildsTotal.Add(1)
 		glog.V(0).Infof("rebuild: enqueued rebuild for %q on %s (epoch=%d, rebuildAddr=%s)",
-			rb.VolumeName, reconnectedServer, entry.Epoch, entry.RebuildListenAddr)
+			rb.VolumeName, reconnectedServer, entry.Epoch, rebuildAddr)
+	}
+}
+
+// reevaluateOrphanedPrimaries checks if the given server is a replica for any
+// volumes whose primary is dead (not block-capable). If so, promotes the best
+// available replica — but only after the old primary's lease has expired, to
+// maintain the same split-brain protection as failoverBlockVolumes().
+// This fixes B-06 (orphaned primary after replica re-register)
+// and partially B-08 (fast reconnect skips failover window).
+func (ms *MasterServer) reevaluateOrphanedPrimaries(server string) {
+	if ms.blockRegistry == nil {
+		return
+	}
+	orphaned := ms.blockRegistry.VolumesWithDeadPrimary(server)
+	now := time.Now()
+	for _, volumeName := range orphaned {
+		entry, ok := ms.blockRegistry.Lookup(volumeName)
+		if !ok {
+			continue
+		}
+
+		// Respect lease expiry — same gate as failoverBlockVolumes().
+		leaseExpiry := entry.LastLeaseGrant.Add(entry.LeaseTTL)
+		if now.Before(leaseExpiry) {
+			delay := leaseExpiry.Sub(now)
+			glog.V(0).Infof("failover: orphaned primary for %q (replica %s alive, primary dead) "+
+				"but lease expires in %v, deferring promotion", volumeName, server, delay)
+			capturedEpoch := entry.Epoch
+			deadPrimary := entry.VolumeServer
+			timer := time.AfterFunc(delay, func() {
+				current, ok := ms.blockRegistry.Lookup(volumeName)
+				if !ok {
+					return
+				}
+				if current.Epoch != capturedEpoch {
+					glog.V(0).Infof("failover: deferred orphan promotion for %q skipped (epoch changed %d -> %d)",
+						volumeName, capturedEpoch, current.Epoch)
+					return
+				}
+				ms.promoteReplica(volumeName)
+			})
+			ms.blockFailover.mu.Lock()
+			ms.blockFailover.deferredTimers[deadPrimary] = append(
+				ms.blockFailover.deferredTimers[deadPrimary], timer)
+			ms.blockFailover.mu.Unlock()
+			continue
+		}
+
+		glog.V(0).Infof("failover: orphaned primary detected for %q (replica %s alive, primary dead, lease expired), promoting",
+			volumeName, server)
+		ms.promoteReplica(volumeName)
 	}
 }
--- a/weed/server/master_block_failover_test.go
+++ b/weed/server/master_block_failover_test.go
@ -34,6 +34,9 @@ func testMasterServerForFailover(t *testing.T) *MasterServer {
 // registerVolumeWithReplica creates a volume entry with primary + replica for tests.
 func registerVolumeWithReplica(t *testing.T, ms *MasterServer, name, primary, replica string, epoch uint64, leaseTTL time.Duration) {
 	t.Helper()
+	// Mark both servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable(primary)
+	ms.blockRegistry.MarkBlockCapable(replica)
 	entry := &BlockVolumeEntry{
 		Name:             name,
 		VolumeServer:     primary,
@ -53,11 +56,13 @@ func registerVolumeWithReplica(t *testing.T, ms *MasterServer, name, primary, re
 		// CP8-2: also populate Replicas[] for PromoteBestReplica.
 		Replicas: []ReplicaInfo{
 			{
-				Server:      replica,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-replica", name),
-				ISCSIAddr:   replica + ":3260",
-				HealthScore: 1.0,
+				Server:        replica,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-replica", name),
+				ISCSIAddr:     replica + ":3260",
+				HealthScore:   1.0,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		},
 	}
@ -194,6 +199,9 @@ func TestFailover_MultipleVolumes(t *testing.T) {

 func TestFailover_LeaseNotExpired_DeferredPromotion(t *testing.T) {
 	ms := testMasterServerForFailover(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
 	entry := &BlockVolumeEntry{
 		Name:             "vol1",
 		VolumeServer:     "vs1",
@ -209,7 +217,7 @@ func TestFailover_LeaseNotExpired_DeferredPromotion(t *testing.T) {
 		LeaseTTL:         200 * time.Millisecond,
 		LastLeaseGrant:   time.Now(), // just granted, NOT expired yet
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	}
 	ms.blockRegistry.Register(entry)
@ -397,6 +405,9 @@ func TestRebuild_RegistryUpdatedWithNewReplica(t *testing.T) {

 func TestRebuild_AssignmentContainsRebuildAddr(t *testing.T) {
 	ms := testMasterServerForFailover(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
 	entry := &BlockVolumeEntry{
 		Name:              "vol1",
 		VolumeServer:      "vs1",
@ -413,7 +424,7 @@ func TestRebuild_AssignmentContainsRebuildAddr(t *testing.T) {
 		LeaseTTL:          5 * time.Second,
 		LastLeaseGrant:    time.Now().Add(-10 * time.Second),
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	}
 	ms.blockRegistry.Register(entry)
@ -457,7 +468,7 @@ func TestFailover_TransientDisconnect_NoPromotion(t *testing.T) {
 		LeaseTTL:         30 * time.Second,
 		LastLeaseGrant:   time.Now(), // just granted
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	}
 	ms.blockRegistry.Register(entry)
@ -556,6 +567,10 @@ func TestLifecycle_CreateFailoverRebuild(t *testing.T) {
 // registerVolumeRF3 creates a volume entry with primary + 2 replicas for RF=3 tests.
 func registerVolumeRF3(t *testing.T, ms *MasterServer, name, primary, replica1, replica2 string, epoch uint64, leaseTTL time.Duration) {
 	t.Helper()
+	// Mark all servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable(primary)
+	ms.blockRegistry.MarkBlockCapable(replica1)
+	ms.blockRegistry.MarkBlockCapable(replica2)
 	entry := &BlockVolumeEntry{
 		Name:          name,
 		VolumeServer:  primary,
@ -576,20 +591,24 @@ func registerVolumeRF3(t *testing.T, ms *MasterServer, name, primary, replica1,
 		ReplicaISCSIAddr: replica1 + ":3260",
 		Replicas: []ReplicaInfo{
 			{
-				Server:      replica1,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-r1", name),
-				ISCSIAddr:   replica1 + ":3260",
-				HealthScore: 1.0,
-				WALHeadLSN:  100,
+				Server:        replica1,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-r1", name),
+				ISCSIAddr:     replica1 + ":3260",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 			{
-				Server:      replica2,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-r2", name),
-				ISCSIAddr:   replica2 + ":3260",
-				HealthScore: 1.0,
-				WALHeadLSN:  100,
+				Server:        replica2,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-r2", name),
+				ISCSIAddr:     replica2 + ":3260",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		},
 	}
@ -793,6 +812,10 @@ func TestRF3_AllReplicasDead_NoPromotion(t *testing.T) {
 // RF3: Lease deferred promotion with RF=3.
 func TestRF3_LeaseDeferred_Promotion(t *testing.T) {
 	ms := testMasterServerForFailover(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.MarkBlockCapable("vs3")
 	entry := &BlockVolumeEntry{
 		Name:          "vol1",
 		VolumeServer:  "vs1",
@ -807,8 +830,8 @@ func TestRF3_LeaseDeferred_Promotion(t *testing.T) {
 		LeaseTTL:      200 * time.Millisecond,
 		LastLeaseGrant: time.Now(), // just granted → NOT expired
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0, WALHeadLSN: 50},
-			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 0.9, WALHeadLSN: 50},
+			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0, WALHeadLSN: 50, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 0.9, WALHeadLSN: 50, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 		// Deprecated scalar fields.
 		ReplicaServer: "vs2", ReplicaPath: "/data/vol1.blk", ReplicaISCSIAddr: "vs2:3260",
@ -853,8 +876,8 @@ func TestRF3_CancelDeferredOnReconnect(t *testing.T) {
 		LeaseTTL:      5 * time.Second,
 		LastLeaseGrant: time.Now(), // just granted → long lease
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
-			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 		ReplicaServer: "vs2", ReplicaPath: "/data/vol1.blk", ReplicaISCSIAddr: "vs2:3260",
 	}
@ -888,3 +911,267 @@ func TestRF3_CancelDeferredOnReconnect(t *testing.T) {
 		t.Fatalf("vs1 should remain primary (timer cancelled), got %q", e.VolumeServer)
 	}
 }
+
+// ============================================================
+// CP11B-3 T2: Re-evaluate on Replica Registration (B-06)
+// ============================================================
+
+// T2: Orphaned primary + replica reconnects → automatic promotion.
+func TestT2_OrphanedPrimary_ReplicaReconnect_Promotes(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// Simulate vs1 dying without proper failover (e.g., promotion failed at the time).
+	// Mark vs1 as dead but DON'T call failoverBlockVolumes (simulates missed/failed failover).
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// vs2 reconnects (sends heartbeat). reevaluateOrphanedPrimaries should detect orphaned primary.
+	ms.recoverBlockVolumes("vs2")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("expected promotion to vs2 (orphaned primary), got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2 after promotion, got %d", entry.Epoch)
+	}
+}
+
+// T2: Replica reconnects but primary is alive → no unnecessary promotion.
+func TestT2_PrimaryAlive_NoPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// Both servers alive. vs2 reconnects — no orphaned primary.
+	ms.recoverBlockVolumes("vs2")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs1" {
+		t.Fatalf("primary should remain vs1 (alive), got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 1 {
+		t.Fatalf("epoch should remain 1, got %d", entry.Epoch)
+	}
+}
+
+// T2: Multiple orphaned volumes, all promoted on reconnect.
+func TestT2_MultipleOrphanedVolumes(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	// vol1: vs1=primary, vs2=replica
+	// vol2: vs3=primary, vs2=replica
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+	ms.blockRegistry.MarkBlockCapable("vs3")
+	entry2 := &BlockVolumeEntry{
+		Name: "vol2", VolumeServer: "vs3", Path: "/data/vol2.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 5 * time.Second,
+		LastLeaseGrant: time.Now().Add(-10 * time.Second),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol2.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	}
+	ms.blockRegistry.Register(entry2)
+
+	// Both primaries die.
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+	ms.blockRegistry.UnmarkBlockCapable("vs3")
+
+	// vs2 reconnects → both orphaned volumes should be promoted.
+	ms.recoverBlockVolumes("vs2")
+
+	e1, _ := ms.blockRegistry.Lookup("vol1")
+	e2, _ := ms.blockRegistry.Lookup("vol2")
+	if e1.VolumeServer != "vs2" {
+		t.Fatalf("vol1: expected promotion to vs2, got %q", e1.VolumeServer)
+	}
+	if e2.VolumeServer != "vs2" {
+		t.Fatalf("vol2: expected promotion to vs2, got %q", e2.VolumeServer)
+	}
+}
+
+// T2: Repeated heartbeats do NOT cause duplicate promotions.
+func TestT2_RepeatedHeartbeats_NoDuplicatePromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// First reconnect promotes.
+	ms.reevaluateOrphanedPrimaries("vs2")
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("first call: expected promotion to vs2, got %q", entry.VolumeServer)
+	}
+	epochAfterFirst := entry.Epoch
+
+	// Second call: vs2 is now the primary AND block-capable. No orphan detected.
+	ms.reevaluateOrphanedPrimaries("vs2")
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.Epoch != epochAfterFirst {
+		t.Fatalf("second call should not bump epoch: got %d, want %d", entry.Epoch, epochAfterFirst)
+	}
+}
+
+// T2: Dead primary with active lease, replica reconnects → no immediate promotion.
+// Regression test for lease-bypass bug: reevaluateOrphanedPrimaries must respect
+// lease expiry, not promote immediately.
+func TestT2_OrphanedPrimary_LeaseNotExpired_DefersPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 300 * time.Millisecond,
+		LastLeaseGrant: time.Now(), // lease still active
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// vs1 dies (unmark block-capable).
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// vs2 reconnects — orphan detected, but lease still active → should NOT promote immediately.
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs1" {
+		t.Fatalf("should NOT promote while lease active, got primary=%q", entry.VolumeServer)
+	}
+	if entry.Epoch != 1 {
+		t.Fatalf("epoch should remain 1, got %d", entry.Epoch)
+	}
+
+	// Verify a deferred timer was created for the dead primary.
+	ms.blockFailover.mu.Lock()
+	timerCount := len(ms.blockFailover.deferredTimers["vs1"])
+	ms.blockFailover.mu.Unlock()
+	if timerCount != 1 {
+		t.Fatalf("expected 1 deferred timer for vs1, got %d", timerCount)
+	}
+
+	// Wait for lease to expire + margin → timer fires, promotion happens.
+	time.Sleep(450 * time.Millisecond)
+
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("after lease expiry, expected promotion to vs2, got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2, got %d", entry.Epoch)
+	}
+}
+
+// ============================================================
+// CP11B-3 T3: Deferred Timer Safety
+// ============================================================
+
+// T3: Delete/recreate volume before deferred timer fires → no wrong promotion.
+func TestT3_DeferredTimer_VolumeDeleted_NoPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	entry := &BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 5, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 200 * time.Millisecond,
+		LastLeaseGrant: time.Now(),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	}
+	ms.blockRegistry.Register(entry)
+
+	// vs1 dies → deferred timer created (lease not expired, epoch=5).
+	ms.failoverBlockVolumes("vs1")
+
+	// Delete the volume before timer fires.
+	ms.blockRegistry.Unregister("vol1")
+
+	// Wait for timer to fire.
+	time.Sleep(350 * time.Millisecond)
+
+	// Volume should not exist (timer found it deleted, no-op).
+	_, ok := ms.blockRegistry.Lookup("vol1")
+	if ok {
+		t.Fatal("volume should have been deleted, timer should not recreate it")
+	}
+}
+
+// T3: Epoch changes before deferred timer fires → timer rejected.
+func TestT3_DeferredTimer_EpochChanged_NoPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.MarkBlockCapable("vs3")
+	entry := &BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 5, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 200 * time.Millisecond,
+		LastLeaseGrant: time.Now(),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	}
+	ms.blockRegistry.Register(entry)
+
+	// vs1 dies → deferred timer created (captures epoch=5).
+	ms.failoverBlockVolumes("vs1")
+
+	// Before timer fires, manually bump the epoch (simulating another event).
+	e, _ := ms.blockRegistry.Lookup("vol1")
+	e.Epoch = 99
+
+	// Wait for timer to fire.
+	time.Sleep(350 * time.Millisecond)
+
+	// Timer should have been rejected (epoch mismatch). Epoch stays at 99.
+	e, _ = ms.blockRegistry.Lookup("vol1")
+	if e.Epoch != 99 {
+		t.Fatalf("epoch should remain 99 (timer rejected), got %d", e.Epoch)
+	}
+	// Primary should NOT have changed (deferred promotion was rejected).
+	if e.VolumeServer != "vs1" {
+		t.Fatalf("primary should remain vs1 (timer rejected), got %q", e.VolumeServer)
+	}
+}
+
+// ============================================================
+// CP11B-3 T4: Rebuild with empty RebuildListenAddr
+// ============================================================
+
+// T4: Rebuild queued with empty RebuildListenAddr after promotion.
+func TestT4_RebuildEmptyAddr_StillQueued(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// Failover: vs1 dies, vs2 promoted. PromoteBestReplica clears RebuildListenAddr.
+	ms.failoverBlockVolumes("vs1")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.RebuildListenAddr != "" {
+		t.Fatalf("RebuildListenAddr should be empty after promotion, got %q", entry.RebuildListenAddr)
+	}
+
+	// vs1 reconnects. Rebuild should still be queued (even with empty addr).
+	ms.recoverBlockVolumes("vs1")
+
+	assignments := ms.blockAssignmentQueue.Peek("vs1")
+	foundRebuild := false
+	for _, a := range assignments {
+		if blockvol.RoleFromWire(a.Role) == blockvol.RoleRebuilding {
+			foundRebuild = true
+			if a.RebuildAddr != "" {
+				t.Fatalf("RebuildAddr should be empty (new primary hasn't heartbeated), got %q", a.RebuildAddr)
+			}
+		}
+	}
+	if !foundRebuild {
+		t.Fatal("rebuild assignment should still be queued even with empty addr")
+	}
+}
--- a/weed/server/master_block_registry.go
+++ b/weed/server/master_block_registry.go
@ -842,44 +842,91 @@ func (r *BlockVolumeRegistry) PromotionLSNTolerance() uint64 {
 	return r.promotionLSNTolerance
 }

-// PromoteBestReplica promotes the best eligible replica to primary.
-// Eligibility: heartbeat fresh (within 2×LeaseTTL), WALHeadLSN within tolerance of primary,
-// and role must be RoleReplica (not RoleRebuilding).
-// The promoted replica is removed from Replicas[]. Other replicas stay.
-// Old primary is NOT added to Replicas (needs rebuild).
-// Returns the new epoch.
-func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-	entry, ok := r.volumes[name]
-	if !ok {
-		return 0, fmt.Errorf("block volume %q not found", name)
+// PromotionRejection records why a specific replica was rejected for promotion.
+type PromotionRejection struct {
+	Server string
+	Reason string // "stale_heartbeat", "wal_lag", "wrong_role", "server_dead"
+}
+
+// PromotionPreflightResult is the reusable result of a promotion evaluation.
+// Used by auto-promotion, manual promote API, preflight status, and logging.
+type PromotionPreflightResult struct {
+	VolumeName   string
+	Promotable   bool               // true if a candidate was found
+	Candidate    *ReplicaInfo       // best candidate (nil if !Promotable)
+	CandidateIdx int                // index in Replicas[] (-1 if !Promotable)
+	Rejections   []PromotionRejection // why each non-candidate was rejected
+	Reason       string             // human-readable summary when !Promotable
+}
+
+// evaluatePromotionLocked evaluates promotion candidates for a volume.
+// Caller must hold r.mu (read or write). Returns a preflight result without
+// mutating the registry. The four gates:
+//   1. Heartbeat freshness (within 2×LeaseTTL)
+//   2. WAL LSN recency (within promotionLSNTolerance of primary)
+//   3. Role must be RoleReplica (not RoleRebuilding)
+//   4. Server must be in blockServers (alive) — fixes B-12
+func (r *BlockVolumeRegistry) evaluatePromotionLocked(entry *BlockVolumeEntry) PromotionPreflightResult {
+	result := PromotionPreflightResult{
+		VolumeName:   entry.Name,
+		CandidateIdx: -1,
 	}
 	if len(entry.Replicas) == 0 {
-		return 0, fmt.Errorf("block volume %q has no replicas", name)
+		result.Reason = "no replicas"
+		return result
 	}

-	// Filter eligible replicas.
 	now := time.Now()
 	freshnessCutoff := 2 * entry.LeaseTTL
 	if freshnessCutoff == 0 {
-		freshnessCutoff = 60 * time.Second // default if LeaseTTL not set
+		freshnessCutoff = 60 * time.Second
 	}
 	primaryLSN := entry.WALHeadLSN

 	bestIdx := -1
 	for i := range entry.Replicas {
 		ri := &entry.Replicas[i]
-		// Gate 1: heartbeat freshness.
-		if !ri.LastHeartbeat.IsZero() && now.Sub(ri.LastHeartbeat) > freshnessCutoff {
+
+		// Gate 1: heartbeat freshness. Zero means never heartbeated — unsafe
+		// to promote because the registry has no proof the replica is alive,
+		// caught up, or fully initialized.
+		if ri.LastHeartbeat.IsZero() {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "no_heartbeat",
+			})
+			continue
+		}
+		if now.Sub(ri.LastHeartbeat) > freshnessCutoff {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "stale_heartbeat",
+			})
 			continue
 		}
 		// Gate 2: WAL LSN recency (skip if primary LSN is 0 — no data yet, all eligible).
 		if primaryLSN > 0 && ri.WALHeadLSN+r.promotionLSNTolerance < primaryLSN {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wal_lag",
+			})
 			continue
 		}
-		// Gate 3: role must be RoleReplica (not rebuilding/stale).
-		if ri.Role != 0 && blockvol.RoleFromWire(ri.Role) != blockvol.RoleReplica {
+		// Gate 3: role must be exactly RoleReplica. Zero/unset role means
+		// the replica was created but never confirmed its role via heartbeat.
+		if blockvol.RoleFromWire(ri.Role) != blockvol.RoleReplica {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wrong_role",
+			})
+			continue
+		}
+		// Gate 4: server must be alive (in blockServers set) — B-12 fix.
+		if !r.blockServers[ri.Server] {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "server_dead",
+			})
 			continue
 		}
 		// Eligible — pick best by health score, tie-break by WALHeadLSN.
@ -894,11 +941,39 @@ func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
 	}

 	if bestIdx == -1 {
-		return 0, fmt.Errorf("block volume %q: no eligible replicas for promotion", name)
+		result.Reason = "no eligible replicas"
+		if len(result.Rejections) > 0 {
+			result.Reason += ": " + result.Rejections[0].Reason
+			if len(result.Rejections) > 1 {
+				result.Reason += fmt.Sprintf(" (+%d more)", len(result.Rejections)-1)
+			}
+		}
+		return result
 	}

-	promoted := entry.Replicas[bestIdx]
+	result.Promotable = true
+	ri := entry.Replicas[bestIdx]
+	result.Candidate = &ri
+	result.CandidateIdx = bestIdx
+	return result
+}

+// EvaluatePromotion returns a read-only preflight result for the named volume
+// without mutating the registry. Safe for status/logging/manual promote preview.
+func (r *BlockVolumeRegistry) EvaluatePromotion(name string) (PromotionPreflightResult, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	entry, ok := r.volumes[name]
+	if !ok {
+		return PromotionPreflightResult{VolumeName: name, Reason: "volume not found"}, fmt.Errorf("block volume %q not found", name)
+	}
+	return r.evaluatePromotionLocked(entry), nil
+}
+
+// applyPromotionLocked applies the promotion of a replica at candidateIdx to primary.
+// Caller must hold r.mu (write lock). The promoted replica is removed from Replicas[].
+// Old primary is NOT added to Replicas (needs rebuild). Returns the new epoch.
+func (r *BlockVolumeRegistry) applyPromotionLocked(entry *BlockVolumeEntry, name string, candidate ReplicaInfo, candidateIdx int) uint64 {
 	// Remove old primary from byServer index.
 	r.removeFromServer(entry.VolumeServer, name)

@ -906,18 +981,21 @@ func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
 	newEpoch := entry.Epoch + 1

 	// Promote replica to primary.
-	entry.VolumeServer = promoted.Server
-	entry.Path = promoted.Path
-	entry.IQN = promoted.IQN
-	entry.ISCSIAddr = promoted.ISCSIAddr
-	entry.NvmeAddr = promoted.NvmeAddr
-	entry.NQN = promoted.NQN
+	entry.VolumeServer = candidate.Server
+	entry.Path = candidate.Path
+	entry.IQN = candidate.IQN
+	entry.ISCSIAddr = candidate.ISCSIAddr
+	entry.NvmeAddr = candidate.NvmeAddr
+	entry.NQN = candidate.NQN
 	entry.Epoch = newEpoch
 	entry.Role = blockvol.RoleToWire(blockvol.RolePrimary)
 	entry.LastLeaseGrant = time.Now()

+	// Clear stale rebuild/publication metadata from old primary (B-11 partial fix).
+	entry.RebuildListenAddr = ""
+
 	// Remove promoted from Replicas. Others stay.
-	entry.Replicas = append(entry.Replicas[:bestIdx], entry.Replicas[bestIdx+1:]...)
+	entry.Replicas = append(entry.Replicas[:candidateIdx], entry.Replicas[candidateIdx+1:]...)

 	// Sync deprecated scalar fields.
 	if len(entry.Replicas) > 0 {
@ -940,9 +1018,212 @@ func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
 	// Update byServer index: new primary server now hosts this volume.
 	r.addToServer(entry.VolumeServer, name)

+	return newEpoch
+}
+
+// PromoteBestReplica promotes the best eligible replica to primary.
+// Eligibility: heartbeat fresh (within 2×LeaseTTL), WALHeadLSN within tolerance of primary,
+// role must be RoleReplica (not RoleRebuilding), and server must be alive (B-12 fix).
+// The promoted replica is removed from Replicas[]. Other replicas stay.
+// Old primary is NOT added to Replicas (needs rebuild).
+// Returns the new epoch and the preflight result.
+func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	entry, ok := r.volumes[name]
+	if !ok {
+		return 0, fmt.Errorf("block volume %q not found", name)
+	}
+
+	pf := r.evaluatePromotionLocked(entry)
+	if !pf.Promotable {
+		return 0, fmt.Errorf("block volume %q: %s", name, pf.Reason)
+	}
+
+	promoted := *pf.Candidate
+	bestIdx := pf.CandidateIdx
+
+	newEpoch := r.applyPromotionLocked(entry, name, promoted, bestIdx)
 	return newEpoch, nil
 }

+// evaluateManualPromotionLocked evaluates promotion candidates for a manual promote request.
+// Caller must hold r.mu (read or write).
+//
+// Differences from evaluatePromotionLocked:
+//   - Primary-alive gate: if !force and current primary is alive, reject with "primary_alive".
+//   - Target filtering: if targetServer != "", only evaluate that specific replica.
+//     Returns Reason="target_not_found" if that server is not a replica.
+//   - Force flag: bypasses soft gates (primary_alive, stale_heartbeat, wal_lag)
+//     but keeps hard gates (no_heartbeat with zero time, wrong_role, server_dead).
+//
+// Gate table:
+//
+//	Gate             | Normal | Force
+//	primary_alive    | Reject | Skip
+//	no_heartbeat(0)  | Reject | Reject
+//	stale_heartbeat  | Reject | Skip
+//	wal_lag          | Reject | Skip
+//	wrong_role       | Reject | Reject
+//	server_dead      | Reject | Reject
+func (r *BlockVolumeRegistry) evaluateManualPromotionLocked(entry *BlockVolumeEntry, targetServer string, force bool) PromotionPreflightResult {
+	result := PromotionPreflightResult{
+		VolumeName:   entry.Name,
+		CandidateIdx: -1,
+	}
+
+	// Primary-alive gate (soft — skipped when force=true).
+	if !force && r.blockServers[entry.VolumeServer] {
+		result.Reason = "primary_alive"
+		return result
+	}
+
+	if len(entry.Replicas) == 0 {
+		result.Reason = "no replicas"
+		return result
+	}
+
+	// Target filtering: if a specific server is requested, find its index first.
+	// Return early if not found.
+	if targetServer != "" {
+		found := false
+		for i := range entry.Replicas {
+			if entry.Replicas[i].Server == targetServer {
+				found = true
+				break
+			}
+		}
+		if !found {
+			result.Reason = "target_not_found"
+			return result
+		}
+	}
+
+	now := time.Now()
+	freshnessCutoff := 2 * entry.LeaseTTL
+	if freshnessCutoff == 0 {
+		freshnessCutoff = 60 * time.Second
+	}
+	primaryLSN := entry.WALHeadLSN
+
+	bestIdx := -1
+	for i := range entry.Replicas {
+		ri := &entry.Replicas[i]
+
+		// If targeting a specific server, skip all others.
+		if targetServer != "" && ri.Server != targetServer {
+			continue
+		}
+
+		// Hard gate: no heartbeat (zero time) — unsafe regardless of force.
+		if ri.LastHeartbeat.IsZero() {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "no_heartbeat",
+			})
+			continue
+		}
+
+		// Soft gate: stale heartbeat — skipped when force=true.
+		if !force && now.Sub(ri.LastHeartbeat) > freshnessCutoff {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "stale_heartbeat",
+			})
+			continue
+		}
+
+		// Soft gate: WAL lag — skipped when force=true.
+		if !force && primaryLSN > 0 && ri.WALHeadLSN+r.promotionLSNTolerance < primaryLSN {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wal_lag",
+			})
+			continue
+		}
+
+		// Hard gate: role must be exactly RoleReplica.
+		if blockvol.RoleFromWire(ri.Role) != blockvol.RoleReplica {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wrong_role",
+			})
+			continue
+		}
+
+		// Hard gate: server must be alive (in blockServers set).
+		if !r.blockServers[ri.Server] {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "server_dead",
+			})
+			continue
+		}
+
+		// Eligible — pick best by health score, tie-break by WALHeadLSN.
+		if bestIdx == -1 {
+			bestIdx = i
+		} else if ri.HealthScore > entry.Replicas[bestIdx].HealthScore {
+			bestIdx = i
+		} else if ri.HealthScore == entry.Replicas[bestIdx].HealthScore &&
+			ri.WALHeadLSN > entry.Replicas[bestIdx].WALHeadLSN {
+			bestIdx = i
+		}
+	}
+
+	if bestIdx == -1 {
+		result.Reason = "no eligible replicas"
+		if len(result.Rejections) > 0 {
+			result.Reason += ": " + result.Rejections[0].Reason
+			if len(result.Rejections) > 1 {
+				result.Reason += fmt.Sprintf(" (+%d more)", len(result.Rejections)-1)
+			}
+		}
+		return result
+	}
+
+	result.Promotable = true
+	ri := entry.Replicas[bestIdx]
+	result.Candidate = &ri
+	result.CandidateIdx = bestIdx
+	return result
+}
+
+// ManualPromote promotes a specific replica (or the best eligible replica) to primary.
+// Unlike PromoteBestReplica, it accepts operator overrides:
+//   - targetServer: if non-empty, only that replica is considered.
+//   - force: bypasses soft gates (primary_alive, stale_heartbeat, wal_lag).
+//
+// Returns (newEpoch, oldPrimary, oldPath, preflightResult, nil) on success.
+// oldPrimary and oldPath are captured under the lock to avoid TOCTOU with
+// concurrent auto-failover (BUG-T5-2 fix).
+// Returns (0, "", "", preflightResult, err) on rejection or lookup failure.
+func (r *BlockVolumeRegistry) ManualPromote(name, targetServer string, force bool) (uint64, string, string, PromotionPreflightResult, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	entry, ok := r.volumes[name]
+	if !ok {
+		return 0, "", "", PromotionPreflightResult{VolumeName: name, Reason: "volume not found"},
+			fmt.Errorf("block volume %q not found", name)
+	}
+
+	// Capture old primary info under lock (BUG-T5-2 fix).
+	oldPrimary := entry.VolumeServer
+	oldPath := entry.Path
+
+	pf := r.evaluateManualPromotionLocked(entry, targetServer, force)
+	if !pf.Promotable {
+		return 0, "", "", pf, fmt.Errorf("block volume %q: %s", name, pf.Reason)
+	}
+
+	promoted := *pf.Candidate
+	candidateIdx := pf.CandidateIdx
+
+	newEpoch := r.applyPromotionLocked(entry, name, promoted, candidateIdx)
+	return newEpoch, oldPrimary, oldPath, pf, nil
+}
+
 // MarkBlockCapable records that the given server supports block volumes.
 func (r *BlockVolumeRegistry) MarkBlockCapable(server string) {
 	r.mu.Lock()
@ -1045,6 +1326,41 @@ func (r *BlockVolumeRegistry) ServerSummaries() []BlockServerSummary {
 	return summaries
 }

+// IsBlockCapable returns true if the given server is in the block-capable set (alive).
+func (r *BlockVolumeRegistry) IsBlockCapable(server string) bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.blockServers[server]
+}
+
+// VolumesWithDeadPrimary returns names of volumes where the given server is a replica
+// and the current primary is NOT in the block-capable set (dead/disconnected).
+// Used by T2 (B-06) to detect orphaned primaries that need re-promotion.
+func (r *BlockVolumeRegistry) VolumesWithDeadPrimary(replicaServer string) []string {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	names, ok := r.byServer[replicaServer]
+	if !ok {
+		return nil
+	}
+	var orphaned []string
+	for name := range names {
+		entry := r.volumes[name]
+		if entry == nil {
+			continue
+		}
+		// Only consider volumes where this server is a replica (not the primary).
+		if entry.VolumeServer == replicaServer {
+			continue
+		}
+		// Check if the primary server is dead.
+		if !r.blockServers[entry.VolumeServer] {
+			orphaned = append(orphaned, name)
+		}
+	}
+	return orphaned
+}
+
 // BlockCapableServers returns the list of servers known to support block volumes.
 func (r *BlockVolumeRegistry) BlockCapableServers() []string {
 	r.mu.RLock()
--- a/weed/server/master_block_registry_test.go
+++ b/weed/server/master_block_registry_test.go
@ -2,6 +2,7 @@ package weed_server

 import (
 	"fmt"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@ -538,6 +539,8 @@ func TestRegistry_RemoveReplica(t *testing.T) {

 func TestRegistry_PromoteBestReplica_PicksHighest(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("s2")
+	r.MarkBlockCapable("s3")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "s1",
@ -545,8 +548,8 @@ func TestRegistry_PromoteBestReplica_PicksHighest(t *testing.T) {
 		Epoch:        5,
 		Role:         1,
 		Replicas: []ReplicaInfo{
-			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.8, WALHeadLSN: 100},
-			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.95, WALHeadLSN: 90},
+			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.8, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.95, WALHeadLSN: 90, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	})
 	// Add to byServer for s2 and s3.
@ -592,14 +595,16 @@ func TestRegistry_PromoteBestReplica_NoReplica(t *testing.T) {

 func TestRegistry_PromoteBestReplica_TiebreakByLSN(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("s2")
+	r.MarkBlockCapable("s3")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "s1",
 		Path:         "/v1.blk",
 		Epoch:        3,
 		Replicas: []ReplicaInfo{
-			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.9, WALHeadLSN: 50},
-			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.9, WALHeadLSN: 100},
+			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.9, WALHeadLSN: 50, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.9, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	})
 	r.mu.Lock()
@ -627,14 +632,16 @@ func TestRegistry_PromoteBestReplica_TiebreakByLSN(t *testing.T) {

 func TestRegistry_PromoteBestReplica_KeepsOthers(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("s2")
+	r.MarkBlockCapable("s3")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "s1",
 		Path:         "/v1.blk",
 		Epoch:        1,
 		Replicas: []ReplicaInfo{
-			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 1.0, WALHeadLSN: 100},
-			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.5, WALHeadLSN: 100},
+			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 1.0, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.5, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	})
 	r.mu.Lock()
@ -877,6 +884,7 @@ func TestRegistry_PromoteBestReplica_WALLagIneligible(t *testing.T) {
 				HealthScore:   1.0,
 				WALHeadLSN:    800, // lag=200, tolerance=100
 				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
 			},
 		},
 	})
@ -918,6 +926,8 @@ func TestRegistry_PromoteBestReplica_RebuildingIneligible(t *testing.T) {
 // Fix #2: Among eligible replicas, best (health+LSN) wins.
 func TestRegistry_PromoteBestReplica_EligibilityFiltersCorrectly(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("stale")
+	r.MarkBlockCapable("good")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "primary",
@ -939,6 +949,7 @@ func TestRegistry_PromoteBestReplica_EligibilityFiltersCorrectly(t *testing.T) {
 				HealthScore:   0.8,
 				WALHeadLSN:    95,
 				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
 			},
 		},
 	})
@ -956,6 +967,7 @@ func TestRegistry_PromoteBestReplica_EligibilityFiltersCorrectly(t *testing.T) {
 // Configurable tolerance: widen tolerance to allow lagging replicas.
 func TestRegistry_PromoteBestReplica_ConfigurableTolerance(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("lagging")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "primary",
@ -970,6 +982,7 @@ func TestRegistry_PromoteBestReplica_ConfigurableTolerance(t *testing.T) {
 				HealthScore:   1.0,
 				WALHeadLSN:    800, // lag=200
 				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
 			},
 		},
 	})
@ -992,6 +1005,236 @@ func TestRegistry_PromoteBestReplica_ConfigurableTolerance(t *testing.T) {
 	}
 }

+// B-12: PromoteBestReplica rejects dead replica (server not in blockServers).
+func TestRegistry_PromoteBestReplica_DeadServerIneligible(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// Intentionally do NOT mark "dead-replica" as block-capable.
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:        "dead-replica",
+				Path:          "/data/vol1.blk",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+			},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err == nil {
+		t.Fatal("expected error: dead replica should be rejected")
+	}
+	if !strings.Contains(err.Error(), "server_dead") {
+		t.Fatalf("error should mention server_dead, got: %v", err)
+	}
+}
+
+// B-12: Dead replica rejected but alive replica promoted when both exist.
+func TestRegistry_PromoteBestReplica_DeadSkipped_AlivePromoted(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// Only mark s3 as alive.
+	r.MarkBlockCapable("s3")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{Server: "s2-dead", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "s3", Path: "/r2.blk", HealthScore: 0.8, WALHeadLSN: 95, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	newEpoch, err := r.PromoteBestReplica("vol1")
+	if err != nil {
+		t.Fatalf("PromoteBestReplica: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("newEpoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "s3" {
+		t.Fatalf("expected alive s3 promoted, got %q", e.VolumeServer)
+	}
+}
+
+// EvaluatePromotion returns read-only preflight without mutating registry.
+func TestRegistry_EvaluatePromotion_Basic(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        5,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{Server: "replica1", Path: "/r1.blk", HealthScore: 0.9, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, err := r.EvaluatePromotion("vol1")
+	if err != nil {
+		t.Fatalf("EvaluatePromotion: %v", err)
+	}
+	if !pf.Promotable {
+		t.Fatalf("expected promotable, got reason: %s", pf.Reason)
+	}
+	if pf.Candidate == nil || pf.Candidate.Server != "replica1" {
+		t.Fatalf("expected candidate replica1, got %+v", pf.Candidate)
+	}
+
+	// Registry must be unmutated.
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "primary" {
+		t.Fatal("EvaluatePromotion should not mutate the registry")
+	}
+	if e.Epoch != 5 {
+		t.Fatal("EvaluatePromotion should not bump epoch")
+	}
+}
+
+// EvaluatePromotion with all replicas rejected.
+func TestRegistry_EvaluatePromotion_AllRejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// No servers marked as block-capable.
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		Replicas: []ReplicaInfo{
+			{Server: "dead1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "dead2", Path: "/r2.blk", HealthScore: 0.9, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, err := r.EvaluatePromotion("vol1")
+	if err != nil {
+		t.Fatalf("EvaluatePromotion: %v", err)
+	}
+	if pf.Promotable {
+		t.Fatal("expected not promotable")
+	}
+	if len(pf.Rejections) != 2 {
+		t.Fatalf("expected 2 rejections, got %d", len(pf.Rejections))
+	}
+	for _, rej := range pf.Rejections {
+		if rej.Reason != "server_dead" {
+			t.Fatalf("expected server_dead rejection, got %q", rej.Reason)
+		}
+	}
+}
+
+// EvaluatePromotion for nonexistent volume.
+func TestRegistry_EvaluatePromotion_NotFound(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	_, err := r.EvaluatePromotion("nonexistent")
+	if err == nil {
+		t.Fatal("expected error for nonexistent volume")
+	}
+}
+
+// Replica created but never heartbeated is not promotable.
+func TestRegistry_PromoteBestReplica_NoHeartbeatIneligible(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:      "replica1",
+				Path:        "/r1.blk",
+				HealthScore: 1.0,
+				WALHeadLSN:  100,
+				Role:        blockvol.RoleToWire(blockvol.RoleReplica),
+				// LastHeartbeat: zero — never heartbeated
+			},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err == nil {
+		t.Fatal("expected error: replica with no heartbeat should be rejected")
+	}
+	if !strings.Contains(err.Error(), "no_heartbeat") {
+		t.Fatalf("error should mention no_heartbeat, got: %v", err)
+	}
+}
+
+// Replica with unset (zero) role is not promotable.
+func TestRegistry_PromoteBestReplica_UnsetRoleIneligible(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:        "replica1",
+				Path:          "/r1.blk",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				LastHeartbeat: time.Now(),
+				// Role: 0 — unset/RoleNone
+			},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err == nil {
+		t.Fatal("expected error: replica with unset role should be rejected")
+	}
+	if !strings.Contains(err.Error(), "wrong_role") {
+		t.Fatalf("error should mention wrong_role, got: %v", err)
+	}
+}
+
+// PromoteBestReplica clears RebuildListenAddr on promotion (B-11 partial fix).
+func TestRegistry_PromoteBestReplica_ClearsRebuildAddr(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:              "vol1",
+		VolumeServer:      "primary",
+		Path:              "/data/vol1.blk",
+		Epoch:             1,
+		RebuildListenAddr: "primary:15000",
+		Replicas: []ReplicaInfo{
+			{Server: "replica1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err != nil {
+		t.Fatalf("PromoteBestReplica: %v", err)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.RebuildListenAddr != "" {
+		t.Fatalf("RebuildListenAddr should be cleared after promotion, got %q", e.RebuildListenAddr)
+	}
+}
+
 // --- LeaseGrants ---

 func TestRegistry_LeaseGrants_PrimaryOnly(t *testing.T) {
@ -1110,3 +1353,267 @@ func TestRegistry_LeaseGrants_UnknownServer(t *testing.T) {
 		t.Fatalf("expected nil for unknown server, got %+v", grants)
 	}
 }
+
+// ============================================================
+// CP11B-3 T2: IsBlockCapable + VolumesWithDeadPrimary
+// ============================================================
+
+func TestRegistry_IsBlockCapable(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1:8080")
+
+	if !r.IsBlockCapable("vs1:8080") {
+		t.Fatal("vs1 should be block-capable")
+	}
+	if r.IsBlockCapable("vs2:8080") {
+		t.Fatal("vs2 should NOT be block-capable")
+	}
+
+	r.UnmarkBlockCapable("vs1:8080")
+	if r.IsBlockCapable("vs1:8080") {
+		t.Fatal("vs1 should no longer be block-capable after unmark")
+	}
+}
+
+func TestRegistry_VolumesWithDeadPrimary_Basic(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1")
+	r.MarkBlockCapable("vs2")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive,
+		Replicas: []ReplicaInfo{{Server: "vs2", Path: "/data/vol1.blk"}},
+	})
+
+	// Both alive → no orphans.
+	orphaned := r.VolumesWithDeadPrimary("vs2")
+	if len(orphaned) != 0 {
+		t.Fatalf("expected 0 orphaned volumes, got %d", len(orphaned))
+	}
+
+	// Kill primary.
+	r.UnmarkBlockCapable("vs1")
+	orphaned = r.VolumesWithDeadPrimary("vs2")
+	if len(orphaned) != 1 || orphaned[0] != "vol1" {
+		t.Fatalf("expected [vol1], got %v", orphaned)
+	}
+}
+
+func TestRegistry_VolumesWithDeadPrimary_PrimaryServer_NotIncluded(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive,
+	})
+
+	// vs1 is the primary for vol1 — should NOT appear in orphaned list for vs1.
+	orphaned := r.VolumesWithDeadPrimary("vs1")
+	if len(orphaned) != 0 {
+		t.Fatalf("primary server should not appear in its own orphan list, got %v", orphaned)
+	}
+}
+
+// T6: EvaluatePromotion preflight includes primary liveness.
+func TestRegistry_EvaluatePromotion_PrimaryDead_StillShowsCandidate(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1")
+	r.MarkBlockCapable("vs2")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// Kill primary but keep vs2 alive.
+	r.UnmarkBlockCapable("vs1")
+
+	pf, err := r.EvaluatePromotion("vol1")
+	if err != nil {
+		t.Fatalf("EvaluatePromotion: %v", err)
+	}
+	if !pf.Promotable {
+		t.Fatalf("should be promotable (vs2 alive), reason=%s", pf.Reason)
+	}
+	if pf.Candidate.Server != "vs2" {
+		t.Fatalf("candidate should be vs2, got %q", pf.Candidate.Server)
+	}
+}
+
+// ============================================================
+// CP11B-3 T5: ManualPromote Dev Tests
+// ============================================================
+
+// T5: ManualPromote with empty target → auto-picks best candidate.
+func TestRegistry_ManualPromote_AutoTarget(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("best")
+	r.MarkBlockCapable("worse")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+		Replicas: []ReplicaInfo{
+			{Server: "worse", Path: "/r1.blk", HealthScore: 0.5, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "best", Path: "/r2.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+	// Primary not block-capable → non-force should still pass (primary_alive gate won't trigger).
+
+	newEpoch, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err != nil {
+		t.Fatalf("ManualPromote: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	if !pf.Promotable {
+		t.Fatal("should be promotable")
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "best" {
+		t.Fatalf("expected 'best' promoted, got %q", e.VolumeServer)
+	}
+}
+
+// T5: ManualPromote targets a specific replica (not the best by health).
+func TestRegistry_ManualPromote_SpecificTarget(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.MarkBlockCapable("r2")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "r2", Path: "/r2.blk", HealthScore: 0.5, WALHeadLSN: 50,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	// Target r2 specifically (worse health).
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "r2", false)
+	if err != nil {
+		t.Fatalf("ManualPromote: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "r2" {
+		t.Fatalf("expected r2 promoted (specific target), got %q", e.VolumeServer)
+	}
+}
+
+// T5: ManualPromote with non-existent target → error.
+func TestRegistry_ManualPromote_TargetNotFound(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "nonexistent", false)
+	if err == nil {
+		t.Fatal("expected error for nonexistent target")
+	}
+	if pf.Reason != "target_not_found" {
+		t.Fatalf("expected target_not_found, got %q", pf.Reason)
+	}
+}
+
+// T5: ManualPromote non-force with alive primary → rejected.
+func TestRegistry_ManualPromote_PrimaryAlive_Rejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("primary")
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err == nil {
+		t.Fatal("expected rejection when primary alive and !force")
+	}
+	if pf.Reason != "primary_alive" {
+		t.Fatalf("expected primary_alive, got %q", pf.Reason)
+	}
+	// Verify no mutation.
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "primary" {
+		t.Fatalf("primary should not change, got %q", e.VolumeServer)
+	}
+}
+
+// T5: Force bypasses stale heartbeat and primary_alive gates.
+func TestRegistry_ManualPromote_Force_StaleHeartbeat(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("primary")
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now().Add(-10 * time.Minute), // stale
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	// Non-force: would fail on primary_alive.
+	// Force: bypasses primary_alive AND stale_heartbeat.
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "", true)
+	if err != nil {
+		t.Fatalf("force ManualPromote should succeed: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "r1" {
+		t.Fatalf("expected r1 promoted via force, got %q", e.VolumeServer)
+	}
+}
+
+// T5: Force does NOT bypass server_dead (hard gate).
+func TestRegistry_ManualPromote_Force_StillRejectsDeadServer(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// "dead" is NOT marked block-capable.
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "dead", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "dead", true)
+	if err == nil {
+		t.Fatal("force should NOT bypass server_dead")
+	}
+	if len(pf.Rejections) == 0 || pf.Rejections[0].Reason != "server_dead" {
+		t.Fatalf("expected server_dead rejection, got %+v", pf.Rejections)
+	}
+}
--- a/weed/server/master_grpc_server.go
+++ b/weed/server/master_grpc_server.go
@ -278,6 +278,9 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
 		// on subsequent heartbeats), never both in the same message.
 		if len(heartbeat.BlockVolumeInfos) > 0 || heartbeat.HasNoBlockVolumes {
 			ms.blockRegistry.UpdateFullHeartbeat(dn.Url(), heartbeat.BlockVolumeInfos)
+			// T2 (B-06): After updating registry from heartbeat, check if this server
+			// is a replica for any volume whose primary is dead. If so, promote.
+			ms.reevaluateOrphanedPrimaries(dn.Url())
 		} else if len(heartbeat.NewBlockVolumes) > 0 || len(heartbeat.DeletedBlockVolumes) > 0 {
 			ms.blockRegistry.UpdateDeltaHeartbeat(dn.Url(), heartbeat.NewBlockVolumes, heartbeat.DeletedBlockVolumes)
 		}
--- a/weed/server/master_grpc_server_block.go
+++ b/weed/server/master_grpc_server_block.go
@ -283,14 +283,16 @@ func (ms *MasterServer) tryCreateOneReplica(ctx context.Context, req *master_pb.
 		entry.RebuildListenAddr = primaryResult.RebuildListenAddr
 		// CP8-2: populate Replicas[].
 		entry.Replicas = append(entry.Replicas, ReplicaInfo{
-			Server:    replicaServerStr,
-			Path:      replicaResult.Path,
-			ISCSIAddr: replicaResult.ISCSIAddr,
-			IQN:       replicaResult.IQN,
-			NvmeAddr:  replicaResult.NvmeAddr,
-			NQN:       replicaResult.NQN,
-			DataAddr:  replicaResult.ReplicaDataAddr,
-			CtrlAddr:  replicaResult.ReplicaCtrlAddr,
+			Server:        replicaServerStr,
+			Path:          replicaResult.Path,
+			ISCSIAddr:     replicaResult.ISCSIAddr,
+			IQN:           replicaResult.IQN,
+			NvmeAddr:      replicaResult.NvmeAddr,
+			NQN:           replicaResult.NQN,
+			DataAddr:      replicaResult.ReplicaDataAddr,
+			CtrlAddr:      replicaResult.ReplicaCtrlAddr,
+			Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+			LastHeartbeat: time.Now(),
 		})
 		return replicaServerStr
 	}
@ -409,6 +411,11 @@ func (ms *MasterServer) ExpandBlockVolume(ctx context.Context, req *master_pb.Ex
 		}
 	}()

+	// Test-only hook: inject failover between lock acquisition and re-read.
+	if ms.expandPreReadHook != nil {
+		ms.expandPreReadHook()
+	}
+
 	// B-09: Re-read entry after acquiring expand lock. Between the initial
 	// Lookup and AcquireExpandInflight, failover may have changed VolumeServer
 	// or Replicas. Using the stale snapshot would send PREPARE to dead nodes.
--- a/weed/server/master_grpc_server_block_test.go
+++ b/weed/server/master_grpc_server_block_test.go
@ -10,6 +10,7 @@ import (

 	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
 	"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
 )

 // testMasterServer creates a minimal MasterServer with mock VS calls for testing.
@ -1112,6 +1113,9 @@ func TestMaster_NoNvmeFieldsWhenDisabled(t *testing.T) {

 func TestMaster_PromotionCopiesNvmeFields(t *testing.T) {
 	ms := testMasterServer(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1:9333")
+	ms.blockRegistry.MarkBlockCapable("vs2:9333")

 	// Directly register an entry with primary + replica, both having NVMe fields.
 	ms.blockRegistry.Register(&BlockVolumeEntry{
@ -1128,16 +1132,18 @@ func TestMaster_PromotionCopiesNvmeFields(t *testing.T) {
 		LeaseTTL:     30 * time.Second,
 		Replicas: []ReplicaInfo{
 			{
-				Server:      "vs2:9333",
-				Path:        "/data/ha-vol.blk",
-				IQN:         "iqn.2024.test:ha-vol-r",
-				ISCSIAddr:   "vs2:3260",
-				NvmeAddr:    "vs2:4420",
-				NQN:         "nqn.2024-01.com.seaweedfs:vol.ha-vol.vs2",
-				DataAddr:    "vs2:14260",
-				CtrlAddr:    "vs2:14261",
-				HealthScore: 0.95,
-				WALHeadLSN:  100,
+				Server:        "vs2:9333",
+				Path:          "/data/ha-vol.blk",
+				IQN:           "iqn.2024.test:ha-vol-r",
+				ISCSIAddr:     "vs2:3260",
+				NvmeAddr:      "vs2:4420",
+				NQN:           "nqn.2024-01.com.seaweedfs:vol.ha-vol.vs2",
+				DataAddr:      "vs2:14260",
+				CtrlAddr:      "vs2:14261",
+				HealthScore:   0.95,
+				WALHeadLSN:    100,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		},
 	})
@ -1654,10 +1660,11 @@ func TestMaster_ExpandCoordinated_RestartRecovery(t *testing.T) {
 }

 func TestMaster_ExpandCoordinated_B09_ReReadsEntryAfterLock(t *testing.T) {
-	// B-09: If failover changes VolumeServer between initial Lookup and
-	// AcquireExpandInflight, the coordinator must use the fresh entry,
-	// not the stale one. Use RF=3 so promotion still leaves 1 replica
-	// and the coordinated path is taken.
+	// B-09: Exercises the actual race window — failover happens BETWEEN
+	// the initial Lookup (line 380) and the post-lock re-read (line 419).
+	// Uses expandPreReadHook to inject PromoteBestReplica at the exact
+	// interleaving point. RF=3 so promotion leaves 1 replica and the
+	// coordinated path is taken.
 	ms := testMasterServerWithExpandMocks(t)
 	ms.blockRegistry.MarkBlockCapable("vs1:9333")
 	ms.blockRegistry.MarkBlockCapable("vs2:9333")
@ -1689,31 +1696,39 @@ func TestMaster_ExpandCoordinated_B09_ReReadsEntryAfterLock(t *testing.T) {
 		return 2 << 30, nil
 	}

-	// Simulate failover: promote best replica. With RF=3, one replica
-	// becomes primary and the other stays as replica → coordinated path.
-	ms.blockRegistry.PromoteBestReplica("b09-vol")
-
-	entry, _ = ms.blockRegistry.Lookup("b09-vol")
-	newPrimary := entry.VolumeServer
-	if newPrimary == originalPrimary {
-		t.Fatal("promotion didn't change primary")
-	}
-	if len(entry.Replicas) == 0 {
-		t.Fatal("expected at least 1 replica after RF=3 promotion")
+	// Hook fires AFTER AcquireExpandInflight but BEFORE the re-read Lookup.
+	// This is the exact race window: the initial Lookup already returned
+	// the old primary, but failover changes it before the re-read.
+	hookFired := false
+	ms.expandPreReadHook = func() {
+		hookFired = true
+		ms.blockRegistry.PromoteBestReplica("b09-vol")
 	}

-	// Expand should use the NEW primary (post-failover), not the old one.
+	// At this point, the initial Lookup inside ExpandBlockVolume will see
+	// originalPrimary. The hook then promotes, changing the primary.
+	// The re-read must pick up the new primary.
 	resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
 		Name: "b09-vol", NewSizeBytes: 2 << 30,
 	})
 	if err != nil {
 		t.Fatalf("expand: %v", err)
 	}
+	if !hookFired {
+		t.Fatal("expandPreReadHook was not called — race window not exercised")
+	}
 	if resp.CapacityBytes != 2<<30 {
 		t.Fatalf("capacity: got %d", resp.CapacityBytes)
 	}

-	// First PREPARE should have gone to the new primary, not the old one.
+	// Verify: after the hook promoted, the re-read must have picked up
+	// the new primary. The first PREPARE should go to the new primary.
+	entry, _ = ms.blockRegistry.Lookup("b09-vol")
+	newPrimary := entry.VolumeServer
+	if newPrimary == originalPrimary {
+		t.Fatal("promotion didn't change primary")
+	}
+
 	if len(preparedServers) == 0 {
 		t.Fatal("no prepare calls recorded")
 	}
@ -1721,7 +1736,7 @@ func TestMaster_ExpandCoordinated_B09_ReReadsEntryAfterLock(t *testing.T) {
 		t.Fatalf("PREPARE went to %q (stale), should go to %q (fresh primary)",
 			preparedServers[0], newPrimary)
 	}
-	// Verify old primary was NOT contacted.
+	// Verify old primary was NOT contacted at all.
 	for _, s := range preparedServers {
 		if s == originalPrimary {
 			t.Fatalf("PREPARE sent to old primary %q — stale entry used", originalPrimary)
--- a/weed/server/master_server.go
+++ b/weed/server/master_server.go
@ -109,6 +109,10 @@ type MasterServer struct {
 	blockVSCommitExpand  func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error)
 	blockVSCancelExpand  func(ctx context.Context, server string, name string, expandEpoch uint64) error
 	nextExpandEpoch      atomic.Uint64
+
+	// Test-only hook: called after AcquireExpandInflight but before the
+	// re-read Lookup in coordinated expand. Nil in production.
+	expandPreReadHook func()
 }

 func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer {
@ -224,6 +228,8 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
 		r.HandleFunc("/block/volume/{name}", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeLookupHandler))).Methods("GET")
 		r.HandleFunc("/block/volumes", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeListHandler))).Methods("GET")
 		r.HandleFunc("/block/volume/{name}/expand", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeExpandHandler)))).Methods("POST")
+		r.HandleFunc("/block/volume/{name}/preflight", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumePreflightHandler))).Methods("GET")
+		r.HandleFunc("/block/volume/{name}/promote", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumePromoteHandler)))).Methods("POST")
 		r.HandleFunc("/block/assign", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockAssignHandler)))).Methods("POST")
 		r.HandleFunc("/block/servers", ms.guard.WhiteList(requestIDMiddleware(ms.blockServersHandler))).Methods("GET")
 		r.HandleFunc("/block/status", ms.guard.WhiteList(requestIDMiddleware(ms.blockStatusHandler))).Methods("GET")
--- a/weed/server/master_server_handlers_block.go
+++ b/weed/server/master_server_handlers_block.go
@ -7,6 +7,7 @@ import (

 	"github.com/gorilla/mux"

+	"github.com/seaweedfs/seaweedfs/weed/glog"
 	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
 	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
 	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/blockapi"
@ -206,6 +207,99 @@ func (ms *MasterServer) blockStatusHandler(w http.ResponseWriter, r *http.Reques
 	writeJsonQuiet(w, r, http.StatusOK, status)
 }

+// blockVolumePreflightHandler handles GET /block/volume/{name}/preflight.
+// Returns a read-only promotion preflight evaluation for the named volume.
+func (ms *MasterServer) blockVolumePreflightHandler(w http.ResponseWriter, r *http.Request) {
+	name := mux.Vars(r)["name"]
+	if name == "" {
+		writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
+		return
+	}
+
+	pf, err := ms.blockRegistry.EvaluatePromotion(name)
+	if err != nil {
+		writeJsonError(w, r, http.StatusNotFound, err)
+		return
+	}
+
+	resp := blockapi.PreflightResponse{
+		VolumeName: pf.VolumeName,
+		Promotable: pf.Promotable,
+		Reason:     pf.Reason,
+	}
+	if pf.Candidate != nil {
+		resp.CandidateServer = pf.Candidate.Server
+		resp.CandidateHealth = pf.Candidate.HealthScore
+		resp.CandidateWALLSN = pf.Candidate.WALHeadLSN
+	}
+	for _, rej := range pf.Rejections {
+		resp.Rejections = append(resp.Rejections, blockapi.PreflightRejection{
+			Server: rej.Server,
+			Reason: rej.Reason,
+		})
+	}
+	// Add primary liveness info.
+	entry, ok := ms.blockRegistry.Lookup(name)
+	if ok {
+		resp.PrimaryServer = entry.VolumeServer
+		resp.PrimaryAlive = ms.blockRegistry.IsBlockCapable(entry.VolumeServer)
+	}
+	writeJsonQuiet(w, r, http.StatusOK, resp)
+}
+
+// blockVolumePromoteHandler handles POST /block/volume/{name}/promote.
+// Triggers a manual promotion for the named block volume.
+func (ms *MasterServer) blockVolumePromoteHandler(w http.ResponseWriter, r *http.Request) {
+	name := mux.Vars(r)["name"]
+	if name == "" {
+		writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
+		return
+	}
+
+	var req blockapi.PromoteVolumeRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("decode request: %w", err))
+		return
+	}
+
+	// ManualPromote captures oldPrimary/oldPath under lock to avoid TOCTOU (BUG-T5-2).
+	newEpoch, oldPrimary, oldPath, pf, err := ms.blockRegistry.ManualPromote(name, req.TargetServer, req.Force)
+	if err != nil {
+		// Distinguish not-found from rejection.
+		status := http.StatusConflict
+		if pf.Reason == "volume not found" {
+			status = http.StatusNotFound
+		}
+		// Build structured rejection response.
+		resp := blockapi.PromoteVolumeResponse{
+			Reason: pf.Reason,
+		}
+		for _, rej := range pf.Rejections {
+			resp.Rejections = append(resp.Rejections, blockapi.PreflightRejection{
+				Server: rej.Server,
+				Reason: rej.Reason,
+			})
+		}
+		glog.V(0).Infof("manual promote %q rejected: %s", name, pf.Reason)
+		writeJsonQuiet(w, r, status, resp)
+		return
+	}
+
+	// Post-promotion orchestration (same as auto path).
+	ms.finalizePromotion(name, oldPrimary, oldPath, newEpoch)
+
+	if req.Reason != "" {
+		glog.V(0).Infof("manual promote %q: reason=%q", name, req.Reason)
+	}
+
+	// Re-read to get the new primary server name.
+	entry, _ := ms.blockRegistry.Lookup(name)
+	writeJsonQuiet(w, r, http.StatusOK, blockapi.PromoteVolumeResponse{
+		NewPrimary: entry.VolumeServer,
+		Epoch:      newEpoch,
+	})
+}
+
 // entryToVolumeInfo converts a BlockVolumeEntry to a blockapi.VolumeInfo.
 func entryToVolumeInfo(e *BlockVolumeEntry) blockapi.VolumeInfo {
 	status := "pending"
@ -239,6 +333,8 @@ func entryToVolumeInfo(e *BlockVolumeEntry) blockapi.VolumeInfo {
 		HealthScore:      e.HealthScore,
 		ReplicaDegraded:  e.ReplicaDegraded,
 		DurabilityMode:   durMode,
+		NvmeAddr:         e.NvmeAddr,
+		NQN:              e.NQN,
 	}
 	for _, ri := range e.Replicas {
 		info.Replicas = append(info.Replicas, blockapi.ReplicaDetail{
--- a/weed/server/qa_block_cp11b3_adversarial_test.go
+++ b/weed/server/qa_block_cp11b3_adversarial_test.go
--- a/weed/server/qa_block_cp63_test.go
+++ b/weed/server/qa_block_cp63_test.go
@ -40,6 +40,11 @@ func testMSForQA(t *testing.T) *MasterServer {
 // registerQAVolume creates a volume entry with optional replica, configurable lease state.
 func registerQAVolume(t *testing.T, ms *MasterServer, name, primary, replica string, epoch uint64, leaseTTL time.Duration, leaseExpired bool) {
 	t.Helper()
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable(primary)
+	if replica != "" {
+		ms.blockRegistry.MarkBlockCapable(replica)
+	}
 	entry := &BlockVolumeEntry{
 		Name:         name,
 		VolumeServer: primary,
@ -65,11 +70,13 @@ func registerQAVolume(t *testing.T, ms *MasterServer, name, primary, replica str
 		// CP8-2: also populate Replicas[].
 		entry.Replicas = []ReplicaInfo{
 			{
-				Server:      replica,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-r", name),
-				ISCSIAddr:   replica + ":3260",
-				HealthScore: 1.0,
+				Server:        replica,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-r", name),
+				ISCSIAddr:     replica + ":3260",
+				HealthScore:   1.0,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		}
 	}
@ -398,7 +405,15 @@ func TestQA_Failover_PromoteIdempotent_NoReplicaAfterFirstSwap(t *testing.T) {
 	// Reconnect vs1 first so it becomes a replica.
 	ms.recoverBlockVolumes("vs1")

+	// Simulate rebuild completion: mark vs1 as a healthy replica.
 	e, _ := ms.blockRegistry.Lookup("vol1")
+	for i := range e.Replicas {
+		if e.Replicas[i].Server == "vs1" {
+			e.Replicas[i].Role = blockvol.RoleToWire(blockvol.RoleReplica)
+			e.Replicas[i].LastHeartbeat = time.Now()
+			e.Replicas[i].HealthScore = 1.0
+		}
+	}
 	e.LastLeaseGrant = time.Now().Add(-1 * time.Minute) // expire the new lease
 	ms.failoverBlockVolumes("vs2")

--- a/weed/server/qa_block_expand_adversarial_test.go
+++ b/weed/server/qa_block_expand_adversarial_test.go
@ -0,0 +1,485 @@
+package weed_server
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
+)
+
+// ============================================================
+// CP11A-2 Adversarial Test Suite: B-09 + B-10
+//
+// 8 scenarios stress-testing the coordinated expand path under
+// failover, concurrent heartbeats, and partial failures.
+// ============================================================
+
+// qaExpandMaster creates a MasterServer with 3 block-capable servers
+// and default expand mocks for adversarial testing.
+func qaExpandMaster(t *testing.T) *MasterServer {
+	t.Helper()
+	ms := &MasterServer{
+		blockRegistry:        NewBlockVolumeRegistry(),
+		blockAssignmentQueue: NewBlockAssignmentQueue(),
+		blockFailover:        newBlockFailoverState(),
+	}
+	ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
+		return &blockAllocResult{
+			Path:              fmt.Sprintf("/data/%s.blk", name),
+			IQN:               fmt.Sprintf("iqn.2024.test:%s", name),
+			ISCSIAddr:         server + ":3260",
+			ReplicaDataAddr:   server + ":14260",
+			ReplicaCtrlAddr:   server + ":14261",
+			RebuildListenAddr: server + ":15000",
+		}, nil
+	}
+	ms.blockVSDelete = func(ctx context.Context, server string, name string) error {
+		return nil
+	}
+	ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
+		return newSize, nil
+	}
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		return nil
+	}
+	ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
+		return 2 << 30, nil
+	}
+	ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
+		return nil
+	}
+	ms.blockRegistry.MarkBlockCapable("vs1:9333")
+	ms.blockRegistry.MarkBlockCapable("vs2:9333")
+	ms.blockRegistry.MarkBlockCapable("vs3:9333")
+	return ms
+}
+
+// qaCreateRF creates a volume with the given replica factor.
+func qaCreateRF(t *testing.T, ms *MasterServer, name string, rf uint32) {
+	t.Helper()
+	_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
+		Name:          name,
+		SizeBytes:     1 << 30,
+		ReplicaFactor: rf,
+	})
+	if err != nil {
+		t.Fatalf("create %s RF=%d: %v", name, rf, err)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-1: ExpandAfterDoubleFailover_RF3
+//
+// RF=3 volume. Primary dies → promote replica A. Then replica A
+// (now primary) dies → promote replica B. Expand must reach
+// replica B (the second-generation primary), not the original.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ExpandAfterDoubleFailover_RF3(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "dbl-failover", 3)
+
+	entry, _ := ms.blockRegistry.Lookup("dbl-failover")
+	gen0Primary := entry.VolumeServer
+
+	// First failover: kill original primary.
+	ms.blockRegistry.PromoteBestReplica("dbl-failover")
+	entry, _ = ms.blockRegistry.Lookup("dbl-failover")
+	gen1Primary := entry.VolumeServer
+	if gen1Primary == gen0Primary {
+		t.Fatal("first promotion didn't change primary")
+	}
+
+	// Second failover: kill gen1 primary.
+	// Need to ensure the remaining replica has a fresh heartbeat.
+	if len(entry.Replicas) == 0 {
+		t.Fatal("no replicas left after first promotion (need RF=3)")
+	}
+	ms.blockRegistry.PromoteBestReplica("dbl-failover")
+	entry, _ = ms.blockRegistry.Lookup("dbl-failover")
+	gen2Primary := entry.VolumeServer
+	if gen2Primary == gen1Primary || gen2Primary == gen0Primary {
+		t.Fatalf("second promotion should pick a new server, got %q (gen0=%q gen1=%q)",
+			gen2Primary, gen0Primary, gen1Primary)
+	}
+
+	// Track PREPARE targets.
+	var preparedServers []string
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		preparedServers = append(preparedServers, server)
+		return nil
+	}
+
+	// Expand — standalone path since no replicas remain after 2 promotions.
+	_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+		Name: "dbl-failover", NewSizeBytes: 2 << 30,
+	})
+	if err != nil {
+		t.Fatalf("expand: %v", err)
+	}
+
+	// If standalone path was taken (no replicas), preparedServers is empty — that's fine.
+	// If coordinated path was taken, first PREPARE must target gen2Primary.
+	if len(preparedServers) > 0 && preparedServers[0] != gen2Primary {
+		t.Fatalf("PREPARE went to %q, want gen2 primary %q", preparedServers[0], gen2Primary)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-2: ExpandSeesDeletedVolume_AfterLockAcquire
+//
+// Volume is deleted between the initial Lookup (succeeds) and
+// the re-read after AcquireExpandInflight. The re-read must
+// detect the deletion and fail cleanly.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ExpandSeesDeletedVolume_AfterLockAcquire(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "disappear", 2)
+
+	// Hook PREPARE to delete the volume before it runs.
+	// The B-09 re-read happens before PREPARE, so we simulate deletion
+	// between initial Lookup and AcquireExpandInflight by having a
+	// goroutine that deletes the entry while expand is in progress.
+	// Instead, test directly: acquire expand lock, then unregister, then
+	// call ExpandBlockVolume — it should fail on re-read.
+
+	// Acquire expand lock manually first so the real call gets blocked.
+	// Then verify the error path by attempting a second expand.
+	if !ms.blockRegistry.AcquireExpandInflight("disappear", 2<<30, 1) {
+		t.Fatal("AcquireExpandInflight should succeed")
+	}
+
+	// Try another expand while locked — should fail with "already in progress".
+	_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+		Name: "disappear", NewSizeBytes: 2 << 30,
+	})
+	if err == nil {
+		t.Fatal("expand should fail when lock is held")
+	}
+
+	// Release and delete the volume.
+	ms.blockRegistry.ReleaseExpandInflight("disappear")
+	ms.blockRegistry.Unregister("disappear")
+
+	// Now expand on a deleted volume — should fail on initial Lookup.
+	_, err = ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+		Name: "disappear", NewSizeBytes: 2 << 30,
+	})
+	if err == nil {
+		t.Fatal("expand on deleted volume should fail")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-3: ConcurrentExpandAndFailover
+//
+// Expand and failover race on the same volume. Neither should
+// panic, and the volume must be in a consistent state afterward.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ConcurrentExpandAndFailover(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "race-vol", 3)
+
+	entry, _ := ms.blockRegistry.Lookup("race-vol")
+	primary := entry.VolumeServer
+
+	// Make PREPARE slow so expand holds the lock longer.
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		time.Sleep(5 * time.Millisecond)
+		return nil
+	}
+
+	var wg sync.WaitGroup
+
+	// Goroutine 1: expand.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+			Name: "race-vol", NewSizeBytes: 2 << 30,
+		})
+		// Error is OK — we're testing for panics and consistency.
+	}()
+
+	// Goroutine 2: failover kills primary.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		time.Sleep(2 * time.Millisecond) // slight delay to let expand start
+		ms.failoverBlockVolumes(primary)
+	}()
+
+	wg.Wait()
+
+	// Volume must still exist regardless of outcome.
+	_, ok := ms.blockRegistry.Lookup("race-vol")
+	if !ok {
+		t.Fatal("volume must survive concurrent expand + failover")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-4: ConcurrentExpandsSameVolume
+//
+// Two goroutines try to expand the same volume simultaneously.
+// Exactly one should succeed, the other should get "already in
+// progress". No panic, no double-commit.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ConcurrentExpandsSameVolume(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "dup-expand", 2)
+
+	var commitCount atomic.Int32
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		time.Sleep(5 * time.Millisecond) // slow prepare
+		return nil
+	}
+	ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
+		commitCount.Add(1)
+		return 2 << 30, nil
+	}
+
+	var wg sync.WaitGroup
+	var successes atomic.Int32
+	var failures atomic.Int32
+
+	for i := 0; i < 2; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+				Name: "dup-expand", NewSizeBytes: 2 << 30,
+			})
+			if err == nil {
+				successes.Add(1)
+			} else {
+				failures.Add(1)
+			}
+		}()
+	}
+	wg.Wait()
+
+	if successes.Load() != 1 {
+		t.Fatalf("expected exactly 1 success, got %d", successes.Load())
+	}
+	if failures.Load() != 1 {
+		t.Fatalf("expected exactly 1 failure (already in progress), got %d", failures.Load())
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-1: RepeatedEmptyHeartbeats_DuringExpand
+//
+// Multiple empty heartbeats from the primary during expand.
+// Entry must survive all of them — not just the first.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_RepeatedEmptyHeartbeats_DuringExpand(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "multi-hb", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("multi-hb")
+	primary := entry.VolumeServer
+
+	if !ms.blockRegistry.AcquireExpandInflight("multi-hb", 2<<30, 42) {
+		t.Fatal("acquire expand lock")
+	}
+
+	// 10 empty heartbeats from the primary — each one would delete
+	// the entry without the B-10 guard.
+	for i := 0; i < 10; i++ {
+		ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+	}
+
+	_, ok := ms.blockRegistry.Lookup("multi-hb")
+	if !ok {
+		t.Fatal("entry deleted after repeated empty heartbeats during expand")
+	}
+
+	ms.blockRegistry.ReleaseExpandInflight("multi-hb")
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-2: ExpandFailed_HeartbeatStillProtected
+//
+// After MarkExpandFailed (primary committed, replica didn't),
+// empty heartbeats must NOT delete the entry. ExpandFailed
+// keeps ExpandInProgress=true as a size-suppression guard.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_ExpandFailed_HeartbeatStillProtected(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "fail-hb", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("fail-hb")
+	primary := entry.VolumeServer
+
+	if !ms.blockRegistry.AcquireExpandInflight("fail-hb", 2<<30, 42) {
+		t.Fatal("acquire expand lock")
+	}
+	ms.blockRegistry.MarkExpandFailed("fail-hb")
+
+	// Empty heartbeat should not delete — ExpandFailed keeps ExpandInProgress=true.
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+
+	e, ok := ms.blockRegistry.Lookup("fail-hb")
+	if !ok {
+		t.Fatal("entry deleted during ExpandFailed state")
+	}
+	if !e.ExpandFailed {
+		t.Fatal("ExpandFailed should still be true")
+	}
+	if !e.ExpandInProgress {
+		t.Fatal("ExpandInProgress should still be true")
+	}
+
+	// After ClearExpandFailed, empty heartbeat should delete normally.
+	ms.blockRegistry.ClearExpandFailed("fail-hb")
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+
+	_, ok = ms.blockRegistry.Lookup("fail-hb")
+	if ok {
+		t.Fatal("entry should be deleted after ClearExpandFailed + empty heartbeat")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-3: HeartbeatSizeSuppress_DuringExpand
+//
+// Primary reports a stale (old) size during coordinated expand.
+// Registry must NOT downgrade SizeBytes — the pending expand
+// size is authoritative until commit or release.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_HeartbeatSizeSuppress_DuringExpand(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "size-suppress", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("size-suppress")
+	primary := entry.VolumeServer
+	origSize := entry.SizeBytes
+
+	if !ms.blockRegistry.AcquireExpandInflight("size-suppress", 2<<30, 42) {
+		t.Fatal("acquire expand lock")
+	}
+
+	// Heartbeat reports old size (expand hasn't committed on VS yet).
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/size-suppress.blk",
+			VolumeSize: origSize, // old size
+			Epoch:      1,
+			Role:       blockvol.RoleToWire(blockvol.RolePrimary),
+		},
+	})
+
+	entry, _ = ms.blockRegistry.Lookup("size-suppress")
+	if entry.SizeBytes != origSize {
+		t.Fatalf("size should remain %d during expand, got %d", origSize, entry.SizeBytes)
+	}
+
+	// Heartbeat reports a LARGER size (stale from previous expand or bug).
+	// Still must not update — coordinated expand owns the size.
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/size-suppress.blk",
+			VolumeSize: 5 << 30, // bogus large size
+			Epoch:      1,
+			Role:       blockvol.RoleToWire(blockvol.RolePrimary),
+		},
+	})
+
+	entry, _ = ms.blockRegistry.Lookup("size-suppress")
+	if entry.SizeBytes != origSize {
+		t.Fatalf("size should remain %d (suppressed), got %d", origSize, entry.SizeBytes)
+	}
+
+	ms.blockRegistry.ReleaseExpandInflight("size-suppress")
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-4: ConcurrentHeartbeatsAndExpand
+//
+// Simultaneous full heartbeats from primary and replicas while
+// expand runs on another goroutine. Must not panic, must not
+// orphan the entry, and expand must either succeed or fail
+// cleanly with a clear error.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_ConcurrentHeartbeatsAndExpand(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "hb-expand-race", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("hb-expand-race")
+	primary := entry.VolumeServer
+	replica := ""
+	if len(entry.Replicas) > 0 {
+		replica = entry.Replicas[0].Server
+	}
+
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		time.Sleep(2 * time.Millisecond)
+		return nil
+	}
+
+	var wg sync.WaitGroup
+	const rounds = 30
+
+	// Goroutine 1: expand.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+			Name: "hb-expand-race", NewSizeBytes: 2 << 30,
+		})
+	}()
+
+	// Goroutine 2: primary heartbeats (mix of reporting and not reporting).
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; i < rounds; i++ {
+			if i%5 == 0 {
+				// Every 5th: empty heartbeat (simulates brief restart).
+				ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+			} else {
+				ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{
+					{
+						Path:       "/data/hb-expand-race.blk",
+						VolumeSize: 1 << 30,
+						Epoch:      1,
+						Role:       blockvol.RoleToWire(blockvol.RolePrimary),
+						WalHeadLsn: uint64(100 + i),
+					},
+				})
+			}
+		}
+	}()
+
+	// Goroutine 3: replica heartbeats.
+	if replica != "" {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for i := 0; i < rounds; i++ {
+				ms.blockRegistry.UpdateFullHeartbeat(replica, []*master_pb.BlockVolumeInfoMessage{
+					{
+						Path:       "/data/hb-expand-race.blk",
+						VolumeSize: 1 << 30,
+						Epoch:      1,
+						Role:       blockvol.RoleToWire(blockvol.RoleReplica),
+						WalHeadLsn: uint64(99 + i),
+					},
+				})
+			}
+		}()
+	}
+
+	wg.Wait()
+
+	// Volume must still exist — no orphan.
+	_, ok := ms.blockRegistry.Lookup("hb-expand-race")
+	if !ok {
+		t.Fatal("volume must survive concurrent heartbeats + expand")
+	}
+}
--- a/weed/server/qa_block_nvme_publication_test.go
+++ b/weed/server/qa_block_nvme_publication_test.go
--- a/weed/storage/blockvol/blockapi/client.go
+++ b/weed/storage/blockvol/blockapi/client.go
@ -136,6 +136,61 @@ func (c *Client) ExpandVolume(ctx context.Context, name string, newSizeBytes uin
 	return out.CapacityBytes, nil
 }

+// PromoteVolume triggers a manual promotion for a block volume.
+func (c *Client) PromoteVolume(ctx context.Context, name string, req PromoteVolumeRequest) (*PromoteVolumeResponse, error) {
+	body, err := json.Marshal(req)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+	resp, err := c.doRequest(ctx, http.MethodPost, "/block/volume/"+name+"/promote", bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if err := checkStatus(resp, http.StatusOK); err != nil {
+		return nil, err
+	}
+	var out PromoteVolumeResponse
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	return &out, nil
+}
+
+// BlockStatus fetches the block registry status metrics.
+func (c *Client) BlockStatus(ctx context.Context) (*BlockStatusResponse, error) {
+	resp, err := c.doRequest(ctx, http.MethodGet, "/block/status", nil)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if err := checkStatus(resp, http.StatusOK); err != nil {
+		return nil, err
+	}
+	var out BlockStatusResponse
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	return &out, nil
+}
+
+// Preflight returns the promotion preflight evaluation for a block volume.
+func (c *Client) Preflight(ctx context.Context, name string) (*PreflightResponse, error) {
+	resp, err := c.doRequest(ctx, http.MethodGet, "/block/volume/"+name+"/preflight", nil)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if err := checkStatus(resp, http.StatusOK); err != nil {
+		return nil, err
+	}
+	var out PreflightResponse
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	return &out, nil
+}
+
 // ListServers lists all block-capable volume servers.
 func (c *Client) ListServers(ctx context.Context) ([]ServerInfo, error) {
 	resp, err := c.doRequest(ctx, http.MethodGet, "/block/servers", nil)
--- a/weed/storage/blockvol/blockapi/types.go
+++ b/weed/storage/blockvol/blockapi/types.go
@ -38,6 +38,8 @@ type VolumeInfo struct {
 	HealthScore     float64         `json:"health_score"`
 	ReplicaDegraded bool            `json:"replica_degraded,omitempty"`
 	DurabilityMode  string          `json:"durability_mode"` // CP8-3-1
+	NvmeAddr        string          `json:"nvme_addr,omitempty"`
+	NQN             string          `json:"nqn,omitempty"`
 }

 // ReplicaDetail describes one replica in the API response.
@ -74,6 +76,52 @@ type ExpandVolumeResponse struct {
 	CapacityBytes uint64 `json:"capacity_bytes"`
 }

+// PromoteVolumeRequest is the request body for POST /block/volume/{name}/promote.
+type PromoteVolumeRequest struct {
+	TargetServer string `json:"target_server,omitempty"` // specific replica, or empty for auto
+	Force        bool   `json:"force,omitempty"`         // bypass soft safety checks
+	Reason       string `json:"reason,omitempty"`        // audit note
+}
+
+// PromoteVolumeResponse is the response for POST /block/volume/{name}/promote.
+type PromoteVolumeResponse struct {
+	NewPrimary string               `json:"new_primary"`
+	Epoch      uint64               `json:"epoch"`
+	Reason     string               `json:"reason,omitempty"`      // rejection reason if failed
+	Rejections []PreflightRejection `json:"rejections,omitempty"`  // per-replica rejection details
+}
+
+// BlockStatusResponse is the response for GET /block/status.
+type BlockStatusResponse struct {
+	VolumeCount          int    `json:"volume_count"`
+	ServerCount          int    `json:"server_count"`
+	PromotionLSNTolerance uint64 `json:"promotion_lsn_tolerance"`
+	BarrierLagLSN        uint64 `json:"barrier_lag_lsn"`
+	PromotionsTotal      int64  `json:"promotions_total"`
+	FailoversTotal       int64  `json:"failovers_total"`
+	RebuildsTotal        int64  `json:"rebuilds_total"`
+	AssignmentQueueDepth int    `json:"assignment_queue_depth"`
+}
+
+// PreflightRejection describes why a specific replica was rejected for promotion.
+type PreflightRejection struct {
+	Server string `json:"server"`
+	Reason string `json:"reason"` // "stale_heartbeat", "wal_lag", "wrong_role", "server_dead", "no_heartbeat"
+}
+
+// PreflightResponse is the response for GET /block/volume/{name}/preflight.
+type PreflightResponse struct {
+	VolumeName      string                `json:"volume_name"`
+	Promotable      bool                  `json:"promotable"`
+	Reason          string                `json:"reason,omitempty"`
+	CandidateServer string                `json:"candidate_server,omitempty"`
+	CandidateHealth float64               `json:"candidate_health,omitempty"`
+	CandidateWALLSN uint64                `json:"candidate_wal_lsn,omitempty"`
+	Rejections      []PreflightRejection  `json:"rejections,omitempty"`
+	PrimaryServer   string                `json:"primary_server"`
+	PrimaryAlive    bool                  `json:"primary_alive"`
+}
+
 // RoleFromString converts a role string to its uint32 wire value.
 // Returns 0 (RoleNone) for unrecognized strings.
 func RoleFromString(s string) uint32 {
--- a/weed/storage/blockvol/qa_wal_cp11a3_adversarial_test.go
+++ b/weed/storage/blockvol/qa_wal_cp11a3_adversarial_test.go
@ -0,0 +1,511 @@
+package blockvol
+
+import (
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// ============================================================
+// CP11A-3 Adversarial Test Suite
+//
+// 10 scenarios stress-testing WAL admission pressure tracking,
+// PressureState boundaries, guidance edge cases, and concurrent
+// metric visibility.
+// ============================================================
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-1: SoftMarkEqualsHardMark_NoPanic
+//
+// If an operator configures softMark == hardMark, the soft-zone
+// delay calculation divides by (hardMark - softMark) = 0.
+// Must not panic, hang, or produce NaN/Inf delay.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_SoftMarkEqualsHardMark_NoPanic(t *testing.T) {
+	m := NewEngineMetrics()
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.8,
+		HardWatermark: 0.8, // equal — no soft zone
+		WALUsedFn:     func() float64 { return 0.85 }, // above both marks
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+
+	// With equal marks, pressure >= hardMark takes the hard branch.
+	// The soft branch's division by zero is never reached.
+	// But if the code path ever changes, this test catches it.
+	done := make(chan error, 1)
+	go func() {
+		done <- a.Acquire(50 * time.Millisecond)
+	}()
+
+	select {
+	case err := <-done:
+		// ErrWALFull is expected (pressure stays above hard, times out).
+		if err != ErrWALFull {
+			t.Fatalf("expected ErrWALFull, got %v", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("Acquire hung — possible Inf delay from division by zero")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-2: SoftZoneExactBoundary_DelayIsZero
+//
+// When pressure == softMark exactly, scale = 0, delay = 0.
+// softPressureWaitNs should NOT increase (delay <= 0 skips sleep).
+// But hitSoft should still be true → SoftAdmitTotal increments.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_SoftZoneExactBoundary_DelayIsZero(t *testing.T) {
+	m := NewEngineMetrics()
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.7 }, // exactly at soft mark
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+	a.sleepFn = func(d time.Duration) {
+		t.Fatalf("sleep should not be called when delay=0, but called with %v", d)
+	}
+
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire: %v", err)
+	}
+	a.Release()
+
+	// SoftAdmitTotal should increment (we entered the soft branch).
+	if m.WALAdmitSoftTotal.Load() != 1 {
+		t.Fatalf("WALAdmitSoftTotal = %d, want 1", m.WALAdmitSoftTotal.Load())
+	}
+	// But no sleep → softPressureWaitNs stays 0.
+	if a.SoftPressureWaitNs() != 0 {
+		t.Fatalf("SoftPressureWaitNs = %d, want 0 (no delay at exact boundary)", a.SoftPressureWaitNs())
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-3: ConcurrentHardWaiters_TimeAccumulates
+//
+// 8 goroutines enter hard zone simultaneously. Each waits ~5ms.
+// Total hardPressureWaitNs should be roughly 8 × 5ms, proving
+// atomic accumulation doesn't lose contributions.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_ConcurrentHardWaiters_TimeAccumulates(t *testing.T) {
+	m := NewEngineMetrics()
+	var pressure atomic.Int64
+	pressure.Store(95) // above hard mark
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+
+	var sleepCalls atomic.Int64
+	a.sleepFn = func(d time.Duration) {
+		time.Sleep(1 * time.Millisecond)
+		// After enough total sleeps across all goroutines, drop pressure.
+		if sleepCalls.Add(1) >= 20 {
+			pressure.Store(50)
+		}
+	}
+
+	const workers = 8
+	var wg sync.WaitGroup
+	for i := 0; i < workers; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			if err := a.Acquire(5 * time.Second); err != nil {
+				t.Errorf("Acquire: %v", err)
+			}
+			a.Release()
+		}()
+	}
+	wg.Wait()
+
+	// All 8 must have entered hard zone.
+	if m.WALAdmitHardTotal.Load() < uint64(workers) {
+		t.Fatalf("WALAdmitHardTotal = %d, want >= %d", m.WALAdmitHardTotal.Load(), workers)
+	}
+	// Accumulated hard wait should be > 0, reflecting contributions from all goroutines.
+	if a.HardPressureWaitNs() <= 0 {
+		t.Fatal("HardPressureWaitNs should be > 0 after concurrent hard-zone waits")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-4: PressureStateAndAcquireRace
+//
+// One goroutine oscillates walUsed, another reads PressureState
+// rapidly. Must not panic, must always return a valid state.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_PressureStateAndAcquireRace(t *testing.T) {
+	var pressure atomic.Int64
+	pressure.Store(50)
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       NewEngineMetrics(),
+	})
+	a.sleepFn = func(d time.Duration) { time.Sleep(100 * time.Microsecond) }
+
+	var wg sync.WaitGroup
+	const rounds = 200
+
+	// Goroutine 1: oscillate pressure.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		levels := []int64{30, 75, 95, 50, 80, 92, 10}
+		for i := 0; i < rounds; i++ {
+			pressure.Store(levels[i%len(levels)])
+		}
+	}()
+
+	// Goroutine 2: read PressureState.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		valid := map[string]bool{"normal": true, "soft": true, "hard": true}
+		for i := 0; i < rounds; i++ {
+			s := a.PressureState()
+			if !valid[s] {
+				t.Errorf("PressureState() = %q — not a valid state", s)
+				return
+			}
+		}
+	}()
+
+	// Goroutine 3: Acquire/Release rapidly.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; i < rounds/2; i++ {
+			err := a.Acquire(20 * time.Millisecond)
+			if err == nil {
+				a.Release()
+			}
+		}
+	}()
+
+	wg.Wait()
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-5: TimeInZoneMonotonicity
+//
+// softPressureWaitNs and hardPressureWaitNs must be monotonically
+// non-decreasing across reads, even under concurrent writes.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_TimeInZoneMonotonicity(t *testing.T) {
+	m := NewEngineMetrics()
+	var pressure atomic.Int64
+	pressure.Store(80) // soft zone
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+	a.sleepFn = func(d time.Duration) { time.Sleep(100 * time.Microsecond) }
+
+	var wg sync.WaitGroup
+	const writers = 4
+	const rounds = 30
+
+	// Writers produce soft-zone and hard-zone waits.
+	for i := 0; i < writers; i++ {
+		wg.Add(1)
+		go func(id int) {
+			defer wg.Done()
+			for j := 0; j < rounds; j++ {
+				if j%5 == 0 {
+					pressure.Store(95) // hard
+				} else {
+					pressure.Store(80) // soft
+				}
+				err := a.Acquire(50 * time.Millisecond)
+				if err == nil {
+					a.Release()
+				}
+				// Drop back so next Acquire can succeed.
+				pressure.Store(50)
+			}
+		}(i)
+	}
+
+	// Reader checks monotonicity.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		var prevSoft, prevHard int64
+		for i := 0; i < rounds*writers; i++ {
+			soft := a.SoftPressureWaitNs()
+			hard := a.HardPressureWaitNs()
+			if soft < prevSoft {
+				t.Errorf("SoftPressureWaitNs decreased: %d -> %d", prevSoft, soft)
+			}
+			if hard < prevHard {
+				t.Errorf("HardPressureWaitNs decreased: %d -> %d", prevHard, hard)
+			}
+			prevSoft = soft
+			prevHard = hard
+		}
+	}()
+
+	wg.Wait()
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-6: WALGuidance_ZeroInputs
+//
+// Zero walSize, zero blockSize, zero maxConcurrent, empty hint.
+// Must not panic or produce invalid results.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_WALGuidance_ZeroInputs(t *testing.T) {
+	// All zeros.
+	r := WALSizingGuidance(0, 0, "")
+	if r.Level != "warn" {
+		t.Errorf("zero walSize: Level = %q, want warn", r.Level)
+	}
+
+	// Zero blockSize: absMin = 0*64 = 0. Only workload minimum check fires.
+	r = WALSizingGuidance(0, 0, WorkloadGeneral)
+	if r.Level != "warn" {
+		t.Errorf("zero walSize+blockSize: Level = %q, want warn", r.Level)
+	}
+
+	// Zero walSize but nonzero blockSize.
+	r = WALSizingGuidance(0, 4096, WorkloadDatabase)
+	if r.Level != "warn" {
+		t.Errorf("zero walSize: Level = %q, want warn", r.Level)
+	}
+	if len(r.Warnings) < 2 {
+		t.Errorf("expected both workload + absolute minimum warnings, got %d", len(r.Warnings))
+	}
+
+	// EvaluateWALConfig with zero maxConcurrent should not trigger concurrency warning.
+	r = EvaluateWALConfig(0, 4096, 0, WorkloadGeneral)
+	// walSize=0 still triggers sizing warning.
+	if r.Level != "warn" {
+		t.Errorf("Level = %q, want warn for zero walSize", r.Level)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-7: WALGuidance_OverflowSafe
+//
+// Very large blockSize × minWALEntries might overflow uint64.
+// (64 × 2^60 does NOT overflow, but let's test near-boundary.)
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_WALGuidance_OverflowSafe(t *testing.T) {
+	// Large blockSize: 256MB blocks × 64 = 16GB minimum.
+	// walSize = 1GB → should warn (16GB > 1GB).
+	r := WALSizingGuidance(1<<30, 256<<20, WorkloadGeneral)
+	if r.Level != "warn" {
+		t.Errorf("Level = %q, want warn (1GB WAL < 16GB absMin)", r.Level)
+	}
+
+	// Extreme: blockSize = 1<<40 (1TB). 64 × 1TB = 64TB.
+	// uint64 can hold 18 EB — no overflow.
+	r = WALSizingGuidance(1<<50, 1<<40, WorkloadThroughput)
+	// 1PB WAL with 1TB blocks: absMin = 64TB, 1PB > 64TB → ok for absolute.
+	// 1PB > 128MB (throughput min) → ok for workload.
+	if r.Level != "ok" {
+		t.Errorf("Level = %q, want ok for huge WAL", r.Level)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-8: WALStatusSnapshot_PartialInit
+//
+// BlockVol with Metrics but nil walAdmission, and vice versa.
+// WALStatus must return coherent defaults for the nil side
+// and real values for the non-nil side.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_WALStatusSnapshot_PartialInit(t *testing.T) {
+	// Case 1: Metrics set, walAdmission nil.
+	m := NewEngineMetrics()
+	m.WALAdmitSoftTotal.Add(42)
+	m.WALAdmitHardTotal.Add(7)
+	vol1 := &BlockVol{Metrics: m}
+
+	ws := vol1.WALStatus()
+	if ws.PressureState != "normal" {
+		t.Errorf("nil admission: PressureState = %q, want normal", ws.PressureState)
+	}
+	if ws.SoftAdmitTotal != 42 {
+		t.Errorf("SoftAdmitTotal = %d, want 42", ws.SoftAdmitTotal)
+	}
+	if ws.HardAdmitTotal != 7 {
+		t.Errorf("HardAdmitTotal = %d, want 7", ws.HardAdmitTotal)
+	}
+	// Pressure wait should be 0 (no admission controller).
+	if ws.SoftPressureWaitSec != 0 || ws.HardPressureWaitSec != 0 {
+		t.Errorf("nil admission: pressure wait should be 0")
+	}
+
+	// Case 2: walAdmission set, Metrics nil.
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.65,
+		HardWatermark: 0.85,
+		WALUsedFn:     func() float64 { return 0.7 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+	vol2 := &BlockVol{walAdmission: a}
+
+	ws2 := vol2.WALStatus()
+	if ws2.PressureState != "soft" {
+		t.Errorf("PressureState = %q, want soft (0.7 >= 0.65)", ws2.PressureState)
+	}
+	if ws2.SoftWatermark != 0.65 {
+		t.Errorf("SoftWatermark = %f, want 0.65", ws2.SoftWatermark)
+	}
+	// Metrics fields should be zero (nil Metrics).
+	if ws2.SoftAdmitTotal != 0 || ws2.HardAdmitTotal != 0 || ws2.TimeoutTotal != 0 {
+		t.Errorf("nil metrics: counters should be 0")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-9: ObserverPanic_ContainedOrDocumented
+//
+// If WALAdmitWaitObserver panics, RecordWALAdmit is called from
+// Acquire → recordAdmit. A panic in the observer would crash the
+// writer goroutine. This test documents whether the panic is
+// recovered or propagated.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_ObserverPanic_DocumentedBehavior(t *testing.T) {
+	m := NewEngineMetrics()
+	m.WALAdmitWaitObserver = func(s float64) { panic("boom") }
+
+	// RecordWALAdmit calls the observer. If it panics, the caller panics.
+	// This is expected (same as prometheus.Histogram.Observe panicking).
+	// Document that the observer must not panic.
+	panicked := false
+	func() {
+		defer func() {
+			if r := recover(); r != nil {
+				panicked = true
+			}
+		}()
+		m.RecordWALAdmit(1*time.Millisecond, false, false, false)
+	}()
+
+	if !panicked {
+		t.Fatal("expected panic from observer — if recovered, update this test")
+	}
+
+	// Verify counters were NOT updated (panic happened before completion).
+	// Actually, the observer is called AFTER WALAdmitTotal.Add(1) and
+	// walAdmitWaitNs.record(). Let's verify the counter state.
+	if m.WALAdmitTotal.Load() != 1 {
+		t.Errorf("WALAdmitTotal = %d — should be 1 (incremented before observer)", m.WALAdmitTotal.Load())
+	}
+	// soft/hard/timeout flags are processed AFTER observer — panic skips them.
+	// With soft=false, hard=false, timedOut=false there's nothing to skip,
+	// but the counters should reflect what happened before the panic.
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-10: ConcurrentWALStatusReads
+//
+// Multiple goroutines read WALStatus while Acquire/Release runs.
+// Must not panic. Fields should be internally consistent
+// (SoftAdmitTotal >= 0, HardPressureWaitSec >= 0, etc.)
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_ConcurrentWALStatusReads(t *testing.T) {
+	m := NewEngineMetrics()
+	var pressure atomic.Int64
+	pressure.Store(50)
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+	a.sleepFn = func(d time.Duration) { time.Sleep(50 * time.Microsecond) }
+
+	vol := &BlockVol{
+		Metrics:      m,
+		walAdmission: a,
+	}
+
+	var wg sync.WaitGroup
+	const rounds = 100
+
+	// Writers with varying pressure.
+	for i := 0; i < 4; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			levels := []int64{50, 75, 95, 60, 85}
+			for j := 0; j < rounds; j++ {
+				pressure.Store(levels[j%len(levels)])
+				if err := a.Acquire(20 * time.Millisecond); err == nil {
+					a.Release()
+				}
+				pressure.Store(50) // reset for next round
+			}
+		}()
+	}
+
+	// Concurrent WALStatus readers.
+	for i := 0; i < 4; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			valid := map[string]bool{"normal": true, "soft": true, "hard": true}
+			for j := 0; j < rounds*2; j++ {
+				ws := vol.WALStatus()
+				if !valid[ws.PressureState] {
+					t.Errorf("invalid PressureState: %q", ws.PressureState)
+					return
+				}
+				if ws.UsedFraction < 0 || ws.UsedFraction > 1.01 {
+					t.Errorf("UsedFraction out of range: %f", ws.UsedFraction)
+					return
+				}
+				if ws.SoftPressureWaitSec < 0 {
+					t.Errorf("SoftPressureWaitSec negative: %f", ws.SoftPressureWaitSec)
+					return
+				}
+				if ws.HardPressureWaitSec < 0 {
+					t.Errorf("HardPressureWaitSec negative: %f", ws.HardPressureWaitSec)
+					return
+				}
+			}
+		}()
+	}
+
+	wg.Wait()
+}
--- a/weed/storage/blockvol/testrunner/actions/devops.go
+++ b/weed/storage/blockvol/testrunner/actions/devops.go
@ -26,6 +26,10 @@ func RegisterDevOpsActions(r *tr.Registry) {
 	r.RegisterFunc("delete_block_volume", tr.TierDevOps, deleteBlockVolume)
 	r.RegisterFunc("wait_block_servers", tr.TierDevOps, waitBlockServers)
 	r.RegisterFunc("cluster_status", tr.TierDevOps, clusterStatus)
+	r.RegisterFunc("wait_block_primary", tr.TierDevOps, waitBlockPrimary)
+	r.RegisterFunc("assert_block_field", tr.TierDevOps, assertBlockField)
+	r.RegisterFunc("block_status", tr.TierDevOps, blockStatus)
+	r.RegisterFunc("block_promote", tr.TierDevOps, blockPromote)
 }

 // setISCSIVars sets the save_as_iscsi_host/port/addr/iqn vars from a VolumeInfo.
@ -434,6 +438,222 @@ func waitBlockServers(ctx context.Context, actx *tr.ActionContext, act tr.Action
 	}
 }

+// waitBlockPrimary polls lookup until the volume's primary server matches (or differs from) expected.
+// Params: name, expected (server addr to wait for) OR not (server addr to wait to change from), timeout (default 60s).
+// Sets save_as vars from the final lookup.
+func waitBlockPrimary(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("wait_block_primary: %w", err)
+	}
+
+	name := act.Params["name"]
+	if name == "" {
+		return nil, fmt.Errorf("wait_block_primary: name param required")
+	}
+	expected := act.Params["expected"]
+	notServer := act.Params["not"]
+	if expected == "" && notServer == "" {
+		return nil, fmt.Errorf("wait_block_primary: expected or not param required")
+	}
+
+	timeout := 60 * time.Second
+	if t, ok := act.Params["timeout"]; ok {
+		if d, err := parseDuration(t); err == nil {
+			timeout = d
+		}
+	}
+
+	timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	ticker := time.NewTicker(2 * time.Second)
+	defer ticker.Stop()
+
+	pollCount := 0
+	for {
+		select {
+		case <-timeoutCtx.Done():
+			return nil, fmt.Errorf("wait_block_primary: timeout after %s waiting for primary change on %s", timeout, name)
+		case <-ticker.C:
+			pollCount++
+			info, err := client.LookupVolume(timeoutCtx, name)
+			if err != nil {
+				if pollCount <= 3 {
+					actx.Log("  poll %d: lookup error: %v", pollCount, err)
+				}
+				continue
+			}
+			if pollCount <= 3 || pollCount%10 == 0 {
+				actx.Log("  poll %d: %s primary=%s role=%s", pollCount, name, info.VolumeServer, info.Role)
+			}
+
+			match := false
+			if expected != "" && info.VolumeServer == expected {
+				match = true
+			}
+			if notServer != "" && info.VolumeServer != notServer && info.VolumeServer != "" {
+				match = true
+			}
+			if match {
+				actx.Log("  primary for %s is now %s (epoch=%d)", name, info.VolumeServer, info.Epoch)
+				if act.SaveAs != "" {
+					setISCSIVars(actx, act.SaveAs, info)
+					actx.Vars[act.SaveAs+"_server"] = info.VolumeServer
+					actx.Vars[act.SaveAs+"_epoch"] = strconv.FormatUint(info.Epoch, 10)
+					actx.Vars[act.SaveAs+"_role"] = info.Role
+				}
+				return map[string]string{"value": info.VolumeServer}, nil
+			}
+		}
+	}
+}
+
+// assertBlockField looks up a block volume and asserts a specific field matches the expected value.
+// Params: name, field (one of: volume_server, role, status, epoch, size_bytes, replica_server,
+//   replica_factor, health_score, replica_degraded, durability_mode, iscsi_addr, iqn), expected.
+func assertBlockField(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("assert_block_field: %w", err)
+	}
+
+	name := act.Params["name"]
+	if name == "" {
+		return nil, fmt.Errorf("assert_block_field: name param required")
+	}
+	field := act.Params["field"]
+	if field == "" {
+		return nil, fmt.Errorf("assert_block_field: field param required")
+	}
+	expected := act.Params["expected"]
+	if expected == "" {
+		return nil, fmt.Errorf("assert_block_field: expected param required")
+	}
+
+	info, err := client.LookupVolume(ctx, name)
+	if err != nil {
+		return nil, fmt.Errorf("assert_block_field: lookup %s: %w", name, err)
+	}
+
+	actual, err := extractVolumeField(info, field)
+	if err != nil {
+		return nil, fmt.Errorf("assert_block_field: %w", err)
+	}
+
+	if actual != expected {
+		return nil, fmt.Errorf("assert_block_field: %s.%s = %q, expected %q", name, field, actual, expected)
+	}
+	actx.Log("  assert %s.%s == %q OK", name, field, expected)
+	return map[string]string{"value": actual}, nil
+}
+
+// extractVolumeField extracts a named field from VolumeInfo as a string.
+func extractVolumeField(info *blockapi.VolumeInfo, field string) (string, error) {
+	switch field {
+	case "volume_server":
+		return info.VolumeServer, nil
+	case "role":
+		return info.Role, nil
+	case "status":
+		return info.Status, nil
+	case "epoch":
+		return strconv.FormatUint(info.Epoch, 10), nil
+	case "size_bytes":
+		return strconv.FormatUint(info.SizeBytes, 10), nil
+	case "replica_server":
+		return info.ReplicaServer, nil
+	case "replica_factor":
+		return strconv.Itoa(info.ReplicaFactor), nil
+	case "health_score":
+		return fmt.Sprintf("%.2f", info.HealthScore), nil
+	case "replica_degraded":
+		return strconv.FormatBool(info.ReplicaDegraded), nil
+	case "durability_mode":
+		return info.DurabilityMode, nil
+	case "iscsi_addr":
+		return info.ISCSIAddr, nil
+	case "iqn":
+		return info.IQN, nil
+	case "name":
+		return info.Name, nil
+	case "replica_iscsi_addr":
+		return info.ReplicaISCSIAddr, nil
+	case "replica_iqn":
+		return info.ReplicaIQN, nil
+	case "replica_data_addr":
+		return info.ReplicaDataAddr, nil
+	case "replica_ctrl_addr":
+		return info.ReplicaCtrlAddr, nil
+	default:
+		return "", fmt.Errorf("unknown field %q", field)
+	}
+}
+
+// blockStatus fetches block registry status metrics from master.
+// Sets save_as_promotions_total, save_as_failovers_total, etc.
+func blockStatus(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("block_status: %w", err)
+	}
+
+	status, err := client.BlockStatus(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("block_status: %w", err)
+	}
+
+	actx.Log("  block status: volumes=%d servers=%d promotions=%d failovers=%d rebuilds=%d",
+		status.VolumeCount, status.ServerCount, status.PromotionsTotal, status.FailoversTotal, status.RebuildsTotal)
+
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_volume_count"] = strconv.Itoa(status.VolumeCount)
+		actx.Vars[act.SaveAs+"_server_count"] = strconv.Itoa(status.ServerCount)
+		actx.Vars[act.SaveAs+"_promotions_total"] = strconv.FormatInt(status.PromotionsTotal, 10)
+		actx.Vars[act.SaveAs+"_failovers_total"] = strconv.FormatInt(status.FailoversTotal, 10)
+		actx.Vars[act.SaveAs+"_rebuilds_total"] = strconv.FormatInt(status.RebuildsTotal, 10)
+		actx.Vars[act.SaveAs+"_queue_depth"] = strconv.Itoa(status.AssignmentQueueDepth)
+	}
+
+	jsonBytes, _ := json.Marshal(status)
+	return map[string]string{"value": string(jsonBytes)}, nil
+}
+
+// blockPromote triggers a manual promotion for a block volume.
+// Params: name, target_server (optional, empty=auto), force (optional bool), reason (optional).
+func blockPromote(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("block_promote: %w", err)
+	}
+
+	name := act.Params["name"]
+	if name == "" {
+		return nil, fmt.Errorf("block_promote: name param required")
+	}
+
+	force := false
+	if f := act.Params["force"]; f == "true" || f == "1" {
+		force = true
+	}
+
+	resp, err := client.PromoteVolume(ctx, name, blockapi.PromoteVolumeRequest{
+		TargetServer: act.Params["target_server"],
+		Force:        force,
+		Reason:       act.Params["reason"],
+	})
+	if err != nil {
+		return nil, fmt.Errorf("block_promote: %w", err)
+	}
+
+	actx.Log("  promoted %s -> primary=%s epoch=%d", name, resp.NewPrimary, resp.Epoch)
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_server"] = resp.NewPrimary
+		actx.Vars[act.SaveAs+"_epoch"] = strconv.FormatUint(resp.Epoch, 10)
+	}
+	return map[string]string{"value": resp.NewPrimary}, nil
+}
+
 // clusterStatus fetches the full cluster status JSON.
 func clusterStatus(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
 	node, err := getNode(actx, act.Node)
--- a/weed/storage/blockvol/testrunner/actions/devops_test.go
+++ b/weed/storage/blockvol/testrunner/actions/devops_test.go
@ -23,6 +23,10 @@ func TestDevOpsActions_Registration(t *testing.T) {
 		"delete_block_volume",
 		"wait_block_servers",
 		"cluster_status",
+		"wait_block_primary",
+		"assert_block_field",
+		"block_status",
+		"block_promote",
 	}

 	for _, name := range expected {
@ -39,8 +43,8 @@ func TestDevOpsActions_Tier(t *testing.T) {
 	byTier := registry.ListByTier()
 	devopsActions := byTier[tr.TierDevOps]

-	if len(devopsActions) != 11 {
-		t.Errorf("devops tier has %d actions, want 11", len(devopsActions))
+	if len(devopsActions) != 15 {
+		t.Errorf("devops tier has %d actions, want 15", len(devopsActions))
 	}

 	// Verify all are in devops tier.
@ -84,11 +88,11 @@ func TestAllActions_Registration(t *testing.T) {
 	if n := len(byTier[tr.TierCore]); n != 11 {
 		t.Errorf("core: %d, want 11", n)
 	}
-	if n := len(byTier[tr.TierBlock]); n != 56 {
-		t.Errorf("block: %d, want 56", n)
+	if n := len(byTier[tr.TierBlock]); n != 58 {
+		t.Errorf("block: %d, want 58", n)
 	}
-	if n := len(byTier[tr.TierDevOps]); n != 11 {
-		t.Errorf("devops: %d, want 11", n)
+	if n := len(byTier[tr.TierDevOps]); n != 15 {
+		t.Errorf("devops: %d, want 15", n)
 	}
 	if n := len(byTier[tr.TierChaos]); n != 5 {
 		t.Errorf("chaos: %d, want 5", n)
@ -97,13 +101,13 @@ func TestAllActions_Registration(t *testing.T) {
 		t.Errorf("k8s: %d, want 14", n)
 	}

-	// Total should be 97 (92 prev + 4 devops: expand/lookup/delete/wait_block_servers + 1 block: iscsi_login_direct).
+	// Total should be 103 (99 prev + 4 devops: wait_block_primary, assert_block_field, block_status, block_promote).
 	total := 0
 	for _, actions := range byTier {
 		total += len(actions)
 	}
-	if total != 97 {
-		t.Errorf("total actions: %d, want 97", total)
+	if total != 103 {
+		t.Errorf("total actions: %d, want 103", total)
 	}
 }

--- a/weed/storage/blockvol/testrunner/actions/snapshot.go
+++ b/weed/storage/blockvol/testrunner/actions/snapshot.go
@ -8,6 +8,7 @@ import (
 	"time"

 	tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner/infra"
 )

 // RegisterSnapshotActions registers snapshot and resize actions.
@ -18,6 +19,8 @@ func RegisterSnapshotActions(r *tr.Registry) {
 	r.RegisterFunc("resize", tr.TierBlock, resizeAction)
 	r.RegisterFunc("iscsi_rescan", tr.TierBlock, iscsiRescan)
 	r.RegisterFunc("get_block_size", tr.TierBlock, getBlockSize)
+	r.RegisterFunc("snapshot_export_s3", tr.TierBlock, snapshotExportS3)
+	r.RegisterFunc("snapshot_import_s3", tr.TierBlock, snapshotImportS3)
 }

 func snapshotCreate(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
@ -181,3 +184,89 @@ func parseHumanSize(s string) (uint64, error) {
 	}
 	return val * multiplier, nil
 }
+
+// snapshotExportS3 exports a snapshot from a target to an S3 bucket.
+// Params: bucket, key_prefix, s3_endpoint, s3_access_key, s3_secret_key, s3_region, snapshot_id (optional).
+// Returns: manifest_key, data_key, size_bytes, sha256.
+func snapshotExportS3(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	tgt, err := getHATarget(actx, act.Target)
+	if err != nil {
+		return nil, err
+	}
+
+	opts := infra.ExportS3Opts{
+		Bucket:      act.Params["bucket"],
+		KeyPrefix:   act.Params["key_prefix"],
+		S3Endpoint:  act.Params["s3_endpoint"],
+		S3AccessKey: act.Params["s3_access_key"],
+		S3SecretKey: act.Params["s3_secret_key"],
+		S3Region:    act.Params["s3_region"],
+	}
+	if opts.Bucket == "" || opts.S3Endpoint == "" {
+		return nil, fmt.Errorf("snapshot_export_s3: bucket and s3_endpoint required")
+	}
+	if idStr := act.Params["snapshot_id"]; idStr != "" {
+		id, err := strconv.ParseUint(idStr, 10, 32)
+		if err != nil {
+			return nil, fmt.Errorf("snapshot_export_s3: invalid snapshot_id %q: %w", idStr, err)
+		}
+		opts.SnapshotID = uint32(id)
+	}
+
+	result, err := tgt.ExportSnapshotS3(ctx, opts)
+	if err != nil {
+		return nil, fmt.Errorf("snapshot_export_s3: %w", err)
+	}
+
+	actx.Log("  exported to s3://%s/%s (%d bytes, sha256=%s)", opts.Bucket, result.DataKey, result.SizeBytes, result.SHA256)
+	out := map[string]string{
+		"value": result.SHA256,
+	}
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_manifest_key"] = result.ManifestKey
+		actx.Vars[act.SaveAs+"_data_key"] = result.DataKey
+		actx.Vars[act.SaveAs+"_size_bytes"] = strconv.FormatUint(result.SizeBytes, 10)
+		actx.Vars[act.SaveAs+"_sha256"] = result.SHA256
+	}
+	return out, nil
+}
+
+// snapshotImportS3 imports a snapshot from an S3 bucket into a target.
+// Params: bucket, manifest_key, s3_endpoint, s3_access_key, s3_secret_key, s3_region, allow_overwrite.
+// Returns: size_bytes, sha256.
+func snapshotImportS3(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	tgt, err := getHATarget(actx, act.Target)
+	if err != nil {
+		return nil, err
+	}
+
+	opts := infra.ImportS3Opts{
+		Bucket:      act.Params["bucket"],
+		ManifestKey: act.Params["manifest_key"],
+		S3Endpoint:  act.Params["s3_endpoint"],
+		S3AccessKey: act.Params["s3_access_key"],
+		S3SecretKey: act.Params["s3_secret_key"],
+		S3Region:    act.Params["s3_region"],
+	}
+	if opts.Bucket == "" || opts.ManifestKey == "" || opts.S3Endpoint == "" {
+		return nil, fmt.Errorf("snapshot_import_s3: bucket, manifest_key, and s3_endpoint required")
+	}
+	if act.Params["allow_overwrite"] == "true" {
+		opts.AllowOverwrite = true
+	}
+
+	result, err := tgt.ImportSnapshotS3(ctx, opts)
+	if err != nil {
+		return nil, fmt.Errorf("snapshot_import_s3: %w", err)
+	}
+
+	actx.Log("  imported %d bytes (sha256=%s)", result.SizeBytes, result.SHA256)
+	out := map[string]string{
+		"value": result.SHA256,
+	}
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_size_bytes"] = strconv.FormatUint(result.SizeBytes, 10)
+		actx.Vars[act.SaveAs+"_sha256"] = result.SHA256
+	}
+	return out, nil
+}
--- a/weed/storage/blockvol/testrunner/infra/ha_target.go
+++ b/weed/storage/blockvol/testrunner/infra/ha_target.go
@ -478,6 +478,107 @@ func (h *HATarget) Resize(ctx context.Context, newSizeBytes uint64) error {
 	return nil
 }

+// ExportSnapshotS3 sends POST /export with S3 credentials.
+// Returns the manifest key and data SHA-256 on success.
+func (h *HATarget) ExportSnapshotS3(ctx context.Context, opts ExportS3Opts) (*ExportS3Result, error) {
+	reqBody := map[string]interface{}{
+		"bucket":      opts.Bucket,
+		"key_prefix":  opts.KeyPrefix,
+		"s3_endpoint": opts.S3Endpoint,
+		"s3_region":   opts.S3Region,
+	}
+	if opts.S3AccessKey != "" {
+		reqBody["s3_access_key"] = opts.S3AccessKey
+		reqBody["s3_secret_key"] = opts.S3SecretKey
+	}
+	if opts.SnapshotID > 0 {
+		reqBody["snapshot_id"] = opts.SnapshotID
+	}
+
+	code, body, err := h.curlPost(ctx, "/export", reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("export snapshot s3: %w", err)
+	}
+	if code != http.StatusOK {
+		return nil, fmt.Errorf("export snapshot s3 failed (HTTP %d): %s", code, body)
+	}
+
+	var resp ExportS3Result
+	if err := json.NewDecoder(strings.NewReader(body)).Decode(&resp); err != nil {
+		return nil, fmt.Errorf("decode export response: %w", err)
+	}
+	return &resp, nil
+}
+
+// ImportSnapshotS3 sends POST /import with S3 credentials and manifest key.
+func (h *HATarget) ImportSnapshotS3(ctx context.Context, opts ImportS3Opts) (*ImportS3Result, error) {
+	reqBody := map[string]interface{}{
+		"bucket":       opts.Bucket,
+		"manifest_key": opts.ManifestKey,
+		"s3_endpoint":  opts.S3Endpoint,
+		"s3_region":    opts.S3Region,
+	}
+	if opts.S3AccessKey != "" {
+		reqBody["s3_access_key"] = opts.S3AccessKey
+		reqBody["s3_secret_key"] = opts.S3SecretKey
+	}
+	if opts.AllowOverwrite {
+		reqBody["allow_overwrite"] = true
+	}
+
+	code, body, err := h.curlPost(ctx, "/import", reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("import snapshot s3: %w", err)
+	}
+	if code != http.StatusOK {
+		return nil, fmt.Errorf("import snapshot s3 failed (HTTP %d): %s", code, body)
+	}
+
+	var resp ImportS3Result
+	if err := json.NewDecoder(strings.NewReader(body)).Decode(&resp); err != nil {
+		return nil, fmt.Errorf("decode import response: %w", err)
+	}
+	return &resp, nil
+}
+
+// ExportS3Opts configures a snapshot export to S3.
+type ExportS3Opts struct {
+	Bucket      string
+	KeyPrefix   string
+	S3Endpoint  string
+	S3AccessKey string
+	S3SecretKey string
+	S3Region    string
+	SnapshotID  uint32
+}
+
+// ExportS3Result is the response from POST /export.
+type ExportS3Result struct {
+	OK          bool   `json:"ok"`
+	ManifestKey string `json:"manifest_key"`
+	DataKey     string `json:"data_key"`
+	SizeBytes   uint64 `json:"size_bytes"`
+	SHA256      string `json:"sha256"`
+}
+
+// ImportS3Opts configures a snapshot import from S3.
+type ImportS3Opts struct {
+	Bucket         string
+	ManifestKey    string
+	S3Endpoint     string
+	S3AccessKey    string
+	S3SecretKey    string
+	S3Region       string
+	AllowOverwrite bool
+}
+
+// ImportS3Result is the response from POST /import.
+type ImportS3Result struct {
+	OK        bool   `json:"ok"`
+	SizeBytes uint64 `json:"size_bytes"`
+	SHA256    string `json:"sha256"`
+}
+
 // WaitForRole polls GET /status until the target reports the expected role.
 func (h *HATarget) WaitForRole(ctx context.Context, expectedRole string) error {
 	for {
--- a/weed/storage/blockvol/testrunner/scenarios/cp11b3-auto-failover.yaml
+++ b/weed/storage/blockvol/testrunner/scenarios/cp11b3-auto-failover.yaml
@ -0,0 +1,246 @@
+name: cp11b3-auto-failover
+timeout: 10m
+env:
+  repo_dir: "/opt/work/seaweedfs"
+  master_url: "http://192.168.1.184:9434"
+
+# Tests: T1 (candidate evaluation), T2 (orphan re-evaluation), T6 (preflight/status)
+# Flow: Create RF=2 → write data → kill primary → master auto-promotes → verify data + metrics
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "/opt/work/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "/opt/work/testdev_key"
+
+phases:
+  # Phase 1: Clean slate
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3-master /tmp/sw-b3-vs1 /tmp/sw-b3-vs2"
+        root: "true"
+
+  # Phase 2: Start cluster
+  - name: start_cluster
+    actions:
+      - action: exec
+        node: target_node
+        cmd: "mkdir -p /tmp/sw-b3-master /tmp/sw-b3-vs1/blocks /tmp/sw-b3-vs2/blocks"
+      - action: start_weed_master
+        node: target_node
+        port: "9434"
+        dir: "/tmp/sw-b3-master"
+        save_as: master_pid
+      - action: wait_cluster_ready
+        node: target_node
+        master_url: "http://localhost:9434"
+        timeout: 30s
+      - action: start_weed_volume
+        node: target_node
+        port: "18190"
+        master: "localhost:9434"
+        dir: "/tmp/sw-b3-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3-vs1/blocks -block.listen=:3277 -ip=192.168.1.184"
+        save_as: vs1_pid
+      - action: start_weed_volume
+        node: target_node
+        port: "18191"
+        master: "localhost:9434"
+        dir: "/tmp/sw-b3-vs2"
+        extra_args: "-block.dir=/tmp/sw-b3-vs2/blocks -block.listen=:3278 -ip=192.168.1.184"
+        save_as: vs2_pid
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+
+  # Phase 3: Create RF=2 volume, record initial state
+  - name: create_volume
+    actions:
+      - action: create_block_volume
+        name: "failover-test"
+        size: "50M"
+        replica_factor: "2"
+        save_as: vol_info
+      # Wait for replica to confirm role via heartbeat.
+      # Without this, PromoteBestReplica rejects replica as "no_heartbeat".
+      - action: sleep
+        duration: 10s
+      - action: lookup_block_volume
+        name: "failover-test"
+        save_as: initial
+      - action: print
+        msg: "initial primary={{ initial_iscsi_host }}:{{ initial_iscsi_port }} capacity={{ initial_capacity }}"
+      # Record the initial primary server for later comparison.
+      - action: assert_block_field
+        name: "failover-test"
+        field: "replica_factor"
+        expected: "2"
+      - action: assert_block_field
+        name: "failover-test"
+        field: "epoch"
+        expected: "1"
+      # Capture initial block status metrics.
+      - action: block_status
+        save_as: pre_stats
+
+  # Phase 4: Write data via iSCSI
+  - name: write_data
+    actions:
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ initial_iscsi_host }}"
+        port: "{{ initial_iscsi_port }}"
+        iqn: "{{ initial_iqn }}"
+        save_as: device
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        seek: "5"
+        save_as: md5_5M
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        skip: "5"
+        save_as: verify_5M
+      - action: assert_equal
+        actual: "{{ verify_5M }}"
+        expected: "{{ md5_5M }}"
+
+  # Phase 5: Kill primary VS, wait for master auto-failover
+  - name: failover
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: lookup_block_volume
+        name: "failover-test"
+        save_as: pre_kill
+      - action: print
+        msg: "killing primary VS (server={{ pre_kill_iscsi_host }}:{{ pre_kill_iscsi_port }})"
+      # Crash-kill VS1 with SIGKILL (not SIGTERM) to simulate a real crash.
+      # SIGTERM triggers graceful shutdown which deregisters volumes from
+      # the master registry — preventing the failover path we want to test.
+      - action: exec
+        node: target_node
+        cmd: "kill -9 {{ vs1_pid }}"
+        root: "true"
+      # Wait for master to detect VS1 disconnection and promote.
+      # Lease TTL is 30s; if never granted (zero), promotion is immediate.
+      # Allow extra time for heartbeat confirmation + deferred timer.
+      - action: sleep
+        duration: 35s
+      - action: wait_block_primary
+        name: "failover-test"
+        not: "192.168.1.184:18190"
+        timeout: 60s
+        save_as: promoted
+
+  # Phase 6: Verify failover state
+  - name: verify_failover
+    actions:
+      - action: print
+        msg: "new primary={{ promoted_server }} epoch={{ promoted_epoch }}"
+      # Epoch must have incremented (real promotion, not just heartbeat update).
+      - action: assert_block_field
+        name: "failover-test"
+        field: "epoch"
+        expected: "2"
+      - action: block_status
+        save_as: post_stats
+      # Verify promotion counter incremented.
+      - action: assert_greater
+        actual: "{{ post_stats_promotions_total }}"
+        expected: "{{ pre_stats_promotions_total }}"
+
+  # Phase 7: Reconnect iSCSI to new primary, verify data
+  - name: verify_data
+    actions:
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ promoted_iscsi_host }}"
+        port: "{{ promoted_iscsi_port }}"
+        iqn: "{{ promoted_iqn }}"
+        save_as: device2
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device2 }}"
+        bs: 1M
+        count: "1"
+        skip: "5"
+        save_as: post_failover_md5
+      - action: assert_equal
+        actual: "{{ post_failover_md5 }}"
+        expected: "{{ md5_5M }}"
+
+  # Phase 8: Restart killed VS, verify rebuild queued
+  - name: restart_verify
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: start_weed_volume
+        node: target_node
+        port: "18190"
+        master: "localhost:9434"
+        dir: "/tmp/sw-b3-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3-vs1/blocks -block.listen=:3277 -ip=192.168.1.184"
+        save_as: vs1_pid2
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+      - action: sleep
+        duration: 5s
+      # After restart, the old primary should be queued for rebuild.
+      - action: block_status
+        save_as: final_stats
+      - action: assert_greater
+        actual: "{{ final_stats_rebuilds_total }}"
+        expected: "{{ post_stats_rebuilds_total }}"
+
+  # Cleanup (always runs)
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: delete_block_volume
+        name: "failover-test"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid2 }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs2_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ master_pid }}"
+        ignore_error: true
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3-master /tmp/sw-b3-vs1 /tmp/sw-b3-vs2"
+        root: "true"
+        ignore_error: true
--- a/weed/storage/blockvol/testrunner/scenarios/cp11b3-fast-reconnect.yaml
+++ b/weed/storage/blockvol/testrunner/scenarios/cp11b3-fast-reconnect.yaml
@ -0,0 +1,214 @@
+name: cp11b3-fast-reconnect
+timeout: 10m
+env:
+  repo_dir: "/opt/work/seaweedfs"
+  master_url: "http://192.168.1.184:9436"
+
+# Tests: T3 (deferred timer safety), T2 (fast reconnect skips failover)
+# Flow: Create RF=2 → write → kill primary briefly → restart before lease expires
+#       → verify no promotion happened → verify data intact
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "/opt/work/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "/opt/work/testdev_key"
+
+phases:
+  # Phase 1: Clean slate
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3r-master /tmp/sw-b3r-vs1 /tmp/sw-b3r-vs2"
+        root: "true"
+
+  # Phase 2: Start cluster
+  - name: start_cluster
+    actions:
+      - action: exec
+        node: target_node
+        cmd: "mkdir -p /tmp/sw-b3r-master /tmp/sw-b3r-vs1/blocks /tmp/sw-b3r-vs2/blocks"
+      - action: start_weed_master
+        node: target_node
+        port: "9436"
+        dir: "/tmp/sw-b3r-master"
+        save_as: master_pid
+      - action: wait_cluster_ready
+        node: target_node
+        master_url: "http://localhost:9436"
+        timeout: 30s
+      - action: start_weed_volume
+        node: target_node
+        port: "18194"
+        master: "localhost:9436"
+        dir: "/tmp/sw-b3r-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3r-vs1/blocks -block.listen=:3281 -ip=192.168.1.184"
+        save_as: vs1_pid
+      - action: start_weed_volume
+        node: target_node
+        port: "18195"
+        master: "localhost:9436"
+        dir: "/tmp/sw-b3r-vs2"
+        extra_args: "-block.dir=/tmp/sw-b3r-vs2/blocks -block.listen=:3282 -ip=192.168.1.184"
+        save_as: vs2_pid
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+
+  # Phase 3: Create RF=2 volume, write data
+  - name: create_and_write
+    actions:
+      - action: create_block_volume
+        name: "reconnect-test"
+        size: "50M"
+        replica_factor: "2"
+        save_as: vol_info
+      # Wait for replica to confirm role via heartbeat.
+      - action: sleep
+        duration: 10s
+      - action: lookup_block_volume
+        name: "reconnect-test"
+        save_as: initial
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ initial_iscsi_host }}"
+        port: "{{ initial_iscsi_port }}"
+        iqn: "{{ initial_iqn }}"
+        save_as: device
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        seek: "8"
+        save_as: md5_8M
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        skip: "8"
+        save_as: verify_8M
+      - action: assert_equal
+        actual: "{{ verify_8M }}"
+        expected: "{{ md5_8M }}"
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      # Record initial epoch.
+      - action: assert_block_field
+        name: "reconnect-test"
+        field: "epoch"
+        expected: "1"
+      # Record pre-kill promotion counter.
+      - action: block_status
+        save_as: pre_stats
+
+  # Phase 4: Kill and quickly restart primary VS (before lease expires)
+  - name: fast_reconnect
+    actions:
+      # Crash-kill primary VS with SIGKILL.
+      - action: exec
+        node: target_node
+        cmd: "kill -9 {{ vs1_pid }}"
+        root: "true"
+      # Restart it quickly — within a few seconds, well before the
+      # default 30s lease TTL expires on the master.
+      - action: sleep
+        duration: 3s
+      - action: start_weed_volume
+        node: target_node
+        port: "18194"
+        master: "localhost:9436"
+        dir: "/tmp/sw-b3r-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3r-vs1/blocks -block.listen=:3281 -ip=192.168.1.184"
+        save_as: vs1_pid2
+      # Wait for VS to re-register with master.
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+      - action: sleep
+        duration: 5s
+
+  # Phase 5: Verify NO promotion happened
+  - name: verify_no_promotion
+    actions:
+      # Epoch should still be 1 (no promotion).
+      - action: assert_block_field
+        name: "reconnect-test"
+        field: "epoch"
+        expected: "1"
+      # Promotion counter should not have increased.
+      - action: block_status
+        save_as: post_stats
+      - action: assert_equal
+        actual: "{{ post_stats_promotions_total }}"
+        expected: "{{ pre_stats_promotions_total }}"
+      - action: print
+        msg: "fast reconnect: epoch unchanged, no promotion — deferred timer cancelled"
+
+  # Phase 6: Verify data still accessible on original primary
+  - name: verify_data
+    actions:
+      - action: lookup_block_volume
+        name: "reconnect-test"
+        save_as: after
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ after_iscsi_host }}"
+        port: "{{ after_iscsi_port }}"
+        iqn: "{{ after_iqn }}"
+        save_as: device2
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device2 }}"
+        bs: 1M
+        count: "1"
+        skip: "8"
+        save_as: post_reconnect_md5
+      - action: assert_equal
+        actual: "{{ post_reconnect_md5 }}"
+        expected: "{{ md5_8M }}"
+
+  # Cleanup (always runs)
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: delete_block_volume
+        name: "reconnect-test"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid2 }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs2_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ master_pid }}"
+        ignore_error: true
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3r-master /tmp/sw-b3r-vs1 /tmp/sw-b3r-vs2"
+        root: "true"
+        ignore_error: true
--- a/weed/storage/blockvol/testrunner/scenarios/cp11b3-manual-promote.yaml
+++ b/weed/storage/blockvol/testrunner/scenarios/cp11b3-manual-promote.yaml
@ -0,0 +1,190 @@
+name: cp11b3-manual-promote
+timeout: 10m
+env:
+  repo_dir: "/opt/work/seaweedfs"
+  master_url: "http://192.168.1.184:9435"
+
+# Tests: T5 (manual promote API), T6 (preflight), structured rejection
+# Flow: Create RF=2 → write → preflight check → kill primary → manual promote → verify data
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "/opt/work/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "/opt/work/testdev_key"
+
+phases:
+  # Phase 1: Clean slate
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3m-master /tmp/sw-b3m-vs1 /tmp/sw-b3m-vs2"
+        root: "true"
+
+  # Phase 2: Start cluster
+  - name: start_cluster
+    actions:
+      - action: exec
+        node: target_node
+        cmd: "mkdir -p /tmp/sw-b3m-master /tmp/sw-b3m-vs1/blocks /tmp/sw-b3m-vs2/blocks"
+      - action: start_weed_master
+        node: target_node
+        port: "9435"
+        dir: "/tmp/sw-b3m-master"
+        save_as: master_pid
+      - action: wait_cluster_ready
+        node: target_node
+        master_url: "http://localhost:9435"
+        timeout: 30s
+      - action: start_weed_volume
+        node: target_node
+        port: "18192"
+        master: "localhost:9435"
+        dir: "/tmp/sw-b3m-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3m-vs1/blocks -block.listen=:3279 -ip=192.168.1.184"
+        save_as: vs1_pid
+      - action: start_weed_volume
+        node: target_node
+        port: "18193"
+        master: "localhost:9435"
+        dir: "/tmp/sw-b3m-vs2"
+        extra_args: "-block.dir=/tmp/sw-b3m-vs2/blocks -block.listen=:3280 -ip=192.168.1.184"
+        save_as: vs2_pid
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+
+  # Phase 3: Create RF=2 volume, write data
+  - name: create_and_write
+    actions:
+      - action: create_block_volume
+        name: "promote-test"
+        size: "50M"
+        replica_factor: "2"
+        save_as: vol_info
+      # Wait for replica to confirm role via heartbeat.
+      - action: sleep
+        duration: 10s
+      - action: lookup_block_volume
+        name: "promote-test"
+        save_as: initial
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ initial_iscsi_host }}"
+        port: "{{ initial_iscsi_port }}"
+        iqn: "{{ initial_iqn }}"
+        save_as: device
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        seek: "3"
+        save_as: md5_3M
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        skip: "3"
+        save_as: verify_3M
+      - action: assert_equal
+        actual: "{{ verify_3M }}"
+        expected: "{{ md5_3M }}"
+
+  # Phase 4: Kill primary VS, then promote via API
+  - name: kill_and_promote
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      # Crash-kill VS1 with SIGKILL to simulate a real crash.
+      - action: exec
+        node: target_node
+        cmd: "kill -9 {{ vs1_pid }}"
+        root: "true"
+      # Wait for master to detect the disconnection.
+      - action: sleep
+        duration: 15s
+      # Manual promote via the API.
+      - action: block_promote
+        name: "promote-test"
+        reason: "T7 integration test: manual failover"
+        save_as: promote_result
+      - action: print
+        msg: "promoted to {{ promote_result_server }} epoch={{ promote_result_epoch }}"
+
+  # Phase 5: Verify promoted state
+  - name: verify_promoted
+    actions:
+      - action: lookup_block_volume
+        name: "promote-test"
+        save_as: after
+      # New primary should be different from old.
+      - action: assert_block_field
+        name: "promote-test"
+        field: "epoch"
+        expected: "2"
+      - action: block_status
+        save_as: stats
+      - action: print
+        msg: "promotions_total={{ stats_promotions_total }}"
+
+  # Phase 6: Reconnect iSCSI to new primary, verify data
+  - name: verify_data
+    actions:
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ after_iscsi_host }}"
+        port: "{{ after_iscsi_port }}"
+        iqn: "{{ after_iqn }}"
+        save_as: device2
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device2 }}"
+        bs: 1M
+        count: "2"
+        skip: "3"
+        save_as: post_promote_md5
+      - action: assert_equal
+        actual: "{{ post_promote_md5 }}"
+        expected: "{{ md5_3M }}"
+
+  # Cleanup (always runs)
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: delete_block_volume
+        name: "promote-test"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs2_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ master_pid }}"
+        ignore_error: true
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3m-master /tmp/sw-b3m-vs1 /tmp/sw-b3m-vs2"
+        root: "true"
+        ignore_error: true