diff --git a/weed/server/integration_block_test.go b/weed/server/integration_block_test.go
index f5d9aad87..6f8cf4894 100644
--- a/weed/server/integration_block_test.go
+++ b/weed/server/integration_block_test.go
@@ -645,13 +645,16 @@ func TestIntegration_DoubleFailover(t *testing.T) {
 	// Reconnect vs1 first so it becomes a replica (via recoverBlockVolumes).
 	ms.recoverBlockVolumes(vs1)
 
-	// Simulate heartbeat from vs1 that restores iSCSI addr and health score
-	// (in production this happens when the VS re-registers after reconnect).
+	// Simulate heartbeat from vs1 that restores iSCSI addr, health score,
+	// role, and heartbeat timestamp (in production this happens when the
+	// VS re-registers after reconnect and completes rebuild).
 	e1, _ = ms.blockRegistry.Lookup("pvc-double-1")
 	for i := range e1.Replicas {
 		if e1.Replicas[i].Server == vs1 {
 			e1.Replicas[i].ISCSIAddr = vs1 + ":3260"
 			e1.Replicas[i].HealthScore = 1.0
+			e1.Replicas[i].Role = blockvol.RoleToWire(blockvol.RoleReplica)
+			e1.Replicas[i].LastHeartbeat = time.Now()
 		}
 	}
 
diff --git a/weed/server/master_block_failover.go b/weed/server/master_block_failover.go
index 09649af62..f3eb35bbb 100644
--- a/weed/server/master_block_failover.go
+++ b/weed/server/master_block_failover.go
@@ -57,7 +57,19 @@ func (ms *MasterServer) failoverBlockVolumes(deadServer string) {
 				delay := leaseExpiry.Sub(now)
 				glog.V(0).Infof("failover: %q lease expires in %v, deferring promotion", entry.Name, delay)
 				volumeName := entry.Name
+				capturedEpoch := entry.Epoch // T3: capture epoch for stale-timer validation
 				timer := time.AfterFunc(delay, func() {
+					// T3: Re-validate before acting — prevent stale timer on recreated/changed volume.
+					current, ok := ms.blockRegistry.Lookup(volumeName)
+					if !ok {
+						glog.V(0).Infof("failover: deferred promotion for %q skipped (volume deleted)", volumeName)
+						return
+					}
+					if current.Epoch != capturedEpoch {
+						glog.V(0).Infof("failover: deferred promotion for %q skipped (epoch changed %d -> %d)",
+							volumeName, capturedEpoch, current.Epoch)
+						return
+					}
 					ms.promoteReplica(volumeName)
 				})
 				ms.blockFailover.mu.Lock()
@@ -116,8 +128,15 @@ func (ms *MasterServer) promoteReplica(volumeName string) {
 		return
 	}
 
+	ms.finalizePromotion(volumeName, oldPrimary, oldPath, newEpoch)
+}
+
+// finalizePromotion performs post-registry promotion steps:
+// enqueue assignment for new primary, record pending rebuild for old primary, bump metrics.
+// Called by both promoteReplica (auto) and blockVolumePromoteHandler (manual).
+func (ms *MasterServer) finalizePromotion(volumeName, oldPrimary, oldPath string, newEpoch uint64) {
 	// Re-read entry after promotion.
-	entry, ok = ms.blockRegistry.Lookup(volumeName)
+	entry, ok := ms.blockRegistry.Lookup(volumeName)
 	if !ok {
 		return
 	}
@@ -198,11 +217,15 @@ func (ms *MasterServer) cancelDeferredTimers(server string) {
 
 // recoverBlockVolumes is called when a previously dead VS reconnects.
 // It cancels any deferred promotion timers (R2-F2), drains pending rebuilds,
-// and enqueues rebuild assignments.
+// enqueues rebuild assignments, and checks for orphaned primaries (T2/B-06).
 func (ms *MasterServer) recoverBlockVolumes(reconnectedServer string) {
 	// R2-F2: Cancel deferred promotion timers for this server to prevent split-brain.
 	ms.cancelDeferredTimers(reconnectedServer)
 
+	// T2 (B-06): Check for orphaned primaries — volumes where the reconnecting
+	// server is a replica but the primary is dead/disconnected.
+	ms.reevaluateOrphanedPrimaries(reconnectedServer)
+
 	rebuilds := ms.drainPendingRebuilds(reconnectedServer)
 	if len(rebuilds) == 0 {
 		return
@@ -221,16 +244,74 @@ func (ms *MasterServer) recoverBlockVolumes(reconnectedServer string) {
 			Path:   rb.OldPath,
 		})
 
+		// T4: Warn if RebuildListenAddr is empty (new primary hasn't heartbeated yet).
+		rebuildAddr := entry.RebuildListenAddr
+		if rebuildAddr == "" {
+			glog.Warningf("rebuild: %q RebuildListenAddr is empty (new primary %s may not have heartbeated yet), "+
+				"queuing rebuild anyway — VS should retry on empty addr", rb.VolumeName, entry.VolumeServer)
+		}
+
 		// Enqueue rebuild assignment for the reconnected server.
 		ms.blockAssignmentQueue.Enqueue(reconnectedServer, blockvol.BlockVolumeAssignment{
 			Path:        rb.OldPath,
 			Epoch:       entry.Epoch,
 			Role:        blockvol.RoleToWire(blockvol.RoleRebuilding),
-			RebuildAddr: entry.RebuildListenAddr,
+			RebuildAddr: rebuildAddr,
 		})
 
 		ms.blockRegistry.RebuildsTotal.Add(1)
 		glog.V(0).Infof("rebuild: enqueued rebuild for %q on %s (epoch=%d, rebuildAddr=%s)",
-			rb.VolumeName, reconnectedServer, entry.Epoch, entry.RebuildListenAddr)
+			rb.VolumeName, reconnectedServer, entry.Epoch, rebuildAddr)
+	}
+}
+
+// reevaluateOrphanedPrimaries checks if the given server is a replica for any
+// volumes whose primary is dead (not block-capable). If so, promotes the best
+// available replica — but only after the old primary's lease has expired, to
+// maintain the same split-brain protection as failoverBlockVolumes().
+// This fixes B-06 (orphaned primary after replica re-register)
+// and partially B-08 (fast reconnect skips failover window).
+func (ms *MasterServer) reevaluateOrphanedPrimaries(server string) {
+	if ms.blockRegistry == nil {
+		return
+	}
+	orphaned := ms.blockRegistry.VolumesWithDeadPrimary(server)
+	now := time.Now()
+	for _, volumeName := range orphaned {
+		entry, ok := ms.blockRegistry.Lookup(volumeName)
+		if !ok {
+			continue
+		}
+
+		// Respect lease expiry — same gate as failoverBlockVolumes().
+		leaseExpiry := entry.LastLeaseGrant.Add(entry.LeaseTTL)
+		if now.Before(leaseExpiry) {
+			delay := leaseExpiry.Sub(now)
+			glog.V(0).Infof("failover: orphaned primary for %q (replica %s alive, primary dead) "+
+				"but lease expires in %v, deferring promotion", volumeName, server, delay)
+			capturedEpoch := entry.Epoch
+			deadPrimary := entry.VolumeServer
+			timer := time.AfterFunc(delay, func() {
+				current, ok := ms.blockRegistry.Lookup(volumeName)
+				if !ok {
+					return
+				}
+				if current.Epoch != capturedEpoch {
+					glog.V(0).Infof("failover: deferred orphan promotion for %q skipped (epoch changed %d -> %d)",
+						volumeName, capturedEpoch, current.Epoch)
+					return
+				}
+				ms.promoteReplica(volumeName)
+			})
+			ms.blockFailover.mu.Lock()
+			ms.blockFailover.deferredTimers[deadPrimary] = append(
+				ms.blockFailover.deferredTimers[deadPrimary], timer)
+			ms.blockFailover.mu.Unlock()
+			continue
+		}
+
+		glog.V(0).Infof("failover: orphaned primary detected for %q (replica %s alive, primary dead, lease expired), promoting",
+			volumeName, server)
+		ms.promoteReplica(volumeName)
 	}
 }
diff --git a/weed/server/master_block_failover_test.go b/weed/server/master_block_failover_test.go
index 6d6439068..afe604a43 100644
--- a/weed/server/master_block_failover_test.go
+++ b/weed/server/master_block_failover_test.go
@@ -34,6 +34,9 @@ func testMasterServerForFailover(t *testing.T) *MasterServer {
 // registerVolumeWithReplica creates a volume entry with primary + replica for tests.
 func registerVolumeWithReplica(t *testing.T, ms *MasterServer, name, primary, replica string, epoch uint64, leaseTTL time.Duration) {
 	t.Helper()
+	// Mark both servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable(primary)
+	ms.blockRegistry.MarkBlockCapable(replica)
 	entry := &BlockVolumeEntry{
 		Name:             name,
 		VolumeServer:     primary,
@@ -53,11 +56,13 @@ func registerVolumeWithReplica(t *testing.T, ms *MasterServer, name, primary, re
 		// CP8-2: also populate Replicas[] for PromoteBestReplica.
 		Replicas: []ReplicaInfo{
 			{
-				Server:      replica,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-replica", name),
-				ISCSIAddr:   replica + ":3260",
-				HealthScore: 1.0,
+				Server:        replica,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-replica", name),
+				ISCSIAddr:     replica + ":3260",
+				HealthScore:   1.0,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		},
 	}
@@ -194,6 +199,9 @@ func TestFailover_MultipleVolumes(t *testing.T) {
 
 func TestFailover_LeaseNotExpired_DeferredPromotion(t *testing.T) {
 	ms := testMasterServerForFailover(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
 	entry := &BlockVolumeEntry{
 		Name:             "vol1",
 		VolumeServer:     "vs1",
@@ -209,7 +217,7 @@ func TestFailover_LeaseNotExpired_DeferredPromotion(t *testing.T) {
 		LeaseTTL:         200 * time.Millisecond,
 		LastLeaseGrant:   time.Now(), // just granted, NOT expired yet
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	}
 	ms.blockRegistry.Register(entry)
@@ -397,6 +405,9 @@ func TestRebuild_RegistryUpdatedWithNewReplica(t *testing.T) {
 
 func TestRebuild_AssignmentContainsRebuildAddr(t *testing.T) {
 	ms := testMasterServerForFailover(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
 	entry := &BlockVolumeEntry{
 		Name:              "vol1",
 		VolumeServer:      "vs1",
@@ -413,7 +424,7 @@ func TestRebuild_AssignmentContainsRebuildAddr(t *testing.T) {
 		LeaseTTL:          5 * time.Second,
 		LastLeaseGrant:    time.Now().Add(-10 * time.Second),
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	}
 	ms.blockRegistry.Register(entry)
@@ -457,7 +468,7 @@ func TestFailover_TransientDisconnect_NoPromotion(t *testing.T) {
 		LeaseTTL:         30 * time.Second,
 		LastLeaseGrant:   time.Now(), // just granted
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", IQN: "iqn:vol1-r", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	}
 	ms.blockRegistry.Register(entry)
@@ -556,6 +567,10 @@ func TestLifecycle_CreateFailoverRebuild(t *testing.T) {
 // registerVolumeRF3 creates a volume entry with primary + 2 replicas for RF=3 tests.
 func registerVolumeRF3(t *testing.T, ms *MasterServer, name, primary, replica1, replica2 string, epoch uint64, leaseTTL time.Duration) {
 	t.Helper()
+	// Mark all servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable(primary)
+	ms.blockRegistry.MarkBlockCapable(replica1)
+	ms.blockRegistry.MarkBlockCapable(replica2)
 	entry := &BlockVolumeEntry{
 		Name:          name,
 		VolumeServer:  primary,
@@ -576,20 +591,24 @@ func registerVolumeRF3(t *testing.T, ms *MasterServer, name, primary, replica1,
 		ReplicaISCSIAddr: replica1 + ":3260",
 		Replicas: []ReplicaInfo{
 			{
-				Server:      replica1,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-r1", name),
-				ISCSIAddr:   replica1 + ":3260",
-				HealthScore: 1.0,
-				WALHeadLSN:  100,
+				Server:        replica1,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-r1", name),
+				ISCSIAddr:     replica1 + ":3260",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 			{
-				Server:      replica2,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-r2", name),
-				ISCSIAddr:   replica2 + ":3260",
-				HealthScore: 1.0,
-				WALHeadLSN:  100,
+				Server:        replica2,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-r2", name),
+				ISCSIAddr:     replica2 + ":3260",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		},
 	}
@@ -793,6 +812,10 @@ func TestRF3_AllReplicasDead_NoPromotion(t *testing.T) {
 // RF3: Lease deferred promotion with RF=3.
 func TestRF3_LeaseDeferred_Promotion(t *testing.T) {
 	ms := testMasterServerForFailover(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.MarkBlockCapable("vs3")
 	entry := &BlockVolumeEntry{
 		Name:          "vol1",
 		VolumeServer:  "vs1",
@@ -807,8 +830,8 @@ func TestRF3_LeaseDeferred_Promotion(t *testing.T) {
 		LeaseTTL:      200 * time.Millisecond,
 		LastLeaseGrant: time.Now(), // just granted → NOT expired
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0, WALHeadLSN: 50},
-			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 0.9, WALHeadLSN: 50},
+			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0, WALHeadLSN: 50, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 0.9, WALHeadLSN: 50, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 		// Deprecated scalar fields.
 		ReplicaServer: "vs2", ReplicaPath: "/data/vol1.blk", ReplicaISCSIAddr: "vs2:3260",
@@ -853,8 +876,8 @@ func TestRF3_CancelDeferredOnReconnect(t *testing.T) {
 		LeaseTTL:      5 * time.Second,
 		LastLeaseGrant: time.Now(), // just granted → long lease
 		Replicas: []ReplicaInfo{
-			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0},
-			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 1.0},
+			{Server: "vs2", Path: "/data/vol1.blk", ISCSIAddr: "vs2:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "vs3", Path: "/data/vol1.blk", ISCSIAddr: "vs3:3260", HealthScore: 1.0, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 		ReplicaServer: "vs2", ReplicaPath: "/data/vol1.blk", ReplicaISCSIAddr: "vs2:3260",
 	}
@@ -888,3 +911,267 @@ func TestRF3_CancelDeferredOnReconnect(t *testing.T) {
 		t.Fatalf("vs1 should remain primary (timer cancelled), got %q", e.VolumeServer)
 	}
 }
+
+// ============================================================
+// CP11B-3 T2: Re-evaluate on Replica Registration (B-06)
+// ============================================================
+
+// T2: Orphaned primary + replica reconnects → automatic promotion.
+func TestT2_OrphanedPrimary_ReplicaReconnect_Promotes(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// Simulate vs1 dying without proper failover (e.g., promotion failed at the time).
+	// Mark vs1 as dead but DON'T call failoverBlockVolumes (simulates missed/failed failover).
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// vs2 reconnects (sends heartbeat). reevaluateOrphanedPrimaries should detect orphaned primary.
+	ms.recoverBlockVolumes("vs2")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("expected promotion to vs2 (orphaned primary), got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2 after promotion, got %d", entry.Epoch)
+	}
+}
+
+// T2: Replica reconnects but primary is alive → no unnecessary promotion.
+func TestT2_PrimaryAlive_NoPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// Both servers alive. vs2 reconnects — no orphaned primary.
+	ms.recoverBlockVolumes("vs2")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs1" {
+		t.Fatalf("primary should remain vs1 (alive), got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 1 {
+		t.Fatalf("epoch should remain 1, got %d", entry.Epoch)
+	}
+}
+
+// T2: Multiple orphaned volumes, all promoted on reconnect.
+func TestT2_MultipleOrphanedVolumes(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	// vol1: vs1=primary, vs2=replica
+	// vol2: vs3=primary, vs2=replica
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+	ms.blockRegistry.MarkBlockCapable("vs3")
+	entry2 := &BlockVolumeEntry{
+		Name: "vol2", VolumeServer: "vs3", Path: "/data/vol2.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 5 * time.Second,
+		LastLeaseGrant: time.Now().Add(-10 * time.Second),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol2.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	}
+	ms.blockRegistry.Register(entry2)
+
+	// Both primaries die.
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+	ms.blockRegistry.UnmarkBlockCapable("vs3")
+
+	// vs2 reconnects → both orphaned volumes should be promoted.
+	ms.recoverBlockVolumes("vs2")
+
+	e1, _ := ms.blockRegistry.Lookup("vol1")
+	e2, _ := ms.blockRegistry.Lookup("vol2")
+	if e1.VolumeServer != "vs2" {
+		t.Fatalf("vol1: expected promotion to vs2, got %q", e1.VolumeServer)
+	}
+	if e2.VolumeServer != "vs2" {
+		t.Fatalf("vol2: expected promotion to vs2, got %q", e2.VolumeServer)
+	}
+}
+
+// T2: Repeated heartbeats do NOT cause duplicate promotions.
+func TestT2_RepeatedHeartbeats_NoDuplicatePromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// First reconnect promotes.
+	ms.reevaluateOrphanedPrimaries("vs2")
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("first call: expected promotion to vs2, got %q", entry.VolumeServer)
+	}
+	epochAfterFirst := entry.Epoch
+
+	// Second call: vs2 is now the primary AND block-capable. No orphan detected.
+	ms.reevaluateOrphanedPrimaries("vs2")
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.Epoch != epochAfterFirst {
+		t.Fatalf("second call should not bump epoch: got %d, want %d", entry.Epoch, epochAfterFirst)
+	}
+}
+
+// T2: Dead primary with active lease, replica reconnects → no immediate promotion.
+// Regression test for lease-bypass bug: reevaluateOrphanedPrimaries must respect
+// lease expiry, not promote immediately.
+func TestT2_OrphanedPrimary_LeaseNotExpired_DefersPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 300 * time.Millisecond,
+		LastLeaseGrant: time.Now(), // lease still active
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// vs1 dies (unmark block-capable).
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// vs2 reconnects — orphan detected, but lease still active → should NOT promote immediately.
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs1" {
+		t.Fatalf("should NOT promote while lease active, got primary=%q", entry.VolumeServer)
+	}
+	if entry.Epoch != 1 {
+		t.Fatalf("epoch should remain 1, got %d", entry.Epoch)
+	}
+
+	// Verify a deferred timer was created for the dead primary.
+	ms.blockFailover.mu.Lock()
+	timerCount := len(ms.blockFailover.deferredTimers["vs1"])
+	ms.blockFailover.mu.Unlock()
+	if timerCount != 1 {
+		t.Fatalf("expected 1 deferred timer for vs1, got %d", timerCount)
+	}
+
+	// Wait for lease to expire + margin → timer fires, promotion happens.
+	time.Sleep(450 * time.Millisecond)
+
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("after lease expiry, expected promotion to vs2, got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2, got %d", entry.Epoch)
+	}
+}
+
+// ============================================================
+// CP11B-3 T3: Deferred Timer Safety
+// ============================================================
+
+// T3: Delete/recreate volume before deferred timer fires → no wrong promotion.
+func TestT3_DeferredTimer_VolumeDeleted_NoPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	entry := &BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 5, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 200 * time.Millisecond,
+		LastLeaseGrant: time.Now(),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	}
+	ms.blockRegistry.Register(entry)
+
+	// vs1 dies → deferred timer created (lease not expired, epoch=5).
+	ms.failoverBlockVolumes("vs1")
+
+	// Delete the volume before timer fires.
+	ms.blockRegistry.Unregister("vol1")
+
+	// Wait for timer to fire.
+	time.Sleep(350 * time.Millisecond)
+
+	// Volume should not exist (timer found it deleted, no-op).
+	_, ok := ms.blockRegistry.Lookup("vol1")
+	if ok {
+		t.Fatal("volume should have been deleted, timer should not recreate it")
+	}
+}
+
+// T3: Epoch changes before deferred timer fires → timer rejected.
+func TestT3_DeferredTimer_EpochChanged_NoPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.MarkBlockCapable("vs3")
+	entry := &BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 5, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 200 * time.Millisecond,
+		LastLeaseGrant: time.Now(),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	}
+	ms.blockRegistry.Register(entry)
+
+	// vs1 dies → deferred timer created (captures epoch=5).
+	ms.failoverBlockVolumes("vs1")
+
+	// Before timer fires, manually bump the epoch (simulating another event).
+	e, _ := ms.blockRegistry.Lookup("vol1")
+	e.Epoch = 99
+
+	// Wait for timer to fire.
+	time.Sleep(350 * time.Millisecond)
+
+	// Timer should have been rejected (epoch mismatch). Epoch stays at 99.
+	e, _ = ms.blockRegistry.Lookup("vol1")
+	if e.Epoch != 99 {
+		t.Fatalf("epoch should remain 99 (timer rejected), got %d", e.Epoch)
+	}
+	// Primary should NOT have changed (deferred promotion was rejected).
+	if e.VolumeServer != "vs1" {
+		t.Fatalf("primary should remain vs1 (timer rejected), got %q", e.VolumeServer)
+	}
+}
+
+// ============================================================
+// CP11B-3 T4: Rebuild with empty RebuildListenAddr
+// ============================================================
+
+// T4: Rebuild queued with empty RebuildListenAddr after promotion.
+func TestT4_RebuildEmptyAddr_StillQueued(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// Failover: vs1 dies, vs2 promoted. PromoteBestReplica clears RebuildListenAddr.
+	ms.failoverBlockVolumes("vs1")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.RebuildListenAddr != "" {
+		t.Fatalf("RebuildListenAddr should be empty after promotion, got %q", entry.RebuildListenAddr)
+	}
+
+	// vs1 reconnects. Rebuild should still be queued (even with empty addr).
+	ms.recoverBlockVolumes("vs1")
+
+	assignments := ms.blockAssignmentQueue.Peek("vs1")
+	foundRebuild := false
+	for _, a := range assignments {
+		if blockvol.RoleFromWire(a.Role) == blockvol.RoleRebuilding {
+			foundRebuild = true
+			if a.RebuildAddr != "" {
+				t.Fatalf("RebuildAddr should be empty (new primary hasn't heartbeated), got %q", a.RebuildAddr)
+			}
+		}
+	}
+	if !foundRebuild {
+		t.Fatal("rebuild assignment should still be queued even with empty addr")
+	}
+}
diff --git a/weed/server/master_block_registry.go b/weed/server/master_block_registry.go
index b0590f2ec..9155e26a6 100644
--- a/weed/server/master_block_registry.go
+++ b/weed/server/master_block_registry.go
@@ -842,44 +842,91 @@ func (r *BlockVolumeRegistry) PromotionLSNTolerance() uint64 {
 	return r.promotionLSNTolerance
 }
 
-// PromoteBestReplica promotes the best eligible replica to primary.
-// Eligibility: heartbeat fresh (within 2×LeaseTTL), WALHeadLSN within tolerance of primary,
-// and role must be RoleReplica (not RoleRebuilding).
-// The promoted replica is removed from Replicas[]. Other replicas stay.
-// Old primary is NOT added to Replicas (needs rebuild).
-// Returns the new epoch.
-func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-	entry, ok := r.volumes[name]
-	if !ok {
-		return 0, fmt.Errorf("block volume %q not found", name)
+// PromotionRejection records why a specific replica was rejected for promotion.
+type PromotionRejection struct {
+	Server string
+	Reason string // "stale_heartbeat", "wal_lag", "wrong_role", "server_dead"
+}
+
+// PromotionPreflightResult is the reusable result of a promotion evaluation.
+// Used by auto-promotion, manual promote API, preflight status, and logging.
+type PromotionPreflightResult struct {
+	VolumeName   string
+	Promotable   bool               // true if a candidate was found
+	Candidate    *ReplicaInfo       // best candidate (nil if !Promotable)
+	CandidateIdx int                // index in Replicas[] (-1 if !Promotable)
+	Rejections   []PromotionRejection // why each non-candidate was rejected
+	Reason       string             // human-readable summary when !Promotable
+}
+
+// evaluatePromotionLocked evaluates promotion candidates for a volume.
+// Caller must hold r.mu (read or write). Returns a preflight result without
+// mutating the registry. The four gates:
+//   1. Heartbeat freshness (within 2×LeaseTTL)
+//   2. WAL LSN recency (within promotionLSNTolerance of primary)
+//   3. Role must be RoleReplica (not RoleRebuilding)
+//   4. Server must be in blockServers (alive) — fixes B-12
+func (r *BlockVolumeRegistry) evaluatePromotionLocked(entry *BlockVolumeEntry) PromotionPreflightResult {
+	result := PromotionPreflightResult{
+		VolumeName:   entry.Name,
+		CandidateIdx: -1,
 	}
 	if len(entry.Replicas) == 0 {
-		return 0, fmt.Errorf("block volume %q has no replicas", name)
+		result.Reason = "no replicas"
+		return result
 	}
 
-	// Filter eligible replicas.
 	now := time.Now()
 	freshnessCutoff := 2 * entry.LeaseTTL
 	if freshnessCutoff == 0 {
-		freshnessCutoff = 60 * time.Second // default if LeaseTTL not set
+		freshnessCutoff = 60 * time.Second
 	}
 	primaryLSN := entry.WALHeadLSN
 
 	bestIdx := -1
 	for i := range entry.Replicas {
 		ri := &entry.Replicas[i]
-		// Gate 1: heartbeat freshness.
-		if !ri.LastHeartbeat.IsZero() && now.Sub(ri.LastHeartbeat) > freshnessCutoff {
+
+		// Gate 1: heartbeat freshness. Zero means never heartbeated — unsafe
+		// to promote because the registry has no proof the replica is alive,
+		// caught up, or fully initialized.
+		if ri.LastHeartbeat.IsZero() {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "no_heartbeat",
+			})
+			continue
+		}
+		if now.Sub(ri.LastHeartbeat) > freshnessCutoff {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "stale_heartbeat",
+			})
 			continue
 		}
 		// Gate 2: WAL LSN recency (skip if primary LSN is 0 — no data yet, all eligible).
 		if primaryLSN > 0 && ri.WALHeadLSN+r.promotionLSNTolerance < primaryLSN {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wal_lag",
+			})
 			continue
 		}
-		// Gate 3: role must be RoleReplica (not rebuilding/stale).
-		if ri.Role != 0 && blockvol.RoleFromWire(ri.Role) != blockvol.RoleReplica {
+		// Gate 3: role must be exactly RoleReplica. Zero/unset role means
+		// the replica was created but never confirmed its role via heartbeat.
+		if blockvol.RoleFromWire(ri.Role) != blockvol.RoleReplica {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wrong_role",
+			})
+			continue
+		}
+		// Gate 4: server must be alive (in blockServers set) — B-12 fix.
+		if !r.blockServers[ri.Server] {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "server_dead",
+			})
 			continue
 		}
 		// Eligible — pick best by health score, tie-break by WALHeadLSN.
@@ -894,11 +941,39 @@ func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
 	}
 
 	if bestIdx == -1 {
-		return 0, fmt.Errorf("block volume %q: no eligible replicas for promotion", name)
+		result.Reason = "no eligible replicas"
+		if len(result.Rejections) > 0 {
+			result.Reason += ": " + result.Rejections[0].Reason
+			if len(result.Rejections) > 1 {
+				result.Reason += fmt.Sprintf(" (+%d more)", len(result.Rejections)-1)
+			}
+		}
+		return result
 	}
 
-	promoted := entry.Replicas[bestIdx]
+	result.Promotable = true
+	ri := entry.Replicas[bestIdx]
+	result.Candidate = &ri
+	result.CandidateIdx = bestIdx
+	return result
+}
 
+// EvaluatePromotion returns a read-only preflight result for the named volume
+// without mutating the registry. Safe for status/logging/manual promote preview.
+func (r *BlockVolumeRegistry) EvaluatePromotion(name string) (PromotionPreflightResult, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	entry, ok := r.volumes[name]
+	if !ok {
+		return PromotionPreflightResult{VolumeName: name, Reason: "volume not found"}, fmt.Errorf("block volume %q not found", name)
+	}
+	return r.evaluatePromotionLocked(entry), nil
+}
+
+// applyPromotionLocked applies the promotion of a replica at candidateIdx to primary.
+// Caller must hold r.mu (write lock). The promoted replica is removed from Replicas[].
+// Old primary is NOT added to Replicas (needs rebuild). Returns the new epoch.
+func (r *BlockVolumeRegistry) applyPromotionLocked(entry *BlockVolumeEntry, name string, candidate ReplicaInfo, candidateIdx int) uint64 {
 	// Remove old primary from byServer index.
 	r.removeFromServer(entry.VolumeServer, name)
 
@@ -906,18 +981,21 @@ func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
 	newEpoch := entry.Epoch + 1
 
 	// Promote replica to primary.
-	entry.VolumeServer = promoted.Server
-	entry.Path = promoted.Path
-	entry.IQN = promoted.IQN
-	entry.ISCSIAddr = promoted.ISCSIAddr
-	entry.NvmeAddr = promoted.NvmeAddr
-	entry.NQN = promoted.NQN
+	entry.VolumeServer = candidate.Server
+	entry.Path = candidate.Path
+	entry.IQN = candidate.IQN
+	entry.ISCSIAddr = candidate.ISCSIAddr
+	entry.NvmeAddr = candidate.NvmeAddr
+	entry.NQN = candidate.NQN
 	entry.Epoch = newEpoch
 	entry.Role = blockvol.RoleToWire(blockvol.RolePrimary)
 	entry.LastLeaseGrant = time.Now()
 
+	// Clear stale rebuild/publication metadata from old primary (B-11 partial fix).
+	entry.RebuildListenAddr = ""
+
 	// Remove promoted from Replicas. Others stay.
-	entry.Replicas = append(entry.Replicas[:bestIdx], entry.Replicas[bestIdx+1:]...)
+	entry.Replicas = append(entry.Replicas[:candidateIdx], entry.Replicas[candidateIdx+1:]...)
 
 	// Sync deprecated scalar fields.
 	if len(entry.Replicas) > 0 {
@@ -940,9 +1018,212 @@ func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
 	// Update byServer index: new primary server now hosts this volume.
 	r.addToServer(entry.VolumeServer, name)
 
+	return newEpoch
+}
+
+// PromoteBestReplica promotes the best eligible replica to primary.
+// Eligibility: heartbeat fresh (within 2×LeaseTTL), WALHeadLSN within tolerance of primary,
+// role must be RoleReplica (not RoleRebuilding), and server must be alive (B-12 fix).
+// The promoted replica is removed from Replicas[]. Other replicas stay.
+// Old primary is NOT added to Replicas (needs rebuild).
+// Returns the new epoch and the preflight result.
+func (r *BlockVolumeRegistry) PromoteBestReplica(name string) (uint64, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	entry, ok := r.volumes[name]
+	if !ok {
+		return 0, fmt.Errorf("block volume %q not found", name)
+	}
+
+	pf := r.evaluatePromotionLocked(entry)
+	if !pf.Promotable {
+		return 0, fmt.Errorf("block volume %q: %s", name, pf.Reason)
+	}
+
+	promoted := *pf.Candidate
+	bestIdx := pf.CandidateIdx
+
+	newEpoch := r.applyPromotionLocked(entry, name, promoted, bestIdx)
 	return newEpoch, nil
 }
 
+// evaluateManualPromotionLocked evaluates promotion candidates for a manual promote request.
+// Caller must hold r.mu (read or write).
+//
+// Differences from evaluatePromotionLocked:
+//   - Primary-alive gate: if !force and current primary is alive, reject with "primary_alive".
+//   - Target filtering: if targetServer != "", only evaluate that specific replica.
+//     Returns Reason="target_not_found" if that server is not a replica.
+//   - Force flag: bypasses soft gates (primary_alive, stale_heartbeat, wal_lag)
+//     but keeps hard gates (no_heartbeat with zero time, wrong_role, server_dead).
+//
+// Gate table:
+//
+//	Gate             | Normal | Force
+//	primary_alive    | Reject | Skip
+//	no_heartbeat(0)  | Reject | Reject
+//	stale_heartbeat  | Reject | Skip
+//	wal_lag          | Reject | Skip
+//	wrong_role       | Reject | Reject
+//	server_dead      | Reject | Reject
+func (r *BlockVolumeRegistry) evaluateManualPromotionLocked(entry *BlockVolumeEntry, targetServer string, force bool) PromotionPreflightResult {
+	result := PromotionPreflightResult{
+		VolumeName:   entry.Name,
+		CandidateIdx: -1,
+	}
+
+	// Primary-alive gate (soft — skipped when force=true).
+	if !force && r.blockServers[entry.VolumeServer] {
+		result.Reason = "primary_alive"
+		return result
+	}
+
+	if len(entry.Replicas) == 0 {
+		result.Reason = "no replicas"
+		return result
+	}
+
+	// Target filtering: if a specific server is requested, find its index first.
+	// Return early if not found.
+	if targetServer != "" {
+		found := false
+		for i := range entry.Replicas {
+			if entry.Replicas[i].Server == targetServer {
+				found = true
+				break
+			}
+		}
+		if !found {
+			result.Reason = "target_not_found"
+			return result
+		}
+	}
+
+	now := time.Now()
+	freshnessCutoff := 2 * entry.LeaseTTL
+	if freshnessCutoff == 0 {
+		freshnessCutoff = 60 * time.Second
+	}
+	primaryLSN := entry.WALHeadLSN
+
+	bestIdx := -1
+	for i := range entry.Replicas {
+		ri := &entry.Replicas[i]
+
+		// If targeting a specific server, skip all others.
+		if targetServer != "" && ri.Server != targetServer {
+			continue
+		}
+
+		// Hard gate: no heartbeat (zero time) — unsafe regardless of force.
+		if ri.LastHeartbeat.IsZero() {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "no_heartbeat",
+			})
+			continue
+		}
+
+		// Soft gate: stale heartbeat — skipped when force=true.
+		if !force && now.Sub(ri.LastHeartbeat) > freshnessCutoff {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "stale_heartbeat",
+			})
+			continue
+		}
+
+		// Soft gate: WAL lag — skipped when force=true.
+		if !force && primaryLSN > 0 && ri.WALHeadLSN+r.promotionLSNTolerance < primaryLSN {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wal_lag",
+			})
+			continue
+		}
+
+		// Hard gate: role must be exactly RoleReplica.
+		if blockvol.RoleFromWire(ri.Role) != blockvol.RoleReplica {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "wrong_role",
+			})
+			continue
+		}
+
+		// Hard gate: server must be alive (in blockServers set).
+		if !r.blockServers[ri.Server] {
+			result.Rejections = append(result.Rejections, PromotionRejection{
+				Server: ri.Server,
+				Reason: "server_dead",
+			})
+			continue
+		}
+
+		// Eligible — pick best by health score, tie-break by WALHeadLSN.
+		if bestIdx == -1 {
+			bestIdx = i
+		} else if ri.HealthScore > entry.Replicas[bestIdx].HealthScore {
+			bestIdx = i
+		} else if ri.HealthScore == entry.Replicas[bestIdx].HealthScore &&
+			ri.WALHeadLSN > entry.Replicas[bestIdx].WALHeadLSN {
+			bestIdx = i
+		}
+	}
+
+	if bestIdx == -1 {
+		result.Reason = "no eligible replicas"
+		if len(result.Rejections) > 0 {
+			result.Reason += ": " + result.Rejections[0].Reason
+			if len(result.Rejections) > 1 {
+				result.Reason += fmt.Sprintf(" (+%d more)", len(result.Rejections)-1)
+			}
+		}
+		return result
+	}
+
+	result.Promotable = true
+	ri := entry.Replicas[bestIdx]
+	result.Candidate = &ri
+	result.CandidateIdx = bestIdx
+	return result
+}
+
+// ManualPromote promotes a specific replica (or the best eligible replica) to primary.
+// Unlike PromoteBestReplica, it accepts operator overrides:
+//   - targetServer: if non-empty, only that replica is considered.
+//   - force: bypasses soft gates (primary_alive, stale_heartbeat, wal_lag).
+//
+// Returns (newEpoch, oldPrimary, oldPath, preflightResult, nil) on success.
+// oldPrimary and oldPath are captured under the lock to avoid TOCTOU with
+// concurrent auto-failover (BUG-T5-2 fix).
+// Returns (0, "", "", preflightResult, err) on rejection or lookup failure.
+func (r *BlockVolumeRegistry) ManualPromote(name, targetServer string, force bool) (uint64, string, string, PromotionPreflightResult, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	entry, ok := r.volumes[name]
+	if !ok {
+		return 0, "", "", PromotionPreflightResult{VolumeName: name, Reason: "volume not found"},
+			fmt.Errorf("block volume %q not found", name)
+	}
+
+	// Capture old primary info under lock (BUG-T5-2 fix).
+	oldPrimary := entry.VolumeServer
+	oldPath := entry.Path
+
+	pf := r.evaluateManualPromotionLocked(entry, targetServer, force)
+	if !pf.Promotable {
+		return 0, "", "", pf, fmt.Errorf("block volume %q: %s", name, pf.Reason)
+	}
+
+	promoted := *pf.Candidate
+	candidateIdx := pf.CandidateIdx
+
+	newEpoch := r.applyPromotionLocked(entry, name, promoted, candidateIdx)
+	return newEpoch, oldPrimary, oldPath, pf, nil
+}
+
 // MarkBlockCapable records that the given server supports block volumes.
 func (r *BlockVolumeRegistry) MarkBlockCapable(server string) {
 	r.mu.Lock()
@@ -1045,6 +1326,41 @@ func (r *BlockVolumeRegistry) ServerSummaries() []BlockServerSummary {
 	return summaries
 }
 
+// IsBlockCapable returns true if the given server is in the block-capable set (alive).
+func (r *BlockVolumeRegistry) IsBlockCapable(server string) bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.blockServers[server]
+}
+
+// VolumesWithDeadPrimary returns names of volumes where the given server is a replica
+// and the current primary is NOT in the block-capable set (dead/disconnected).
+// Used by T2 (B-06) to detect orphaned primaries that need re-promotion.
+func (r *BlockVolumeRegistry) VolumesWithDeadPrimary(replicaServer string) []string {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	names, ok := r.byServer[replicaServer]
+	if !ok {
+		return nil
+	}
+	var orphaned []string
+	for name := range names {
+		entry := r.volumes[name]
+		if entry == nil {
+			continue
+		}
+		// Only consider volumes where this server is a replica (not the primary).
+		if entry.VolumeServer == replicaServer {
+			continue
+		}
+		// Check if the primary server is dead.
+		if !r.blockServers[entry.VolumeServer] {
+			orphaned = append(orphaned, name)
+		}
+	}
+	return orphaned
+}
+
 // BlockCapableServers returns the list of servers known to support block volumes.
 func (r *BlockVolumeRegistry) BlockCapableServers() []string {
 	r.mu.RLock()
diff --git a/weed/server/master_block_registry_test.go b/weed/server/master_block_registry_test.go
index 0608448f9..bea8061b1 100644
--- a/weed/server/master_block_registry_test.go
+++ b/weed/server/master_block_registry_test.go
@@ -2,6 +2,7 @@ package weed_server
 
 import (
 	"fmt"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -538,6 +539,8 @@ func TestRegistry_RemoveReplica(t *testing.T) {
 
 func TestRegistry_PromoteBestReplica_PicksHighest(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("s2")
+	r.MarkBlockCapable("s3")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "s1",
@@ -545,8 +548,8 @@ func TestRegistry_PromoteBestReplica_PicksHighest(t *testing.T) {
 		Epoch:        5,
 		Role:         1,
 		Replicas: []ReplicaInfo{
-			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.8, WALHeadLSN: 100},
-			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.95, WALHeadLSN: 90},
+			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.8, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.95, WALHeadLSN: 90, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	})
 	// Add to byServer for s2 and s3.
@@ -592,14 +595,16 @@ func TestRegistry_PromoteBestReplica_NoReplica(t *testing.T) {
 
 func TestRegistry_PromoteBestReplica_TiebreakByLSN(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("s2")
+	r.MarkBlockCapable("s3")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "s1",
 		Path:         "/v1.blk",
 		Epoch:        3,
 		Replicas: []ReplicaInfo{
-			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.9, WALHeadLSN: 50},
-			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.9, WALHeadLSN: 100},
+			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 0.9, WALHeadLSN: 50, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.9, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	})
 	r.mu.Lock()
@@ -627,14 +632,16 @@ func TestRegistry_PromoteBestReplica_TiebreakByLSN(t *testing.T) {
 
 func TestRegistry_PromoteBestReplica_KeepsOthers(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("s2")
+	r.MarkBlockCapable("s3")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "s1",
 		Path:         "/v1.blk",
 		Epoch:        1,
 		Replicas: []ReplicaInfo{
-			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 1.0, WALHeadLSN: 100},
-			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.5, WALHeadLSN: 100},
+			{Server: "s2", Path: "/r1.blk", IQN: "iqn:r1", ISCSIAddr: "s2:3260", HealthScore: 1.0, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
+			{Server: "s3", Path: "/r2.blk", IQN: "iqn:r2", ISCSIAddr: "s3:3260", HealthScore: 0.5, WALHeadLSN: 100, Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now()},
 		},
 	})
 	r.mu.Lock()
@@ -877,6 +884,7 @@ func TestRegistry_PromoteBestReplica_WALLagIneligible(t *testing.T) {
 				HealthScore:   1.0,
 				WALHeadLSN:    800, // lag=200, tolerance=100
 				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
 			},
 		},
 	})
@@ -918,6 +926,8 @@ func TestRegistry_PromoteBestReplica_RebuildingIneligible(t *testing.T) {
 // Fix #2: Among eligible replicas, best (health+LSN) wins.
 func TestRegistry_PromoteBestReplica_EligibilityFiltersCorrectly(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("stale")
+	r.MarkBlockCapable("good")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "primary",
@@ -939,6 +949,7 @@ func TestRegistry_PromoteBestReplica_EligibilityFiltersCorrectly(t *testing.T) {
 				HealthScore:   0.8,
 				WALHeadLSN:    95,
 				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
 			},
 		},
 	})
@@ -956,6 +967,7 @@ func TestRegistry_PromoteBestReplica_EligibilityFiltersCorrectly(t *testing.T) {
 // Configurable tolerance: widen tolerance to allow lagging replicas.
 func TestRegistry_PromoteBestReplica_ConfigurableTolerance(t *testing.T) {
 	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("lagging")
 	r.Register(&BlockVolumeEntry{
 		Name:         "vol1",
 		VolumeServer: "primary",
@@ -970,6 +982,7 @@ func TestRegistry_PromoteBestReplica_ConfigurableTolerance(t *testing.T) {
 				HealthScore:   1.0,
 				WALHeadLSN:    800, // lag=200
 				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
 			},
 		},
 	})
@@ -992,6 +1005,236 @@ func TestRegistry_PromoteBestReplica_ConfigurableTolerance(t *testing.T) {
 	}
 }
 
+// B-12: PromoteBestReplica rejects dead replica (server not in blockServers).
+func TestRegistry_PromoteBestReplica_DeadServerIneligible(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// Intentionally do NOT mark "dead-replica" as block-capable.
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:        "dead-replica",
+				Path:          "/data/vol1.blk",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+			},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err == nil {
+		t.Fatal("expected error: dead replica should be rejected")
+	}
+	if !strings.Contains(err.Error(), "server_dead") {
+		t.Fatalf("error should mention server_dead, got: %v", err)
+	}
+}
+
+// B-12: Dead replica rejected but alive replica promoted when both exist.
+func TestRegistry_PromoteBestReplica_DeadSkipped_AlivePromoted(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// Only mark s3 as alive.
+	r.MarkBlockCapable("s3")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{Server: "s2-dead", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "s3", Path: "/r2.blk", HealthScore: 0.8, WALHeadLSN: 95, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	newEpoch, err := r.PromoteBestReplica("vol1")
+	if err != nil {
+		t.Fatalf("PromoteBestReplica: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("newEpoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "s3" {
+		t.Fatalf("expected alive s3 promoted, got %q", e.VolumeServer)
+	}
+}
+
+// EvaluatePromotion returns read-only preflight without mutating registry.
+func TestRegistry_EvaluatePromotion_Basic(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        5,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{Server: "replica1", Path: "/r1.blk", HealthScore: 0.9, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, err := r.EvaluatePromotion("vol1")
+	if err != nil {
+		t.Fatalf("EvaluatePromotion: %v", err)
+	}
+	if !pf.Promotable {
+		t.Fatalf("expected promotable, got reason: %s", pf.Reason)
+	}
+	if pf.Candidate == nil || pf.Candidate.Server != "replica1" {
+		t.Fatalf("expected candidate replica1, got %+v", pf.Candidate)
+	}
+
+	// Registry must be unmutated.
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "primary" {
+		t.Fatal("EvaluatePromotion should not mutate the registry")
+	}
+	if e.Epoch != 5 {
+		t.Fatal("EvaluatePromotion should not bump epoch")
+	}
+}
+
+// EvaluatePromotion with all replicas rejected.
+func TestRegistry_EvaluatePromotion_AllRejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// No servers marked as block-capable.
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		Replicas: []ReplicaInfo{
+			{Server: "dead1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "dead2", Path: "/r2.blk", HealthScore: 0.9, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, err := r.EvaluatePromotion("vol1")
+	if err != nil {
+		t.Fatalf("EvaluatePromotion: %v", err)
+	}
+	if pf.Promotable {
+		t.Fatal("expected not promotable")
+	}
+	if len(pf.Rejections) != 2 {
+		t.Fatalf("expected 2 rejections, got %d", len(pf.Rejections))
+	}
+	for _, rej := range pf.Rejections {
+		if rej.Reason != "server_dead" {
+			t.Fatalf("expected server_dead rejection, got %q", rej.Reason)
+		}
+	}
+}
+
+// EvaluatePromotion for nonexistent volume.
+func TestRegistry_EvaluatePromotion_NotFound(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	_, err := r.EvaluatePromotion("nonexistent")
+	if err == nil {
+		t.Fatal("expected error for nonexistent volume")
+	}
+}
+
+// Replica created but never heartbeated is not promotable.
+func TestRegistry_PromoteBestReplica_NoHeartbeatIneligible(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:      "replica1",
+				Path:        "/r1.blk",
+				HealthScore: 1.0,
+				WALHeadLSN:  100,
+				Role:        blockvol.RoleToWire(blockvol.RoleReplica),
+				// LastHeartbeat: zero — never heartbeated
+			},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err == nil {
+		t.Fatal("expected error: replica with no heartbeat should be rejected")
+	}
+	if !strings.Contains(err.Error(), "no_heartbeat") {
+		t.Fatalf("error should mention no_heartbeat, got: %v", err)
+	}
+}
+
+// Replica with unset (zero) role is not promotable.
+func TestRegistry_PromoteBestReplica_UnsetRoleIneligible(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "primary",
+		Path:         "/data/vol1.blk",
+		Epoch:        1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:        "replica1",
+				Path:          "/r1.blk",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				LastHeartbeat: time.Now(),
+				// Role: 0 — unset/RoleNone
+			},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err == nil {
+		t.Fatal("expected error: replica with unset role should be rejected")
+	}
+	if !strings.Contains(err.Error(), "wrong_role") {
+		t.Fatalf("error should mention wrong_role, got: %v", err)
+	}
+}
+
+// PromoteBestReplica clears RebuildListenAddr on promotion (B-11 partial fix).
+func TestRegistry_PromoteBestReplica_ClearsRebuildAddr(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name:              "vol1",
+		VolumeServer:      "primary",
+		Path:              "/data/vol1.blk",
+		Epoch:             1,
+		RebuildListenAddr: "primary:15000",
+		Replicas: []ReplicaInfo{
+			{Server: "replica1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100, LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err != nil {
+		t.Fatalf("PromoteBestReplica: %v", err)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.RebuildListenAddr != "" {
+		t.Fatalf("RebuildListenAddr should be cleared after promotion, got %q", e.RebuildListenAddr)
+	}
+}
+
 // --- LeaseGrants ---
 
 func TestRegistry_LeaseGrants_PrimaryOnly(t *testing.T) {
@@ -1110,3 +1353,267 @@ func TestRegistry_LeaseGrants_UnknownServer(t *testing.T) {
 		t.Fatalf("expected nil for unknown server, got %+v", grants)
 	}
 }
+
+// ============================================================
+// CP11B-3 T2: IsBlockCapable + VolumesWithDeadPrimary
+// ============================================================
+
+func TestRegistry_IsBlockCapable(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1:8080")
+
+	if !r.IsBlockCapable("vs1:8080") {
+		t.Fatal("vs1 should be block-capable")
+	}
+	if r.IsBlockCapable("vs2:8080") {
+		t.Fatal("vs2 should NOT be block-capable")
+	}
+
+	r.UnmarkBlockCapable("vs1:8080")
+	if r.IsBlockCapable("vs1:8080") {
+		t.Fatal("vs1 should no longer be block-capable after unmark")
+	}
+}
+
+func TestRegistry_VolumesWithDeadPrimary_Basic(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1")
+	r.MarkBlockCapable("vs2")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive,
+		Replicas: []ReplicaInfo{{Server: "vs2", Path: "/data/vol1.blk"}},
+	})
+
+	// Both alive → no orphans.
+	orphaned := r.VolumesWithDeadPrimary("vs2")
+	if len(orphaned) != 0 {
+		t.Fatalf("expected 0 orphaned volumes, got %d", len(orphaned))
+	}
+
+	// Kill primary.
+	r.UnmarkBlockCapable("vs1")
+	orphaned = r.VolumesWithDeadPrimary("vs2")
+	if len(orphaned) != 1 || orphaned[0] != "vol1" {
+		t.Fatalf("expected [vol1], got %v", orphaned)
+	}
+}
+
+func TestRegistry_VolumesWithDeadPrimary_PrimaryServer_NotIncluded(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive,
+	})
+
+	// vs1 is the primary for vol1 — should NOT appear in orphaned list for vs1.
+	orphaned := r.VolumesWithDeadPrimary("vs1")
+	if len(orphaned) != 0 {
+		t.Fatalf("primary server should not appear in its own orphan list, got %v", orphaned)
+	}
+}
+
+// T6: EvaluatePromotion preflight includes primary liveness.
+func TestRegistry_EvaluatePromotion_PrimaryDead_StillShowsCandidate(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1")
+	r.MarkBlockCapable("vs2")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// Kill primary but keep vs2 alive.
+	r.UnmarkBlockCapable("vs1")
+
+	pf, err := r.EvaluatePromotion("vol1")
+	if err != nil {
+		t.Fatalf("EvaluatePromotion: %v", err)
+	}
+	if !pf.Promotable {
+		t.Fatalf("should be promotable (vs2 alive), reason=%s", pf.Reason)
+	}
+	if pf.Candidate.Server != "vs2" {
+		t.Fatalf("candidate should be vs2, got %q", pf.Candidate.Server)
+	}
+}
+
+// ============================================================
+// CP11B-3 T5: ManualPromote Dev Tests
+// ============================================================
+
+// T5: ManualPromote with empty target → auto-picks best candidate.
+func TestRegistry_ManualPromote_AutoTarget(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("best")
+	r.MarkBlockCapable("worse")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+		Replicas: []ReplicaInfo{
+			{Server: "worse", Path: "/r1.blk", HealthScore: 0.5, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "best", Path: "/r2.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+	// Primary not block-capable → non-force should still pass (primary_alive gate won't trigger).
+
+	newEpoch, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err != nil {
+		t.Fatalf("ManualPromote: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	if !pf.Promotable {
+		t.Fatal("should be promotable")
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "best" {
+		t.Fatalf("expected 'best' promoted, got %q", e.VolumeServer)
+	}
+}
+
+// T5: ManualPromote targets a specific replica (not the best by health).
+func TestRegistry_ManualPromote_SpecificTarget(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.MarkBlockCapable("r2")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "r2", Path: "/r2.blk", HealthScore: 0.5, WALHeadLSN: 50,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	// Target r2 specifically (worse health).
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "r2", false)
+	if err != nil {
+		t.Fatalf("ManualPromote: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "r2" {
+		t.Fatalf("expected r2 promoted (specific target), got %q", e.VolumeServer)
+	}
+}
+
+// T5: ManualPromote with non-existent target → error.
+func TestRegistry_ManualPromote_TargetNotFound(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "nonexistent", false)
+	if err == nil {
+		t.Fatal("expected error for nonexistent target")
+	}
+	if pf.Reason != "target_not_found" {
+		t.Fatalf("expected target_not_found, got %q", pf.Reason)
+	}
+}
+
+// T5: ManualPromote non-force with alive primary → rejected.
+func TestRegistry_ManualPromote_PrimaryAlive_Rejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("primary")
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err == nil {
+		t.Fatal("expected rejection when primary alive and !force")
+	}
+	if pf.Reason != "primary_alive" {
+		t.Fatalf("expected primary_alive, got %q", pf.Reason)
+	}
+	// Verify no mutation.
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "primary" {
+		t.Fatalf("primary should not change, got %q", e.VolumeServer)
+	}
+}
+
+// T5: Force bypasses stale heartbeat and primary_alive gates.
+func TestRegistry_ManualPromote_Force_StaleHeartbeat(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("primary")
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now().Add(-10 * time.Minute), // stale
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	// Non-force: would fail on primary_alive.
+	// Force: bypasses primary_alive AND stale_heartbeat.
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "", true)
+	if err != nil {
+		t.Fatalf("force ManualPromote should succeed: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "r1" {
+		t.Fatalf("expected r1 promoted via force, got %q", e.VolumeServer)
+	}
+}
+
+// T5: Force does NOT bypass server_dead (hard gate).
+func TestRegistry_ManualPromote_Force_StillRejectsDeadServer(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// "dead" is NOT marked block-capable.
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "dead", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "dead", true)
+	if err == nil {
+		t.Fatal("force should NOT bypass server_dead")
+	}
+	if len(pf.Rejections) == 0 || pf.Rejections[0].Reason != "server_dead" {
+		t.Fatalf("expected server_dead rejection, got %+v", pf.Rejections)
+	}
+}
diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go
index aa8589bd8..59f5a9aa8 100644
--- a/weed/server/master_grpc_server.go
+++ b/weed/server/master_grpc_server.go
@@ -278,6 +278,9 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
 		// on subsequent heartbeats), never both in the same message.
 		if len(heartbeat.BlockVolumeInfos) > 0 || heartbeat.HasNoBlockVolumes {
 			ms.blockRegistry.UpdateFullHeartbeat(dn.Url(), heartbeat.BlockVolumeInfos)
+			// T2 (B-06): After updating registry from heartbeat, check if this server
+			// is a replica for any volume whose primary is dead. If so, promote.
+			ms.reevaluateOrphanedPrimaries(dn.Url())
 		} else if len(heartbeat.NewBlockVolumes) > 0 || len(heartbeat.DeletedBlockVolumes) > 0 {
 			ms.blockRegistry.UpdateDeltaHeartbeat(dn.Url(), heartbeat.NewBlockVolumes, heartbeat.DeletedBlockVolumes)
 		}
diff --git a/weed/server/master_grpc_server_block.go b/weed/server/master_grpc_server_block.go
index b8f7a0c82..9c70dfad8 100644
--- a/weed/server/master_grpc_server_block.go
+++ b/weed/server/master_grpc_server_block.go
@@ -283,14 +283,16 @@ func (ms *MasterServer) tryCreateOneReplica(ctx context.Context, req *master_pb.
 		entry.RebuildListenAddr = primaryResult.RebuildListenAddr
 		// CP8-2: populate Replicas[].
 		entry.Replicas = append(entry.Replicas, ReplicaInfo{
-			Server:    replicaServerStr,
-			Path:      replicaResult.Path,
-			ISCSIAddr: replicaResult.ISCSIAddr,
-			IQN:       replicaResult.IQN,
-			NvmeAddr:  replicaResult.NvmeAddr,
-			NQN:       replicaResult.NQN,
-			DataAddr:  replicaResult.ReplicaDataAddr,
-			CtrlAddr:  replicaResult.ReplicaCtrlAddr,
+			Server:        replicaServerStr,
+			Path:          replicaResult.Path,
+			ISCSIAddr:     replicaResult.ISCSIAddr,
+			IQN:           replicaResult.IQN,
+			NvmeAddr:      replicaResult.NvmeAddr,
+			NQN:           replicaResult.NQN,
+			DataAddr:      replicaResult.ReplicaDataAddr,
+			CtrlAddr:      replicaResult.ReplicaCtrlAddr,
+			Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+			LastHeartbeat: time.Now(),
 		})
 		return replicaServerStr
 	}
@@ -409,6 +411,11 @@ func (ms *MasterServer) ExpandBlockVolume(ctx context.Context, req *master_pb.Ex
 		}
 	}()
 
+	// Test-only hook: inject failover between lock acquisition and re-read.
+	if ms.expandPreReadHook != nil {
+		ms.expandPreReadHook()
+	}
+
 	// B-09: Re-read entry after acquiring expand lock. Between the initial
 	// Lookup and AcquireExpandInflight, failover may have changed VolumeServer
 	// or Replicas. Using the stale snapshot would send PREPARE to dead nodes.
diff --git a/weed/server/master_grpc_server_block_test.go b/weed/server/master_grpc_server_block_test.go
index 1d29191ee..f82f9a818 100644
--- a/weed/server/master_grpc_server_block_test.go
+++ b/weed/server/master_grpc_server_block_test.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
 	"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
 )
 
 // testMasterServer creates a minimal MasterServer with mock VS calls for testing.
@@ -1112,6 +1113,9 @@ func TestMaster_NoNvmeFieldsWhenDisabled(t *testing.T) {
 
 func TestMaster_PromotionCopiesNvmeFields(t *testing.T) {
 	ms := testMasterServer(t)
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable("vs1:9333")
+	ms.blockRegistry.MarkBlockCapable("vs2:9333")
 
 	// Directly register an entry with primary + replica, both having NVMe fields.
 	ms.blockRegistry.Register(&BlockVolumeEntry{
@@ -1128,16 +1132,18 @@ func TestMaster_PromotionCopiesNvmeFields(t *testing.T) {
 		LeaseTTL:     30 * time.Second,
 		Replicas: []ReplicaInfo{
 			{
-				Server:      "vs2:9333",
-				Path:        "/data/ha-vol.blk",
-				IQN:         "iqn.2024.test:ha-vol-r",
-				ISCSIAddr:   "vs2:3260",
-				NvmeAddr:    "vs2:4420",
-				NQN:         "nqn.2024-01.com.seaweedfs:vol.ha-vol.vs2",
-				DataAddr:    "vs2:14260",
-				CtrlAddr:    "vs2:14261",
-				HealthScore: 0.95,
-				WALHeadLSN:  100,
+				Server:        "vs2:9333",
+				Path:          "/data/ha-vol.blk",
+				IQN:           "iqn.2024.test:ha-vol-r",
+				ISCSIAddr:     "vs2:3260",
+				NvmeAddr:      "vs2:4420",
+				NQN:           "nqn.2024-01.com.seaweedfs:vol.ha-vol.vs2",
+				DataAddr:      "vs2:14260",
+				CtrlAddr:      "vs2:14261",
+				HealthScore:   0.95,
+				WALHeadLSN:    100,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		},
 	})
@@ -1654,10 +1660,11 @@ func TestMaster_ExpandCoordinated_RestartRecovery(t *testing.T) {
 }
 
 func TestMaster_ExpandCoordinated_B09_ReReadsEntryAfterLock(t *testing.T) {
-	// B-09: If failover changes VolumeServer between initial Lookup and
-	// AcquireExpandInflight, the coordinator must use the fresh entry,
-	// not the stale one. Use RF=3 so promotion still leaves 1 replica
-	// and the coordinated path is taken.
+	// B-09: Exercises the actual race window — failover happens BETWEEN
+	// the initial Lookup (line 380) and the post-lock re-read (line 419).
+	// Uses expandPreReadHook to inject PromoteBestReplica at the exact
+	// interleaving point. RF=3 so promotion leaves 1 replica and the
+	// coordinated path is taken.
 	ms := testMasterServerWithExpandMocks(t)
 	ms.blockRegistry.MarkBlockCapable("vs1:9333")
 	ms.blockRegistry.MarkBlockCapable("vs2:9333")
@@ -1689,31 +1696,39 @@ func TestMaster_ExpandCoordinated_B09_ReReadsEntryAfterLock(t *testing.T) {
 		return 2 << 30, nil
 	}
 
-	// Simulate failover: promote best replica. With RF=3, one replica
-	// becomes primary and the other stays as replica → coordinated path.
-	ms.blockRegistry.PromoteBestReplica("b09-vol")
-
-	entry, _ = ms.blockRegistry.Lookup("b09-vol")
-	newPrimary := entry.VolumeServer
-	if newPrimary == originalPrimary {
-		t.Fatal("promotion didn't change primary")
-	}
-	if len(entry.Replicas) == 0 {
-		t.Fatal("expected at least 1 replica after RF=3 promotion")
+	// Hook fires AFTER AcquireExpandInflight but BEFORE the re-read Lookup.
+	// This is the exact race window: the initial Lookup already returned
+	// the old primary, but failover changes it before the re-read.
+	hookFired := false
+	ms.expandPreReadHook = func() {
+		hookFired = true
+		ms.blockRegistry.PromoteBestReplica("b09-vol")
 	}
 
-	// Expand should use the NEW primary (post-failover), not the old one.
+	// At this point, the initial Lookup inside ExpandBlockVolume will see
+	// originalPrimary. The hook then promotes, changing the primary.
+	// The re-read must pick up the new primary.
 	resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
 		Name: "b09-vol", NewSizeBytes: 2 << 30,
 	})
 	if err != nil {
 		t.Fatalf("expand: %v", err)
 	}
+	if !hookFired {
+		t.Fatal("expandPreReadHook was not called — race window not exercised")
+	}
 	if resp.CapacityBytes != 2<<30 {
 		t.Fatalf("capacity: got %d", resp.CapacityBytes)
 	}
 
-	// First PREPARE should have gone to the new primary, not the old one.
+	// Verify: after the hook promoted, the re-read must have picked up
+	// the new primary. The first PREPARE should go to the new primary.
+	entry, _ = ms.blockRegistry.Lookup("b09-vol")
+	newPrimary := entry.VolumeServer
+	if newPrimary == originalPrimary {
+		t.Fatal("promotion didn't change primary")
+	}
+
 	if len(preparedServers) == 0 {
 		t.Fatal("no prepare calls recorded")
 	}
@@ -1721,7 +1736,7 @@ func TestMaster_ExpandCoordinated_B09_ReReadsEntryAfterLock(t *testing.T) {
 		t.Fatalf("PREPARE went to %q (stale), should go to %q (fresh primary)",
 			preparedServers[0], newPrimary)
 	}
-	// Verify old primary was NOT contacted.
+	// Verify old primary was NOT contacted at all.
 	for _, s := range preparedServers {
 		if s == originalPrimary {
 			t.Fatalf("PREPARE sent to old primary %q — stale entry used", originalPrimary)
diff --git a/weed/server/master_server.go b/weed/server/master_server.go
index e14c32057..ac57ae1bf 100644
--- a/weed/server/master_server.go
+++ b/weed/server/master_server.go
@@ -109,6 +109,10 @@ type MasterServer struct {
 	blockVSCommitExpand  func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error)
 	blockVSCancelExpand  func(ctx context.Context, server string, name string, expandEpoch uint64) error
 	nextExpandEpoch      atomic.Uint64
+
+	// Test-only hook: called after AcquireExpandInflight but before the
+	// re-read Lookup in coordinated expand. Nil in production.
+	expandPreReadHook func()
 }
 
 func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer {
@@ -224,6 +228,8 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
 		r.HandleFunc("/block/volume/{name}", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeLookupHandler))).Methods("GET")
 		r.HandleFunc("/block/volumes", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeListHandler))).Methods("GET")
 		r.HandleFunc("/block/volume/{name}/expand", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeExpandHandler)))).Methods("POST")
+		r.HandleFunc("/block/volume/{name}/preflight", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumePreflightHandler))).Methods("GET")
+		r.HandleFunc("/block/volume/{name}/promote", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumePromoteHandler)))).Methods("POST")
 		r.HandleFunc("/block/assign", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockAssignHandler)))).Methods("POST")
 		r.HandleFunc("/block/servers", ms.guard.WhiteList(requestIDMiddleware(ms.blockServersHandler))).Methods("GET")
 		r.HandleFunc("/block/status", ms.guard.WhiteList(requestIDMiddleware(ms.blockStatusHandler))).Methods("GET")
diff --git a/weed/server/master_server_handlers_block.go b/weed/server/master_server_handlers_block.go
index d7afc374d..fde6181d6 100644
--- a/weed/server/master_server_handlers_block.go
+++ b/weed/server/master_server_handlers_block.go
@@ -7,6 +7,7 @@ import (
 
 	"github.com/gorilla/mux"
 
+	"github.com/seaweedfs/seaweedfs/weed/glog"
 	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
 	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
 	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/blockapi"
@@ -206,6 +207,99 @@ func (ms *MasterServer) blockStatusHandler(w http.ResponseWriter, r *http.Reques
 	writeJsonQuiet(w, r, http.StatusOK, status)
 }
 
+// blockVolumePreflightHandler handles GET /block/volume/{name}/preflight.
+// Returns a read-only promotion preflight evaluation for the named volume.
+func (ms *MasterServer) blockVolumePreflightHandler(w http.ResponseWriter, r *http.Request) {
+	name := mux.Vars(r)["name"]
+	if name == "" {
+		writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
+		return
+	}
+
+	pf, err := ms.blockRegistry.EvaluatePromotion(name)
+	if err != nil {
+		writeJsonError(w, r, http.StatusNotFound, err)
+		return
+	}
+
+	resp := blockapi.PreflightResponse{
+		VolumeName: pf.VolumeName,
+		Promotable: pf.Promotable,
+		Reason:     pf.Reason,
+	}
+	if pf.Candidate != nil {
+		resp.CandidateServer = pf.Candidate.Server
+		resp.CandidateHealth = pf.Candidate.HealthScore
+		resp.CandidateWALLSN = pf.Candidate.WALHeadLSN
+	}
+	for _, rej := range pf.Rejections {
+		resp.Rejections = append(resp.Rejections, blockapi.PreflightRejection{
+			Server: rej.Server,
+			Reason: rej.Reason,
+		})
+	}
+	// Add primary liveness info.
+	entry, ok := ms.blockRegistry.Lookup(name)
+	if ok {
+		resp.PrimaryServer = entry.VolumeServer
+		resp.PrimaryAlive = ms.blockRegistry.IsBlockCapable(entry.VolumeServer)
+	}
+	writeJsonQuiet(w, r, http.StatusOK, resp)
+}
+
+// blockVolumePromoteHandler handles POST /block/volume/{name}/promote.
+// Triggers a manual promotion for the named block volume.
+func (ms *MasterServer) blockVolumePromoteHandler(w http.ResponseWriter, r *http.Request) {
+	name := mux.Vars(r)["name"]
+	if name == "" {
+		writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
+		return
+	}
+
+	var req blockapi.PromoteVolumeRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("decode request: %w", err))
+		return
+	}
+
+	// ManualPromote captures oldPrimary/oldPath under lock to avoid TOCTOU (BUG-T5-2).
+	newEpoch, oldPrimary, oldPath, pf, err := ms.blockRegistry.ManualPromote(name, req.TargetServer, req.Force)
+	if err != nil {
+		// Distinguish not-found from rejection.
+		status := http.StatusConflict
+		if pf.Reason == "volume not found" {
+			status = http.StatusNotFound
+		}
+		// Build structured rejection response.
+		resp := blockapi.PromoteVolumeResponse{
+			Reason: pf.Reason,
+		}
+		for _, rej := range pf.Rejections {
+			resp.Rejections = append(resp.Rejections, blockapi.PreflightRejection{
+				Server: rej.Server,
+				Reason: rej.Reason,
+			})
+		}
+		glog.V(0).Infof("manual promote %q rejected: %s", name, pf.Reason)
+		writeJsonQuiet(w, r, status, resp)
+		return
+	}
+
+	// Post-promotion orchestration (same as auto path).
+	ms.finalizePromotion(name, oldPrimary, oldPath, newEpoch)
+
+	if req.Reason != "" {
+		glog.V(0).Infof("manual promote %q: reason=%q", name, req.Reason)
+	}
+
+	// Re-read to get the new primary server name.
+	entry, _ := ms.blockRegistry.Lookup(name)
+	writeJsonQuiet(w, r, http.StatusOK, blockapi.PromoteVolumeResponse{
+		NewPrimary: entry.VolumeServer,
+		Epoch:      newEpoch,
+	})
+}
+
 // entryToVolumeInfo converts a BlockVolumeEntry to a blockapi.VolumeInfo.
 func entryToVolumeInfo(e *BlockVolumeEntry) blockapi.VolumeInfo {
 	status := "pending"
@@ -239,6 +333,8 @@ func entryToVolumeInfo(e *BlockVolumeEntry) blockapi.VolumeInfo {
 		HealthScore:      e.HealthScore,
 		ReplicaDegraded:  e.ReplicaDegraded,
 		DurabilityMode:   durMode,
+		NvmeAddr:         e.NvmeAddr,
+		NQN:              e.NQN,
 	}
 	for _, ri := range e.Replicas {
 		info.Replicas = append(info.Replicas, blockapi.ReplicaDetail{
diff --git a/weed/server/qa_block_cp11b3_adversarial_test.go b/weed/server/qa_block_cp11b3_adversarial_test.go
new file mode 100644
index 000000000..e999d6146
--- /dev/null
+++ b/weed/server/qa_block_cp11b3_adversarial_test.go
@@ -0,0 +1,1581 @@
+package weed_server
+
+import (
+	"fmt"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
+)
+
+// ============================================================
+// CP11B-3 QA Adversarial Tests
+//
+// T1: Promotion candidate evaluation hardening
+// T2: Re-evaluate on replica registration (B-06, B-08)
+// T3: Deferred timer safety (B-07)
+// T4: Rebuild endpoint / publication refresh (B-11)
+// T6: Preflight surface
+// ============================================================
+
+// --- T1 Adversarial: Promotion Gate Edge Cases ---
+
+// QA-T1-1: All 4 gates fail simultaneously on a single replica.
+func TestQA_T1_AllGatesFail_SingleReplica(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	// Do NOT mark "bad" as block-capable (gate 4 fail).
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 1000,
+		Replicas: []ReplicaInfo{{
+			Server:        "bad",
+			Path:          "/r1.blk",
+			HealthScore:   1.0,
+			WALHeadLSN:    1, // gate 2: far behind
+			LastHeartbeat: time.Now().Add(-1 * time.Hour), // gate 1: stale
+			Role:          blockvol.RoleToWire(blockvol.RoleRebuilding), // gate 3: wrong role
+		}},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("should not be promotable when all gates fail")
+	}
+	if len(pf.Rejections) != 1 {
+		t.Fatalf("expected 1 rejection (first gate short-circuits), got %d", len(pf.Rejections))
+	}
+	// Gate 1 (freshness) should fire first since heartbeat is stale.
+	if pf.Rejections[0].Reason != "stale_heartbeat" {
+		t.Fatalf("expected stale_heartbeat as first rejection, got %q", pf.Rejections[0].Reason)
+	}
+}
+
+// QA-T1-2: Boundary test — WAL lag exactly at tolerance.
+func TestQA_T1_WALLag_ExactBoundary(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.SetPromotionLSNTolerance(50)
+	r.MarkBlockCapable("replica1")
+
+	// Primary at LSN 200, replica at LSN 150 → lag = 50 = exactly tolerance.
+	// evaluatePromotionLocked: ri.WALHeadLSN + tolerance < primaryLSN → 150+50 < 200 → false → eligible.
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 200,
+		Replicas: []ReplicaInfo{{
+			Server: "replica1", Path: "/r1.blk", HealthScore: 1.0,
+			WALHeadLSN: 150, LastHeartbeat: time.Now(),
+			Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if !pf.Promotable {
+		t.Fatalf("lag=tolerance should be eligible, got reason=%q", pf.Reason)
+	}
+
+	// Now set replica at LSN 149 → lag = 51 > tolerance → ineligible.
+	e, _ := r.Lookup("vol1")
+	e.Replicas[0].WALHeadLSN = 149
+
+	pf, _ = r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("lag > tolerance should be ineligible")
+	}
+	if len(pf.Rejections) == 0 || pf.Rejections[0].Reason != "wal_lag" {
+		t.Fatalf("expected wal_lag rejection, got %+v", pf.Rejections)
+	}
+}
+
+// QA-T1-3: Zero LeaseTTL → freshness cutoff falls back to 60s.
+func TestQA_T1_ZeroLeaseTTL_FallbackFreshness(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 0, // zero
+		Replicas: []ReplicaInfo{{
+			Server: "replica1", Path: "/r1.blk", HealthScore: 1.0,
+			WALHeadLSN:    0,
+			LastHeartbeat: time.Now().Add(-90 * time.Second), // 90s ago, beyond 60s fallback
+			Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("90s-old heartbeat with 0 LeaseTTL (60s fallback) should be ineligible")
+	}
+	if len(pf.Rejections) == 0 || pf.Rejections[0].Reason != "stale_heartbeat" {
+		t.Fatalf("expected stale_heartbeat, got %+v", pf.Rejections)
+	}
+}
+
+// QA-T1-4: RF3 — one dead, one stale, one healthy → healthy promoted.
+func TestQA_T1_RF3_MixedGates_OnlyHealthyPromoted(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("healthy")
+	// "dead" not marked, "stale" marked but old heartbeat.
+	r.MarkBlockCapable("stale")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+		Replicas: []ReplicaInfo{
+			{Server: "dead", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "stale", Path: "/r2.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now().Add(-5 * time.Minute), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "healthy", Path: "/r3.blk", HealthScore: 0.7, WALHeadLSN: 95,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	newEpoch, err := r.PromoteBestReplica("vol1")
+	if err != nil {
+		t.Fatalf("PromoteBestReplica: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "healthy" {
+		t.Fatalf("expected 'healthy' promoted (only one passing all gates), got %q", e.VolumeServer)
+	}
+	// dead + stale should be in remaining replicas (not promoted, not removed).
+	if len(e.Replicas) != 2 {
+		t.Fatalf("expected 2 remaining replicas, got %d", len(e.Replicas))
+	}
+}
+
+// QA-T1-5: EvaluatePromotion is read-only — does NOT mutate entry.
+func TestQA_T1_EvaluatePromotion_ReadOnly(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 5, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+		Replicas: []ReplicaInfo{{
+			Server: "replica1", Path: "/r1.blk", HealthScore: 1.0,
+			WALHeadLSN: 100, LastHeartbeat: time.Now(),
+			Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	// Call EvaluatePromotion multiple times.
+	for i := 0; i < 10; i++ {
+		pf, _ := r.EvaluatePromotion("vol1")
+		if !pf.Promotable {
+			t.Fatalf("iter %d: should be promotable", i)
+		}
+	}
+
+	// Entry should be unchanged.
+	e, _ := r.Lookup("vol1")
+	if e.Epoch != 5 {
+		t.Fatalf("epoch mutated by EvaluatePromotion: got %d, want 5", e.Epoch)
+	}
+	if e.VolumeServer != "primary" {
+		t.Fatalf("VolumeServer mutated: got %q, want primary", e.VolumeServer)
+	}
+	if len(e.Replicas) != 1 {
+		t.Fatalf("Replicas mutated: got %d, want 1", len(e.Replicas))
+	}
+}
+
+// QA-T1-6: Concurrent EvaluatePromotion + PromoteBestReplica — no panic/deadlock.
+func TestQA_T1_ConcurrentEvaluateAndPromote(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.MarkBlockCapable("r2")
+
+	setup := func() {
+		r.Unregister("vol1")
+		r.Register(&BlockVolumeEntry{
+			Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+			Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+			Replicas: []ReplicaInfo{
+				{Server: "r1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+					LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+				{Server: "r2", Path: "/r2.blk", HealthScore: 0.9, WALHeadLSN: 95,
+					LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			},
+		})
+	}
+
+	// Run 20 rounds: concurrent EvaluatePromotion + PromoteBestReplica.
+	for round := 0; round < 20; round++ {
+		setup()
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			defer wg.Done()
+			r.EvaluatePromotion("vol1")
+		}()
+		go func() {
+			defer wg.Done()
+			r.PromoteBestReplica("vol1")
+		}()
+		go func() {
+			defer wg.Done()
+			r.EvaluatePromotion("vol1")
+		}()
+		wg.Wait()
+	}
+	// No panic = pass.
+}
+
+// QA-T1-7: Promotion during ExpandInProgress — should still work
+// (expand inflight doesn't block promotion, only size updates).
+func TestQA_T1_PromotionDuringExpand(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 50,
+		ExpandInProgress: true, PendingExpandSize: 2 << 30,
+		Replicas: []ReplicaInfo{{
+			Server: "replica1", Path: "/r1.blk", HealthScore: 1.0,
+			WALHeadLSN: 50, LastHeartbeat: time.Now(),
+			Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	newEpoch, err := r.PromoteBestReplica("vol1")
+	if err != nil {
+		t.Fatalf("promotion should succeed during expand: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "replica1" {
+		t.Fatalf("expected replica1 promoted, got %q", e.VolumeServer)
+	}
+}
+
+// QA-T1-8: Double promotion — second call fails (no replicas left after first).
+func TestQA_T1_DoublePromotion_SecondFails(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("replica1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{{
+			Server: "replica1", Path: "/r1.blk", HealthScore: 1.0,
+			WALHeadLSN: 0, LastHeartbeat: time.Now(),
+			Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	_, err := r.PromoteBestReplica("vol1")
+	if err != nil {
+		t.Fatalf("first promotion: %v", err)
+	}
+
+	// Second promotion should fail — no replicas left.
+	_, err = r.PromoteBestReplica("vol1")
+	if err == nil {
+		t.Fatal("second promotion should fail (no replicas)")
+	}
+	if !strings.Contains(err.Error(), "no replicas") {
+		t.Fatalf("expected 'no replicas' error, got: %v", err)
+	}
+}
+
+// --- T2 Adversarial: Orphaned Primary Edge Cases ---
+
+// QA-T2-1: Orphan detection races with failover — no double promotion.
+func TestQA_T2_OrphanAndFailover_NoDoublePromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// vs1 dies → normal failover promotes vs2.
+	ms.failoverBlockVolumes("vs1")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("expected vs2 promoted, got %q", entry.VolumeServer)
+	}
+	epochAfterFailover := entry.Epoch
+
+	// Now reevaluateOrphanedPrimaries runs (e.g., from heartbeat path).
+	// vs2 is now primary AND block-capable → no orphan → no double promotion.
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.Epoch != epochAfterFailover {
+		t.Fatalf("epoch should not change (no double promotion): got %d, want %d",
+			entry.Epoch, epochAfterFailover)
+	}
+}
+
+// QA-T2-2: Orphan detection when replica itself is not promotable (rebuilding role).
+func TestQA_T2_OrphanButReplicaNotPromotable(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 5 * time.Second,
+		LastLeaseGrant: time.Now().Add(-10 * time.Second),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role:          blockvol.RoleToWire(blockvol.RoleRebuilding), // NOT promotable
+			LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// Kill primary.
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// vs2 reconnects — orphan detected, but replica is Rebuilding → promotion rejected.
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	// Primary should remain vs1 (promotion failed, volume stays degraded).
+	if entry.VolumeServer != "vs1" {
+		t.Fatalf("should NOT promote rebuilding replica, got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 1 {
+		t.Fatalf("epoch should remain 1, got %d", entry.Epoch)
+	}
+}
+
+// QA-T2-3: Concurrent reevaluateOrphanedPrimaries from multiple goroutines.
+func TestQA_T2_ConcurrentReevaluation_NoPanic(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	var wg sync.WaitGroup
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			ms.reevaluateOrphanedPrimaries("vs2")
+		}()
+	}
+	wg.Wait()
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	// Should have promoted exactly once; epoch = 2 regardless of concurrency.
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("expected vs2 promoted, got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2 (single promotion), got %d", entry.Epoch)
+	}
+}
+
+// QA-T2-4: Heartbeat-path orphan check on server that hosts no block volumes.
+func TestQA_T2_HeartbeatOrphanCheck_NoVolumes_NoOp(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	// vs3 has no volumes at all.
+	ms.blockRegistry.MarkBlockCapable("vs3")
+
+	// Should not panic or error.
+	ms.reevaluateOrphanedPrimaries("vs3")
+}
+
+// --- T3 Adversarial: Timer Safety Edge Cases ---
+
+// QA-T3-1: Volume recreated with same name but different epoch → timer rejected.
+func TestQA_T3_VolumeRecreated_TimerRejected(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.MarkBlockCapable("vs3")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 10, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 200 * time.Millisecond,
+		LastLeaseGrant: time.Now(),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// vs1 dies → deferred timer (captures epoch=10).
+	ms.failoverBlockVolumes("vs1")
+
+	// Delete and recreate with epoch=1 (simulates admin recreate).
+	ms.blockRegistry.Unregister("vol1")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs3", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 30 * time.Second,
+		LastLeaseGrant: time.Now(),
+	})
+
+	time.Sleep(350 * time.Millisecond)
+
+	// Timer fired but epoch mismatch (10 != 1) → no promotion on new volume.
+	e, _ := ms.blockRegistry.Lookup("vol1")
+	if e.VolumeServer != "vs3" {
+		t.Fatalf("recreated volume should keep vs3 as primary, got %q", e.VolumeServer)
+	}
+	if e.Epoch != 1 {
+		t.Fatalf("recreated volume epoch should be 1, got %d", e.Epoch)
+	}
+}
+
+// QA-T3-2: Multiple deferred timers for same server, all cancelled on reconnect.
+func TestQA_T3_MultipleTimers_AllCancelled(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	// Create 3 volumes with active leases, all on vs1.
+	for i := 0; i < 3; i++ {
+		name := fmt.Sprintf("vol%d", i)
+		replica := fmt.Sprintf("vs%d", i+2)
+		ms.blockRegistry.MarkBlockCapable("vs1")
+		ms.blockRegistry.MarkBlockCapable(replica)
+		ms.blockRegistry.Register(&BlockVolumeEntry{
+			Name: name, VolumeServer: "vs1", Path: fmt.Sprintf("/data/%s.blk", name),
+			SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+			Status: StatusActive, LeaseTTL: 5 * time.Second,
+			LastLeaseGrant: time.Now(),
+			Replicas: []ReplicaInfo{{
+				Server: replica, Path: fmt.Sprintf("/data/%s.blk", name), HealthScore: 1.0,
+				Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+			}},
+		})
+	}
+
+	ms.failoverBlockVolumes("vs1")
+
+	ms.blockFailover.mu.Lock()
+	timerCount := len(ms.blockFailover.deferredTimers["vs1"])
+	ms.blockFailover.mu.Unlock()
+	if timerCount != 3 {
+		t.Fatalf("expected 3 deferred timers, got %d", timerCount)
+	}
+
+	// vs1 reconnects → all cancelled.
+	ms.cancelDeferredTimers("vs1")
+
+	ms.blockFailover.mu.Lock()
+	timerCount = len(ms.blockFailover.deferredTimers["vs1"])
+	ms.blockFailover.mu.Unlock()
+	if timerCount != 0 {
+		t.Fatalf("all timers should be cancelled, got %d", timerCount)
+	}
+
+	// Wait past lease — no promotions should happen.
+	time.Sleep(200 * time.Millisecond)
+	for i := 0; i < 3; i++ {
+		name := fmt.Sprintf("vol%d", i)
+		e, _ := ms.blockRegistry.Lookup(name)
+		if e.VolumeServer != "vs1" {
+			t.Fatalf("%s: primary should remain vs1 (timer cancelled), got %q", name, e.VolumeServer)
+		}
+	}
+}
+
+// --- T4 Adversarial: Rebuild Metadata Edge Cases ---
+
+// QA-T4-1: Promotion clears RebuildListenAddr, ReplicaDataAddr survives for surviving replicas.
+func TestQA_T4_PromotionClearsStaleMetadata(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs2")
+	r.MarkBlockCapable("vs3")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		RebuildListenAddr: "vs1:15000", // old primary's rebuild addr
+		Replicas: []ReplicaInfo{
+			{Server: "vs2", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+				DataAddr: "vs2:4260", CtrlAddr: "vs2:4261"},
+			{Server: "vs3", Path: "/r2.blk", HealthScore: 0.8, WALHeadLSN: 95,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+				DataAddr: "vs3:4260", CtrlAddr: "vs3:4261"},
+		},
+	})
+
+	r.PromoteBestReplica("vol1")
+
+	e, _ := r.Lookup("vol1")
+	// RebuildListenAddr must be cleared.
+	if e.RebuildListenAddr != "" {
+		t.Fatalf("RebuildListenAddr should be cleared, got %q", e.RebuildListenAddr)
+	}
+	// Promoted replica (vs2) is now primary.
+	if e.VolumeServer != "vs2" {
+		t.Fatalf("expected vs2 promoted, got %q", e.VolumeServer)
+	}
+	// Surviving replica (vs3) should still have DataAddr/CtrlAddr via scalar sync.
+	if e.ReplicaDataAddr != "vs3:4260" {
+		t.Fatalf("surviving replica DataAddr should be vs3:4260, got %q", e.ReplicaDataAddr)
+	}
+	if e.ReplicaCtrlAddr != "vs3:4261" {
+		t.Fatalf("surviving replica CtrlAddr should be vs3:4261, got %q", e.ReplicaCtrlAddr)
+	}
+}
+
+// QA-T4-2: Rebuild with stale RebuildListenAddr from before promotion.
+func TestQA_T4_RebuildAddr_FromOldPrimary_NotUsed(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 5 * time.Second,
+		LastLeaseGrant:    time.Now().Add(-10 * time.Second),
+		RebuildListenAddr: "vs1:15000",
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// vs1 dies → vs2 promoted. RebuildListenAddr should be cleared by PromoteBestReplica.
+	ms.failoverBlockVolumes("vs1")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.RebuildListenAddr != "" {
+		t.Fatalf("RebuildListenAddr should be empty after promotion, got %q", entry.RebuildListenAddr)
+	}
+
+	// vs1 reconnects → rebuild queued with empty addr (not stale vs1:15000).
+	ms.recoverBlockVolumes("vs1")
+	assignments := ms.blockAssignmentQueue.Peek("vs1")
+	for _, a := range assignments {
+		if blockvol.RoleFromWire(a.Role) == blockvol.RoleRebuilding {
+			if a.RebuildAddr == "vs1:15000" {
+				t.Fatal("rebuild should NOT use old primary's stale RebuildListenAddr")
+			}
+			return
+		}
+	}
+	t.Fatal("expected rebuild assignment for vs1")
+}
+
+// --- T6 Adversarial: Preflight Surface ---
+
+// QA-T6-1: Preflight with no replicas → clear reason.
+func TestQA_T6_Preflight_NoReplicas(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("should not be promotable with no replicas")
+	}
+	if pf.Reason != "no replicas" {
+		t.Fatalf("expected 'no replicas', got %q", pf.Reason)
+	}
+}
+
+// QA-T6-2: Preflight aggregates multiple rejection reasons.
+func TestQA_T6_Preflight_MultipleRejections(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("stale-hb")
+	// "dead" not marked
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+		Replicas: []ReplicaInfo{
+			{Server: "dead", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "stale-hb", Path: "/r2.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now().Add(-10 * time.Minute), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("should not be promotable")
+	}
+	if len(pf.Rejections) != 2 {
+		t.Fatalf("expected 2 rejections, got %d", len(pf.Rejections))
+	}
+	// Verify rejection reasons map to correct servers.
+	reasons := map[string]string{}
+	for _, rej := range pf.Rejections {
+		reasons[rej.Server] = rej.Reason
+	}
+	if reasons["dead"] != "server_dead" {
+		t.Fatalf("dead server: expected server_dead, got %q", reasons["dead"])
+	}
+	if reasons["stale-hb"] != "stale_heartbeat" {
+		t.Fatalf("stale server: expected stale_heartbeat, got %q", reasons["stale-hb"])
+	}
+	// Reason should aggregate.
+	if !strings.Contains(pf.Reason, "+1 more") {
+		t.Fatalf("expected aggregated reason, got %q", pf.Reason)
+	}
+}
+
+// QA-T6-3: Preflight for non-existent volume → error.
+func TestQA_T6_Preflight_NonExistent(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	_, err := r.EvaluatePromotion("does-not-exist")
+	if err == nil {
+		t.Fatal("expected error for non-existent volume")
+	}
+}
+
+// ============================================================
+// Additional Adversarial / Regression Tests
+// ============================================================
+
+// --- T1 Gate 2 edge case: zero primary LSN ---
+
+// QA-T1-9: When primary WALHeadLSN=0, all replicas should be eligible
+// regardless of their LSN (no data yet → no lag possible).
+func TestQA_T1_ZeroPrimaryLSN_AllReplicasEligible(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.MarkBlockCapable("r2")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 0, // zero
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "r2", Path: "/r2.blk", HealthScore: 0.9, WALHeadLSN: 500,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if !pf.Promotable {
+		t.Fatalf("zero primary LSN: all replicas should be eligible, reason=%q", pf.Reason)
+	}
+	if len(pf.Rejections) != 0 {
+		t.Fatalf("expected 0 rejections with zero primary LSN, got %d: %+v", len(pf.Rejections), pf.Rejections)
+	}
+}
+
+// QA-T1-10: Replica with RolePrimary in Replicas[] → rejected as wrong_role.
+func TestQA_T1_ReplicaWithPrimaryRole_Rejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RolePrimary)},
+		},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("replica with RolePrimary should NOT be promotable")
+	}
+	if len(pf.Rejections) != 1 || pf.Rejections[0].Reason != "wrong_role" {
+		t.Fatalf("expected wrong_role rejection, got %+v", pf.Rejections)
+	}
+}
+
+// QA-T1-11: Heartbeat exactly at freshness boundary.
+func TestQA_T1_HeartbeatExactlyAtCutoff(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+
+	leaseTTL := 5 * time.Second
+	freshnessCutoff := 2 * leaseTTL // 10s
+
+	// Heartbeat exactly at cutoff → now.Sub(hb) == 10s → NOT > 10s → eligible.
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: leaseTTL, WALHeadLSN: 0,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now().Add(-freshnessCutoff), // exactly at boundary
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if !pf.Promotable {
+		t.Fatalf("heartbeat exactly at cutoff should be eligible, reason=%q", pf.Reason)
+	}
+}
+
+// --- T2 additional: RF3 orphan, timer-failover interactions ---
+
+// QA-T2-5: RF3 orphaned primary — two replicas alive, reconnecting triggers promotion.
+func TestQA_T2_RF3_OrphanedPrimary_BestReplicaPromoted(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeRF3(t, ms, "vol1", "vs1", "vs2", "vs3", 1, 5*time.Second)
+
+	// Give vs3 higher health.
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	entry.Replicas[0].HealthScore = 0.7 // vs2
+	entry.Replicas[1].HealthScore = 1.0 // vs3
+
+	// Kill primary without calling failoverBlockVolumes (simulates missed failover).
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// vs2 reconnects → orphan detected → best replica (vs3) promoted.
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs3" {
+		t.Fatalf("expected vs3 promoted (highest health), got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2, got %d", entry.Epoch)
+	}
+	// vs2 should remain as replica (not promoted, not removed).
+	if len(entry.Replicas) != 1 || entry.Replicas[0].Server != "vs2" {
+		t.Fatalf("expected [vs2] as remaining replica, got %+v", entry.Replicas)
+	}
+}
+
+// QA-T2-6: Failover promotes, then orphan check runs for same volume — no double promotion.
+func TestQA_T2_FailoverThenOrphan_SameVolume_NoDuplicate(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// Proper failover: vs1 dies → vs2 promoted.
+	ms.failoverBlockVolumes("vs1")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("expected vs2, got %q", entry.VolumeServer)
+	}
+	epochAfter := entry.Epoch
+
+	// vs2 is now primary AND block-capable. Orphan check shouldn't find anything.
+	orphaned := ms.blockRegistry.VolumesWithDeadPrimary("vs2")
+	if len(orphaned) != 0 {
+		t.Fatalf("no orphans expected (vs2 is now primary), got %v", orphaned)
+	}
+
+	// Just to be sure: calling reevaluate shouldn't change anything.
+	ms.reevaluateOrphanedPrimaries("vs2")
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.Epoch != epochAfter {
+		t.Fatalf("epoch shouldn't change, got %d want %d", entry.Epoch, epochAfter)
+	}
+}
+
+// QA-T2-7: Orphan deferred timer stored under dead primary → cancelDeferredTimers cancels it.
+func TestQA_T2_OrphanDeferredTimer_CancelledOnPrimaryReconnect(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 300 * time.Millisecond,
+		LastLeaseGrant: time.Now(), // lease active
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// Kill primary.
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// Replica reconnects → orphan with active lease → deferred timer (stored under "vs1").
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	ms.blockFailover.mu.Lock()
+	timerCount := len(ms.blockFailover.deferredTimers["vs1"])
+	ms.blockFailover.mu.Unlock()
+	if timerCount != 1 {
+		t.Fatalf("expected 1 deferred timer under vs1, got %d", timerCount)
+	}
+
+	// Primary comes back (maybe network partition healed) → cancel its timers.
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.cancelDeferredTimers("vs1")
+
+	ms.blockFailover.mu.Lock()
+	timerCount = len(ms.blockFailover.deferredTimers["vs1"])
+	ms.blockFailover.mu.Unlock()
+	if timerCount != 0 {
+		t.Fatalf("expected 0 timers after cancel, got %d", timerCount)
+	}
+
+	// Wait past the original lease → no promotion should have happened.
+	time.Sleep(500 * time.Millisecond)
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs1" {
+		t.Fatalf("primary should remain vs1 (timer cancelled), got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 1 {
+		t.Fatalf("epoch should remain 1, got %d", entry.Epoch)
+	}
+}
+
+// QA-T2-8: Volume deleted between VolumesWithDeadPrimary and reevaluate loop — no panic.
+func TestQA_T2_VolumeDeletedDuringReevaluation(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// Verify orphan is detected.
+	orphaned := ms.blockRegistry.VolumesWithDeadPrimary("vs2")
+	if len(orphaned) != 1 {
+		t.Fatalf("expected 1 orphan, got %d", len(orphaned))
+	}
+
+	// Delete the volume right away.
+	ms.blockRegistry.Unregister("vol1")
+
+	// reevaluateOrphanedPrimaries should handle the Lookup miss gracefully.
+	ms.reevaluateOrphanedPrimaries("vs2") // must not panic
+}
+
+// --- T3 additional: Orphan timer fires and promotes correctly ---
+
+// QA-T3-3: Orphan deferred timer fires after lease expiry → promotion succeeds.
+func TestQA_T3_OrphanDeferredTimer_FiresAndPromotes(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 200 * time.Millisecond,
+		LastLeaseGrant: time.Now(), // lease active
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// Kill primary.
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+
+	// Orphan detected with active lease → deferred.
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	// Immediately: not yet promoted.
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs1" {
+		t.Fatalf("should NOT promote yet (lease active), got %q", entry.VolumeServer)
+	}
+
+	// Wait for lease to expire + timer.
+	time.Sleep(350 * time.Millisecond)
+
+	entry, _ = ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("should promote after lease expires, got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2, got %d", entry.Epoch)
+	}
+}
+
+// QA-T3-4: Orphan deferred timer epoch mismatch → no stale promotion.
+func TestQA_T3_OrphanDeferredTimer_EpochChanged_NoPromotion(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs1")
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 5, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 200 * time.Millisecond,
+		LastLeaseGrant: time.Now(),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	ms.blockRegistry.UnmarkBlockCapable("vs1")
+	ms.reevaluateOrphanedPrimaries("vs2")
+
+	// Before timer fires, bump epoch (simulates admin intervention).
+	e, _ := ms.blockRegistry.Lookup("vol1")
+	e.Epoch = 42
+
+	time.Sleep(350 * time.Millisecond)
+
+	e, _ = ms.blockRegistry.Lookup("vol1")
+	if e.Epoch != 42 {
+		t.Fatalf("epoch should remain 42 (timer rejected), got %d", e.Epoch)
+	}
+	if e.VolumeServer != "vs1" {
+		t.Fatalf("primary should remain vs1 (timer rejected), got %q", e.VolumeServer)
+	}
+}
+
+// --- T4 additional ---
+
+// QA-T4-3: Rebuild uses updated RebuildListenAddr after new primary heartbeats.
+func TestQA_T4_RebuildAddr_UpdatedByHeartbeat(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	// vs1 dies → vs2 promoted.
+	ms.failoverBlockVolumes("vs1")
+
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.RebuildListenAddr != "" {
+		t.Fatalf("should be empty after promotion, got %q", entry.RebuildListenAddr)
+	}
+
+	// New primary (vs2) heartbeats with RebuildListenAddr.
+	entry.RebuildListenAddr = "vs2:15000"
+
+	// vs1 reconnects → rebuild should use the updated addr.
+	ms.recoverBlockVolumes("vs1")
+
+	assignments := ms.blockAssignmentQueue.Peek("vs1")
+	for _, a := range assignments {
+		if blockvol.RoleFromWire(a.Role) == blockvol.RoleRebuilding {
+			if a.RebuildAddr != "vs2:15000" {
+				t.Fatalf("rebuild should use updated addr vs2:15000, got %q", a.RebuildAddr)
+			}
+			return
+		}
+	}
+	t.Fatal("expected rebuild assignment for vs1")
+}
+
+// --- T6 additional ---
+
+// QA-T6-4: Preflight with primary dead but candidate available — verify result fields.
+func TestQA_T6_Preflight_FullResultFields(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs2")
+	r.MarkBlockCapable("vs3")
+	// "stale" is block-capable but has old heartbeat
+	r.MarkBlockCapable("stale")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		Epoch: 5, LeaseTTL: 30 * time.Second, WALHeadLSN: 200,
+		Replicas: []ReplicaInfo{
+			{Server: "stale", Path: "/r0.blk", HealthScore: 1.0, WALHeadLSN: 200,
+				LastHeartbeat: time.Now().Add(-10 * time.Minute), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "vs2", Path: "/r1.blk", HealthScore: 0.9, WALHeadLSN: 195,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "vs3", Path: "/r2.blk", HealthScore: 0.95, WALHeadLSN: 198,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	pf, err := r.EvaluatePromotion("vol1")
+	if err != nil {
+		t.Fatalf("EvaluatePromotion: %v", err)
+	}
+	if !pf.Promotable {
+		t.Fatalf("should be promotable, reason=%q", pf.Reason)
+	}
+	// Best candidate: vs3 (highest health among eligible).
+	if pf.Candidate == nil || pf.Candidate.Server != "vs3" {
+		t.Fatalf("expected vs3 as candidate, got %+v", pf.Candidate)
+	}
+	if pf.CandidateIdx < 0 {
+		t.Fatal("CandidateIdx should be non-negative")
+	}
+	// 1 rejection: stale.
+	if len(pf.Rejections) != 1 {
+		t.Fatalf("expected 1 rejection (stale), got %d: %+v", len(pf.Rejections), pf.Rejections)
+	}
+	if pf.Rejections[0].Server != "stale" || pf.Rejections[0].Reason != "stale_heartbeat" {
+		t.Fatalf("unexpected rejection: %+v", pf.Rejections[0])
+	}
+	if pf.VolumeName != "vol1" {
+		t.Fatalf("VolumeName: got %q, want vol1", pf.VolumeName)
+	}
+}
+
+// QA-T6-5: Preflight with RoleStale replica — rejected as wrong_role.
+func TestQA_T6_Preflight_StaleRole_Rejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleStale)},
+		},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("RoleStale replica should NOT be promotable")
+	}
+	if len(pf.Rejections) != 1 || pf.Rejections[0].Reason != "wrong_role" {
+		t.Fatalf("expected wrong_role rejection, got %+v", pf.Rejections)
+	}
+}
+
+// QA-T6-6: Preflight with RoleDraining replica — rejected as wrong_role.
+func TestQA_T6_Preflight_DrainingRole_Rejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 0,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleDraining)},
+		},
+	})
+
+	pf, _ := r.EvaluatePromotion("vol1")
+	if pf.Promotable {
+		t.Fatal("RoleDraining replica should NOT be promotable")
+	}
+	if len(pf.Rejections) != 1 || pf.Rejections[0].Reason != "wrong_role" {
+		t.Fatalf("expected wrong_role rejection, got %+v", pf.Rejections)
+	}
+}
+
+// --- Concurrent: failover + orphan reevaluation race ---
+
+// QA-RACE-1: Concurrent failover and orphan reevaluation — no panic or deadlock.
+func TestQA_ConcurrentFailoverAndOrphanReevaluation(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	registerVolumeWithReplica(t, ms, "vol1", "vs1", "vs2", 1, 5*time.Second)
+
+	var wg sync.WaitGroup
+	for i := 0; i < 20; i++ {
+		wg.Add(2)
+		go func() {
+			defer wg.Done()
+			ms.failoverBlockVolumes("vs1")
+		}()
+		go func() {
+			defer wg.Done()
+			ms.reevaluateOrphanedPrimaries("vs2")
+		}()
+	}
+	wg.Wait()
+	// No panic = pass. Volume may or may not have been promoted — that's fine.
+}
+
+// QA-RACE-2: Concurrent VolumesWithDeadPrimary + UnmarkBlockCapable — no panic.
+func TestQA_ConcurrentVolumesWithDeadPrimaryAndUnmark(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("vs1")
+	r.MarkBlockCapable("vs2")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive,
+		Replicas: []ReplicaInfo{{Server: "vs2", Path: "/data/vol1.blk"}},
+	})
+
+	var wg sync.WaitGroup
+	for i := 0; i < 50; i++ {
+		wg.Add(2)
+		go func() {
+			defer wg.Done()
+			r.VolumesWithDeadPrimary("vs2")
+		}()
+		go func() {
+			defer wg.Done()
+			r.UnmarkBlockCapable("vs1")
+			r.MarkBlockCapable("vs1")
+		}()
+	}
+	wg.Wait()
+}
+
+// ============================================================
+// CP11B-3 T5: Manual Promote Adversarial Tests
+// ============================================================
+
+// QA-T5-1: Force does NOT bypass no_heartbeat (zero time).
+func TestQA_T5_ManualPromote_ForceNoHeartbeat_Rejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Time{}, // zero — never seen
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "", true)
+	if err == nil {
+		t.Fatal("force should NOT bypass no_heartbeat (zero time)")
+	}
+	if len(pf.Rejections) == 0 || pf.Rejections[0].Reason != "no_heartbeat" {
+		t.Fatalf("expected no_heartbeat rejection, got %+v", pf.Rejections)
+	}
+}
+
+// QA-T5-2: Force does NOT bypass wrong_role.
+func TestQA_T5_ManualPromote_ForceWrongRole_Rejected(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleRebuilding)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "", true)
+	if err == nil {
+		t.Fatal("force should NOT bypass wrong_role")
+	}
+	if len(pf.Rejections) == 0 || pf.Rejections[0].Reason != "wrong_role" {
+		t.Fatalf("expected wrong_role rejection, got %+v", pf.Rejections)
+	}
+}
+
+// QA-T5-3: Force bypasses wal_lag.
+func TestQA_T5_ManualPromote_ForceBypassesWALLag(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.SetPromotionLSNTolerance(10)
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 1000,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				WALHeadLSN: 100, // lag = 900, way beyond tolerance=10
+				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	// Non-force: should fail on wal_lag.
+	_, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err == nil {
+		t.Fatal("non-force should reject wal_lag")
+	}
+	if len(pf.Rejections) == 0 || pf.Rejections[0].Reason != "wal_lag" {
+		t.Fatalf("expected wal_lag rejection, got %+v", pf.Rejections)
+	}
+
+	// Force: should succeed despite wal_lag.
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "", true)
+	if err != nil {
+		t.Fatalf("force should bypass wal_lag: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+}
+
+// QA-T5-4: Force + alive primary → promotion succeeds.
+func TestQA_T5_ManualPromote_PrimaryAlive_ForceOverrides(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("primary")
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	// Non-force: rejected (primary alive).
+	_, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err == nil {
+		t.Fatal("non-force should reject when primary alive")
+	}
+	if pf.Reason != "primary_alive" {
+		t.Fatalf("expected primary_alive, got %q", pf.Reason)
+	}
+
+	// Force: succeeds despite alive primary.
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "", true)
+	if err != nil {
+		t.Fatalf("force should override primary_alive: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "r1" {
+		t.Fatalf("expected r1 promoted, got %q", e.VolumeServer)
+	}
+}
+
+// QA-T5-5: Concurrent ManualPromote + PromoteBestReplica — no panic.
+func TestQA_T5_ManualPromote_ConcurrentWithAutoPromotion(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.MarkBlockCapable("r2")
+
+	setup := func() {
+		r.Unregister("vol1")
+		r.Register(&BlockVolumeEntry{
+			Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+			Epoch: 1, LeaseTTL: 30 * time.Second,
+			Replicas: []ReplicaInfo{
+				{Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+					LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+				{Server: "r2", Path: "/r2.blk", HealthScore: 0.9,
+					LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			},
+		})
+	}
+
+	for round := 0; round < 20; round++ {
+		setup()
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			defer wg.Done()
+			r.ManualPromote("vol1", "", false)
+		}()
+		go func() {
+			defer wg.Done()
+			r.PromoteBestReplica("vol1")
+		}()
+		go func() {
+			defer wg.Done()
+			r.ManualPromote("vol1", "r2", true)
+		}()
+		wg.Wait()
+	}
+	// No panic = pass.
+}
+
+// QA-T5-6: Rejection response includes per-replica structured rejections.
+func TestQA_T5_ManualPromote_ReturnsStructuredRejections(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("stale")
+	// "dead" not marked
+
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second, WALHeadLSN: 100,
+		Replicas: []ReplicaInfo{
+			{Server: "dead", Path: "/r1.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "stale", Path: "/r2.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now().Add(-10 * time.Minute),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	_, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err == nil {
+		t.Fatal("should reject")
+	}
+	if len(pf.Rejections) != 2 {
+		t.Fatalf("expected 2 rejections, got %d", len(pf.Rejections))
+	}
+	reasons := map[string]string{}
+	for _, rej := range pf.Rejections {
+		reasons[rej.Server] = rej.Reason
+	}
+	if reasons["dead"] != "server_dead" {
+		t.Fatalf("dead: expected server_dead, got %q", reasons["dead"])
+	}
+	if reasons["stale"] != "stale_heartbeat" {
+		t.Fatalf("stale: expected stale_heartbeat, got %q", reasons["stale"])
+	}
+}
+
+// QA-T5-7: HTTP round-trip test for promote handler.
+func TestQA_T5_PromoteHandler_HTTP(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("vs2")
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "vs1", Path: "/data/vol1.blk",
+		SizeBytes: 1 << 30, Epoch: 1, Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Status: StatusActive, LeaseTTL: 5 * time.Second,
+		LastLeaseGrant: time.Now().Add(-10 * time.Second),
+		Replicas: []ReplicaInfo{{
+			Server: "vs2", Path: "/data/vol1.blk", HealthScore: 1.0,
+			Role: blockvol.RoleToWire(blockvol.RoleReplica), LastHeartbeat: time.Now(),
+		}},
+	})
+
+	// Call ManualPromote (simulates what the handler does).
+	oldPrimary := "vs1"
+	oldPath := "/data/vol1.blk"
+	newEpoch, _, _, pf, err := ms.blockRegistry.ManualPromote("vol1", "", false)
+	if err != nil {
+		t.Fatalf("ManualPromote: %v", err)
+	}
+	if !pf.Promotable {
+		t.Fatalf("should be promotable, reason=%s", pf.Reason)
+	}
+
+	// Simulate finalizePromotion.
+	ms.finalizePromotion("vol1", oldPrimary, oldPath, newEpoch)
+
+	// Verify.
+	entry, _ := ms.blockRegistry.Lookup("vol1")
+	if entry.VolumeServer != "vs2" {
+		t.Fatalf("expected vs2 promoted, got %q", entry.VolumeServer)
+	}
+	if entry.Epoch != 2 {
+		t.Fatalf("expected epoch 2, got %d", entry.Epoch)
+	}
+
+	// Check assignment was enqueued for new primary.
+	assignments := ms.blockAssignmentQueue.Peek("vs2")
+	if len(assignments) == 0 {
+		t.Fatal("expected assignment enqueued for vs2")
+	}
+
+	// Check pending rebuild recorded for old primary.
+	rebuilds := ms.drainPendingRebuilds("vs1")
+	if len(rebuilds) == 0 {
+		t.Fatal("expected pending rebuild for vs1")
+	}
+	if rebuilds[0].NewPrimary != "vs2" {
+		t.Fatalf("rebuild NewPrimary: got %q, want vs2", rebuilds[0].NewPrimary)
+	}
+}
+
+// ============================================================
+// CP11B-3 T5 Review: Additional Adversarial Tests
+// ============================================================
+
+// QA-T5-8: BUG-T5-1 regression — PromotionsTotal counts both auto and manual promotions.
+// Counter lives in finalizePromotion (shared orchestration), not in registry methods,
+// so this test exercises the full MasterServer flow for both paths.
+func TestQA_T5_PromotionsTotal_CountsBothAutoAndManual(t *testing.T) {
+	ms := testMasterServerForFailover(t)
+	ms.blockRegistry.MarkBlockCapable("r1")
+	ms.blockRegistry.MarkBlockCapable("r2")
+
+	// Setup vol1 for auto-promote (dead primary, lease expired).
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary1", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 5 * time.Second,
+		LastLeaseGrant: time.Now().Add(-10 * time.Second),
+		Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Replicas: []ReplicaInfo{{
+			Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+			LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	before := ms.blockRegistry.PromotionsTotal.Load()
+
+	// Auto-promote via promoteReplica (production auto path).
+	ms.promoteReplica("vol1")
+	afterAuto := ms.blockRegistry.PromotionsTotal.Load()
+	if afterAuto != before+1 {
+		t.Fatalf("auto promote should increment PromotionsTotal: before=%d after=%d", before, afterAuto)
+	}
+
+	// Setup vol2 for manual promote (dead primary).
+	ms.blockRegistry.Register(&BlockVolumeEntry{
+		Name: "vol2", VolumeServer: "primary2", Path: "/data/vol2.blk",
+		Epoch: 1, LeaseTTL: 5 * time.Second,
+		LastLeaseGrant: time.Now().Add(-10 * time.Second),
+		Role: blockvol.RoleToWire(blockvol.RolePrimary),
+		Replicas: []ReplicaInfo{{
+			Server: "r2", Path: "/r2.blk", HealthScore: 1.0,
+			LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	// Manual promote via ManualPromote + finalizePromotion (production manual path).
+	newEpoch, oldPrimary, oldPath, _, err := ms.blockRegistry.ManualPromote("vol2", "", false)
+	if err != nil {
+		t.Fatalf("manual promote: %v", err)
+	}
+	ms.finalizePromotion("vol2", oldPrimary, oldPath, newEpoch)
+	afterManual := ms.blockRegistry.PromotionsTotal.Load()
+	if afterManual != afterAuto+1 {
+		t.Fatalf("manual promote should increment PromotionsTotal: afterAuto=%d afterManual=%d", afterAuto, afterManual)
+	}
+}
+
+// QA-T5-9: BUG-T5-2 regression — ManualPromote returns correct oldPrimary under lock.
+func TestQA_T5_ManualPromote_ReturnsOldPrimary(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "original-primary", Path: "/original/path.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{{
+			Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+			LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	newEpoch, oldPrimary, oldPath, _, err := r.ManualPromote("vol1", "", false)
+	if err != nil {
+		t.Fatalf("ManualPromote: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	if oldPrimary != "original-primary" {
+		t.Fatalf("oldPrimary: got %q, want original-primary", oldPrimary)
+	}
+	if oldPath != "/original/path.blk" {
+		t.Fatalf("oldPath: got %q, want /original/path.blk", oldPath)
+	}
+	// After promote, the entry's primary should be r1, not the old primary.
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "r1" {
+		t.Fatalf("new primary: got %q, want r1", e.VolumeServer)
+	}
+}
+
+// QA-T5-10: Double ManualPromote exhausts replicas — second call fails.
+func TestQA_T5_ManualPromote_DoubleExhaustsReplicas(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{{
+			Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+			LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	// First promote succeeds.
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "", false)
+	if err != nil {
+		t.Fatalf("first promote: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("first epoch: got %d, want 2", newEpoch)
+	}
+
+	// Simulate new primary (r1) dying.
+	r.UnmarkBlockCapable("r1")
+
+	// Second promote fails — no replicas left.
+	_, _, _, pf, err := r.ManualPromote("vol1", "", false)
+	if err == nil {
+		t.Fatal("second promote should fail: no replicas")
+	}
+	if pf.Reason != "no replicas" {
+		t.Fatalf("expected 'no replicas', got %q", pf.Reason)
+	}
+}
+
+// QA-T5-11: ManualPromote transfers NVMe publication fields.
+func TestQA_T5_ManualPromote_TransfersNVMeFields(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		NvmeAddr: "192.168.1.1:4420", NQN: "nqn.old-primary",
+		Replicas: []ReplicaInfo{{
+			Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+			NvmeAddr: "192.168.1.2:4420", NQN: "nqn.replica-1",
+			LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	_, _, _, _, err := r.ManualPromote("vol1", "", false)
+	if err != nil {
+		t.Fatalf("ManualPromote: %v", err)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.NvmeAddr != "192.168.1.2:4420" {
+		t.Fatalf("NvmeAddr: got %q, want 192.168.1.2:4420 (replica's addr)", e.NvmeAddr)
+	}
+	if e.NQN != "nqn.replica-1" {
+		t.Fatalf("NQN: got %q, want nqn.replica-1", e.NQN)
+	}
+}
+
+// QA-T5-12: RF=3 force-promote specific target picks lower-health replica.
+func TestQA_T5_RF3_ForceSpecificTarget_LowerHealth(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("best")
+	r.MarkBlockCapable("mid")
+	r.MarkBlockCapable("worst")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, LeaseTTL: 30 * time.Second,
+		Replicas: []ReplicaInfo{
+			{Server: "best", Path: "/best.blk", HealthScore: 1.0, WALHeadLSN: 100,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "mid", Path: "/mid.blk", HealthScore: 0.5, WALHeadLSN: 80,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+			{Server: "worst", Path: "/worst.blk", HealthScore: 0.1, WALHeadLSN: 50,
+				LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica)},
+		},
+	})
+
+	// Force-promote the worst replica specifically.
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "worst", true)
+	if err != nil {
+		t.Fatalf("force promote worst: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "worst" {
+		t.Fatalf("expected 'worst' promoted, got %q", e.VolumeServer)
+	}
+	// "best" and "mid" should remain as replicas.
+	if len(e.Replicas) != 2 {
+		t.Fatalf("expected 2 remaining replicas, got %d", len(e.Replicas))
+	}
+}
+
+// QA-T5-13: ManualPromote during expand in-progress — should succeed.
+func TestQA_T5_ManualPromote_DuringExpand(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("r1")
+	r.Register(&BlockVolumeEntry{
+		Name: "vol1", VolumeServer: "primary", Path: "/data/vol1.blk",
+		Epoch: 1, SizeBytes: 50 << 20, LeaseTTL: 30 * time.Second,
+		ExpandInProgress: true, PendingExpandSize: 100 << 20, ExpandEpoch: 1,
+		Replicas: []ReplicaInfo{{
+			Server: "r1", Path: "/r1.blk", HealthScore: 1.0,
+			LastHeartbeat: time.Now(), Role: blockvol.RoleToWire(blockvol.RoleReplica),
+		}},
+	})
+
+	// Promotion should succeed even with expand in-progress.
+	newEpoch, _, _, _, err := r.ManualPromote("vol1", "", false)
+	if err != nil {
+		t.Fatalf("ManualPromote during expand: %v", err)
+	}
+	if newEpoch != 2 {
+		t.Fatalf("epoch: got %d, want 2", newEpoch)
+	}
+	e, _ := r.Lookup("vol1")
+	if e.VolumeServer != "r1" {
+		t.Fatalf("expected r1 promoted, got %q", e.VolumeServer)
+	}
+	// Expand state should still be present (promotion doesn't clear it).
+	if !e.ExpandInProgress {
+		t.Fatal("ExpandInProgress should remain true after promotion")
+	}
+}
+
+// QA-T5-14: ManualPromote on non-existent volume returns volume_not_found.
+func TestQA_T5_ManualPromote_NonExistentVolume(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	_, _, _, pf, err := r.ManualPromote("no-such-vol", "", false)
+	if err == nil {
+		t.Fatal("expected error for non-existent volume")
+	}
+	if pf.Reason != "volume not found" {
+		t.Fatalf("expected 'volume not found', got %q", pf.Reason)
+	}
+}
diff --git a/weed/server/qa_block_cp63_test.go b/weed/server/qa_block_cp63_test.go
index 58e533c20..e7115cd52 100644
--- a/weed/server/qa_block_cp63_test.go
+++ b/weed/server/qa_block_cp63_test.go
@@ -40,6 +40,11 @@ func testMSForQA(t *testing.T) *MasterServer {
 // registerQAVolume creates a volume entry with optional replica, configurable lease state.
 func registerQAVolume(t *testing.T, ms *MasterServer, name, primary, replica string, epoch uint64, leaseTTL time.Duration, leaseExpired bool) {
 	t.Helper()
+	// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+	ms.blockRegistry.MarkBlockCapable(primary)
+	if replica != "" {
+		ms.blockRegistry.MarkBlockCapable(replica)
+	}
 	entry := &BlockVolumeEntry{
 		Name:         name,
 		VolumeServer: primary,
@@ -65,11 +70,13 @@ func registerQAVolume(t *testing.T, ms *MasterServer, name, primary, replica str
 		// CP8-2: also populate Replicas[].
 		entry.Replicas = []ReplicaInfo{
 			{
-				Server:      replica,
-				Path:        fmt.Sprintf("/data/%s.blk", name),
-				IQN:         fmt.Sprintf("iqn.2024.test:%s-r", name),
-				ISCSIAddr:   replica + ":3260",
-				HealthScore: 1.0,
+				Server:        replica,
+				Path:          fmt.Sprintf("/data/%s.blk", name),
+				IQN:           fmt.Sprintf("iqn.2024.test:%s-r", name),
+				ISCSIAddr:     replica + ":3260",
+				HealthScore:   1.0,
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				LastHeartbeat: time.Now(),
 			},
 		}
 	}
@@ -398,7 +405,15 @@ func TestQA_Failover_PromoteIdempotent_NoReplicaAfterFirstSwap(t *testing.T) {
 	// Reconnect vs1 first so it becomes a replica.
 	ms.recoverBlockVolumes("vs1")
 
+	// Simulate rebuild completion: mark vs1 as a healthy replica.
 	e, _ := ms.blockRegistry.Lookup("vol1")
+	for i := range e.Replicas {
+		if e.Replicas[i].Server == "vs1" {
+			e.Replicas[i].Role = blockvol.RoleToWire(blockvol.RoleReplica)
+			e.Replicas[i].LastHeartbeat = time.Now()
+			e.Replicas[i].HealthScore = 1.0
+		}
+	}
 	e.LastLeaseGrant = time.Now().Add(-1 * time.Minute) // expire the new lease
 	ms.failoverBlockVolumes("vs2")
 
diff --git a/weed/server/qa_block_expand_adversarial_test.go b/weed/server/qa_block_expand_adversarial_test.go
new file mode 100644
index 000000000..a14b7e285
--- /dev/null
+++ b/weed/server/qa_block_expand_adversarial_test.go
@@ -0,0 +1,485 @@
+package weed_server
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
+)
+
+// ============================================================
+// CP11A-2 Adversarial Test Suite: B-09 + B-10
+//
+// 8 scenarios stress-testing the coordinated expand path under
+// failover, concurrent heartbeats, and partial failures.
+// ============================================================
+
+// qaExpandMaster creates a MasterServer with 3 block-capable servers
+// and default expand mocks for adversarial testing.
+func qaExpandMaster(t *testing.T) *MasterServer {
+	t.Helper()
+	ms := &MasterServer{
+		blockRegistry:        NewBlockVolumeRegistry(),
+		blockAssignmentQueue: NewBlockAssignmentQueue(),
+		blockFailover:        newBlockFailoverState(),
+	}
+	ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
+		return &blockAllocResult{
+			Path:              fmt.Sprintf("/data/%s.blk", name),
+			IQN:               fmt.Sprintf("iqn.2024.test:%s", name),
+			ISCSIAddr:         server + ":3260",
+			ReplicaDataAddr:   server + ":14260",
+			ReplicaCtrlAddr:   server + ":14261",
+			RebuildListenAddr: server + ":15000",
+		}, nil
+	}
+	ms.blockVSDelete = func(ctx context.Context, server string, name string) error {
+		return nil
+	}
+	ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
+		return newSize, nil
+	}
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		return nil
+	}
+	ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
+		return 2 << 30, nil
+	}
+	ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
+		return nil
+	}
+	ms.blockRegistry.MarkBlockCapable("vs1:9333")
+	ms.blockRegistry.MarkBlockCapable("vs2:9333")
+	ms.blockRegistry.MarkBlockCapable("vs3:9333")
+	return ms
+}
+
+// qaCreateRF creates a volume with the given replica factor.
+func qaCreateRF(t *testing.T, ms *MasterServer, name string, rf uint32) {
+	t.Helper()
+	_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
+		Name:          name,
+		SizeBytes:     1 << 30,
+		ReplicaFactor: rf,
+	})
+	if err != nil {
+		t.Fatalf("create %s RF=%d: %v", name, rf, err)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-1: ExpandAfterDoubleFailover_RF3
+//
+// RF=3 volume. Primary dies → promote replica A. Then replica A
+// (now primary) dies → promote replica B. Expand must reach
+// replica B (the second-generation primary), not the original.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ExpandAfterDoubleFailover_RF3(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "dbl-failover", 3)
+
+	entry, _ := ms.blockRegistry.Lookup("dbl-failover")
+	gen0Primary := entry.VolumeServer
+
+	// First failover: kill original primary.
+	ms.blockRegistry.PromoteBestReplica("dbl-failover")
+	entry, _ = ms.blockRegistry.Lookup("dbl-failover")
+	gen1Primary := entry.VolumeServer
+	if gen1Primary == gen0Primary {
+		t.Fatal("first promotion didn't change primary")
+	}
+
+	// Second failover: kill gen1 primary.
+	// Need to ensure the remaining replica has a fresh heartbeat.
+	if len(entry.Replicas) == 0 {
+		t.Fatal("no replicas left after first promotion (need RF=3)")
+	}
+	ms.blockRegistry.PromoteBestReplica("dbl-failover")
+	entry, _ = ms.blockRegistry.Lookup("dbl-failover")
+	gen2Primary := entry.VolumeServer
+	if gen2Primary == gen1Primary || gen2Primary == gen0Primary {
+		t.Fatalf("second promotion should pick a new server, got %q (gen0=%q gen1=%q)",
+			gen2Primary, gen0Primary, gen1Primary)
+	}
+
+	// Track PREPARE targets.
+	var preparedServers []string
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		preparedServers = append(preparedServers, server)
+		return nil
+	}
+
+	// Expand — standalone path since no replicas remain after 2 promotions.
+	_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+		Name: "dbl-failover", NewSizeBytes: 2 << 30,
+	})
+	if err != nil {
+		t.Fatalf("expand: %v", err)
+	}
+
+	// If standalone path was taken (no replicas), preparedServers is empty — that's fine.
+	// If coordinated path was taken, first PREPARE must target gen2Primary.
+	if len(preparedServers) > 0 && preparedServers[0] != gen2Primary {
+		t.Fatalf("PREPARE went to %q, want gen2 primary %q", preparedServers[0], gen2Primary)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-2: ExpandSeesDeletedVolume_AfterLockAcquire
+//
+// Volume is deleted between the initial Lookup (succeeds) and
+// the re-read after AcquireExpandInflight. The re-read must
+// detect the deletion and fail cleanly.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ExpandSeesDeletedVolume_AfterLockAcquire(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "disappear", 2)
+
+	// Hook PREPARE to delete the volume before it runs.
+	// The B-09 re-read happens before PREPARE, so we simulate deletion
+	// between initial Lookup and AcquireExpandInflight by having a
+	// goroutine that deletes the entry while expand is in progress.
+	// Instead, test directly: acquire expand lock, then unregister, then
+	// call ExpandBlockVolume — it should fail on re-read.
+
+	// Acquire expand lock manually first so the real call gets blocked.
+	// Then verify the error path by attempting a second expand.
+	if !ms.blockRegistry.AcquireExpandInflight("disappear", 2<<30, 1) {
+		t.Fatal("AcquireExpandInflight should succeed")
+	}
+
+	// Try another expand while locked — should fail with "already in progress".
+	_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+		Name: "disappear", NewSizeBytes: 2 << 30,
+	})
+	if err == nil {
+		t.Fatal("expand should fail when lock is held")
+	}
+
+	// Release and delete the volume.
+	ms.blockRegistry.ReleaseExpandInflight("disappear")
+	ms.blockRegistry.Unregister("disappear")
+
+	// Now expand on a deleted volume — should fail on initial Lookup.
+	_, err = ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+		Name: "disappear", NewSizeBytes: 2 << 30,
+	})
+	if err == nil {
+		t.Fatal("expand on deleted volume should fail")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-3: ConcurrentExpandAndFailover
+//
+// Expand and failover race on the same volume. Neither should
+// panic, and the volume must be in a consistent state afterward.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ConcurrentExpandAndFailover(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "race-vol", 3)
+
+	entry, _ := ms.blockRegistry.Lookup("race-vol")
+	primary := entry.VolumeServer
+
+	// Make PREPARE slow so expand holds the lock longer.
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		time.Sleep(5 * time.Millisecond)
+		return nil
+	}
+
+	var wg sync.WaitGroup
+
+	// Goroutine 1: expand.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+			Name: "race-vol", NewSizeBytes: 2 << 30,
+		})
+		// Error is OK — we're testing for panics and consistency.
+	}()
+
+	// Goroutine 2: failover kills primary.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		time.Sleep(2 * time.Millisecond) // slight delay to let expand start
+		ms.failoverBlockVolumes(primary)
+	}()
+
+	wg.Wait()
+
+	// Volume must still exist regardless of outcome.
+	_, ok := ms.blockRegistry.Lookup("race-vol")
+	if !ok {
+		t.Fatal("volume must survive concurrent expand + failover")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B09-4: ConcurrentExpandsSameVolume
+//
+// Two goroutines try to expand the same volume simultaneously.
+// Exactly one should succeed, the other should get "already in
+// progress". No panic, no double-commit.
+// ────────────────────────────────────────────────────────────
+func TestQA_B09_ConcurrentExpandsSameVolume(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "dup-expand", 2)
+
+	var commitCount atomic.Int32
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		time.Sleep(5 * time.Millisecond) // slow prepare
+		return nil
+	}
+	ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
+		commitCount.Add(1)
+		return 2 << 30, nil
+	}
+
+	var wg sync.WaitGroup
+	var successes atomic.Int32
+	var failures atomic.Int32
+
+	for i := 0; i < 2; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+				Name: "dup-expand", NewSizeBytes: 2 << 30,
+			})
+			if err == nil {
+				successes.Add(1)
+			} else {
+				failures.Add(1)
+			}
+		}()
+	}
+	wg.Wait()
+
+	if successes.Load() != 1 {
+		t.Fatalf("expected exactly 1 success, got %d", successes.Load())
+	}
+	if failures.Load() != 1 {
+		t.Fatalf("expected exactly 1 failure (already in progress), got %d", failures.Load())
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-1: RepeatedEmptyHeartbeats_DuringExpand
+//
+// Multiple empty heartbeats from the primary during expand.
+// Entry must survive all of them — not just the first.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_RepeatedEmptyHeartbeats_DuringExpand(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "multi-hb", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("multi-hb")
+	primary := entry.VolumeServer
+
+	if !ms.blockRegistry.AcquireExpandInflight("multi-hb", 2<<30, 42) {
+		t.Fatal("acquire expand lock")
+	}
+
+	// 10 empty heartbeats from the primary — each one would delete
+	// the entry without the B-10 guard.
+	for i := 0; i < 10; i++ {
+		ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+	}
+
+	_, ok := ms.blockRegistry.Lookup("multi-hb")
+	if !ok {
+		t.Fatal("entry deleted after repeated empty heartbeats during expand")
+	}
+
+	ms.blockRegistry.ReleaseExpandInflight("multi-hb")
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-2: ExpandFailed_HeartbeatStillProtected
+//
+// After MarkExpandFailed (primary committed, replica didn't),
+// empty heartbeats must NOT delete the entry. ExpandFailed
+// keeps ExpandInProgress=true as a size-suppression guard.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_ExpandFailed_HeartbeatStillProtected(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "fail-hb", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("fail-hb")
+	primary := entry.VolumeServer
+
+	if !ms.blockRegistry.AcquireExpandInflight("fail-hb", 2<<30, 42) {
+		t.Fatal("acquire expand lock")
+	}
+	ms.blockRegistry.MarkExpandFailed("fail-hb")
+
+	// Empty heartbeat should not delete — ExpandFailed keeps ExpandInProgress=true.
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+
+	e, ok := ms.blockRegistry.Lookup("fail-hb")
+	if !ok {
+		t.Fatal("entry deleted during ExpandFailed state")
+	}
+	if !e.ExpandFailed {
+		t.Fatal("ExpandFailed should still be true")
+	}
+	if !e.ExpandInProgress {
+		t.Fatal("ExpandInProgress should still be true")
+	}
+
+	// After ClearExpandFailed, empty heartbeat should delete normally.
+	ms.blockRegistry.ClearExpandFailed("fail-hb")
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+
+	_, ok = ms.blockRegistry.Lookup("fail-hb")
+	if ok {
+		t.Fatal("entry should be deleted after ClearExpandFailed + empty heartbeat")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-3: HeartbeatSizeSuppress_DuringExpand
+//
+// Primary reports a stale (old) size during coordinated expand.
+// Registry must NOT downgrade SizeBytes — the pending expand
+// size is authoritative until commit or release.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_HeartbeatSizeSuppress_DuringExpand(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "size-suppress", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("size-suppress")
+	primary := entry.VolumeServer
+	origSize := entry.SizeBytes
+
+	if !ms.blockRegistry.AcquireExpandInflight("size-suppress", 2<<30, 42) {
+		t.Fatal("acquire expand lock")
+	}
+
+	// Heartbeat reports old size (expand hasn't committed on VS yet).
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/size-suppress.blk",
+			VolumeSize: origSize, // old size
+			Epoch:      1,
+			Role:       blockvol.RoleToWire(blockvol.RolePrimary),
+		},
+	})
+
+	entry, _ = ms.blockRegistry.Lookup("size-suppress")
+	if entry.SizeBytes != origSize {
+		t.Fatalf("size should remain %d during expand, got %d", origSize, entry.SizeBytes)
+	}
+
+	// Heartbeat reports a LARGER size (stale from previous expand or bug).
+	// Still must not update — coordinated expand owns the size.
+	ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/size-suppress.blk",
+			VolumeSize: 5 << 30, // bogus large size
+			Epoch:      1,
+			Role:       blockvol.RoleToWire(blockvol.RolePrimary),
+		},
+	})
+
+	entry, _ = ms.blockRegistry.Lookup("size-suppress")
+	if entry.SizeBytes != origSize {
+		t.Fatalf("size should remain %d (suppressed), got %d", origSize, entry.SizeBytes)
+	}
+
+	ms.blockRegistry.ReleaseExpandInflight("size-suppress")
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-B10-4: ConcurrentHeartbeatsAndExpand
+//
+// Simultaneous full heartbeats from primary and replicas while
+// expand runs on another goroutine. Must not panic, must not
+// orphan the entry, and expand must either succeed or fail
+// cleanly with a clear error.
+// ────────────────────────────────────────────────────────────
+func TestQA_B10_ConcurrentHeartbeatsAndExpand(t *testing.T) {
+	ms := qaExpandMaster(t)
+	qaCreateRF(t, ms, "hb-expand-race", 2)
+
+	entry, _ := ms.blockRegistry.Lookup("hb-expand-race")
+	primary := entry.VolumeServer
+	replica := ""
+	if len(entry.Replicas) > 0 {
+		replica = entry.Replicas[0].Server
+	}
+
+	ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
+		time.Sleep(2 * time.Millisecond)
+		return nil
+	}
+
+	var wg sync.WaitGroup
+	const rounds = 30
+
+	// Goroutine 1: expand.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
+			Name: "hb-expand-race", NewSizeBytes: 2 << 30,
+		})
+	}()
+
+	// Goroutine 2: primary heartbeats (mix of reporting and not reporting).
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; i < rounds; i++ {
+			if i%5 == 0 {
+				// Every 5th: empty heartbeat (simulates brief restart).
+				ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{})
+			} else {
+				ms.blockRegistry.UpdateFullHeartbeat(primary, []*master_pb.BlockVolumeInfoMessage{
+					{
+						Path:       "/data/hb-expand-race.blk",
+						VolumeSize: 1 << 30,
+						Epoch:      1,
+						Role:       blockvol.RoleToWire(blockvol.RolePrimary),
+						WalHeadLsn: uint64(100 + i),
+					},
+				})
+			}
+		}
+	}()
+
+	// Goroutine 3: replica heartbeats.
+	if replica != "" {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for i := 0; i < rounds; i++ {
+				ms.blockRegistry.UpdateFullHeartbeat(replica, []*master_pb.BlockVolumeInfoMessage{
+					{
+						Path:       "/data/hb-expand-race.blk",
+						VolumeSize: 1 << 30,
+						Epoch:      1,
+						Role:       blockvol.RoleToWire(blockvol.RoleReplica),
+						WalHeadLsn: uint64(99 + i),
+					},
+				})
+			}
+		}()
+	}
+
+	wg.Wait()
+
+	// Volume must still exist — no orphan.
+	_, ok := ms.blockRegistry.Lookup("hb-expand-race")
+	if !ok {
+		t.Fatal("volume must survive concurrent heartbeats + expand")
+	}
+}
diff --git a/weed/server/qa_block_nvme_publication_test.go b/weed/server/qa_block_nvme_publication_test.go
new file mode 100644
index 000000000..ddf09e48f
--- /dev/null
+++ b/weed/server/qa_block_nvme_publication_test.go
@@ -0,0 +1,1346 @@
+package weed_server
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
+)
+
+// =============================================================================
+// QA Adversarial Tests for Master-Backed NVMe Publication (Item 1)
+//
+// These tests verify:
+// - NVMe fields (NvmeAddr, NQN) propagated through registry lifecycle
+// - Backward compatibility: missing NVMe fields degrade gracefully to iSCSI
+// - Heartbeat reconstruction after master restart
+// - Partial-field behavior (NvmeAddr without NQN, vice versa)
+// - PromoteBestReplica preserves NVMe metadata of promoted replica
+// =============================================================================
+
+// TestQA_NVMe_CreateSetsFields verifies that NvmeAddr/NQN are preserved in
+// registry entries created via Register (simulating the CreateBlockVolume path).
+func TestQA_NVMe_CreateSetsFields(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	err := r.Register(&BlockVolumeEntry{
+		Name:         "nvme-vol1",
+		VolumeServer: "s1:18080",
+		Path:         "/data/nvme-vol1.blk",
+		IQN:          "iqn.2024.com.seaweedfs:nvme-vol1",
+		ISCSIAddr:    "10.0.0.1:3260",
+		NvmeAddr:     "10.0.0.1:4420",
+		NQN:          "nqn.2024-01.com.seaweedfs:nvme-vol1",
+		SizeBytes:    1 << 30,
+		Epoch:        1,
+		Role:         blockvol.RoleToWire(blockvol.RolePrimary),
+		Status:       StatusActive,
+	})
+	if err != nil {
+		t.Fatalf("Register: %v", err)
+	}
+
+	entry, ok := r.Lookup("nvme-vol1")
+	if !ok {
+		t.Fatal("nvme-vol1 not found")
+	}
+	if entry.NvmeAddr != "10.0.0.1:4420" {
+		t.Fatalf("NvmeAddr = %q, want 10.0.0.1:4420", entry.NvmeAddr)
+	}
+	if entry.NQN != "nqn.2024-01.com.seaweedfs:nvme-vol1" {
+		t.Fatalf("NQN = %q, want nqn.2024-01.com.seaweedfs:nvme-vol1", entry.NQN)
+	}
+}
+
+// TestQA_NVMe_MissingFieldsDegradeToISCSI verifies that entries without NVMe
+// fields still work correctly via iSCSI (backward compatibility).
+func TestQA_NVMe_MissingFieldsDegradeToISCSI(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name:         "iscsi-only",
+		VolumeServer: "s1:18080",
+		Path:         "/data/iscsi-only.blk",
+		IQN:          "iqn.2024.com.seaweedfs:iscsi-only",
+		ISCSIAddr:    "10.0.0.1:3260",
+		// NvmeAddr and NQN intentionally omitted.
+		SizeBytes: 1 << 30,
+		Epoch:     1,
+		Status:    StatusActive,
+	})
+
+	entry, ok := r.Lookup("iscsi-only")
+	if !ok {
+		t.Fatal("iscsi-only not found")
+	}
+	if entry.NvmeAddr != "" {
+		t.Fatalf("NvmeAddr should be empty for iSCSI-only volume, got %q", entry.NvmeAddr)
+	}
+	if entry.NQN != "" {
+		t.Fatalf("NQN should be empty for iSCSI-only volume, got %q", entry.NQN)
+	}
+	// iSCSI fields should still work.
+	if entry.ISCSIAddr != "10.0.0.1:3260" {
+		t.Fatalf("ISCSIAddr = %q", entry.ISCSIAddr)
+	}
+}
+
+// TestQA_NVMe_HeartbeatSetsNvmeFields verifies that a full heartbeat with
+// NVMe fields updates the registry entry. This is critical for master restart
+// reconstruction — NvmeAddr/NQN must be propagated from heartbeat.
+func TestQA_NVMe_HeartbeatSetsNvmeFields(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "s1",
+		Path:         "/data/vol1.blk",
+		Status:       StatusPending,
+		// NvmeAddr/NQN NOT set at creation (simulates pre-NVMe registration).
+	})
+
+	// Full heartbeat arrives with NVMe fields.
+	r.UpdateFullHeartbeat("s1", []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/vol1.blk",
+			VolumeSize: 1 << 30,
+			Epoch:      1,
+			Role:       1,
+			NvmeAddr:   "10.0.0.1:4420",
+			Nqn:        "nqn.2024-01.com.seaweedfs:vol1",
+		},
+	})
+
+	entry, ok := r.Lookup("vol1")
+	if !ok {
+		t.Fatal("vol1 not found after heartbeat")
+	}
+	if entry.Status != StatusActive {
+		t.Fatalf("Status = %v, want Active", entry.Status)
+	}
+	// BUG DETECTION: If these fail, UpdateFullHeartbeat doesn't propagate NVMe fields.
+	// This is critical for master restart recovery.
+	if entry.NvmeAddr != "10.0.0.1:4420" {
+		t.Fatalf("NvmeAddr not updated by heartbeat: got %q, want 10.0.0.1:4420", entry.NvmeAddr)
+	}
+	if entry.NQN != "nqn.2024-01.com.seaweedfs:vol1" {
+		t.Fatalf("NQN not updated by heartbeat: got %q, want nqn.2024-01.com.seaweedfs:vol1", entry.NQN)
+	}
+}
+
+// TestQA_NVMe_HeartbeatClearsStaleNvme verifies that if a heartbeat omits NVMe
+// fields (server no longer has NVMe enabled), the registry should reflect that.
+func TestQA_NVMe_HeartbeatClearsStaleNvme(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol1",
+		VolumeServer: "s1",
+		Path:         "/data/vol1.blk",
+		NvmeAddr:     "10.0.0.1:4420", // was NVMe-enabled
+		NQN:          "nqn.2024-01.com.seaweedfs:vol1",
+		Status:       StatusActive,
+	})
+
+	// Heartbeat without NVMe fields (NVMe disabled on volume server).
+	r.UpdateFullHeartbeat("s1", []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/vol1.blk",
+			VolumeSize: 1 << 30,
+			Epoch:      2,
+			Role:       1,
+			// NvmeAddr and Nqn intentionally empty.
+		},
+	})
+
+	entry, _ := r.Lookup("vol1")
+	// After heartbeat with empty NVMe fields, stale NVMe info should be cleared.
+	// (If not cleared, CSI may try to connect via stale NVMe address.)
+	if entry.NvmeAddr != "" {
+		t.Logf("WARNING: stale NvmeAddr not cleared by heartbeat: %q (may cause CSI to use wrong transport)", entry.NvmeAddr)
+		// This is a design decision — some implementations keep stale data.
+		// We log a warning rather than failing, since the current code may
+		// intentionally preserve NvmeAddr until explicitly cleared.
+	}
+}
+
+// TestQA_NVMe_PartialFields_OnlyAddr verifies behavior when only NvmeAddr is
+// set but NQN is missing. The CSI driver needs both to connect.
+func TestQA_NVMe_PartialFields_OnlyAddr(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name:         "partial-nvme",
+		VolumeServer: "s1",
+		Path:         "/data/partial.blk",
+		NvmeAddr:     "10.0.0.1:4420",
+		// NQN is missing — NVMe connect will fail without it.
+		Status: StatusActive,
+	})
+
+	entry, _ := r.Lookup("partial-nvme")
+	if entry.NvmeAddr == "" {
+		t.Fatal("NvmeAddr should be preserved")
+	}
+	if entry.NQN != "" {
+		t.Fatal("NQN should be empty (partial field)")
+	}
+	// The CSI driver must check both NvmeAddr != "" && NQN != "" before attempting NVMe.
+}
+
+// TestQA_NVMe_PartialFields_OnlyNQN verifies behavior with NQN but no addr.
+func TestQA_NVMe_PartialFields_OnlyNQN(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name:         "partial-nqn",
+		VolumeServer: "s1",
+		Path:         "/data/partial2.blk",
+		NQN:          "nqn.2024-01.com.seaweedfs:partial2",
+		Status:       StatusActive,
+	})
+
+	entry, _ := r.Lookup("partial-nqn")
+	if entry.NQN == "" {
+		t.Fatal("NQN should be preserved")
+	}
+	if entry.NvmeAddr != "" {
+		t.Fatal("NvmeAddr should be empty (partial field)")
+	}
+}
+
+// TestQA_NVMe_SwapPrimaryReplica_PreservesNvme verifies that after SwapPrimaryReplica,
+// the promoted replica's NVMe fields are available in the entry.
+func TestQA_NVMe_SwapPrimaryReplica_PreservesNvme(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name:             "failover-vol",
+		VolumeServer:     "primary-s1",
+		Path:             "/data/vol.blk",
+		IQN:              "iqn:primary",
+		ISCSIAddr:        "10.0.0.1:3260",
+		NvmeAddr:         "10.0.0.1:4420",
+		NQN:              "nqn:vol-primary",
+		ReplicaServer:    "replica-s2",
+		ReplicaPath:      "/data/vol-replica.blk",
+		ReplicaIQN:       "iqn:replica",
+		ReplicaISCSIAddr: "10.0.0.2:3260",
+		Epoch:            5,
+		Role:             1,
+	})
+
+	newEpoch, err := r.SwapPrimaryReplica("failover-vol")
+	if err != nil {
+		t.Fatalf("SwapPrimaryReplica: %v", err)
+	}
+	if newEpoch != 6 {
+		t.Fatalf("newEpoch = %d, want 6", newEpoch)
+	}
+
+	entry, _ := r.Lookup("failover-vol")
+	// After swap, the old primary's NVMe fields are now stale.
+	// The new primary (old replica) hasn't had its NVMe fields set yet
+	// — they'll come in via the next heartbeat.
+	if entry.VolumeServer != "replica-s2" {
+		t.Fatalf("VolumeServer = %q, want replica-s2", entry.VolumeServer)
+	}
+	// NvmeAddr from old primary should NOT persist on the new primary entry.
+	// (It pointed to old primary's NVMe target.)
+	// Current behavior: SwapPrimaryReplica doesn't touch NvmeAddr/NQN.
+	// This test documents the current behavior so we track it.
+	t.Logf("NvmeAddr after swap: %q (may be stale from old primary)", entry.NvmeAddr)
+	t.Logf("NQN after swap: %q (may be stale from old primary)", entry.NQN)
+}
+
+// TestQA_NVMe_PromoteBestReplica_NvmeFieldsCopied verifies that when a replica
+// with NVMe fields is promoted to primary, its NVMe fields end up in the entry.
+func TestQA_NVMe_PromoteBestReplica_NvmeFieldsCopied(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.MarkBlockCapable("healthy-replica")
+	r.Register(&BlockVolumeEntry{
+		Name:         "promote-vol",
+		VolumeServer: "dead-primary",
+		Path:         "/data/vol.blk",
+		NvmeAddr:     "10.0.0.1:4420",
+		NQN:          "nqn:vol-on-primary",
+		Epoch:        5,
+		Role:         1,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:        "healthy-replica",
+				Path:          "/data/vol-replica.blk",
+				IQN:           "iqn:replica",
+				ISCSIAddr:     "10.0.0.2:3260",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+			},
+		},
+	})
+	r.mu.Lock()
+	r.addToServer("healthy-replica", "promote-vol")
+	r.mu.Unlock()
+
+	_, err := r.PromoteBestReplica("promote-vol")
+	if err != nil {
+		t.Fatalf("PromoteBestReplica: %v", err)
+	}
+
+	entry, _ := r.Lookup("promote-vol")
+	if entry.VolumeServer != "healthy-replica" {
+		t.Fatalf("VolumeServer = %q, want healthy-replica", entry.VolumeServer)
+	}
+	// The promoted replica's NVMe fields should come from the next heartbeat,
+	// NOT from the old primary. Test that old primary's NVMe fields don't persist.
+	t.Logf("NvmeAddr after promotion: %q (should be updated by replica heartbeat)", entry.NvmeAddr)
+	t.Logf("NQN after promotion: %q (should be updated by replica heartbeat)", entry.NQN)
+}
+
+// TestQA_NVMe_HeartbeatProto_RoundTrip verifies that BlockVolumeInfoMessage
+// NVMe fields survive the proto conversion round-trip.
+func TestQA_NVMe_HeartbeatProto_RoundTrip(t *testing.T) {
+	msg := blockvol.BlockVolumeInfoMessage{
+		Path:       "/data/vol.blk",
+		VolumeSize: 1 << 30,
+		Epoch:      5,
+		Role:       1,
+		NvmeAddr:   "10.0.0.1:4420",
+		NQN:        "nqn.2024-01.com.seaweedfs:vol1",
+	}
+
+	// Convert to proto and back.
+	proto := blockvol.InfoMessageToProto(msg)
+	if proto.NvmeAddr != "10.0.0.1:4420" {
+		t.Fatalf("proto NvmeAddr = %q", proto.NvmeAddr)
+	}
+	if proto.Nqn != "nqn.2024-01.com.seaweedfs:vol1" {
+		t.Fatalf("proto Nqn = %q", proto.Nqn)
+	}
+
+	back := blockvol.InfoMessageFromProto(proto)
+	if back.NvmeAddr != msg.NvmeAddr {
+		t.Fatalf("round-trip NvmeAddr: got %q, want %q", back.NvmeAddr, msg.NvmeAddr)
+	}
+	if back.NQN != msg.NQN {
+		t.Fatalf("round-trip NQN: got %q, want %q", back.NQN, msg.NQN)
+	}
+}
+
+// TestQA_NVMe_HeartbeatProto_EmptyFields verifies empty NVMe fields survive
+// round-trip without becoming non-empty.
+func TestQA_NVMe_HeartbeatProto_EmptyFields(t *testing.T) {
+	msg := blockvol.BlockVolumeInfoMessage{
+		Path:  "/data/vol.blk",
+		Epoch: 1,
+		Role:  1,
+		// NvmeAddr and NQN empty.
+	}
+
+	proto := blockvol.InfoMessageToProto(msg)
+	if proto.NvmeAddr != "" {
+		t.Fatalf("proto NvmeAddr should be empty, got %q", proto.NvmeAddr)
+	}
+	if proto.Nqn != "" {
+		t.Fatalf("proto Nqn should be empty, got %q", proto.Nqn)
+	}
+
+	back := blockvol.InfoMessageFromProto(proto)
+	if back.NvmeAddr != "" || back.NQN != "" {
+		t.Fatalf("empty NVMe fields should survive round-trip: NvmeAddr=%q NQN=%q", back.NvmeAddr, back.NQN)
+	}
+}
+
+// TestQA_NVMe_FullHeartbeat_MasterRestart verifies the full master-restart
+// reconstruction sequence: volume created with NVMe → master restarts →
+// heartbeat rebuilds registry → NVMe fields available for Lookup.
+func TestQA_NVMe_FullHeartbeat_MasterRestart(t *testing.T) {
+	// Simulate master restart: fresh registry.
+	r := NewBlockVolumeRegistry()
+
+	// Volume server sends first full heartbeat after master restart.
+	// The heartbeat includes NVMe fields.
+	r.UpdateFullHeartbeat("s1:18080", []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/vol1.blk",
+			VolumeSize: 1 << 30,
+			Epoch:      10,
+			Role:       1,
+			NvmeAddr:   "10.0.0.1:4420",
+			Nqn:        "nqn.2024-01.com.seaweedfs:vol1",
+		},
+	})
+
+	// After heartbeat, volume should be reconstructed with NVMe fields.
+	// Currently the registry uses nameFromPath() to find/create entries.
+	// If the entry was auto-created from heartbeat, check NVMe fields.
+	entries := r.ListByServer("s1:18080")
+	if len(entries) == 0 {
+		t.Log("NOTE: fresh registry after master restart may not auto-create entries from heartbeat")
+		t.Log("This is expected if the design requires explicit Register before heartbeat updates work")
+		t.Skip("auto-creation from heartbeat not supported — entries must be pre-registered")
+	}
+
+	// If entries exist, verify NVMe fields.
+	for _, e := range entries {
+		if e.Path == "/data/vol1.blk" {
+			if e.NvmeAddr != "10.0.0.1:4420" {
+				t.Errorf("NvmeAddr not reconstructed from heartbeat: got %q", e.NvmeAddr)
+			}
+			if e.NQN != "nqn.2024-01.com.seaweedfs:vol1" {
+				t.Errorf("NQN not reconstructed from heartbeat: got %q", e.NQN)
+			}
+			return
+		}
+	}
+	t.Error("vol1.blk entry not found after heartbeat reconstruction")
+}
+
+// TestQA_NVMe_ListByServerIncludesNvmeFields verifies that ListByServer returns
+// entries with NVMe fields intact (not stripped during aggregation).
+func TestQA_NVMe_ListByServerIncludesNvmeFields(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol-nvme",
+		VolumeServer: "s1",
+		Path:         "/data/vol-nvme.blk",
+		NvmeAddr:     "10.0.0.1:4420",
+		NQN:          "nqn:vol-nvme",
+	})
+	r.Register(&BlockVolumeEntry{
+		Name:         "vol-iscsi",
+		VolumeServer: "s1",
+		Path:         "/data/vol-iscsi.blk",
+		ISCSIAddr:    "10.0.0.1:3260",
+	})
+
+	entries := r.ListByServer("s1")
+	if len(entries) != 2 {
+		t.Fatalf("expected 2 entries, got %d", len(entries))
+	}
+
+	var foundNvme bool
+	for _, e := range entries {
+		if e.Name == "vol-nvme" {
+			foundNvme = true
+			if e.NvmeAddr != "10.0.0.1:4420" {
+				t.Errorf("NvmeAddr stripped in ListByServer: got %q", e.NvmeAddr)
+			}
+			if e.NQN != "nqn:vol-nvme" {
+				t.Errorf("NQN stripped in ListByServer: got %q", e.NQN)
+			}
+		}
+	}
+	if !foundNvme {
+		t.Error("vol-nvme not found in ListByServer results")
+	}
+}
+
+// =============================================================================
+// Integration Tests: NVMe Publication End-to-End Flows
+//
+// These tests exercise the full control-plane path that the user described:
+// Create → Allocate returns NVMe fields → Registry stores them →
+// Heartbeat refreshes them → Lookup/CSI returns them → Failover preserves them.
+// Uses integrationMaster() mock (no real gRPC/NVMe).
+// =============================================================================
+
+// nvmeIntegrationMaster creates an integrationMaster with NVMe-capable
+// allocate callback that returns NvmeAddr and NQN.
+func nvmeIntegrationMaster(t *testing.T) *MasterServer {
+	t.Helper()
+	ms := &MasterServer{
+		blockRegistry:        NewBlockVolumeRegistry(),
+		blockAssignmentQueue: NewBlockAssignmentQueue(),
+		blockFailover:        newBlockFailoverState(),
+	}
+	ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
+		// Simulate volume servers with NVMe enabled.
+		// Each server has NVMe on :4420 and a deterministic NQN.
+		host := server[:strings.Index(server, ":")]
+		return &blockAllocResult{
+			Path:              fmt.Sprintf("/data/%s.blk", name),
+			IQN:               fmt.Sprintf("iqn.2024.test:%s", name),
+			ISCSIAddr:         server[:strings.Index(server, ":")] + ":3260",
+			NvmeAddr:          host + ":4420",
+			NQN:               fmt.Sprintf("nqn.2024-01.com.seaweedfs:vol.%s", name),
+			ReplicaDataAddr:   server[:strings.Index(server, ":")] + ":14260",
+			ReplicaCtrlAddr:   server[:strings.Index(server, ":")] + ":14261",
+			RebuildListenAddr: server[:strings.Index(server, ":")] + ":15000",
+		}, nil
+	}
+	ms.blockVSDelete = func(ctx context.Context, server string, name string) error {
+		return nil
+	}
+	ms.blockRegistry.MarkBlockCapable("10.0.0.1:9333")
+	ms.blockRegistry.MarkBlockCapable("10.0.0.2:9333")
+	ms.blockRegistry.MarkBlockCapable("10.0.0.3:9333")
+	return ms
+}
+
+// TestIntegration_NVMe_CreateReturnsNvmeAddr tests the Kubernetes PVC flow:
+// CreateBlockVolume → master picks a server → returns NvmeAddr + NQN for CSI.
+func TestIntegration_NVMe_CreateReturnsNvmeAddr(t *testing.T) {
+	ms := nvmeIntegrationMaster(t)
+	ctx := context.Background()
+
+	resp, err := ms.CreateBlockVolume(ctx, &master_pb.CreateBlockVolumeRequest{
+		Name:      "pvc-abc",
+		SizeBytes: 100 << 30, // 100GB
+	})
+	if err != nil {
+		t.Fatalf("CreateBlockVolume: %v", err)
+	}
+
+	// Primary should have NVMe fields.
+	if resp.NvmeAddr == "" {
+		t.Fatal("CreateBlockVolume response missing NvmeAddr — CSI can't use NVMe/TCP")
+	}
+	if resp.Nqn == "" {
+		t.Fatal("CreateBlockVolume response missing NQN — CSI can't use NVMe/TCP")
+	}
+	if !strings.Contains(resp.Nqn, "pvc-abc") {
+		t.Fatalf("NQN should contain volume name, got %q", resp.Nqn)
+	}
+
+	// NVMe address should match the primary volume server's host.
+	primaryHost := resp.VolumeServer[:strings.Index(resp.VolumeServer, ":")]
+	expectedNvmeAddr := primaryHost + ":4420"
+	if resp.NvmeAddr != expectedNvmeAddr {
+		t.Fatalf("NvmeAddr = %q, want %q (primary's NVMe port)", resp.NvmeAddr, expectedNvmeAddr)
+	}
+
+	t.Logf("PVC created: server=%s nvme=%s nqn=%s", resp.VolumeServer, resp.NvmeAddr, resp.Nqn)
+}
+
+// TestIntegration_NVMe_LookupReturnsNvmeAddr tests CSI ControllerPublishVolume:
+// Lookup returns NvmeAddr + NQN so the node plugin can `nvme connect`.
+func TestIntegration_NVMe_LookupReturnsNvmeAddr(t *testing.T) {
+	ms := nvmeIntegrationMaster(t)
+	ctx := context.Background()
+
+	createResp, err := ms.CreateBlockVolume(ctx, &master_pb.CreateBlockVolumeRequest{
+		Name:      "pvc-lookup-1",
+		SizeBytes: 50 << 30,
+	})
+	if err != nil {
+		t.Fatalf("Create: %v", err)
+	}
+
+	// CSI calls Lookup to get connection details.
+	lookupResp, err := ms.LookupBlockVolume(ctx, &master_pb.LookupBlockVolumeRequest{Name: "pvc-lookup-1"})
+	if err != nil {
+		t.Fatalf("Lookup: %v", err)
+	}
+
+	// NVMe fields must match what was returned at creation.
+	if lookupResp.NvmeAddr != createResp.NvmeAddr {
+		t.Fatalf("Lookup NvmeAddr = %q, Create returned %q", lookupResp.NvmeAddr, createResp.NvmeAddr)
+	}
+	if lookupResp.Nqn != createResp.Nqn {
+		t.Fatalf("Lookup NQN = %q, Create returned %q", lookupResp.Nqn, createResp.Nqn)
+	}
+
+	// iSCSI fields should also be available (fallback path).
+	if lookupResp.IscsiAddr == "" {
+		t.Fatal("Lookup should also return iSCSI addr for fallback")
+	}
+	if lookupResp.Iqn == "" {
+		t.Fatal("Lookup should also return IQN for fallback")
+	}
+
+	t.Logf("CSI Lookup: nvme=%s nqn=%s iscsi=%s iqn=%s",
+		lookupResp.NvmeAddr, lookupResp.Nqn, lookupResp.IscsiAddr, lookupResp.Iqn)
+}
+
+// TestIntegration_NVMe_FailoverUpdatesNvmeAddr tests that after failover,
+// Lookup returns the NEW primary's NVMe address (not the dead server's).
+func TestIntegration_NVMe_FailoverUpdatesNvmeAddr(t *testing.T) {
+	ms := nvmeIntegrationMaster(t)
+	ctx := context.Background()
+
+	createResp, err := ms.CreateBlockVolume(ctx, &master_pb.CreateBlockVolumeRequest{
+		Name:      "pvc-failover-nvme",
+		SizeBytes: 10 << 30,
+	})
+	if err != nil {
+		t.Fatalf("Create: %v", err)
+	}
+	primaryVS := createResp.VolumeServer
+	primaryHost := primaryVS[:strings.Index(primaryVS, ":")]
+	originalNvmeAddr := createResp.NvmeAddr
+
+	// Expire lease for immediate failover.
+	entry, _ := ms.blockRegistry.Lookup("pvc-failover-nvme")
+	entry.LastLeaseGrant = time.Now().Add(-1 * time.Minute)
+
+	// Primary dies → replica promoted.
+	ms.failoverBlockVolumes(primaryVS)
+
+	// Verify new primary is different.
+	entry, _ = ms.blockRegistry.Lookup("pvc-failover-nvme")
+	if entry.VolumeServer == primaryVS {
+		t.Fatal("failover didn't promote replica")
+	}
+	newPrimaryHost := entry.VolumeServer[:strings.Index(entry.VolumeServer, ":")]
+
+	// Simulate the new primary's heartbeat arriving with its NVMe fields.
+	// In production, the VS heartbeat collector sends this automatically.
+	ms.blockRegistry.UpdateFullHeartbeat(entry.VolumeServer, []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       entry.Path,
+			VolumeSize: 10 << 30,
+			Epoch:      entry.Epoch,
+			Role:       1,
+			NvmeAddr:   newPrimaryHost + ":4420",
+			Nqn:        fmt.Sprintf("nqn.2024-01.com.seaweedfs:vol.pvc-failover-nvme"),
+		},
+	})
+
+	// CSI re-publishes after failover: Lookup must return new NVMe address.
+	lookupResp, err := ms.LookupBlockVolume(ctx, &master_pb.LookupBlockVolumeRequest{Name: "pvc-failover-nvme"})
+	if err != nil {
+		t.Fatalf("post-failover Lookup: %v", err)
+	}
+
+	if lookupResp.NvmeAddr == originalNvmeAddr {
+		t.Fatalf("post-failover NvmeAddr still points to dead primary %q", originalNvmeAddr)
+	}
+	expectedNewAddr := newPrimaryHost + ":4420"
+	if lookupResp.NvmeAddr != expectedNewAddr {
+		t.Fatalf("post-failover NvmeAddr = %q, want %q", lookupResp.NvmeAddr, expectedNewAddr)
+	}
+
+	t.Logf("Failover: old=%s:%s → new=%s:%s",
+		primaryHost, originalNvmeAddr, newPrimaryHost, lookupResp.NvmeAddr)
+}
+
+// TestIntegration_NVMe_HeartbeatReconstructionAfterMasterRestart tests the
+// master restart scenario:
+// 1. Fresh registry (master just started)
+// 2. Volume server sends heartbeat with NVMe fields
+// 3. Registry auto-creates entry with NVMe fields
+// 4. CSI Lookup returns NVMe connection details
+func TestIntegration_NVMe_HeartbeatReconstructionAfterMasterRestart(t *testing.T) {
+	ms := nvmeIntegrationMaster(t)
+	ctx := context.Background()
+
+	// Step 1: Create volume normally.
+	createResp, err := ms.CreateBlockVolume(ctx, &master_pb.CreateBlockVolumeRequest{
+		Name:      "pvc-restart-1",
+		SizeBytes: 20 << 30,
+	})
+	if err != nil {
+		t.Fatalf("Create: %v", err)
+	}
+	primaryVS := createResp.VolumeServer
+	primaryHost := primaryVS[:strings.Index(primaryVS, ":")]
+
+	// Step 2: Simulate master restart — fresh registry.
+	ms.blockRegistry = NewBlockVolumeRegistry()
+	ms.blockRegistry.MarkBlockCapable(primaryVS)
+
+	// Step 3: Volume server sends heartbeat with NVMe info.
+	ms.blockRegistry.UpdateFullHeartbeat(primaryVS, []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/pvc-restart-1.blk",
+			VolumeSize: 20 << 30,
+			Epoch:      1,
+			Role:       1,
+			NvmeAddr:   primaryHost + ":4420",
+			Nqn:        "nqn.2024-01.com.seaweedfs:vol.pvc-restart-1",
+		},
+	})
+
+	// Step 4: CSI calls Lookup — must find NVMe details.
+	lookupResp, err := ms.LookupBlockVolume(ctx, &master_pb.LookupBlockVolumeRequest{Name: "pvc-restart-1"})
+	if err != nil {
+		t.Fatalf("Lookup after master restart: %v", err)
+	}
+
+	if lookupResp.NvmeAddr != primaryHost+":4420" {
+		t.Fatalf("NvmeAddr not reconstructed after master restart: got %q", lookupResp.NvmeAddr)
+	}
+	if lookupResp.Nqn != "nqn.2024-01.com.seaweedfs:vol.pvc-restart-1" {
+		t.Fatalf("NQN not reconstructed after master restart: got %q", lookupResp.Nqn)
+	}
+
+	t.Logf("Post-restart Lookup: nvme=%s nqn=%s", lookupResp.NvmeAddr, lookupResp.Nqn)
+}
+
+// TestIntegration_NVMe_MixedCluster tests a cluster where some volume servers
+// have NVMe enabled and others don't. CSI should get NVMe when available,
+// fall back to iSCSI otherwise.
+func TestIntegration_NVMe_MixedCluster(t *testing.T) {
+	ms := &MasterServer{
+		blockRegistry:        NewBlockVolumeRegistry(),
+		blockAssignmentQueue: NewBlockAssignmentQueue(),
+		blockFailover:        newBlockFailoverState(),
+	}
+	callCount := 0
+	ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
+		callCount++
+		host := server[:strings.Index(server, ":")]
+		result := &blockAllocResult{
+			Path:              fmt.Sprintf("/data/%s.blk", name),
+			IQN:               fmt.Sprintf("iqn.2024.test:%s", name),
+			ISCSIAddr:         host + ":3260",
+			ReplicaDataAddr:   host + ":14260",
+			ReplicaCtrlAddr:   host + ":14261",
+			RebuildListenAddr: host + ":15000",
+		}
+		// Only the first server (primary) has NVMe. Replica doesn't.
+		if callCount == 1 {
+			result.NvmeAddr = host + ":4420"
+			result.NQN = fmt.Sprintf("nqn.2024-01.com.seaweedfs:vol.%s", name)
+		}
+		return result, nil
+	}
+	ms.blockVSDelete = func(ctx context.Context, server string, name string) error {
+		return nil
+	}
+	ms.blockRegistry.MarkBlockCapable("nvme-vs:9333")
+	ms.blockRegistry.MarkBlockCapable("iscsi-vs:9333")
+
+	ctx := context.Background()
+	resp, err := ms.CreateBlockVolume(ctx, &master_pb.CreateBlockVolumeRequest{
+		Name:      "pvc-mixed",
+		SizeBytes: 10 << 30,
+	})
+	if err != nil {
+		t.Fatalf("Create: %v", err)
+	}
+
+	// Primary was picked by PickServer (fewest volumes), should have NVMe.
+	lookupResp, err := ms.LookupBlockVolume(ctx, &master_pb.LookupBlockVolumeRequest{Name: "pvc-mixed"})
+	if err != nil {
+		t.Fatalf("Lookup: %v", err)
+	}
+
+	// In all cases, iSCSI should be available.
+	if lookupResp.IscsiAddr == "" {
+		t.Fatal("iSCSI addr must always be present")
+	}
+
+	// NVMe may or may not be present depending on which server was picked.
+	if lookupResp.NvmeAddr != "" {
+		t.Logf("Primary %s has NVMe: addr=%s nqn=%s", resp.VolumeServer, lookupResp.NvmeAddr, lookupResp.Nqn)
+		if lookupResp.Nqn == "" {
+			t.Fatal("if NvmeAddr is set, NQN must also be set")
+		}
+	} else {
+		t.Logf("Primary %s is iSCSI-only: iscsi=%s iqn=%s", resp.VolumeServer, lookupResp.IscsiAddr, lookupResp.Iqn)
+	}
+}
+
+// TestIntegration_NVMe_VolumeServerHeartbeatCollector tests the volume server
+// side: CollectBlockVolumeHeartbeat populates NvmeAddr and NQN when NVMe
+// is enabled on the BlockService.
+func TestIntegration_NVMe_VolumeServerHeartbeatCollector(t *testing.T) {
+	dir := t.TempDir()
+	blockDir := dir + "/blocks"
+	os.MkdirAll(blockDir, 0755)
+
+	// Start BlockService WITH NVMe config.
+	bs := StartBlockService("127.0.0.1:0", blockDir, "iqn.2024.test:",
+		"127.0.0.1:3260,1",
+		NVMeConfig{
+			Enabled:    true,
+			ListenAddr: "10.0.0.3:4420",
+			NQNPrefix:  "nqn.2024-01.com.seaweedfs:vol.",
+		})
+	if bs == nil {
+		t.Fatal("StartBlockService returned nil")
+	}
+	defer bs.Shutdown()
+
+	// Create a volume.
+	_, _, _, err := bs.CreateBlockVol("test-nvme-hb", 4*1024*1024, "ssd", "")
+	if err != nil {
+		t.Fatalf("CreateBlockVol: %v", err)
+	}
+
+	// Collect heartbeat.
+	msgs := bs.CollectBlockVolumeHeartbeat()
+	if len(msgs) == 0 {
+		t.Fatal("no heartbeat messages collected")
+	}
+
+	var found bool
+	for _, msg := range msgs {
+		if strings.Contains(msg.Path, "test-nvme-hb") {
+			found = true
+			if msg.NvmeAddr != "10.0.0.3:4420" {
+				t.Fatalf("heartbeat NvmeAddr = %q, want 10.0.0.3:4420", msg.NvmeAddr)
+			}
+			if !strings.Contains(msg.NQN, "test-nvme-hb") {
+				t.Fatalf("heartbeat NQN should contain volume name, got %q", msg.NQN)
+			}
+			t.Logf("Heartbeat: nvme=%s nqn=%s", msg.NvmeAddr, msg.NQN)
+		}
+	}
+	if !found {
+		t.Fatal("test-nvme-hb not found in heartbeat messages")
+	}
+}
+
+// TestIntegration_NVMe_VolumeServerNoNvme tests that without NVMe config,
+// the heartbeat correctly omits NvmeAddr and NQN.
+func TestIntegration_NVMe_VolumeServerNoNvme(t *testing.T) {
+	dir := t.TempDir()
+	blockDir := dir + "/blocks"
+	os.MkdirAll(blockDir, 0755)
+
+	// Start BlockService WITHOUT NVMe.
+	bs := StartBlockService("127.0.0.1:0", blockDir, "iqn.2024.test:",
+		"127.0.0.1:3260,1", NVMeConfig{})
+	if bs == nil {
+		t.Fatal("StartBlockService returned nil")
+	}
+	defer bs.Shutdown()
+
+	bs.CreateBlockVol("test-no-nvme", 4*1024*1024, "", "")
+
+	msgs := bs.CollectBlockVolumeHeartbeat()
+	for _, msg := range msgs {
+		if strings.Contains(msg.Path, "test-no-nvme") {
+			if msg.NvmeAddr != "" {
+				t.Fatalf("NvmeAddr should be empty without NVMe config, got %q", msg.NvmeAddr)
+			}
+			if msg.NQN != "" {
+				t.Fatalf("NQN should be empty without NVMe config, got %q", msg.NQN)
+			}
+			return
+		}
+	}
+	t.Fatal("test-no-nvme not found in heartbeat")
+}
+
+// TestIntegration_NVMe_FullLifecycle_K8s simulates the complete K8s PVC lifecycle:
+// Admin deploys 3 VS with NVMe → Pod requests PVC → CSI creates via master →
+// Pod connects via NVMe/TCP → Primary dies → Failover → CSI re-publishes →
+// Pod reconnects to new NVMe target.
+func TestIntegration_NVMe_FullLifecycle_K8s(t *testing.T) {
+	ms := nvmeIntegrationMaster(t)
+	ctx := context.Background()
+
+	// ── Step 1: Admin deployed VS with --block-nvme-addr :4420 ──
+	// (Simulated by nvmeIntegrationMaster's allocate callback)
+
+	// ── Step 2: Pod requests PVC → CSI controller calls master ──
+	createResp, err := ms.CreateBlockVolume(ctx, &master_pb.CreateBlockVolumeRequest{
+		Name:      "pvc-k8s-data",
+		SizeBytes: 100 << 30,
+	})
+	if err != nil {
+		t.Fatalf("CreateBlockVolume: %v", err)
+	}
+	primaryVS := createResp.VolumeServer
+	replicaVS := createResp.ReplicaServer
+	if replicaVS == "" {
+		t.Fatal("expected replica for HA")
+	}
+
+	t.Logf("Step 2: Created pvc-k8s-data on primary=%s replica=%s", primaryVS, replicaVS)
+
+	// ── Step 3: CSI controller passes NVMe details in PublishContext ──
+	lookupResp, err := ms.LookupBlockVolume(ctx, &master_pb.LookupBlockVolumeRequest{Name: "pvc-k8s-data"})
+	if err != nil {
+		t.Fatalf("Lookup: %v", err)
+	}
+	if lookupResp.NvmeAddr == "" || lookupResp.Nqn == "" {
+		t.Fatalf("CSI needs NVMe details: nvmeAddr=%q nqn=%q", lookupResp.NvmeAddr, lookupResp.Nqn)
+	}
+
+	// CSI node plugin would do: nvme connect -t tcp -a <host> -s 4420 -n <nqn>
+	publishNvmeAddr := lookupResp.NvmeAddr
+	publishNQN := lookupResp.Nqn
+	t.Logf("Step 3: CSI publish: nvme=%s nqn=%s", publishNvmeAddr, publishNQN)
+
+	// ── Step 4: Confirm assignments (VS heartbeats) ──
+	entry, _ := ms.blockRegistry.Lookup("pvc-k8s-data")
+	ms.blockAssignmentQueue.ConfirmFromHeartbeat(primaryVS, []blockvol.BlockVolumeInfoMessage{
+		{Path: entry.Path, Epoch: 1},
+	})
+	replicaPath := ""
+	if len(entry.Replicas) > 0 {
+		replicaPath = entry.Replicas[0].Path
+	} else {
+		replicaPath = entry.ReplicaPath
+	}
+	ms.blockAssignmentQueue.ConfirmFromHeartbeat(replicaVS, []blockvol.BlockVolumeInfoMessage{
+		{Path: replicaPath, Epoch: 1},
+	})
+
+	// ── Step 5: Primary VS dies ──
+	entry.LastLeaseGrant = time.Now().Add(-1 * time.Minute)
+	ms.failoverBlockVolumes(primaryVS)
+
+	entry, _ = ms.blockRegistry.Lookup("pvc-k8s-data")
+	if entry.VolumeServer == primaryVS {
+		t.Fatal("failover didn't promote replica")
+	}
+	newPrimaryVS := entry.VolumeServer
+	newPrimaryHost := newPrimaryVS[:strings.Index(newPrimaryVS, ":")]
+	t.Logf("Step 5: Failover: new primary=%s epoch=%d", newPrimaryVS, entry.Epoch)
+
+	// ── Step 6: New primary's heartbeat arrives with NVMe info ──
+	ms.blockRegistry.UpdateFullHeartbeat(newPrimaryVS, []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       entry.Path,
+			VolumeSize: 100 << 30,
+			Epoch:      entry.Epoch,
+			Role:       1,
+			NvmeAddr:   newPrimaryHost + ":4420",
+			Nqn:        "nqn.2024-01.com.seaweedfs:vol.pvc-k8s-data",
+		},
+	})
+
+	// ── Step 7: CSI re-publishes → node plugin reconnects via NVMe ──
+	lookupResp, err = ms.LookupBlockVolume(ctx, &master_pb.LookupBlockVolumeRequest{Name: "pvc-k8s-data"})
+	if err != nil {
+		t.Fatalf("post-failover Lookup: %v", err)
+	}
+
+	// NVMe target must now point to the NEW primary.
+	if lookupResp.NvmeAddr == publishNvmeAddr {
+		t.Fatalf("NvmeAddr still points to dead primary: %q", lookupResp.NvmeAddr)
+	}
+	expectedNewNvme := newPrimaryHost + ":4420"
+	if lookupResp.NvmeAddr != expectedNewNvme {
+		t.Fatalf("NvmeAddr = %q, want %q (new primary)", lookupResp.NvmeAddr, expectedNewNvme)
+	}
+	if lookupResp.Nqn != publishNQN {
+		// NQN is volume-specific, should be same regardless of which server hosts it.
+		t.Logf("Note: NQN changed from %q to %q (expected: same across failover)", publishNQN, lookupResp.Nqn)
+	}
+
+	t.Logf("Step 7: CSI re-publish: new nvme=%s nqn=%s", lookupResp.NvmeAddr, lookupResp.Nqn)
+
+	// ── Step 8: Cleanup — delete volume ──
+	_, err = ms.DeleteBlockVolume(ctx, &master_pb.DeleteBlockVolumeRequest{Name: "pvc-k8s-data"})
+	if err != nil {
+		t.Fatalf("Delete: %v", err)
+	}
+	if _, ok := ms.blockRegistry.Lookup("pvc-k8s-data"); ok {
+		t.Fatal("volume should be deleted")
+	}
+	t.Log("Step 8: Volume deleted")
+}
+
+// =============================================================================
+// C2: NVMe Toggle on Running VS
+//
+// Simulates a volume server enabling NVMe, sending heartbeats with NVMe
+// fields, then disabling NVMe and sending heartbeats without. Verifies
+// that the registry reflects the current state unconditionally.
+// =============================================================================
+
+// TestQA_NVMe_ToggleNvmeOnRunningVS tests the primary-side NVMe toggle:
+// iSCSI-only → enable NVMe via heartbeat → disable NVMe via heartbeat.
+func TestQA_NVMe_ToggleNvmeOnRunningVS(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+
+	// Step 1: Register volume with NvmeAddr="" (iSCSI-only initially).
+	err := r.Register(&BlockVolumeEntry{
+		Name:         "toggle-vol",
+		VolumeServer: "vs1:18080",
+		Path:         "/data/toggle-vol.blk",
+		IQN:          "iqn.2024.com.seaweedfs:toggle-vol",
+		ISCSIAddr:    "10.0.0.1:3260",
+		// NvmeAddr intentionally empty — iSCSI-only at creation.
+		SizeBytes: 1 << 30,
+		Epoch:     1,
+		Role:      blockvol.RoleToWire(blockvol.RolePrimary),
+		Status:    StatusActive,
+	})
+	if err != nil {
+		t.Fatalf("Register: %v", err)
+	}
+
+	entry, ok := r.Lookup("toggle-vol")
+	if !ok {
+		t.Fatal("toggle-vol not found after Register")
+	}
+	if entry.NvmeAddr != "" {
+		t.Fatalf("initial NvmeAddr should be empty, got %q", entry.NvmeAddr)
+	}
+
+	// Step 2: Heartbeat arrives with NvmeAddr (admin enabled NVMe on VS).
+	r.UpdateFullHeartbeat("vs1:18080", []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/toggle-vol.blk",
+			VolumeSize: 1 << 30,
+			Epoch:      1,
+			Role:       1,
+			NvmeAddr:   "10.0.0.1:4420",
+			Nqn:        "nqn.2024-01.com.seaweedfs:toggle-vol",
+		},
+	})
+
+	entry, _ = r.Lookup("toggle-vol")
+	if entry.NvmeAddr != "10.0.0.1:4420" {
+		t.Fatalf("after enable heartbeat: NvmeAddr = %q, want 10.0.0.1:4420", entry.NvmeAddr)
+	}
+	if entry.NQN != "nqn.2024-01.com.seaweedfs:toggle-vol" {
+		t.Fatalf("after enable heartbeat: NQN = %q, want nqn.2024-01.com.seaweedfs:toggle-vol", entry.NQN)
+	}
+
+	// Step 3: Heartbeat arrives with NvmeAddr="" (admin disabled NVMe on VS).
+	// UpdateFullHeartbeat unconditionally writes NvmeAddr/NQN, so empty clears.
+	r.UpdateFullHeartbeat("vs1:18080", []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:       "/data/toggle-vol.blk",
+			VolumeSize: 1 << 30,
+			Epoch:      1,
+			Role:       1,
+			// NvmeAddr and Nqn intentionally empty — NVMe disabled.
+		},
+	})
+
+	entry, _ = r.Lookup("toggle-vol")
+	if entry.NvmeAddr != "" {
+		t.Fatalf("after disable heartbeat: NvmeAddr should be empty, got %q", entry.NvmeAddr)
+	}
+	if entry.NQN != "" {
+		t.Fatalf("after disable heartbeat: NQN should be empty, got %q", entry.NQN)
+	}
+
+	// Step 4: Lookup returns empty NvmeAddr after disable — CSI falls back to iSCSI.
+	entry, ok = r.Lookup("toggle-vol")
+	if !ok {
+		t.Fatal("toggle-vol disappeared")
+	}
+	if entry.NvmeAddr != "" {
+		t.Fatalf("Lookup after disable: NvmeAddr = %q, want empty", entry.NvmeAddr)
+	}
+	if entry.ISCSIAddr != "10.0.0.1:3260" {
+		t.Fatalf("iSCSI addr should be preserved: got %q", entry.ISCSIAddr)
+	}
+}
+
+// TestQA_NVMe_ToggleNvmeOnRunningVS_ReplicaSide tests the same toggle behavior
+// on a replica: enable NVMe via replica heartbeat → disable via heartbeat.
+func TestQA_NVMe_ToggleNvmeOnRunningVS_ReplicaSide(t *testing.T) {
+	r := NewBlockVolumeRegistry()
+
+	// Step 1: Register volume with a replica that has no NvmeAddr.
+	err := r.Register(&BlockVolumeEntry{
+		Name:         "toggle-replica-vol",
+		VolumeServer: "primary-vs:18080",
+		Path:         "/data/toggle-replica-vol.blk",
+		IQN:          "iqn.2024.com.seaweedfs:toggle-replica-vol",
+		ISCSIAddr:    "10.0.0.1:3260",
+		SizeBytes:    1 << 30,
+		Epoch:        1,
+		Role:         blockvol.RoleToWire(blockvol.RolePrimary),
+		Status:       StatusActive,
+		LeaseTTL:     30 * time.Second,
+		WALHeadLSN:   100,
+		Replicas: []ReplicaInfo{
+			{
+				Server:        "replica-vs:18080",
+				Path:          "/data/toggle-replica-vol.blk",
+				IQN:           "iqn.2024.com.seaweedfs:toggle-replica-vol-r",
+				ISCSIAddr:     "10.0.0.2:3260",
+				HealthScore:   1.0,
+				WALHeadLSN:    100,
+				LastHeartbeat: time.Now(),
+				Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				// NvmeAddr intentionally empty — replica has no NVMe initially.
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("Register: %v", err)
+	}
+	r.mu.Lock()
+	r.addToServer("replica-vs:18080", "toggle-replica-vol")
+	r.mu.Unlock()
+
+	// Verify replica has no NvmeAddr initially.
+	entry, _ := r.Lookup("toggle-replica-vol")
+	if len(entry.Replicas) == 0 {
+		t.Fatal("expected at least one replica")
+	}
+	if entry.Replicas[0].NvmeAddr != "" {
+		t.Fatalf("initial replica NvmeAddr should be empty, got %q", entry.Replicas[0].NvmeAddr)
+	}
+
+	// Step 2: Replica heartbeat arrives with NvmeAddr (NVMe enabled on replica VS).
+	r.UpdateFullHeartbeat("replica-vs:18080", []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:        "/data/toggle-replica-vol.blk",
+			VolumeSize:  1 << 30,
+			Epoch:       1,
+			Role:        uint32(blockvol.RoleToWire(blockvol.RoleReplica)),
+			HealthScore: 1.0,
+			WalHeadLsn:  100,
+			NvmeAddr:    "10.0.0.2:4420",
+			Nqn:         "nqn.2024-01.com.seaweedfs:toggle-replica-vol",
+		},
+	})
+
+	entry, _ = r.Lookup("toggle-replica-vol")
+	if entry.Replicas[0].NvmeAddr != "10.0.0.2:4420" {
+		t.Fatalf("after enable heartbeat: replica NvmeAddr = %q, want 10.0.0.2:4420", entry.Replicas[0].NvmeAddr)
+	}
+	if entry.Replicas[0].NQN != "nqn.2024-01.com.seaweedfs:toggle-replica-vol" {
+		t.Fatalf("after enable heartbeat: replica NQN = %q", entry.Replicas[0].NQN)
+	}
+
+	// Step 3: Replica heartbeat arrives without NvmeAddr (NVMe disabled on replica VS).
+	r.UpdateFullHeartbeat("replica-vs:18080", []*master_pb.BlockVolumeInfoMessage{
+		{
+			Path:        "/data/toggle-replica-vol.blk",
+			VolumeSize:  1 << 30,
+			Epoch:       1,
+			Role:        uint32(blockvol.RoleToWire(blockvol.RoleReplica)),
+			HealthScore: 1.0,
+			WalHeadLsn:  100,
+			// NvmeAddr and Nqn intentionally empty — NVMe disabled.
+		},
+	})
+
+	entry, _ = r.Lookup("toggle-replica-vol")
+	if entry.Replicas[0].NvmeAddr != "" {
+		t.Fatalf("after disable heartbeat: replica NvmeAddr should be empty, got %q", entry.Replicas[0].NvmeAddr)
+	}
+	if entry.Replicas[0].NQN != "" {
+		t.Fatalf("after disable heartbeat: replica NQN should be empty, got %q", entry.Replicas[0].NQN)
+	}
+}
+
+// =============================================================================
+// C3: Promotion → Immediate Lookup (race window)
+//
+// After PromoteBestReplica, the promoted replica's NVMe fields from its
+// ReplicaInfo are copied into the entry. This tests three sub-cases:
+// (a) Replica had NvmeAddr → Lookup gets it immediately
+// (b) Replica had empty NvmeAddr → Lookup returns empty (CSI falls back)
+// (c) Heartbeat after promotion fills in NvmeAddr
+// =============================================================================
+
+func TestQA_NVMe_PromotionThenImmediateLookup(t *testing.T) {
+	// Sub-case (a): Replica heartbeated NvmeAddr into ReplicaInfo → promote →
+	// Lookup returns NvmeAddr immediately (no extra heartbeat needed).
+	t.Run("ReplicaHasNvme", func(t *testing.T) {
+		r := NewBlockVolumeRegistry()
+		// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+		r.MarkBlockCapable("dead-primary:18080")
+		r.MarkBlockCapable("healthy-replica:18080")
+		err := r.Register(&BlockVolumeEntry{
+			Name:         "promo-nvme-vol",
+			VolumeServer: "dead-primary:18080",
+			Path:         "/data/promo-nvme-vol.blk",
+			IQN:          "iqn:promo-primary",
+			ISCSIAddr:    "10.0.0.1:3260",
+			NvmeAddr:     "10.0.0.1:4420",
+			NQN:          "nqn:promo-primary",
+			SizeBytes:    1 << 30,
+			Epoch:        5,
+			Role:         blockvol.RoleToWire(blockvol.RolePrimary),
+			Status:       StatusActive,
+			LeaseTTL:     30 * time.Second,
+			WALHeadLSN:   200,
+			Replicas: []ReplicaInfo{
+				{
+					Server:        "healthy-replica:18080",
+					Path:          "/data/promo-nvme-vol.blk",
+					IQN:           "iqn:promo-replica",
+					ISCSIAddr:     "10.0.0.2:3260",
+					NvmeAddr:      "10.0.0.2:4420",   // Replica has NVMe!
+					NQN:           "nqn:promo-replica",
+					HealthScore:   1.0,
+					WALHeadLSN:    200,
+					LastHeartbeat: time.Now(),
+					Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				},
+			},
+		})
+		if err != nil {
+			t.Fatalf("Register: %v", err)
+		}
+		r.mu.Lock()
+		r.addToServer("healthy-replica:18080", "promo-nvme-vol")
+		r.mu.Unlock()
+
+		newEpoch, err := r.PromoteBestReplica("promo-nvme-vol")
+		if err != nil {
+			t.Fatalf("PromoteBestReplica: %v", err)
+		}
+		if newEpoch != 6 {
+			t.Fatalf("newEpoch = %d, want 6", newEpoch)
+		}
+
+		// Immediate Lookup — no heartbeat needed.
+		entry, ok := r.Lookup("promo-nvme-vol")
+		if !ok {
+			t.Fatal("promo-nvme-vol not found after promotion")
+		}
+		if entry.VolumeServer != "healthy-replica:18080" {
+			t.Fatalf("VolumeServer = %q, want healthy-replica:18080", entry.VolumeServer)
+		}
+		// CORRECT behavior: NvmeAddr is available immediately from ReplicaInfo.
+		if entry.NvmeAddr != "10.0.0.2:4420" {
+			t.Fatalf("NvmeAddr = %q, want 10.0.0.2:4420 (should be copied from replica)", entry.NvmeAddr)
+		}
+		if entry.NQN != "nqn:promo-replica" {
+			t.Fatalf("NQN = %q, want nqn:promo-replica (should be copied from replica)", entry.NQN)
+		}
+	})
+
+	// Sub-case (b): Replica ReplicaInfo has empty NvmeAddr (heartbeat not yet
+	// received or old replica) → promote → Lookup returns empty NvmeAddr →
+	// CSI falls back to iSCSI. This documents the pre-heartbeat window.
+	t.Run("ReplicaMissingNvme", func(t *testing.T) {
+		r := NewBlockVolumeRegistry()
+		// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+		r.MarkBlockCapable("dead-primary:18080")
+		r.MarkBlockCapable("replica-no-nvme:18080")
+		err := r.Register(&BlockVolumeEntry{
+			Name:         "promo-nonvme-vol",
+			VolumeServer: "dead-primary:18080",
+			Path:         "/data/promo-nonvme-vol.blk",
+			IQN:          "iqn:promo2-primary",
+			ISCSIAddr:    "10.0.0.1:3260",
+			NvmeAddr:     "10.0.0.1:4420",
+			NQN:          "nqn:promo2-primary",
+			SizeBytes:    1 << 30,
+			Epoch:        5,
+			Role:         blockvol.RoleToWire(blockvol.RolePrimary),
+			Status:       StatusActive,
+			LeaseTTL:     30 * time.Second,
+			WALHeadLSN:   200,
+			Replicas: []ReplicaInfo{
+				{
+					Server:        "replica-no-nvme:18080",
+					Path:          "/data/promo-nonvme-vol.blk",
+					IQN:           "iqn:promo2-replica",
+					ISCSIAddr:     "10.0.0.3:3260",
+					// NvmeAddr intentionally empty — replica hasn't heartbeated NVMe.
+					HealthScore:   1.0,
+					WALHeadLSN:    200,
+					LastHeartbeat: time.Now(),
+					Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				},
+			},
+		})
+		if err != nil {
+			t.Fatalf("Register: %v", err)
+		}
+		r.mu.Lock()
+		r.addToServer("replica-no-nvme:18080", "promo-nonvme-vol")
+		r.mu.Unlock()
+
+		_, err = r.PromoteBestReplica("promo-nonvme-vol")
+		if err != nil {
+			t.Fatalf("PromoteBestReplica: %v", err)
+		}
+
+		// Immediate Lookup — NvmeAddr should be empty (replica had none).
+		entry, ok := r.Lookup("promo-nonvme-vol")
+		if !ok {
+			t.Fatal("promo-nonvme-vol not found after promotion")
+		}
+		if entry.VolumeServer != "replica-no-nvme:18080" {
+			t.Fatalf("VolumeServer = %q, want replica-no-nvme:18080", entry.VolumeServer)
+		}
+		// Pre-heartbeat window: NvmeAddr is empty. CSI must fall back to iSCSI.
+		if entry.NvmeAddr != "" {
+			t.Fatalf("NvmeAddr = %q, want empty (replica had no NVMe info)", entry.NvmeAddr)
+		}
+		if entry.NQN != "" {
+			t.Fatalf("NQN = %q, want empty (replica had no NVMe info)", entry.NQN)
+		}
+		// iSCSI should still be available for fallback.
+		if entry.ISCSIAddr != "10.0.0.3:3260" {
+			t.Fatalf("ISCSIAddr = %q, want 10.0.0.3:3260 (iSCSI fallback)", entry.ISCSIAddr)
+		}
+	})
+
+	// Sub-case (c): Same as (b) but then heartbeat arrives from the promoted
+	// server with NvmeAddr → entry updated → Lookup returns it.
+	// This proves heartbeat fixes the post-promotion race window.
+	t.Run("HeartbeatFixesPostPromotion", func(t *testing.T) {
+		r := NewBlockVolumeRegistry()
+		// Mark servers as block-capable so promotion Gate 4 (liveness) passes.
+		r.MarkBlockCapable("dead-primary:18080")
+		r.MarkBlockCapable("promoted-replica:18080")
+		err := r.Register(&BlockVolumeEntry{
+			Name:         "promo-fix-vol",
+			VolumeServer: "dead-primary:18080",
+			Path:         "/data/promo-fix-vol.blk",
+			IQN:          "iqn:promo3-primary",
+			ISCSIAddr:    "10.0.0.1:3260",
+			NvmeAddr:     "10.0.0.1:4420",
+			NQN:          "nqn:promo3-primary",
+			SizeBytes:    1 << 30,
+			Epoch:        5,
+			Role:         blockvol.RoleToWire(blockvol.RolePrimary),
+			Status:       StatusActive,
+			LeaseTTL:     30 * time.Second,
+			WALHeadLSN:   200,
+			Replicas: []ReplicaInfo{
+				{
+					Server:        "promoted-replica:18080",
+					Path:          "/data/promo-fix-vol.blk",
+					IQN:           "iqn:promo3-replica",
+					ISCSIAddr:     "10.0.0.4:3260",
+					// NvmeAddr intentionally empty — pre-heartbeat window.
+					HealthScore:   1.0,
+					WALHeadLSN:    200,
+					LastHeartbeat: time.Now(),
+					Role:          blockvol.RoleToWire(blockvol.RoleReplica),
+				},
+			},
+		})
+		if err != nil {
+			t.Fatalf("Register: %v", err)
+		}
+		r.mu.Lock()
+		r.addToServer("promoted-replica:18080", "promo-fix-vol")
+		r.mu.Unlock()
+
+		newEpoch, err := r.PromoteBestReplica("promo-fix-vol")
+		if err != nil {
+			t.Fatalf("PromoteBestReplica: %v", err)
+		}
+
+		// Verify NvmeAddr is empty immediately after promotion.
+		entry, _ := r.Lookup("promo-fix-vol")
+		if entry.NvmeAddr != "" {
+			t.Fatalf("NvmeAddr should be empty immediately after promotion, got %q", entry.NvmeAddr)
+		}
+
+		// Heartbeat arrives from the promoted server WITH NvmeAddr.
+		// This is the fix: the new primary's heartbeat fills in NVMe fields.
+		r.UpdateFullHeartbeat("promoted-replica:18080", []*master_pb.BlockVolumeInfoMessage{
+			{
+				Path:       "/data/promo-fix-vol.blk",
+				VolumeSize: 1 << 30,
+				Epoch:      newEpoch,
+				Role:       1,
+				NvmeAddr:   "10.0.0.4:4420",
+				Nqn:        "nqn.2024-01.com.seaweedfs:promo-fix-vol",
+			},
+		})
+
+		// Now Lookup should return the NvmeAddr.
+		entry, ok := r.Lookup("promo-fix-vol")
+		if !ok {
+			t.Fatal("promo-fix-vol not found after heartbeat")
+		}
+		if entry.NvmeAddr != "10.0.0.4:4420" {
+			t.Fatalf("NvmeAddr = %q after heartbeat fix, want 10.0.0.4:4420", entry.NvmeAddr)
+		}
+		if entry.NQN != "nqn.2024-01.com.seaweedfs:promo-fix-vol" {
+			t.Fatalf("NQN = %q after heartbeat fix, want nqn.2024-01.com.seaweedfs:promo-fix-vol", entry.NQN)
+		}
+		// Verify the volume server is the promoted replica.
+		if entry.VolumeServer != "promoted-replica:18080" {
+			t.Fatalf("VolumeServer = %q, want promoted-replica:18080", entry.VolumeServer)
+		}
+	})
+}
diff --git a/weed/storage/blockvol/blockapi/client.go b/weed/storage/blockvol/blockapi/client.go
index a5a624daa..7916f20ef 100644
--- a/weed/storage/blockvol/blockapi/client.go
+++ b/weed/storage/blockvol/blockapi/client.go
@@ -136,6 +136,61 @@ func (c *Client) ExpandVolume(ctx context.Context, name string, newSizeBytes uin
 	return out.CapacityBytes, nil
 }
 
+// PromoteVolume triggers a manual promotion for a block volume.
+func (c *Client) PromoteVolume(ctx context.Context, name string, req PromoteVolumeRequest) (*PromoteVolumeResponse, error) {
+	body, err := json.Marshal(req)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+	resp, err := c.doRequest(ctx, http.MethodPost, "/block/volume/"+name+"/promote", bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if err := checkStatus(resp, http.StatusOK); err != nil {
+		return nil, err
+	}
+	var out PromoteVolumeResponse
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	return &out, nil
+}
+
+// BlockStatus fetches the block registry status metrics.
+func (c *Client) BlockStatus(ctx context.Context) (*BlockStatusResponse, error) {
+	resp, err := c.doRequest(ctx, http.MethodGet, "/block/status", nil)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if err := checkStatus(resp, http.StatusOK); err != nil {
+		return nil, err
+	}
+	var out BlockStatusResponse
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	return &out, nil
+}
+
+// Preflight returns the promotion preflight evaluation for a block volume.
+func (c *Client) Preflight(ctx context.Context, name string) (*PreflightResponse, error) {
+	resp, err := c.doRequest(ctx, http.MethodGet, "/block/volume/"+name+"/preflight", nil)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if err := checkStatus(resp, http.StatusOK); err != nil {
+		return nil, err
+	}
+	var out PreflightResponse
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	return &out, nil
+}
+
 // ListServers lists all block-capable volume servers.
 func (c *Client) ListServers(ctx context.Context) ([]ServerInfo, error) {
 	resp, err := c.doRequest(ctx, http.MethodGet, "/block/servers", nil)
diff --git a/weed/storage/blockvol/blockapi/types.go b/weed/storage/blockvol/blockapi/types.go
index 24be9eb72..d381eb2b2 100644
--- a/weed/storage/blockvol/blockapi/types.go
+++ b/weed/storage/blockvol/blockapi/types.go
@@ -38,6 +38,8 @@ type VolumeInfo struct {
 	HealthScore     float64         `json:"health_score"`
 	ReplicaDegraded bool            `json:"replica_degraded,omitempty"`
 	DurabilityMode  string          `json:"durability_mode"` // CP8-3-1
+	NvmeAddr        string          `json:"nvme_addr,omitempty"`
+	NQN             string          `json:"nqn,omitempty"`
 }
 
 // ReplicaDetail describes one replica in the API response.
@@ -74,6 +76,52 @@ type ExpandVolumeResponse struct {
 	CapacityBytes uint64 `json:"capacity_bytes"`
 }
 
+// PromoteVolumeRequest is the request body for POST /block/volume/{name}/promote.
+type PromoteVolumeRequest struct {
+	TargetServer string `json:"target_server,omitempty"` // specific replica, or empty for auto
+	Force        bool   `json:"force,omitempty"`         // bypass soft safety checks
+	Reason       string `json:"reason,omitempty"`        // audit note
+}
+
+// PromoteVolumeResponse is the response for POST /block/volume/{name}/promote.
+type PromoteVolumeResponse struct {
+	NewPrimary string               `json:"new_primary"`
+	Epoch      uint64               `json:"epoch"`
+	Reason     string               `json:"reason,omitempty"`      // rejection reason if failed
+	Rejections []PreflightRejection `json:"rejections,omitempty"`  // per-replica rejection details
+}
+
+// BlockStatusResponse is the response for GET /block/status.
+type BlockStatusResponse struct {
+	VolumeCount          int    `json:"volume_count"`
+	ServerCount          int    `json:"server_count"`
+	PromotionLSNTolerance uint64 `json:"promotion_lsn_tolerance"`
+	BarrierLagLSN        uint64 `json:"barrier_lag_lsn"`
+	PromotionsTotal      int64  `json:"promotions_total"`
+	FailoversTotal       int64  `json:"failovers_total"`
+	RebuildsTotal        int64  `json:"rebuilds_total"`
+	AssignmentQueueDepth int    `json:"assignment_queue_depth"`
+}
+
+// PreflightRejection describes why a specific replica was rejected for promotion.
+type PreflightRejection struct {
+	Server string `json:"server"`
+	Reason string `json:"reason"` // "stale_heartbeat", "wal_lag", "wrong_role", "server_dead", "no_heartbeat"
+}
+
+// PreflightResponse is the response for GET /block/volume/{name}/preflight.
+type PreflightResponse struct {
+	VolumeName      string                `json:"volume_name"`
+	Promotable      bool                  `json:"promotable"`
+	Reason          string                `json:"reason,omitempty"`
+	CandidateServer string                `json:"candidate_server,omitempty"`
+	CandidateHealth float64               `json:"candidate_health,omitempty"`
+	CandidateWALLSN uint64                `json:"candidate_wal_lsn,omitempty"`
+	Rejections      []PreflightRejection  `json:"rejections,omitempty"`
+	PrimaryServer   string                `json:"primary_server"`
+	PrimaryAlive    bool                  `json:"primary_alive"`
+}
+
 // RoleFromString converts a role string to its uint32 wire value.
 // Returns 0 (RoleNone) for unrecognized strings.
 func RoleFromString(s string) uint32 {
diff --git a/weed/storage/blockvol/qa_wal_cp11a3_adversarial_test.go b/weed/storage/blockvol/qa_wal_cp11a3_adversarial_test.go
new file mode 100644
index 000000000..bc0d12271
--- /dev/null
+++ b/weed/storage/blockvol/qa_wal_cp11a3_adversarial_test.go
@@ -0,0 +1,511 @@
+package blockvol
+
+import (
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// ============================================================
+// CP11A-3 Adversarial Test Suite
+//
+// 10 scenarios stress-testing WAL admission pressure tracking,
+// PressureState boundaries, guidance edge cases, and concurrent
+// metric visibility.
+// ============================================================
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-1: SoftMarkEqualsHardMark_NoPanic
+//
+// If an operator configures softMark == hardMark, the soft-zone
+// delay calculation divides by (hardMark - softMark) = 0.
+// Must not panic, hang, or produce NaN/Inf delay.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_SoftMarkEqualsHardMark_NoPanic(t *testing.T) {
+	m := NewEngineMetrics()
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.8,
+		HardWatermark: 0.8, // equal — no soft zone
+		WALUsedFn:     func() float64 { return 0.85 }, // above both marks
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+
+	// With equal marks, pressure >= hardMark takes the hard branch.
+	// The soft branch's division by zero is never reached.
+	// But if the code path ever changes, this test catches it.
+	done := make(chan error, 1)
+	go func() {
+		done <- a.Acquire(50 * time.Millisecond)
+	}()
+
+	select {
+	case err := <-done:
+		// ErrWALFull is expected (pressure stays above hard, times out).
+		if err != ErrWALFull {
+			t.Fatalf("expected ErrWALFull, got %v", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("Acquire hung — possible Inf delay from division by zero")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-2: SoftZoneExactBoundary_DelayIsZero
+//
+// When pressure == softMark exactly, scale = 0, delay = 0.
+// softPressureWaitNs should NOT increase (delay <= 0 skips sleep).
+// But hitSoft should still be true → SoftAdmitTotal increments.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_SoftZoneExactBoundary_DelayIsZero(t *testing.T) {
+	m := NewEngineMetrics()
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.7 }, // exactly at soft mark
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+	a.sleepFn = func(d time.Duration) {
+		t.Fatalf("sleep should not be called when delay=0, but called with %v", d)
+	}
+
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire: %v", err)
+	}
+	a.Release()
+
+	// SoftAdmitTotal should increment (we entered the soft branch).
+	if m.WALAdmitSoftTotal.Load() != 1 {
+		t.Fatalf("WALAdmitSoftTotal = %d, want 1", m.WALAdmitSoftTotal.Load())
+	}
+	// But no sleep → softPressureWaitNs stays 0.
+	if a.SoftPressureWaitNs() != 0 {
+		t.Fatalf("SoftPressureWaitNs = %d, want 0 (no delay at exact boundary)", a.SoftPressureWaitNs())
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-3: ConcurrentHardWaiters_TimeAccumulates
+//
+// 8 goroutines enter hard zone simultaneously. Each waits ~5ms.
+// Total hardPressureWaitNs should be roughly 8 × 5ms, proving
+// atomic accumulation doesn't lose contributions.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_ConcurrentHardWaiters_TimeAccumulates(t *testing.T) {
+	m := NewEngineMetrics()
+	var pressure atomic.Int64
+	pressure.Store(95) // above hard mark
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+
+	var sleepCalls atomic.Int64
+	a.sleepFn = func(d time.Duration) {
+		time.Sleep(1 * time.Millisecond)
+		// After enough total sleeps across all goroutines, drop pressure.
+		if sleepCalls.Add(1) >= 20 {
+			pressure.Store(50)
+		}
+	}
+
+	const workers = 8
+	var wg sync.WaitGroup
+	for i := 0; i < workers; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			if err := a.Acquire(5 * time.Second); err != nil {
+				t.Errorf("Acquire: %v", err)
+			}
+			a.Release()
+		}()
+	}
+	wg.Wait()
+
+	// All 8 must have entered hard zone.
+	if m.WALAdmitHardTotal.Load() < uint64(workers) {
+		t.Fatalf("WALAdmitHardTotal = %d, want >= %d", m.WALAdmitHardTotal.Load(), workers)
+	}
+	// Accumulated hard wait should be > 0, reflecting contributions from all goroutines.
+	if a.HardPressureWaitNs() <= 0 {
+		t.Fatal("HardPressureWaitNs should be > 0 after concurrent hard-zone waits")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-4: PressureStateAndAcquireRace
+//
+// One goroutine oscillates walUsed, another reads PressureState
+// rapidly. Must not panic, must always return a valid state.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_PressureStateAndAcquireRace(t *testing.T) {
+	var pressure atomic.Int64
+	pressure.Store(50)
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       NewEngineMetrics(),
+	})
+	a.sleepFn = func(d time.Duration) { time.Sleep(100 * time.Microsecond) }
+
+	var wg sync.WaitGroup
+	const rounds = 200
+
+	// Goroutine 1: oscillate pressure.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		levels := []int64{30, 75, 95, 50, 80, 92, 10}
+		for i := 0; i < rounds; i++ {
+			pressure.Store(levels[i%len(levels)])
+		}
+	}()
+
+	// Goroutine 2: read PressureState.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		valid := map[string]bool{"normal": true, "soft": true, "hard": true}
+		for i := 0; i < rounds; i++ {
+			s := a.PressureState()
+			if !valid[s] {
+				t.Errorf("PressureState() = %q — not a valid state", s)
+				return
+			}
+		}
+	}()
+
+	// Goroutine 3: Acquire/Release rapidly.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; i < rounds/2; i++ {
+			err := a.Acquire(20 * time.Millisecond)
+			if err == nil {
+				a.Release()
+			}
+		}
+	}()
+
+	wg.Wait()
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-5: TimeInZoneMonotonicity
+//
+// softPressureWaitNs and hardPressureWaitNs must be monotonically
+// non-decreasing across reads, even under concurrent writes.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_TimeInZoneMonotonicity(t *testing.T) {
+	m := NewEngineMetrics()
+	var pressure atomic.Int64
+	pressure.Store(80) // soft zone
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+	a.sleepFn = func(d time.Duration) { time.Sleep(100 * time.Microsecond) }
+
+	var wg sync.WaitGroup
+	const writers = 4
+	const rounds = 30
+
+	// Writers produce soft-zone and hard-zone waits.
+	for i := 0; i < writers; i++ {
+		wg.Add(1)
+		go func(id int) {
+			defer wg.Done()
+			for j := 0; j < rounds; j++ {
+				if j%5 == 0 {
+					pressure.Store(95) // hard
+				} else {
+					pressure.Store(80) // soft
+				}
+				err := a.Acquire(50 * time.Millisecond)
+				if err == nil {
+					a.Release()
+				}
+				// Drop back so next Acquire can succeed.
+				pressure.Store(50)
+			}
+		}(i)
+	}
+
+	// Reader checks monotonicity.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		var prevSoft, prevHard int64
+		for i := 0; i < rounds*writers; i++ {
+			soft := a.SoftPressureWaitNs()
+			hard := a.HardPressureWaitNs()
+			if soft < prevSoft {
+				t.Errorf("SoftPressureWaitNs decreased: %d -> %d", prevSoft, soft)
+			}
+			if hard < prevHard {
+				t.Errorf("HardPressureWaitNs decreased: %d -> %d", prevHard, hard)
+			}
+			prevSoft = soft
+			prevHard = hard
+		}
+	}()
+
+	wg.Wait()
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-6: WALGuidance_ZeroInputs
+//
+// Zero walSize, zero blockSize, zero maxConcurrent, empty hint.
+// Must not panic or produce invalid results.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_WALGuidance_ZeroInputs(t *testing.T) {
+	// All zeros.
+	r := WALSizingGuidance(0, 0, "")
+	if r.Level != "warn" {
+		t.Errorf("zero walSize: Level = %q, want warn", r.Level)
+	}
+
+	// Zero blockSize: absMin = 0*64 = 0. Only workload minimum check fires.
+	r = WALSizingGuidance(0, 0, WorkloadGeneral)
+	if r.Level != "warn" {
+		t.Errorf("zero walSize+blockSize: Level = %q, want warn", r.Level)
+	}
+
+	// Zero walSize but nonzero blockSize.
+	r = WALSizingGuidance(0, 4096, WorkloadDatabase)
+	if r.Level != "warn" {
+		t.Errorf("zero walSize: Level = %q, want warn", r.Level)
+	}
+	if len(r.Warnings) < 2 {
+		t.Errorf("expected both workload + absolute minimum warnings, got %d", len(r.Warnings))
+	}
+
+	// EvaluateWALConfig with zero maxConcurrent should not trigger concurrency warning.
+	r = EvaluateWALConfig(0, 4096, 0, WorkloadGeneral)
+	// walSize=0 still triggers sizing warning.
+	if r.Level != "warn" {
+		t.Errorf("Level = %q, want warn for zero walSize", r.Level)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-7: WALGuidance_OverflowSafe
+//
+// Very large blockSize × minWALEntries might overflow uint64.
+// (64 × 2^60 does NOT overflow, but let's test near-boundary.)
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_WALGuidance_OverflowSafe(t *testing.T) {
+	// Large blockSize: 256MB blocks × 64 = 16GB minimum.
+	// walSize = 1GB → should warn (16GB > 1GB).
+	r := WALSizingGuidance(1<<30, 256<<20, WorkloadGeneral)
+	if r.Level != "warn" {
+		t.Errorf("Level = %q, want warn (1GB WAL < 16GB absMin)", r.Level)
+	}
+
+	// Extreme: blockSize = 1<<40 (1TB). 64 × 1TB = 64TB.
+	// uint64 can hold 18 EB — no overflow.
+	r = WALSizingGuidance(1<<50, 1<<40, WorkloadThroughput)
+	// 1PB WAL with 1TB blocks: absMin = 64TB, 1PB > 64TB → ok for absolute.
+	// 1PB > 128MB (throughput min) → ok for workload.
+	if r.Level != "ok" {
+		t.Errorf("Level = %q, want ok for huge WAL", r.Level)
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-8: WALStatusSnapshot_PartialInit
+//
+// BlockVol with Metrics but nil walAdmission, and vice versa.
+// WALStatus must return coherent defaults for the nil side
+// and real values for the non-nil side.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_WALStatusSnapshot_PartialInit(t *testing.T) {
+	// Case 1: Metrics set, walAdmission nil.
+	m := NewEngineMetrics()
+	m.WALAdmitSoftTotal.Add(42)
+	m.WALAdmitHardTotal.Add(7)
+	vol1 := &BlockVol{Metrics: m}
+
+	ws := vol1.WALStatus()
+	if ws.PressureState != "normal" {
+		t.Errorf("nil admission: PressureState = %q, want normal", ws.PressureState)
+	}
+	if ws.SoftAdmitTotal != 42 {
+		t.Errorf("SoftAdmitTotal = %d, want 42", ws.SoftAdmitTotal)
+	}
+	if ws.HardAdmitTotal != 7 {
+		t.Errorf("HardAdmitTotal = %d, want 7", ws.HardAdmitTotal)
+	}
+	// Pressure wait should be 0 (no admission controller).
+	if ws.SoftPressureWaitSec != 0 || ws.HardPressureWaitSec != 0 {
+		t.Errorf("nil admission: pressure wait should be 0")
+	}
+
+	// Case 2: walAdmission set, Metrics nil.
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.65,
+		HardWatermark: 0.85,
+		WALUsedFn:     func() float64 { return 0.7 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+	vol2 := &BlockVol{walAdmission: a}
+
+	ws2 := vol2.WALStatus()
+	if ws2.PressureState != "soft" {
+		t.Errorf("PressureState = %q, want soft (0.7 >= 0.65)", ws2.PressureState)
+	}
+	if ws2.SoftWatermark != 0.65 {
+		t.Errorf("SoftWatermark = %f, want 0.65", ws2.SoftWatermark)
+	}
+	// Metrics fields should be zero (nil Metrics).
+	if ws2.SoftAdmitTotal != 0 || ws2.HardAdmitTotal != 0 || ws2.TimeoutTotal != 0 {
+		t.Errorf("nil metrics: counters should be 0")
+	}
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-9: ObserverPanic_ContainedOrDocumented
+//
+// If WALAdmitWaitObserver panics, RecordWALAdmit is called from
+// Acquire → recordAdmit. A panic in the observer would crash the
+// writer goroutine. This test documents whether the panic is
+// recovered or propagated.
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_ObserverPanic_DocumentedBehavior(t *testing.T) {
+	m := NewEngineMetrics()
+	m.WALAdmitWaitObserver = func(s float64) { panic("boom") }
+
+	// RecordWALAdmit calls the observer. If it panics, the caller panics.
+	// This is expected (same as prometheus.Histogram.Observe panicking).
+	// Document that the observer must not panic.
+	panicked := false
+	func() {
+		defer func() {
+			if r := recover(); r != nil {
+				panicked = true
+			}
+		}()
+		m.RecordWALAdmit(1*time.Millisecond, false, false, false)
+	}()
+
+	if !panicked {
+		t.Fatal("expected panic from observer — if recovered, update this test")
+	}
+
+	// Verify counters were NOT updated (panic happened before completion).
+	// Actually, the observer is called AFTER WALAdmitTotal.Add(1) and
+	// walAdmitWaitNs.record(). Let's verify the counter state.
+	if m.WALAdmitTotal.Load() != 1 {
+		t.Errorf("WALAdmitTotal = %d — should be 1 (incremented before observer)", m.WALAdmitTotal.Load())
+	}
+	// soft/hard/timeout flags are processed AFTER observer — panic skips them.
+	// With soft=false, hard=false, timedOut=false there's nothing to skip,
+	// but the counters should reflect what happened before the panic.
+}
+
+// ────────────────────────────────────────────────────────────
+// QA-CP11A3-10: ConcurrentWALStatusReads
+//
+// Multiple goroutines read WALStatus while Acquire/Release runs.
+// Must not panic. Fields should be internally consistent
+// (SoftAdmitTotal >= 0, HardPressureWaitSec >= 0, etc.)
+// ────────────────────────────────────────────────────────────
+func TestQA_CP11A3_ConcurrentWALStatusReads(t *testing.T) {
+	m := NewEngineMetrics()
+	var pressure atomic.Int64
+	pressure.Store(50)
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+		Metrics:       m,
+	})
+	a.sleepFn = func(d time.Duration) { time.Sleep(50 * time.Microsecond) }
+
+	vol := &BlockVol{
+		Metrics:      m,
+		walAdmission: a,
+	}
+
+	var wg sync.WaitGroup
+	const rounds = 100
+
+	// Writers with varying pressure.
+	for i := 0; i < 4; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			levels := []int64{50, 75, 95, 60, 85}
+			for j := 0; j < rounds; j++ {
+				pressure.Store(levels[j%len(levels)])
+				if err := a.Acquire(20 * time.Millisecond); err == nil {
+					a.Release()
+				}
+				pressure.Store(50) // reset for next round
+			}
+		}()
+	}
+
+	// Concurrent WALStatus readers.
+	for i := 0; i < 4; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			valid := map[string]bool{"normal": true, "soft": true, "hard": true}
+			for j := 0; j < rounds*2; j++ {
+				ws := vol.WALStatus()
+				if !valid[ws.PressureState] {
+					t.Errorf("invalid PressureState: %q", ws.PressureState)
+					return
+				}
+				if ws.UsedFraction < 0 || ws.UsedFraction > 1.01 {
+					t.Errorf("UsedFraction out of range: %f", ws.UsedFraction)
+					return
+				}
+				if ws.SoftPressureWaitSec < 0 {
+					t.Errorf("SoftPressureWaitSec negative: %f", ws.SoftPressureWaitSec)
+					return
+				}
+				if ws.HardPressureWaitSec < 0 {
+					t.Errorf("HardPressureWaitSec negative: %f", ws.HardPressureWaitSec)
+					return
+				}
+			}
+		}()
+	}
+
+	wg.Wait()
+}
diff --git a/weed/storage/blockvol/testrunner/actions/devops.go b/weed/storage/blockvol/testrunner/actions/devops.go
index d3d4724df..5a2485981 100644
--- a/weed/storage/blockvol/testrunner/actions/devops.go
+++ b/weed/storage/blockvol/testrunner/actions/devops.go
@@ -26,6 +26,10 @@ func RegisterDevOpsActions(r *tr.Registry) {
 	r.RegisterFunc("delete_block_volume", tr.TierDevOps, deleteBlockVolume)
 	r.RegisterFunc("wait_block_servers", tr.TierDevOps, waitBlockServers)
 	r.RegisterFunc("cluster_status", tr.TierDevOps, clusterStatus)
+	r.RegisterFunc("wait_block_primary", tr.TierDevOps, waitBlockPrimary)
+	r.RegisterFunc("assert_block_field", tr.TierDevOps, assertBlockField)
+	r.RegisterFunc("block_status", tr.TierDevOps, blockStatus)
+	r.RegisterFunc("block_promote", tr.TierDevOps, blockPromote)
 }
 
 // setISCSIVars sets the save_as_iscsi_host/port/addr/iqn vars from a VolumeInfo.
@@ -434,6 +438,222 @@ func waitBlockServers(ctx context.Context, actx *tr.ActionContext, act tr.Action
 	}
 }
 
+// waitBlockPrimary polls lookup until the volume's primary server matches (or differs from) expected.
+// Params: name, expected (server addr to wait for) OR not (server addr to wait to change from), timeout (default 60s).
+// Sets save_as vars from the final lookup.
+func waitBlockPrimary(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("wait_block_primary: %w", err)
+	}
+
+	name := act.Params["name"]
+	if name == "" {
+		return nil, fmt.Errorf("wait_block_primary: name param required")
+	}
+	expected := act.Params["expected"]
+	notServer := act.Params["not"]
+	if expected == "" && notServer == "" {
+		return nil, fmt.Errorf("wait_block_primary: expected or not param required")
+	}
+
+	timeout := 60 * time.Second
+	if t, ok := act.Params["timeout"]; ok {
+		if d, err := parseDuration(t); err == nil {
+			timeout = d
+		}
+	}
+
+	timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	ticker := time.NewTicker(2 * time.Second)
+	defer ticker.Stop()
+
+	pollCount := 0
+	for {
+		select {
+		case <-timeoutCtx.Done():
+			return nil, fmt.Errorf("wait_block_primary: timeout after %s waiting for primary change on %s", timeout, name)
+		case <-ticker.C:
+			pollCount++
+			info, err := client.LookupVolume(timeoutCtx, name)
+			if err != nil {
+				if pollCount <= 3 {
+					actx.Log("  poll %d: lookup error: %v", pollCount, err)
+				}
+				continue
+			}
+			if pollCount <= 3 || pollCount%10 == 0 {
+				actx.Log("  poll %d: %s primary=%s role=%s", pollCount, name, info.VolumeServer, info.Role)
+			}
+
+			match := false
+			if expected != "" && info.VolumeServer == expected {
+				match = true
+			}
+			if notServer != "" && info.VolumeServer != notServer && info.VolumeServer != "" {
+				match = true
+			}
+			if match {
+				actx.Log("  primary for %s is now %s (epoch=%d)", name, info.VolumeServer, info.Epoch)
+				if act.SaveAs != "" {
+					setISCSIVars(actx, act.SaveAs, info)
+					actx.Vars[act.SaveAs+"_server"] = info.VolumeServer
+					actx.Vars[act.SaveAs+"_epoch"] = strconv.FormatUint(info.Epoch, 10)
+					actx.Vars[act.SaveAs+"_role"] = info.Role
+				}
+				return map[string]string{"value": info.VolumeServer}, nil
+			}
+		}
+	}
+}
+
+// assertBlockField looks up a block volume and asserts a specific field matches the expected value.
+// Params: name, field (one of: volume_server, role, status, epoch, size_bytes, replica_server,
+//   replica_factor, health_score, replica_degraded, durability_mode, iscsi_addr, iqn), expected.
+func assertBlockField(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("assert_block_field: %w", err)
+	}
+
+	name := act.Params["name"]
+	if name == "" {
+		return nil, fmt.Errorf("assert_block_field: name param required")
+	}
+	field := act.Params["field"]
+	if field == "" {
+		return nil, fmt.Errorf("assert_block_field: field param required")
+	}
+	expected := act.Params["expected"]
+	if expected == "" {
+		return nil, fmt.Errorf("assert_block_field: expected param required")
+	}
+
+	info, err := client.LookupVolume(ctx, name)
+	if err != nil {
+		return nil, fmt.Errorf("assert_block_field: lookup %s: %w", name, err)
+	}
+
+	actual, err := extractVolumeField(info, field)
+	if err != nil {
+		return nil, fmt.Errorf("assert_block_field: %w", err)
+	}
+
+	if actual != expected {
+		return nil, fmt.Errorf("assert_block_field: %s.%s = %q, expected %q", name, field, actual, expected)
+	}
+	actx.Log("  assert %s.%s == %q OK", name, field, expected)
+	return map[string]string{"value": actual}, nil
+}
+
+// extractVolumeField extracts a named field from VolumeInfo as a string.
+func extractVolumeField(info *blockapi.VolumeInfo, field string) (string, error) {
+	switch field {
+	case "volume_server":
+		return info.VolumeServer, nil
+	case "role":
+		return info.Role, nil
+	case "status":
+		return info.Status, nil
+	case "epoch":
+		return strconv.FormatUint(info.Epoch, 10), nil
+	case "size_bytes":
+		return strconv.FormatUint(info.SizeBytes, 10), nil
+	case "replica_server":
+		return info.ReplicaServer, nil
+	case "replica_factor":
+		return strconv.Itoa(info.ReplicaFactor), nil
+	case "health_score":
+		return fmt.Sprintf("%.2f", info.HealthScore), nil
+	case "replica_degraded":
+		return strconv.FormatBool(info.ReplicaDegraded), nil
+	case "durability_mode":
+		return info.DurabilityMode, nil
+	case "iscsi_addr":
+		return info.ISCSIAddr, nil
+	case "iqn":
+		return info.IQN, nil
+	case "name":
+		return info.Name, nil
+	case "replica_iscsi_addr":
+		return info.ReplicaISCSIAddr, nil
+	case "replica_iqn":
+		return info.ReplicaIQN, nil
+	case "replica_data_addr":
+		return info.ReplicaDataAddr, nil
+	case "replica_ctrl_addr":
+		return info.ReplicaCtrlAddr, nil
+	default:
+		return "", fmt.Errorf("unknown field %q", field)
+	}
+}
+
+// blockStatus fetches block registry status metrics from master.
+// Sets save_as_promotions_total, save_as_failovers_total, etc.
+func blockStatus(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("block_status: %w", err)
+	}
+
+	status, err := client.BlockStatus(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("block_status: %w", err)
+	}
+
+	actx.Log("  block status: volumes=%d servers=%d promotions=%d failovers=%d rebuilds=%d",
+		status.VolumeCount, status.ServerCount, status.PromotionsTotal, status.FailoversTotal, status.RebuildsTotal)
+
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_volume_count"] = strconv.Itoa(status.VolumeCount)
+		actx.Vars[act.SaveAs+"_server_count"] = strconv.Itoa(status.ServerCount)
+		actx.Vars[act.SaveAs+"_promotions_total"] = strconv.FormatInt(status.PromotionsTotal, 10)
+		actx.Vars[act.SaveAs+"_failovers_total"] = strconv.FormatInt(status.FailoversTotal, 10)
+		actx.Vars[act.SaveAs+"_rebuilds_total"] = strconv.FormatInt(status.RebuildsTotal, 10)
+		actx.Vars[act.SaveAs+"_queue_depth"] = strconv.Itoa(status.AssignmentQueueDepth)
+	}
+
+	jsonBytes, _ := json.Marshal(status)
+	return map[string]string{"value": string(jsonBytes)}, nil
+}
+
+// blockPromote triggers a manual promotion for a block volume.
+// Params: name, target_server (optional, empty=auto), force (optional bool), reason (optional).
+func blockPromote(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	client, err := blockAPIClient(actx, act)
+	if err != nil {
+		return nil, fmt.Errorf("block_promote: %w", err)
+	}
+
+	name := act.Params["name"]
+	if name == "" {
+		return nil, fmt.Errorf("block_promote: name param required")
+	}
+
+	force := false
+	if f := act.Params["force"]; f == "true" || f == "1" {
+		force = true
+	}
+
+	resp, err := client.PromoteVolume(ctx, name, blockapi.PromoteVolumeRequest{
+		TargetServer: act.Params["target_server"],
+		Force:        force,
+		Reason:       act.Params["reason"],
+	})
+	if err != nil {
+		return nil, fmt.Errorf("block_promote: %w", err)
+	}
+
+	actx.Log("  promoted %s -> primary=%s epoch=%d", name, resp.NewPrimary, resp.Epoch)
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_server"] = resp.NewPrimary
+		actx.Vars[act.SaveAs+"_epoch"] = strconv.FormatUint(resp.Epoch, 10)
+	}
+	return map[string]string{"value": resp.NewPrimary}, nil
+}
+
 // clusterStatus fetches the full cluster status JSON.
 func clusterStatus(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
 	node, err := getNode(actx, act.Node)
diff --git a/weed/storage/blockvol/testrunner/actions/devops_test.go b/weed/storage/blockvol/testrunner/actions/devops_test.go
index 1e0335762..e524c0df8 100644
--- a/weed/storage/blockvol/testrunner/actions/devops_test.go
+++ b/weed/storage/blockvol/testrunner/actions/devops_test.go
@@ -23,6 +23,10 @@ func TestDevOpsActions_Registration(t *testing.T) {
 		"delete_block_volume",
 		"wait_block_servers",
 		"cluster_status",
+		"wait_block_primary",
+		"assert_block_field",
+		"block_status",
+		"block_promote",
 	}
 
 	for _, name := range expected {
@@ -39,8 +43,8 @@ func TestDevOpsActions_Tier(t *testing.T) {
 	byTier := registry.ListByTier()
 	devopsActions := byTier[tr.TierDevOps]
 
-	if len(devopsActions) != 11 {
-		t.Errorf("devops tier has %d actions, want 11", len(devopsActions))
+	if len(devopsActions) != 15 {
+		t.Errorf("devops tier has %d actions, want 15", len(devopsActions))
 	}
 
 	// Verify all are in devops tier.
@@ -84,11 +88,11 @@ func TestAllActions_Registration(t *testing.T) {
 	if n := len(byTier[tr.TierCore]); n != 11 {
 		t.Errorf("core: %d, want 11", n)
 	}
-	if n := len(byTier[tr.TierBlock]); n != 56 {
-		t.Errorf("block: %d, want 56", n)
+	if n := len(byTier[tr.TierBlock]); n != 58 {
+		t.Errorf("block: %d, want 58", n)
 	}
-	if n := len(byTier[tr.TierDevOps]); n != 11 {
-		t.Errorf("devops: %d, want 11", n)
+	if n := len(byTier[tr.TierDevOps]); n != 15 {
+		t.Errorf("devops: %d, want 15", n)
 	}
 	if n := len(byTier[tr.TierChaos]); n != 5 {
 		t.Errorf("chaos: %d, want 5", n)
@@ -97,13 +101,13 @@ func TestAllActions_Registration(t *testing.T) {
 		t.Errorf("k8s: %d, want 14", n)
 	}
 
-	// Total should be 97 (92 prev + 4 devops: expand/lookup/delete/wait_block_servers + 1 block: iscsi_login_direct).
+	// Total should be 103 (99 prev + 4 devops: wait_block_primary, assert_block_field, block_status, block_promote).
 	total := 0
 	for _, actions := range byTier {
 		total += len(actions)
 	}
-	if total != 97 {
-		t.Errorf("total actions: %d, want 97", total)
+	if total != 103 {
+		t.Errorf("total actions: %d, want 103", total)
 	}
 }
 
diff --git a/weed/storage/blockvol/testrunner/actions/snapshot.go b/weed/storage/blockvol/testrunner/actions/snapshot.go
index 977b97567..35b699068 100644
--- a/weed/storage/blockvol/testrunner/actions/snapshot.go
+++ b/weed/storage/blockvol/testrunner/actions/snapshot.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner/infra"
 )
 
 // RegisterSnapshotActions registers snapshot and resize actions.
@@ -18,6 +19,8 @@ func RegisterSnapshotActions(r *tr.Registry) {
 	r.RegisterFunc("resize", tr.TierBlock, resizeAction)
 	r.RegisterFunc("iscsi_rescan", tr.TierBlock, iscsiRescan)
 	r.RegisterFunc("get_block_size", tr.TierBlock, getBlockSize)
+	r.RegisterFunc("snapshot_export_s3", tr.TierBlock, snapshotExportS3)
+	r.RegisterFunc("snapshot_import_s3", tr.TierBlock, snapshotImportS3)
 }
 
 func snapshotCreate(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
@@ -181,3 +184,89 @@ func parseHumanSize(s string) (uint64, error) {
 	}
 	return val * multiplier, nil
 }
+
+// snapshotExportS3 exports a snapshot from a target to an S3 bucket.
+// Params: bucket, key_prefix, s3_endpoint, s3_access_key, s3_secret_key, s3_region, snapshot_id (optional).
+// Returns: manifest_key, data_key, size_bytes, sha256.
+func snapshotExportS3(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	tgt, err := getHATarget(actx, act.Target)
+	if err != nil {
+		return nil, err
+	}
+
+	opts := infra.ExportS3Opts{
+		Bucket:      act.Params["bucket"],
+		KeyPrefix:   act.Params["key_prefix"],
+		S3Endpoint:  act.Params["s3_endpoint"],
+		S3AccessKey: act.Params["s3_access_key"],
+		S3SecretKey: act.Params["s3_secret_key"],
+		S3Region:    act.Params["s3_region"],
+	}
+	if opts.Bucket == "" || opts.S3Endpoint == "" {
+		return nil, fmt.Errorf("snapshot_export_s3: bucket and s3_endpoint required")
+	}
+	if idStr := act.Params["snapshot_id"]; idStr != "" {
+		id, err := strconv.ParseUint(idStr, 10, 32)
+		if err != nil {
+			return nil, fmt.Errorf("snapshot_export_s3: invalid snapshot_id %q: %w", idStr, err)
+		}
+		opts.SnapshotID = uint32(id)
+	}
+
+	result, err := tgt.ExportSnapshotS3(ctx, opts)
+	if err != nil {
+		return nil, fmt.Errorf("snapshot_export_s3: %w", err)
+	}
+
+	actx.Log("  exported to s3://%s/%s (%d bytes, sha256=%s)", opts.Bucket, result.DataKey, result.SizeBytes, result.SHA256)
+	out := map[string]string{
+		"value": result.SHA256,
+	}
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_manifest_key"] = result.ManifestKey
+		actx.Vars[act.SaveAs+"_data_key"] = result.DataKey
+		actx.Vars[act.SaveAs+"_size_bytes"] = strconv.FormatUint(result.SizeBytes, 10)
+		actx.Vars[act.SaveAs+"_sha256"] = result.SHA256
+	}
+	return out, nil
+}
+
+// snapshotImportS3 imports a snapshot from an S3 bucket into a target.
+// Params: bucket, manifest_key, s3_endpoint, s3_access_key, s3_secret_key, s3_region, allow_overwrite.
+// Returns: size_bytes, sha256.
+func snapshotImportS3(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	tgt, err := getHATarget(actx, act.Target)
+	if err != nil {
+		return nil, err
+	}
+
+	opts := infra.ImportS3Opts{
+		Bucket:      act.Params["bucket"],
+		ManifestKey: act.Params["manifest_key"],
+		S3Endpoint:  act.Params["s3_endpoint"],
+		S3AccessKey: act.Params["s3_access_key"],
+		S3SecretKey: act.Params["s3_secret_key"],
+		S3Region:    act.Params["s3_region"],
+	}
+	if opts.Bucket == "" || opts.ManifestKey == "" || opts.S3Endpoint == "" {
+		return nil, fmt.Errorf("snapshot_import_s3: bucket, manifest_key, and s3_endpoint required")
+	}
+	if act.Params["allow_overwrite"] == "true" {
+		opts.AllowOverwrite = true
+	}
+
+	result, err := tgt.ImportSnapshotS3(ctx, opts)
+	if err != nil {
+		return nil, fmt.Errorf("snapshot_import_s3: %w", err)
+	}
+
+	actx.Log("  imported %d bytes (sha256=%s)", result.SizeBytes, result.SHA256)
+	out := map[string]string{
+		"value": result.SHA256,
+	}
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_size_bytes"] = strconv.FormatUint(result.SizeBytes, 10)
+		actx.Vars[act.SaveAs+"_sha256"] = result.SHA256
+	}
+	return out, nil
+}
diff --git a/weed/storage/blockvol/testrunner/infra/ha_target.go b/weed/storage/blockvol/testrunner/infra/ha_target.go
index 9b1436eaa..72d150040 100644
--- a/weed/storage/blockvol/testrunner/infra/ha_target.go
+++ b/weed/storage/blockvol/testrunner/infra/ha_target.go
@@ -478,6 +478,107 @@ func (h *HATarget) Resize(ctx context.Context, newSizeBytes uint64) error {
 	return nil
 }
 
+// ExportSnapshotS3 sends POST /export with S3 credentials.
+// Returns the manifest key and data SHA-256 on success.
+func (h *HATarget) ExportSnapshotS3(ctx context.Context, opts ExportS3Opts) (*ExportS3Result, error) {
+	reqBody := map[string]interface{}{
+		"bucket":      opts.Bucket,
+		"key_prefix":  opts.KeyPrefix,
+		"s3_endpoint": opts.S3Endpoint,
+		"s3_region":   opts.S3Region,
+	}
+	if opts.S3AccessKey != "" {
+		reqBody["s3_access_key"] = opts.S3AccessKey
+		reqBody["s3_secret_key"] = opts.S3SecretKey
+	}
+	if opts.SnapshotID > 0 {
+		reqBody["snapshot_id"] = opts.SnapshotID
+	}
+
+	code, body, err := h.curlPost(ctx, "/export", reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("export snapshot s3: %w", err)
+	}
+	if code != http.StatusOK {
+		return nil, fmt.Errorf("export snapshot s3 failed (HTTP %d): %s", code, body)
+	}
+
+	var resp ExportS3Result
+	if err := json.NewDecoder(strings.NewReader(body)).Decode(&resp); err != nil {
+		return nil, fmt.Errorf("decode export response: %w", err)
+	}
+	return &resp, nil
+}
+
+// ImportSnapshotS3 sends POST /import with S3 credentials and manifest key.
+func (h *HATarget) ImportSnapshotS3(ctx context.Context, opts ImportS3Opts) (*ImportS3Result, error) {
+	reqBody := map[string]interface{}{
+		"bucket":       opts.Bucket,
+		"manifest_key": opts.ManifestKey,
+		"s3_endpoint":  opts.S3Endpoint,
+		"s3_region":    opts.S3Region,
+	}
+	if opts.S3AccessKey != "" {
+		reqBody["s3_access_key"] = opts.S3AccessKey
+		reqBody["s3_secret_key"] = opts.S3SecretKey
+	}
+	if opts.AllowOverwrite {
+		reqBody["allow_overwrite"] = true
+	}
+
+	code, body, err := h.curlPost(ctx, "/import", reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("import snapshot s3: %w", err)
+	}
+	if code != http.StatusOK {
+		return nil, fmt.Errorf("import snapshot s3 failed (HTTP %d): %s", code, body)
+	}
+
+	var resp ImportS3Result
+	if err := json.NewDecoder(strings.NewReader(body)).Decode(&resp); err != nil {
+		return nil, fmt.Errorf("decode import response: %w", err)
+	}
+	return &resp, nil
+}
+
+// ExportS3Opts configures a snapshot export to S3.
+type ExportS3Opts struct {
+	Bucket      string
+	KeyPrefix   string
+	S3Endpoint  string
+	S3AccessKey string
+	S3SecretKey string
+	S3Region    string
+	SnapshotID  uint32
+}
+
+// ExportS3Result is the response from POST /export.
+type ExportS3Result struct {
+	OK          bool   `json:"ok"`
+	ManifestKey string `json:"manifest_key"`
+	DataKey     string `json:"data_key"`
+	SizeBytes   uint64 `json:"size_bytes"`
+	SHA256      string `json:"sha256"`
+}
+
+// ImportS3Opts configures a snapshot import from S3.
+type ImportS3Opts struct {
+	Bucket         string
+	ManifestKey    string
+	S3Endpoint     string
+	S3AccessKey    string
+	S3SecretKey    string
+	S3Region       string
+	AllowOverwrite bool
+}
+
+// ImportS3Result is the response from POST /import.
+type ImportS3Result struct {
+	OK        bool   `json:"ok"`
+	SizeBytes uint64 `json:"size_bytes"`
+	SHA256    string `json:"sha256"`
+}
+
 // WaitForRole polls GET /status until the target reports the expected role.
 func (h *HATarget) WaitForRole(ctx context.Context, expectedRole string) error {
 	for {
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp11b3-auto-failover.yaml b/weed/storage/blockvol/testrunner/scenarios/cp11b3-auto-failover.yaml
new file mode 100644
index 000000000..d93ae1af5
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/cp11b3-auto-failover.yaml
@@ -0,0 +1,246 @@
+name: cp11b3-auto-failover
+timeout: 10m
+env:
+  repo_dir: "/opt/work/seaweedfs"
+  master_url: "http://192.168.1.184:9434"
+
+# Tests: T1 (candidate evaluation), T2 (orphan re-evaluation), T6 (preflight/status)
+# Flow: Create RF=2 → write data → kill primary → master auto-promotes → verify data + metrics
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "/opt/work/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "/opt/work/testdev_key"
+
+phases:
+  # Phase 1: Clean slate
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3-master /tmp/sw-b3-vs1 /tmp/sw-b3-vs2"
+        root: "true"
+
+  # Phase 2: Start cluster
+  - name: start_cluster
+    actions:
+      - action: exec
+        node: target_node
+        cmd: "mkdir -p /tmp/sw-b3-master /tmp/sw-b3-vs1/blocks /tmp/sw-b3-vs2/blocks"
+      - action: start_weed_master
+        node: target_node
+        port: "9434"
+        dir: "/tmp/sw-b3-master"
+        save_as: master_pid
+      - action: wait_cluster_ready
+        node: target_node
+        master_url: "http://localhost:9434"
+        timeout: 30s
+      - action: start_weed_volume
+        node: target_node
+        port: "18190"
+        master: "localhost:9434"
+        dir: "/tmp/sw-b3-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3-vs1/blocks -block.listen=:3277 -ip=192.168.1.184"
+        save_as: vs1_pid
+      - action: start_weed_volume
+        node: target_node
+        port: "18191"
+        master: "localhost:9434"
+        dir: "/tmp/sw-b3-vs2"
+        extra_args: "-block.dir=/tmp/sw-b3-vs2/blocks -block.listen=:3278 -ip=192.168.1.184"
+        save_as: vs2_pid
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+
+  # Phase 3: Create RF=2 volume, record initial state
+  - name: create_volume
+    actions:
+      - action: create_block_volume
+        name: "failover-test"
+        size: "50M"
+        replica_factor: "2"
+        save_as: vol_info
+      # Wait for replica to confirm role via heartbeat.
+      # Without this, PromoteBestReplica rejects replica as "no_heartbeat".
+      - action: sleep
+        duration: 10s
+      - action: lookup_block_volume
+        name: "failover-test"
+        save_as: initial
+      - action: print
+        msg: "initial primary={{ initial_iscsi_host }}:{{ initial_iscsi_port }} capacity={{ initial_capacity }}"
+      # Record the initial primary server for later comparison.
+      - action: assert_block_field
+        name: "failover-test"
+        field: "replica_factor"
+        expected: "2"
+      - action: assert_block_field
+        name: "failover-test"
+        field: "epoch"
+        expected: "1"
+      # Capture initial block status metrics.
+      - action: block_status
+        save_as: pre_stats
+
+  # Phase 4: Write data via iSCSI
+  - name: write_data
+    actions:
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ initial_iscsi_host }}"
+        port: "{{ initial_iscsi_port }}"
+        iqn: "{{ initial_iqn }}"
+        save_as: device
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        seek: "5"
+        save_as: md5_5M
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        skip: "5"
+        save_as: verify_5M
+      - action: assert_equal
+        actual: "{{ verify_5M }}"
+        expected: "{{ md5_5M }}"
+
+  # Phase 5: Kill primary VS, wait for master auto-failover
+  - name: failover
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: lookup_block_volume
+        name: "failover-test"
+        save_as: pre_kill
+      - action: print
+        msg: "killing primary VS (server={{ pre_kill_iscsi_host }}:{{ pre_kill_iscsi_port }})"
+      # Crash-kill VS1 with SIGKILL (not SIGTERM) to simulate a real crash.
+      # SIGTERM triggers graceful shutdown which deregisters volumes from
+      # the master registry — preventing the failover path we want to test.
+      - action: exec
+        node: target_node
+        cmd: "kill -9 {{ vs1_pid }}"
+        root: "true"
+      # Wait for master to detect VS1 disconnection and promote.
+      # Lease TTL is 30s; if never granted (zero), promotion is immediate.
+      # Allow extra time for heartbeat confirmation + deferred timer.
+      - action: sleep
+        duration: 35s
+      - action: wait_block_primary
+        name: "failover-test"
+        not: "192.168.1.184:18190"
+        timeout: 60s
+        save_as: promoted
+
+  # Phase 6: Verify failover state
+  - name: verify_failover
+    actions:
+      - action: print
+        msg: "new primary={{ promoted_server }} epoch={{ promoted_epoch }}"
+      # Epoch must have incremented (real promotion, not just heartbeat update).
+      - action: assert_block_field
+        name: "failover-test"
+        field: "epoch"
+        expected: "2"
+      - action: block_status
+        save_as: post_stats
+      # Verify promotion counter incremented.
+      - action: assert_greater
+        actual: "{{ post_stats_promotions_total }}"
+        expected: "{{ pre_stats_promotions_total }}"
+
+  # Phase 7: Reconnect iSCSI to new primary, verify data
+  - name: verify_data
+    actions:
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ promoted_iscsi_host }}"
+        port: "{{ promoted_iscsi_port }}"
+        iqn: "{{ promoted_iqn }}"
+        save_as: device2
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device2 }}"
+        bs: 1M
+        count: "1"
+        skip: "5"
+        save_as: post_failover_md5
+      - action: assert_equal
+        actual: "{{ post_failover_md5 }}"
+        expected: "{{ md5_5M }}"
+
+  # Phase 8: Restart killed VS, verify rebuild queued
+  - name: restart_verify
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: start_weed_volume
+        node: target_node
+        port: "18190"
+        master: "localhost:9434"
+        dir: "/tmp/sw-b3-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3-vs1/blocks -block.listen=:3277 -ip=192.168.1.184"
+        save_as: vs1_pid2
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+      - action: sleep
+        duration: 5s
+      # After restart, the old primary should be queued for rebuild.
+      - action: block_status
+        save_as: final_stats
+      - action: assert_greater
+        actual: "{{ final_stats_rebuilds_total }}"
+        expected: "{{ post_stats_rebuilds_total }}"
+
+  # Cleanup (always runs)
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: delete_block_volume
+        name: "failover-test"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid2 }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs2_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ master_pid }}"
+        ignore_error: true
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3-master /tmp/sw-b3-vs1 /tmp/sw-b3-vs2"
+        root: "true"
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp11b3-fast-reconnect.yaml b/weed/storage/blockvol/testrunner/scenarios/cp11b3-fast-reconnect.yaml
new file mode 100644
index 000000000..da8def912
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/cp11b3-fast-reconnect.yaml
@@ -0,0 +1,214 @@
+name: cp11b3-fast-reconnect
+timeout: 10m
+env:
+  repo_dir: "/opt/work/seaweedfs"
+  master_url: "http://192.168.1.184:9436"
+
+# Tests: T3 (deferred timer safety), T2 (fast reconnect skips failover)
+# Flow: Create RF=2 → write → kill primary briefly → restart before lease expires
+#       → verify no promotion happened → verify data intact
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "/opt/work/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "/opt/work/testdev_key"
+
+phases:
+  # Phase 1: Clean slate
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3r-master /tmp/sw-b3r-vs1 /tmp/sw-b3r-vs2"
+        root: "true"
+
+  # Phase 2: Start cluster
+  - name: start_cluster
+    actions:
+      - action: exec
+        node: target_node
+        cmd: "mkdir -p /tmp/sw-b3r-master /tmp/sw-b3r-vs1/blocks /tmp/sw-b3r-vs2/blocks"
+      - action: start_weed_master
+        node: target_node
+        port: "9436"
+        dir: "/tmp/sw-b3r-master"
+        save_as: master_pid
+      - action: wait_cluster_ready
+        node: target_node
+        master_url: "http://localhost:9436"
+        timeout: 30s
+      - action: start_weed_volume
+        node: target_node
+        port: "18194"
+        master: "localhost:9436"
+        dir: "/tmp/sw-b3r-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3r-vs1/blocks -block.listen=:3281 -ip=192.168.1.184"
+        save_as: vs1_pid
+      - action: start_weed_volume
+        node: target_node
+        port: "18195"
+        master: "localhost:9436"
+        dir: "/tmp/sw-b3r-vs2"
+        extra_args: "-block.dir=/tmp/sw-b3r-vs2/blocks -block.listen=:3282 -ip=192.168.1.184"
+        save_as: vs2_pid
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+
+  # Phase 3: Create RF=2 volume, write data
+  - name: create_and_write
+    actions:
+      - action: create_block_volume
+        name: "reconnect-test"
+        size: "50M"
+        replica_factor: "2"
+        save_as: vol_info
+      # Wait for replica to confirm role via heartbeat.
+      - action: sleep
+        duration: 10s
+      - action: lookup_block_volume
+        name: "reconnect-test"
+        save_as: initial
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ initial_iscsi_host }}"
+        port: "{{ initial_iscsi_port }}"
+        iqn: "{{ initial_iqn }}"
+        save_as: device
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        seek: "8"
+        save_as: md5_8M
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "1"
+        skip: "8"
+        save_as: verify_8M
+      - action: assert_equal
+        actual: "{{ verify_8M }}"
+        expected: "{{ md5_8M }}"
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      # Record initial epoch.
+      - action: assert_block_field
+        name: "reconnect-test"
+        field: "epoch"
+        expected: "1"
+      # Record pre-kill promotion counter.
+      - action: block_status
+        save_as: pre_stats
+
+  # Phase 4: Kill and quickly restart primary VS (before lease expires)
+  - name: fast_reconnect
+    actions:
+      # Crash-kill primary VS with SIGKILL.
+      - action: exec
+        node: target_node
+        cmd: "kill -9 {{ vs1_pid }}"
+        root: "true"
+      # Restart it quickly — within a few seconds, well before the
+      # default 30s lease TTL expires on the master.
+      - action: sleep
+        duration: 3s
+      - action: start_weed_volume
+        node: target_node
+        port: "18194"
+        master: "localhost:9436"
+        dir: "/tmp/sw-b3r-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3r-vs1/blocks -block.listen=:3281 -ip=192.168.1.184"
+        save_as: vs1_pid2
+      # Wait for VS to re-register with master.
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+      - action: sleep
+        duration: 5s
+
+  # Phase 5: Verify NO promotion happened
+  - name: verify_no_promotion
+    actions:
+      # Epoch should still be 1 (no promotion).
+      - action: assert_block_field
+        name: "reconnect-test"
+        field: "epoch"
+        expected: "1"
+      # Promotion counter should not have increased.
+      - action: block_status
+        save_as: post_stats
+      - action: assert_equal
+        actual: "{{ post_stats_promotions_total }}"
+        expected: "{{ pre_stats_promotions_total }}"
+      - action: print
+        msg: "fast reconnect: epoch unchanged, no promotion — deferred timer cancelled"
+
+  # Phase 6: Verify data still accessible on original primary
+  - name: verify_data
+    actions:
+      - action: lookup_block_volume
+        name: "reconnect-test"
+        save_as: after
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ after_iscsi_host }}"
+        port: "{{ after_iscsi_port }}"
+        iqn: "{{ after_iqn }}"
+        save_as: device2
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device2 }}"
+        bs: 1M
+        count: "1"
+        skip: "8"
+        save_as: post_reconnect_md5
+      - action: assert_equal
+        actual: "{{ post_reconnect_md5 }}"
+        expected: "{{ md5_8M }}"
+
+  # Cleanup (always runs)
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: delete_block_volume
+        name: "reconnect-test"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid2 }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs2_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ master_pid }}"
+        ignore_error: true
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3r-master /tmp/sw-b3r-vs1 /tmp/sw-b3r-vs2"
+        root: "true"
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp11b3-manual-promote.yaml b/weed/storage/blockvol/testrunner/scenarios/cp11b3-manual-promote.yaml
new file mode 100644
index 000000000..4d9dadf30
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/cp11b3-manual-promote.yaml
@@ -0,0 +1,190 @@
+name: cp11b3-manual-promote
+timeout: 10m
+env:
+  repo_dir: "/opt/work/seaweedfs"
+  master_url: "http://192.168.1.184:9435"
+
+# Tests: T5 (manual promote API), T6 (preflight), structured rejection
+# Flow: Create RF=2 → write → preflight check → kill primary → manual promote → verify data
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "/opt/work/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "/opt/work/testdev_key"
+
+phases:
+  # Phase 1: Clean slate
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3m-master /tmp/sw-b3m-vs1 /tmp/sw-b3m-vs2"
+        root: "true"
+
+  # Phase 2: Start cluster
+  - name: start_cluster
+    actions:
+      - action: exec
+        node: target_node
+        cmd: "mkdir -p /tmp/sw-b3m-master /tmp/sw-b3m-vs1/blocks /tmp/sw-b3m-vs2/blocks"
+      - action: start_weed_master
+        node: target_node
+        port: "9435"
+        dir: "/tmp/sw-b3m-master"
+        save_as: master_pid
+      - action: wait_cluster_ready
+        node: target_node
+        master_url: "http://localhost:9435"
+        timeout: 30s
+      - action: start_weed_volume
+        node: target_node
+        port: "18192"
+        master: "localhost:9435"
+        dir: "/tmp/sw-b3m-vs1"
+        extra_args: "-block.dir=/tmp/sw-b3m-vs1/blocks -block.listen=:3279 -ip=192.168.1.184"
+        save_as: vs1_pid
+      - action: start_weed_volume
+        node: target_node
+        port: "18193"
+        master: "localhost:9435"
+        dir: "/tmp/sw-b3m-vs2"
+        extra_args: "-block.dir=/tmp/sw-b3m-vs2/blocks -block.listen=:3280 -ip=192.168.1.184"
+        save_as: vs2_pid
+      - action: wait_block_servers
+        count: "2"
+        timeout: 60s
+
+  # Phase 3: Create RF=2 volume, write data
+  - name: create_and_write
+    actions:
+      - action: create_block_volume
+        name: "promote-test"
+        size: "50M"
+        replica_factor: "2"
+        save_as: vol_info
+      # Wait for replica to confirm role via heartbeat.
+      - action: sleep
+        duration: 10s
+      - action: lookup_block_volume
+        name: "promote-test"
+        save_as: initial
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ initial_iscsi_host }}"
+        port: "{{ initial_iscsi_port }}"
+        iqn: "{{ initial_iqn }}"
+        save_as: device
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        seek: "3"
+        save_as: md5_3M
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        skip: "3"
+        save_as: verify_3M
+      - action: assert_equal
+        actual: "{{ verify_3M }}"
+        expected: "{{ md5_3M }}"
+
+  # Phase 4: Kill primary VS, then promote via API
+  - name: kill_and_promote
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      # Crash-kill VS1 with SIGKILL to simulate a real crash.
+      - action: exec
+        node: target_node
+        cmd: "kill -9 {{ vs1_pid }}"
+        root: "true"
+      # Wait for master to detect the disconnection.
+      - action: sleep
+        duration: 15s
+      # Manual promote via the API.
+      - action: block_promote
+        name: "promote-test"
+        reason: "T7 integration test: manual failover"
+        save_as: promote_result
+      - action: print
+        msg: "promoted to {{ promote_result_server }} epoch={{ promote_result_epoch }}"
+
+  # Phase 5: Verify promoted state
+  - name: verify_promoted
+    actions:
+      - action: lookup_block_volume
+        name: "promote-test"
+        save_as: after
+      # New primary should be different from old.
+      - action: assert_block_field
+        name: "promote-test"
+        field: "epoch"
+        expected: "2"
+      - action: block_status
+        save_as: stats
+      - action: print
+        msg: "promotions_total={{ stats_promotions_total }}"
+
+  # Phase 6: Reconnect iSCSI to new primary, verify data
+  - name: verify_data
+    actions:
+      - action: iscsi_login_direct
+        node: client_node
+        host: "{{ after_iscsi_host }}"
+        port: "{{ after_iscsi_port }}"
+        iqn: "{{ after_iqn }}"
+        save_as: device2
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device2 }}"
+        bs: 1M
+        count: "2"
+        skip: "3"
+        save_as: post_promote_md5
+      - action: assert_equal
+        actual: "{{ post_promote_md5 }}"
+        expected: "{{ md5_3M }}"
+
+  # Cleanup (always runs)
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: delete_block_volume
+        name: "promote-test"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs2_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ vs1_pid }}"
+        ignore_error: true
+      - action: stop_weed
+        node: target_node
+        pid: "{{ master_pid }}"
+        ignore_error: true
+      - action: exec
+        node: target_node
+        cmd: "rm -rf /tmp/sw-b3m-master /tmp/sw-b3m-vs1 /tmp/sw-b3m-vs2"
+        root: "true"
+        ignore_error: true