You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

120 lines
3.3 KiB

package weed_server
import (
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
)
// Health-state constants. Priority: unsafe > rebuilding > degraded > healthy.
const (
HealthStateHealthy = "healthy"
HealthStateDegraded = "degraded"
HealthStateRebuilding = "rebuilding"
HealthStateUnsafe = "unsafe"
)
// deriveHealthState determines the operator-facing health state for a volume
// from registry facts. This is the shared derivation used by both per-volume
// responses and cluster-level summaries.
//
// Priority: unsafe > rebuilding > degraded > healthy.
func deriveHealthState(entry *BlockVolumeEntry) string {
role := blockvol.RoleFromWire(entry.Role)
// unsafe: no primary, primary not alive is handled at cluster level
// (server liveness is not available on the entry itself).
// Entry-level unsafe: role is not primary, or volume has failed control state.
if role != blockvol.RolePrimary {
return HealthStateUnsafe
}
// unsafe: strict durability below required replica count.
if entry.DurabilityMode == "sync_all" || entry.DurabilityMode == "sync_quorum" {
durMode, _ := blockvol.ParseDurabilityMode(entry.DurabilityMode)
rf := entry.ReplicaFactor
if rf == 0 {
rf = 2
}
required := durMode.RequiredReplicas(rf)
if len(entry.Replicas) < required {
return HealthStateUnsafe
}
}
// rebuilding: any replica in rebuild state.
for _, ri := range entry.Replicas {
riRole := blockvol.RoleFromWire(ri.Role)
if riRole == blockvol.RoleRebuilding {
return HealthStateRebuilding
}
}
// degraded: actual replicas below desired.
rf := entry.ReplicaFactor
if rf == 0 {
rf = 2
}
desiredReplicas := rf - 1 // RF includes primary
if desiredReplicas > 0 && len(entry.Replicas) < desiredReplicas {
return HealthStateDegraded
}
// degraded: replica degraded flag set.
if entry.ReplicaDegraded {
return HealthStateDegraded
}
return HealthStateHealthy
}
// deriveHealthStateWithLiveness adds server-liveness awareness.
// primaryAlive comes from the registry's blockServers check.
func deriveHealthStateWithLiveness(entry *BlockVolumeEntry, primaryAlive bool) string {
if !primaryAlive {
return HealthStateUnsafe
}
return deriveHealthState(entry)
}
// clusterHealthSummary holds aggregated health counts for the cluster summary.
type clusterHealthSummary struct {
Healthy int
Degraded int
Rebuilding int
Unsafe int
}
// computeClusterHealthSummary iterates all volumes and computes health counts.
// Uses server liveness from the registry for accurate unsafe detection.
func (r *BlockVolumeRegistry) ComputeClusterHealthSummary() clusterHealthSummary {
r.mu.RLock()
defer r.mu.RUnlock()
var summary clusterHealthSummary
for _, entry := range r.volumes {
primaryAlive := r.blockServers[entry.VolumeServer] != nil
state := deriveHealthStateWithLiveness(entry, primaryAlive)
switch state {
case HealthStateHealthy:
summary.Healthy++
case HealthStateDegraded:
summary.Degraded++
case HealthStateRebuilding:
summary.Rebuilding++
case HealthStateUnsafe:
summary.Unsafe++
}
}
return summary
}
// NvmeCapableServerCount returns the number of servers with NVMe enabled.
func (r *BlockVolumeRegistry) NvmeCapableServerCount() int {
r.mu.RLock()
defer r.mu.RUnlock()
count := 0
for _, info := range r.blockServers {
if info != nil && info.NvmeAddr != "" {
count++
}
}
return count
}