You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
415 lines
14 KiB
415 lines
14 KiB
package weed_server
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
|
|
"github.com/gorilla/mux"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/blockapi"
|
|
)
|
|
|
|
// buildEnvironmentInfo constructs a blockvol.EnvironmentInfo from registry state.
|
|
func (ms *MasterServer) buildEnvironmentInfo() blockvol.EnvironmentInfo {
|
|
return blockvol.EnvironmentInfo{
|
|
NVMeAvailable: ms.blockRegistry.HasNVMeCapableServer(),
|
|
ServerCount: len(ms.blockRegistry.BlockCapableServers()),
|
|
WALSizeDefault: 64 << 20, // engine default
|
|
BlockSizeDefault: 4096, // engine default
|
|
}
|
|
}
|
|
|
|
// blockVolumeCreateHandler handles POST /block/volume.
|
|
func (ms *MasterServer) blockVolumeCreateHandler(w http.ResponseWriter, r *http.Request) {
|
|
var req blockapi.CreateVolumeRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err))
|
|
return
|
|
}
|
|
|
|
// Store replica_placement in registry after creation.
|
|
replicaPlacement := req.ReplicaPlacement
|
|
if replicaPlacement == "" {
|
|
replicaPlacement = "000"
|
|
}
|
|
|
|
// Resolve preset + overrides.
|
|
env := ms.buildEnvironmentInfo()
|
|
resolved := blockvol.ResolvePolicy(blockvol.PresetName(req.Preset),
|
|
req.DurabilityMode, req.ReplicaFactor, req.DiskType, env)
|
|
if len(resolved.Errors) > 0 {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("%s", resolved.Errors[0]))
|
|
return
|
|
}
|
|
|
|
// Use resolved values for the gRPC call.
|
|
resp, err := ms.CreateBlockVolume(r.Context(), &master_pb.CreateBlockVolumeRequest{
|
|
Name: req.Name,
|
|
SizeBytes: req.SizeBytes,
|
|
DiskType: resolved.Policy.DiskType,
|
|
DurabilityMode: resolved.Policy.DurabilityMode,
|
|
ReplicaFactor: uint32(resolved.Policy.ReplicaFactor),
|
|
})
|
|
if err != nil {
|
|
writeJsonError(w, r, http.StatusInternalServerError, err)
|
|
return
|
|
}
|
|
|
|
// Store replica_placement and preset on the registry entry (locked mutation).
|
|
ms.blockRegistry.UpdateEntry(resp.VolumeId, func(e *BlockVolumeEntry) {
|
|
e.ReplicaPlacement = replicaPlacement
|
|
e.Preset = req.Preset
|
|
})
|
|
|
|
// Look up the full entry to populate all fields.
|
|
info := blockapi.VolumeInfo{
|
|
Name: resp.VolumeId,
|
|
VolumeServer: resp.VolumeServer,
|
|
SizeBytes: resp.CapacityBytes,
|
|
ReplicaPlacement: replicaPlacement,
|
|
ISCSIAddr: resp.IscsiAddr,
|
|
IQN: resp.Iqn,
|
|
}
|
|
if entry, ok := ms.blockRegistry.Lookup(resp.VolumeId); ok {
|
|
info = entryToVolumeInfo(&entry, ms.blockRegistry.IsBlockCapable(entry.VolumeServer))
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, info)
|
|
}
|
|
|
|
// blockVolumeResolveHandler handles POST /block/volume/resolve.
|
|
// Diagnostic endpoint: always returns 200, even with errors[].
|
|
func (ms *MasterServer) blockVolumeResolveHandler(w http.ResponseWriter, r *http.Request) {
|
|
var req blockapi.CreateVolumeRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err))
|
|
return
|
|
}
|
|
|
|
env := ms.buildEnvironmentInfo()
|
|
resolved := blockvol.ResolvePolicy(blockvol.PresetName(req.Preset),
|
|
req.DurabilityMode, req.ReplicaFactor, req.DiskType, env)
|
|
|
|
resp := blockapi.ResolvedPolicyResponse{
|
|
Policy: blockapi.ResolvedPolicyView{
|
|
Preset: string(resolved.Policy.Preset),
|
|
DurabilityMode: resolved.Policy.DurabilityMode,
|
|
ReplicaFactor: resolved.Policy.ReplicaFactor,
|
|
DiskType: resolved.Policy.DiskType,
|
|
TransportPreference: resolved.Policy.TransportPref,
|
|
WorkloadHint: resolved.Policy.WorkloadHint,
|
|
WALSizeRecommended: resolved.Policy.WALSizeRecommended,
|
|
StorageProfile: resolved.Policy.StorageProfile,
|
|
},
|
|
Overrides: resolved.Overrides,
|
|
Warnings: resolved.Warnings,
|
|
Errors: resolved.Errors,
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, resp)
|
|
}
|
|
|
|
// blockVolumePlanHandler handles POST /block/volume/plan.
|
|
// Read-only: no cluster mutation. Proxied to leader for consistent placement state.
|
|
// Always returns 200 with errors[] in body for error conditions.
|
|
func (ms *MasterServer) blockVolumePlanHandler(w http.ResponseWriter, r *http.Request) {
|
|
var req blockapi.CreateVolumeRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err))
|
|
return
|
|
}
|
|
resp := ms.PlanBlockVolume(&req)
|
|
writeJsonQuiet(w, r, http.StatusOK, resp)
|
|
}
|
|
|
|
// blockVolumeDeleteHandler handles DELETE /block/volume/{name}.
|
|
func (ms *MasterServer) blockVolumeDeleteHandler(w http.ResponseWriter, r *http.Request) {
|
|
name := mux.Vars(r)["name"]
|
|
if name == "" {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
|
|
return
|
|
}
|
|
|
|
_, err := ms.DeleteBlockVolume(r.Context(), &master_pb.DeleteBlockVolumeRequest{
|
|
Name: name,
|
|
})
|
|
if err != nil {
|
|
writeJsonError(w, r, http.StatusInternalServerError, err)
|
|
return
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, map[string]string{"status": "deleted"})
|
|
}
|
|
|
|
// blockVolumeLookupHandler handles GET /block/volume/{name}.
|
|
func (ms *MasterServer) blockVolumeLookupHandler(w http.ResponseWriter, r *http.Request) {
|
|
name := mux.Vars(r)["name"]
|
|
if name == "" {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
|
|
return
|
|
}
|
|
|
|
entry, ok := ms.blockRegistry.Lookup(name)
|
|
if !ok {
|
|
writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("block volume %q not found", name))
|
|
return
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, entryToVolumeInfo(&entry, ms.blockRegistry.IsBlockCapable(entry.VolumeServer)))
|
|
}
|
|
|
|
// blockVolumeListHandler handles GET /block/volumes.
|
|
func (ms *MasterServer) blockVolumeListHandler(w http.ResponseWriter, r *http.Request) {
|
|
entries := ms.blockRegistry.ListAll()
|
|
infos := make([]blockapi.VolumeInfo, len(entries))
|
|
for i := range entries {
|
|
infos[i] = entryToVolumeInfo(&entries[i], ms.blockRegistry.IsBlockCapable(entries[i].VolumeServer))
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, infos)
|
|
}
|
|
|
|
// blockAssignHandler handles POST /block/assign.
|
|
func (ms *MasterServer) blockAssignHandler(w http.ResponseWriter, r *http.Request) {
|
|
var req blockapi.AssignRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err))
|
|
return
|
|
}
|
|
if req.Name == "" {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
|
|
return
|
|
}
|
|
|
|
// Resolve name → registry entry.
|
|
entry, ok := ms.blockRegistry.Lookup(req.Name)
|
|
if !ok {
|
|
writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("block volume %q not found", req.Name))
|
|
return
|
|
}
|
|
|
|
// Determine target server + path based on role.
|
|
server, path := entry.VolumeServer, entry.Path
|
|
if req.Role == "replica" {
|
|
if entry.ReplicaServer == "" {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("block volume %q has no replica", req.Name))
|
|
return
|
|
}
|
|
server, path = entry.ReplicaServer, entry.ReplicaPath
|
|
}
|
|
|
|
ms.blockAssignmentQueue.Enqueue(server, blockvol.BlockVolumeAssignment{
|
|
Path: path,
|
|
Epoch: req.Epoch,
|
|
Role: blockapi.RoleFromString(req.Role),
|
|
LeaseTtlMs: uint32(req.LeaseTTLMs),
|
|
})
|
|
writeJsonQuiet(w, r, http.StatusOK, map[string]string{"status": "queued"})
|
|
}
|
|
|
|
// blockServersHandler handles GET /block/servers.
|
|
func (ms *MasterServer) blockServersHandler(w http.ResponseWriter, r *http.Request) {
|
|
summaries := ms.blockRegistry.ServerSummaries()
|
|
infos := make([]blockapi.ServerInfo, len(summaries))
|
|
for i, s := range summaries {
|
|
infos[i] = blockapi.ServerInfo{
|
|
Address: s.Address,
|
|
VolumeCount: s.VolumeCount,
|
|
BlockCapable: s.BlockCapable,
|
|
}
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, infos)
|
|
}
|
|
|
|
// blockVolumeExpandHandler handles POST /block/volume/{name}/expand.
|
|
func (ms *MasterServer) blockVolumeExpandHandler(w http.ResponseWriter, r *http.Request) {
|
|
name := mux.Vars(r)["name"]
|
|
if name == "" {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
|
|
return
|
|
}
|
|
|
|
var req blockapi.ExpandVolumeRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err))
|
|
return
|
|
}
|
|
if req.NewSizeBytes == 0 {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("new_size_bytes must be > 0"))
|
|
return
|
|
}
|
|
|
|
resp, err := ms.ExpandBlockVolume(r.Context(), &master_pb.ExpandBlockVolumeRequest{
|
|
Name: name,
|
|
NewSizeBytes: req.NewSizeBytes,
|
|
})
|
|
if err != nil {
|
|
writeJsonError(w, r, http.StatusInternalServerError, err)
|
|
return
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, blockapi.ExpandVolumeResponse{CapacityBytes: resp.CapacityBytes})
|
|
}
|
|
|
|
// blockStatusHandler handles GET /block/status — cluster summary with health counts.
|
|
func (ms *MasterServer) blockStatusHandler(w http.ResponseWriter, r *http.Request) {
|
|
healthSummary := ms.blockRegistry.ComputeClusterHealthSummary()
|
|
status := blockapi.BlockStatusResponse{
|
|
VolumeCount: len(ms.blockRegistry.ListAll()),
|
|
ServerCount: len(ms.blockRegistry.BlockCapableServers()),
|
|
PromotionLSNTolerance: ms.blockRegistry.PromotionLSNTolerance(),
|
|
BarrierLagLSN: ms.blockRegistry.MaxBarrierLagLSN(),
|
|
PromotionsTotal: int64(ms.blockRegistry.PromotionsTotal.Load()),
|
|
FailoversTotal: int64(ms.blockRegistry.FailoversTotal.Load()),
|
|
RebuildsTotal: int64(ms.blockRegistry.RebuildsTotal.Load()),
|
|
AssignmentQueueDepth: ms.blockAssignmentQueue.TotalPending(),
|
|
HealthyCount: healthSummary.Healthy,
|
|
DegradedCount: healthSummary.Degraded,
|
|
RebuildingCount: healthSummary.Rebuilding,
|
|
UnsafeCount: healthSummary.Unsafe,
|
|
NvmeCapableServers: ms.blockRegistry.NvmeCapableServerCount(),
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, status)
|
|
}
|
|
|
|
// blockVolumePreflightHandler handles GET /block/volume/{name}/preflight.
|
|
// Returns a read-only promotion preflight evaluation for the named volume.
|
|
func (ms *MasterServer) blockVolumePreflightHandler(w http.ResponseWriter, r *http.Request) {
|
|
name := mux.Vars(r)["name"]
|
|
if name == "" {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
|
|
return
|
|
}
|
|
|
|
pf, err := ms.blockRegistry.EvaluatePromotion(name)
|
|
if err != nil {
|
|
writeJsonError(w, r, http.StatusNotFound, err)
|
|
return
|
|
}
|
|
|
|
resp := blockapi.PreflightResponse{
|
|
VolumeName: pf.VolumeName,
|
|
Promotable: pf.Promotable,
|
|
Reason: pf.Reason,
|
|
}
|
|
if pf.Candidate != nil {
|
|
resp.CandidateServer = pf.Candidate.Server
|
|
resp.CandidateHealth = pf.Candidate.HealthScore
|
|
resp.CandidateWALLSN = pf.Candidate.WALHeadLSN
|
|
}
|
|
for _, rej := range pf.Rejections {
|
|
resp.Rejections = append(resp.Rejections, blockapi.PreflightRejection{
|
|
Server: rej.Server,
|
|
Reason: rej.Reason,
|
|
})
|
|
}
|
|
// Add primary liveness info.
|
|
entry, ok := ms.blockRegistry.Lookup(name)
|
|
if ok {
|
|
resp.PrimaryServer = entry.VolumeServer
|
|
resp.PrimaryAlive = ms.blockRegistry.IsBlockCapable(entry.VolumeServer)
|
|
}
|
|
writeJsonQuiet(w, r, http.StatusOK, resp)
|
|
}
|
|
|
|
// blockVolumePromoteHandler handles POST /block/volume/{name}/promote.
|
|
// Triggers a manual promotion for the named block volume.
|
|
func (ms *MasterServer) blockVolumePromoteHandler(w http.ResponseWriter, r *http.Request) {
|
|
name := mux.Vars(r)["name"]
|
|
if name == "" {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
|
|
return
|
|
}
|
|
|
|
var req blockapi.PromoteVolumeRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("decode request: %w", err))
|
|
return
|
|
}
|
|
|
|
// ManualPromote captures oldPrimary/oldPath under lock to avoid TOCTOU (BUG-T5-2).
|
|
newEpoch, oldPrimary, oldPath, pf, err := ms.blockRegistry.ManualPromote(name, req.TargetServer, req.Force)
|
|
if err != nil {
|
|
// Distinguish not-found from rejection.
|
|
status := http.StatusConflict
|
|
if pf.Reason == "volume not found" {
|
|
status = http.StatusNotFound
|
|
}
|
|
// Build structured rejection response.
|
|
resp := blockapi.PromoteVolumeResponse{
|
|
Reason: pf.Reason,
|
|
}
|
|
for _, rej := range pf.Rejections {
|
|
resp.Rejections = append(resp.Rejections, blockapi.PreflightRejection{
|
|
Server: rej.Server,
|
|
Reason: rej.Reason,
|
|
})
|
|
}
|
|
glog.V(0).Infof("manual promote %q rejected: %s", name, pf.Reason)
|
|
writeJsonQuiet(w, r, status, resp)
|
|
return
|
|
}
|
|
|
|
// Post-promotion orchestration (same as auto path).
|
|
ms.finalizePromotion(name, oldPrimary, oldPath, newEpoch)
|
|
|
|
if req.Reason != "" {
|
|
glog.V(0).Infof("manual promote %q: reason=%q", name, req.Reason)
|
|
}
|
|
|
|
// Re-read to get the new primary server name.
|
|
entry, _ := ms.blockRegistry.Lookup(name)
|
|
writeJsonQuiet(w, r, http.StatusOK, blockapi.PromoteVolumeResponse{
|
|
NewPrimary: entry.VolumeServer,
|
|
Epoch: newEpoch,
|
|
})
|
|
}
|
|
|
|
// entryToVolumeInfo converts a BlockVolumeEntry to a blockapi.VolumeInfo.
|
|
// primaryAlive indicates whether the primary server is alive (in blockServers set).
|
|
func entryToVolumeInfo(e *BlockVolumeEntry, primaryAlive bool) blockapi.VolumeInfo {
|
|
status := "pending"
|
|
if e.Status == StatusActive {
|
|
status = "active"
|
|
}
|
|
rf := e.ReplicaFactor
|
|
if rf == 0 {
|
|
rf = 2 // default
|
|
}
|
|
durMode := e.DurabilityMode
|
|
if durMode == "" {
|
|
durMode = "best_effort"
|
|
}
|
|
info := blockapi.VolumeInfo{
|
|
Name: e.Name,
|
|
VolumeServer: e.VolumeServer,
|
|
SizeBytes: e.SizeBytes,
|
|
ReplicaPlacement: e.ReplicaPlacement,
|
|
Epoch: e.Epoch,
|
|
Role: blockvol.RoleFromWire(e.Role).String(),
|
|
Status: status,
|
|
ISCSIAddr: e.ISCSIAddr,
|
|
IQN: e.IQN,
|
|
ReplicaServer: e.ReplicaServer,
|
|
ReplicaISCSIAddr: e.ReplicaISCSIAddr,
|
|
ReplicaIQN: e.ReplicaIQN,
|
|
ReplicaDataAddr: e.ReplicaDataAddr,
|
|
ReplicaCtrlAddr: e.ReplicaCtrlAddr,
|
|
ReplicaFactor: rf,
|
|
HealthScore: e.HealthScore,
|
|
ReplicaDegraded: e.ReplicaDegraded,
|
|
DurabilityMode: durMode,
|
|
Preset: e.Preset,
|
|
NvmeAddr: e.NvmeAddr,
|
|
NQN: e.NQN,
|
|
HealthState: deriveHealthStateWithLiveness(e, primaryAlive),
|
|
}
|
|
for _, ri := range e.Replicas {
|
|
info.Replicas = append(info.Replicas, blockapi.ReplicaDetail{
|
|
Server: ri.Server,
|
|
ISCSIAddr: ri.ISCSIAddr,
|
|
IQN: ri.IQN,
|
|
HealthScore: ri.HealthScore,
|
|
WALLag: ri.WALLag,
|
|
})
|
|
}
|
|
return info
|
|
}
|