Browse Source

feat: Phase 10 CP10-2 -- CSI NVMe/TCP node plugin, 210 tests

NVMe/TCP transport support in the CSI driver so Kubernetes pods can
mount block volumes via NVMe alongside (or instead of) iSCSI.

Transport selection: NVMe preferred when nvme_tcp module loaded +
metadata present + nvmeUtil available. Fail-fast on NVMe errors (no
silent iSCSI fallback). .transport file persists across CSI restarts.

Key changes:
- BuildNQN() single source of truth for NQN construction (naming.go)
- NVMeUtil interface + realNVMeUtil wrapping nvme-cli (nvme_util.go)
- NodeStageVolume/Unstage/Expand dual-transport paths (node.go)
- NvmeAddr/NQN fields in VolumeInfo, Controller contexts
- VolumeManager NvmeAddr()/VolumeNQN() getters
- BlockService NvmeListenAddr()/NQN() accessors
- 27 unit tests + 26 QA adversarial tests (nvme_node_test.go, qa_cp102)
- Fix: flaky TestQA_Node_ConcurrentStageUnstage (pre-alloc temp dirs)

Review fixes applied: F1 (NQN format mismatch), F2 (CreateVolume drops
NVMe context), F3 (IsConnected error classification), F4 (findSubsys
path validation), F5 (MasterVolumeClient NVMe gap documented).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feature/sw-block
Ping Qiu 2 days ago
parent
commit
bbadeeb89b
  1. 47
      weed/server/volume_server_block.go
  2. 30
      weed/storage/blockvol/csi/controller.go
  3. 258
      weed/storage/blockvol/csi/node.go
  4. 1222
      weed/storage/blockvol/csi/nvme_node_test.go
  5. 247
      weed/storage/blockvol/csi/nvme_util.go
  6. 1088
      weed/storage/blockvol/csi/qa_cp102_nvme_node_test.go
  7. 13
      weed/storage/blockvol/csi/qa_cp62_test.go
  8. 7
      weed/storage/blockvol/csi/server.go
  9. 11
      weed/storage/blockvol/csi/volume_backend.go
  10. 38
      weed/storage/blockvol/csi/volume_manager.go
  11. 8
      weed/storage/blockvol/naming.go

47
weed/server/volume_server_block.go

@ -33,13 +33,14 @@ type NVMeConfig struct {
// BlockService manages block volumes and the iSCSI/NVMe target servers.
type BlockService struct {
blockStore *storage.BlockVolumeStore
targetServer *iscsi.TargetServer
nvmeServer *nvme.Server
iqnPrefix string
nqnPrefix string
blockDir string
listenAddr string
blockStore *storage.BlockVolumeStore
targetServer *iscsi.TargetServer
nvmeServer *nvme.Server
iqnPrefix string
nqnPrefix string
blockDir string
listenAddr string
nvmeListenAddr string
// Replication state (CP6-3).
replMu sync.RWMutex
@ -63,11 +64,12 @@ func StartBlockService(listenAddr, blockDir, iqnPrefix, portalAddr string, nvmeC
}
bs := &BlockService{
blockStore: storage.NewBlockVolumeStore(),
iqnPrefix: iqnPrefix,
nqnPrefix: nqnPrefix,
blockDir: blockDir,
listenAddr: listenAddr,
blockStore: storage.NewBlockVolumeStore(),
iqnPrefix: iqnPrefix,
nqnPrefix: nqnPrefix,
blockDir: blockDir,
listenAddr: listenAddr,
nvmeListenAddr: nvmeCfg.ListenAddr,
}
// iSCSI target setup.
@ -165,7 +167,7 @@ func (bs *BlockService) registerVolume(vol *blockvol.BlockVol, name string) {
bs.targetServer.AddVolume(iqn, adapter)
if bs.nvmeServer != nil {
nqn := bs.nqnPrefix + blockvol.SanitizeIQN(name)
nqn := blockvol.BuildNQN(bs.nqnPrefix, name)
nvmeAdapter := nvme.NewNVMeAdapter(vol)
bs.nvmeServer.AddVolume(nqn, nvmeAdapter, nvmeAdapter.DeviceNGUID())
}
@ -188,6 +190,19 @@ func (bs *BlockService) ListenAddr() string {
return bs.listenAddr
}
// NvmeListenAddr returns the configured NVMe/TCP target listen address, or empty if NVMe is disabled.
func (bs *BlockService) NvmeListenAddr() string {
if bs.nvmeServer != nil {
return bs.nvmeListenAddr
}
return ""
}
// NQN returns the NVMe subsystem NQN for a volume name.
func (bs *BlockService) NQN(name string) string {
return blockvol.BuildNQN(bs.nqnPrefix, name)
}
// CreateBlockVol creates a new .blk file, registers it with BlockVolumeStore
// and iSCSI TargetServer. Returns path, IQN, iSCSI addr.
// Idempotent: if volume already exists with same or larger size, returns existing info.
@ -209,7 +224,7 @@ func (bs *BlockService) CreateBlockVol(name string, sizeBytes uint64, diskType s
adapter := blockvol.NewBlockVolAdapter(vol)
bs.targetServer.AddVolume(iqn, adapter)
if bs.nvmeServer != nil {
nqn := bs.nqnPrefix + blockvol.SanitizeIQN(name)
nqn := blockvol.BuildNQN(bs.nqnPrefix, name)
nvmeAdapter := nvme.NewNVMeAdapter(vol)
bs.nvmeServer.AddVolume(nqn, nvmeAdapter, nvmeAdapter.DeviceNGUID())
}
@ -250,7 +265,7 @@ func (bs *BlockService) CreateBlockVol(name string, sizeBytes uint64, diskType s
bs.targetServer.AddVolume(iqn, adapter)
if bs.nvmeServer != nil {
nqn := bs.nqnPrefix + blockvol.SanitizeIQN(name)
nqn := blockvol.BuildNQN(bs.nqnPrefix, name)
nvmeAdapter := nvme.NewNVMeAdapter(vol)
bs.nvmeServer.AddVolume(nqn, nvmeAdapter, nvmeAdapter.DeviceNGUID())
}
@ -273,7 +288,7 @@ func (bs *BlockService) DeleteBlockVol(name string) error {
// Remove from NVMe target.
if bs.nvmeServer != nil {
nqn := bs.nqnPrefix + blockvol.SanitizeIQN(name)
nqn := blockvol.BuildNQN(bs.nqnPrefix, name)
bs.nvmeServer.RemoveVolume(nqn)
}

30
weed/storage/blockvol/csi/controller.go

@ -77,11 +77,18 @@ func (s *controllerServer) CreateVolume(_ context.Context, req *csi.CreateVolume
},
}
// Attach volume_context with iSCSI target info for NodeStageVolume.
if info.ISCSIAddr != "" || info.IQN != "" {
resp.Volume.VolumeContext = map[string]string{
"iscsiAddr": info.ISCSIAddr,
"iqn": info.IQN,
// Attach volume_context with target info for NodeStageVolume.
hasISCSI := info.ISCSIAddr != "" || info.IQN != ""
hasNVMe := info.NvmeAddr != ""
if hasISCSI || hasNVMe {
resp.Volume.VolumeContext = make(map[string]string)
if hasISCSI {
resp.Volume.VolumeContext["iscsiAddr"] = info.ISCSIAddr
resp.Volume.VolumeContext["iqn"] = info.IQN
}
if hasNVMe {
resp.Volume.VolumeContext["nvmeAddr"] = info.NvmeAddr
resp.Volume.VolumeContext["nqn"] = info.NQN
}
}
@ -114,11 +121,16 @@ func (s *controllerServer) ControllerPublishVolume(_ context.Context, req *csi.C
return nil, status.Errorf(codes.NotFound, "volume %q not found: %v", req.VolumeId, err)
}
pubCtx := map[string]string{
"iscsiAddr": info.ISCSIAddr,
"iqn": info.IQN,
}
if info.NvmeAddr != "" {
pubCtx["nvmeAddr"] = info.NvmeAddr
pubCtx["nqn"] = info.NQN
}
return &csi.ControllerPublishVolumeResponse{
PublishContext: map[string]string{
"iscsiAddr": info.ISCSIAddr,
"iqn": info.IQN,
},
PublishContext: pubCtx,
}, nil
}

258
weed/storage/blockvol/csi/node.go

@ -5,6 +5,7 @@ import (
"fmt"
"log"
"os"
"path/filepath"
"sync"
"github.com/container-storage-interface/spec/lib/go/csi"
@ -13,10 +14,18 @@ import (
"google.golang.org/grpc/status"
)
const (
transportISCSI = "iscsi"
transportNVMe = "nvme"
)
// stagedVolumeInfo tracks info needed for NodeUnstageVolume and NodeExpandVolume.
type stagedVolumeInfo struct {
iqn string
iscsiAddr string
nqn string // NVMe subsystem NQN
nvmeAddr string // NVMe/TCP target address
transport string // "iscsi" or "nvme"
isLocal bool // true if volume is served by local VolumeManager
fsType string // filesystem type (ext4, xfs, etc.)
stagingPath string // staging mount path
@ -27,7 +36,9 @@ type nodeServer struct {
mgr *VolumeManager // may be nil in controller-only mode
nodeID string
iqnPrefix string // for IQN derivation fallback on restart
nqnPrefix string // for NQN derivation fallback on restart
iscsiUtil ISCSIUtil
nvmeUtil NVMeUtil // may be nil if NVMe not available
mountUtil MountUtil
logger *log.Logger
@ -35,6 +46,10 @@ type nodeServer struct {
staged map[string]*stagedVolumeInfo // volumeID -> staged info
}
// transportFile is the filename written inside the staging directory to persist
// the transport type across CSI plugin restarts.
const transportFile = ".transport"
func (s *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) {
volumeID := req.VolumeId
stagingPath := req.StagingTargetPath
@ -59,30 +74,42 @@ func (s *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolu
return &csi.NodeStageVolumeResponse{}, nil
}
// Determine iSCSI target info.
// Priority: publish_context (fresh from ControllerPublish, reflects failover)
// > volume_context (from CreateVolume, may be stale after failover)
// > local volume manager fallback.
// Resolve iSCSI target info.
// Priority: publish_context > volume_context > local volume manager fallback.
var iqn, portal string
isLocal := false
if req.PublishContext != nil && req.PublishContext["iscsiAddr"] != "" && req.PublishContext["iqn"] != "" {
// Fresh address from ControllerPublishVolume (reflects current primary).
portal = req.PublishContext["iscsiAddr"]
iqn = req.PublishContext["iqn"]
} else if req.VolumeContext != nil && req.VolumeContext["iscsiAddr"] != "" && req.VolumeContext["iqn"] != "" {
// Fallback: volume_context from CreateVolume (may be stale after failover).
portal = req.VolumeContext["iscsiAddr"]
iqn = req.VolumeContext["iqn"]
} else if s.mgr != nil {
// Local fallback: open volume via local VolumeManager.
isLocal = true
if err := s.mgr.OpenVolume(volumeID); err != nil {
return nil, status.Errorf(codes.Internal, "open volume: %v", err)
}
iqn = s.mgr.VolumeIQN(volumeID)
portal = s.mgr.ListenAddr()
} else {
}
// Resolve NVMe target info (same priority chain).
// PublishContext > VolumeContext > local VolumeManager.
var nqn, nvmeAddr string
if req.PublishContext != nil && req.PublishContext["nvmeAddr"] != "" && req.PublishContext["nqn"] != "" {
nvmeAddr = req.PublishContext["nvmeAddr"]
nqn = req.PublishContext["nqn"]
} else if req.VolumeContext != nil && req.VolumeContext["nvmeAddr"] != "" && req.VolumeContext["nqn"] != "" {
nvmeAddr = req.VolumeContext["nvmeAddr"]
nqn = req.VolumeContext["nqn"]
} else if s.mgr != nil && s.mgr.NvmeAddr() != "" {
nvmeAddr = s.mgr.NvmeAddr()
nqn = s.mgr.VolumeNQN(volumeID)
}
// No transport info at all (neither iSCSI nor NVMe resolved, no local mgr).
if iqn == "" && nqn == "" {
return nil, status.Error(codes.FailedPrecondition, "no volume_context and no local volume manager")
}
@ -97,26 +124,58 @@ func (s *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolu
}
}()
// Check if already logged in, skip login if so.
loggedIn, err := s.iscsiUtil.IsLoggedIn(ctx, iqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "check iscsi login: %v", err)
}
// Transport selection: prefer NVMe if supported, fall back to iSCSI.
var device, transport string
if !loggedIn {
// Discovery + login.
if err := s.iscsiUtil.Discovery(ctx, portal); err != nil {
return nil, status.Errorf(codes.Internal, "iscsi discovery: %v", err)
nvmeAvailable := nvmeAddr != "" && nqn != "" && s.nvmeUtil != nil && s.nvmeUtil.IsNVMeTCPAvailable()
if nvmeAvailable {
// NVMe path — fail fast on error, no fallback to iSCSI.
transport = transportNVMe
connected, cerr := s.nvmeUtil.IsConnected(ctx, nqn)
if cerr != nil {
return nil, status.Errorf(codes.Internal, "check nvme connection: %v", cerr)
}
if err := s.iscsiUtil.Login(ctx, iqn, portal); err != nil {
return nil, status.Errorf(codes.Internal, "iscsi login: %v", err)
if !connected {
if cerr := s.nvmeUtil.Connect(ctx, nqn, nvmeAddr); cerr != nil {
return nil, status.Errorf(codes.Internal, "nvme connect: %v", cerr)
}
}
}
// Wait for device to appear.
device, err := s.iscsiUtil.GetDeviceByIQN(ctx, iqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "get device: %v", err)
// Cleanup NVMe on subsequent failures.
defer func() {
if !success {
s.nvmeUtil.Disconnect(ctx, nqn)
}
}()
device, err = s.nvmeUtil.GetDeviceByNQN(ctx, nqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "nvme get device: %v", err)
}
} else if iqn != "" && portal != "" {
// iSCSI path (existing code).
transport = transportISCSI
loggedIn, lerr := s.iscsiUtil.IsLoggedIn(ctx, iqn)
if lerr != nil {
return nil, status.Errorf(codes.Internal, "check iscsi login: %v", lerr)
}
if !loggedIn {
if err := s.iscsiUtil.Discovery(ctx, portal); err != nil {
return nil, status.Errorf(codes.Internal, "iscsi discovery: %v", err)
}
if err := s.iscsiUtil.Login(ctx, iqn, portal); err != nil {
return nil, status.Errorf(codes.Internal, "iscsi login: %v", err)
}
}
device, err = s.iscsiUtil.GetDeviceByIQN(ctx, iqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "get device: %v", err)
}
} else {
return nil, status.Error(codes.FailedPrecondition, "no transport available")
}
// Ensure staging directory exists.
@ -136,6 +195,11 @@ func (s *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolu
return nil, status.Errorf(codes.Internal, "format and mount: %v", err)
}
// Write transport marker for restart recovery.
if werr := writeTransportFile(stagingPath, transport); werr != nil {
s.logger.Printf("NodeStageVolume: %s: %v (non-fatal)", volumeID, werr)
}
// Track staged volume for unstage.
s.stagedMu.Lock()
if s.staged == nil {
@ -144,6 +208,9 @@ func (s *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolu
s.staged[volumeID] = &stagedVolumeInfo{
iqn: iqn,
iscsiAddr: portal,
nqn: nqn,
nvmeAddr: nvmeAddr,
transport: transport,
isLocal: isLocal,
fsType: fsType,
stagingPath: stagingPath,
@ -151,7 +218,7 @@ func (s *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolu
s.stagedMu.Unlock()
success = true
s.logger.Printf("NodeStageVolume: %s staged at %s (device=%s, iqn=%s, local=%v)", volumeID, stagingPath, device, iqn, isLocal)
s.logger.Printf("NodeStageVolume: %s staged at %s (device=%s, transport=%s, local=%v)", volumeID, stagingPath, device, transport, isLocal)
return &csi.NodeStageVolumeResponse{}, nil
}
@ -166,26 +233,49 @@ func (s *nodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstage
return nil, status.Error(codes.InvalidArgument, "staging target path is required")
}
// Look up staged info. If not found (e.g. driver restarted), derive IQN.
// Look up staged info.
s.stagedMu.Lock()
info := s.staged[volumeID]
s.stagedMu.Unlock()
var iqn string
// Determine transport and identifiers.
var iqn, nqn, transport string
isLocal := false
if info != nil {
iqn = info.iqn
nqn = info.nqn
transport = info.transport
isLocal = info.isLocal
} else {
// Restart fallback: derive IQN from volumeID.
// iscsiadm -m node -T <iqn> --logout works without knowing the portal.
// Restart fallback: read .transport file from staging path.
transport = readTransportFile(stagingPath)
// Derive identifiers.
if s.mgr != nil {
iqn = s.mgr.VolumeIQN(volumeID)
nqn = s.mgr.VolumeNQN(volumeID)
isLocal = true
} else if s.iqnPrefix != "" {
iqn = s.iqnPrefix + ":" + blockvol.SanitizeIQN(volumeID)
} else {
if s.iqnPrefix != "" {
iqn = s.iqnPrefix + ":" + blockvol.SanitizeIQN(volumeID)
}
if s.nqnPrefix != "" {
nqn = blockvol.BuildNQN(s.nqnPrefix, volumeID)
}
}
s.logger.Printf("NodeUnstageVolume: %s not in staged map, derived iqn=%s", volumeID, iqn)
// If no .transport file, probe NVMe connection to determine transport.
if transport == "" && nqn != "" && s.nvmeUtil != nil {
if connected, _ := s.nvmeUtil.IsConnected(ctx, nqn); connected {
transport = transportNVMe
}
}
if transport == "" {
transport = transportISCSI // default fallback
}
s.logger.Printf("NodeUnstageVolume: %s not in staged map, derived transport=%s", volumeID, transport)
}
// Best-effort cleanup: always attempt all steps even if one fails.
@ -197,12 +287,24 @@ func (s *nodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstage
firstErr = err
}
// iSCSI logout.
if iqn != "" {
if err := s.iscsiUtil.Logout(ctx, iqn); err != nil {
s.logger.Printf("NodeUnstageVolume: logout error: %v", err)
if firstErr == nil {
firstErr = err
// Disconnect transport.
switch transport {
case transportNVMe:
if nqn != "" && s.nvmeUtil != nil {
if err := s.nvmeUtil.Disconnect(ctx, nqn); err != nil {
s.logger.Printf("NodeUnstageVolume: nvme disconnect error: %v", err)
if firstErr == nil {
firstErr = err
}
}
}
default: // iSCSI
if iqn != "" {
if err := s.iscsiUtil.Logout(ctx, iqn); err != nil {
s.logger.Printf("NodeUnstageVolume: logout error: %v", err)
if firstErr == nil {
firstErr = err
}
}
}
}
@ -218,10 +320,13 @@ func (s *nodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstage
}
if firstErr != nil {
// Keep staged entry so retry has correct isLocal/iqn info.
// Keep staged entry so retry has correct info.
return nil, status.Errorf(codes.Internal, "unstage: %v", firstErr)
}
// Clean up transport file.
os.Remove(filepath.Join(stagingPath, transportFile))
// Remove from staged map only after successful cleanup.
s.stagedMu.Lock()
delete(s.staged, volumeID)
@ -310,24 +415,38 @@ func (s *nodeServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandVo
return nil, status.Errorf(codes.FailedPrecondition, "volume %q not staged", req.VolumeId)
}
// Check that iSCSI session is active.
loggedIn, err := s.iscsiUtil.IsLoggedIn(ctx, info.iqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "check iSCSI session: %v", err)
}
if !loggedIn {
return nil, status.Errorf(codes.FailedPrecondition, "volume %q not staged: iSCSI session not active", req.VolumeId)
}
// Rescan device to pick up new size.
if err := s.iscsiUtil.RescanDevice(ctx, info.iqn); err != nil {
return nil, status.Errorf(codes.Internal, "rescan iSCSI device: %v", err)
}
var device string
var err error
// Find the device path.
device, err := s.iscsiUtil.GetDeviceByIQN(ctx, info.iqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "find device: %v", err)
switch info.transport {
case transportNVMe:
// NVMe: rescan namespace, then find device.
if s.nvmeUtil == nil {
return nil, status.Errorf(codes.Internal, "nvme util not available")
}
if err := s.nvmeUtil.Rescan(ctx, info.nqn); err != nil {
return nil, status.Errorf(codes.Internal, "nvme rescan: %v", err)
}
device, err = s.nvmeUtil.GetDeviceByNQN(ctx, info.nqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "nvme find device: %v", err)
}
default: // iSCSI
// Check that iSCSI session is active.
loggedIn, lerr := s.iscsiUtil.IsLoggedIn(ctx, info.iqn)
if lerr != nil {
return nil, status.Errorf(codes.Internal, "check iSCSI session: %v", lerr)
}
if !loggedIn {
return nil, status.Errorf(codes.FailedPrecondition, "volume %q not staged: iSCSI session not active", req.VolumeId)
}
if err := s.iscsiUtil.RescanDevice(ctx, info.iqn); err != nil {
return nil, status.Errorf(codes.Internal, "rescan iSCSI device: %v", err)
}
device, err = s.iscsiUtil.GetDeviceByIQN(ctx, info.iqn)
if err != nil {
return nil, status.Errorf(codes.Internal, "find device: %v", err)
}
}
// Determine mount path and fsType.
@ -345,7 +464,7 @@ func (s *nodeServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandVo
return nil, status.Errorf(codes.Internal, "resize filesystem: %v", err)
}
s.logger.Printf("NodeExpandVolume: %s expanded (device=%s, fs=%s)", req.VolumeId, device, fsType)
s.logger.Printf("NodeExpandVolume: %s expanded (device=%s, transport=%s, fs=%s)", req.VolumeId, device, info.transport, fsType)
capacity := int64(0)
if req.CapacityRange != nil {
@ -384,3 +503,28 @@ func (s *nodeServer) NodeGetInfo(_ context.Context, _ *csi.NodeGetInfoRequest) (
},
}, nil
}
// writeTransportFile writes the transport type to a file inside the staging path
// so it can be recovered after CSI plugin restart.
func writeTransportFile(stagingPath, transport string) error {
path := filepath.Join(stagingPath, transportFile)
if err := os.WriteFile(path, []byte(transport), 0600); err != nil {
return fmt.Errorf("write transport file: %w", err)
}
return nil
}
// readTransportFile reads the transport type from the staging path.
// Returns empty string if the file doesn't exist.
func readTransportFile(stagingPath string) string {
path := filepath.Join(stagingPath, transportFile)
data, err := os.ReadFile(path)
if err != nil {
return ""
}
t := string(data)
if t == transportISCSI || t == transportNVMe {
return t
}
return ""
}

1222
weed/storage/blockvol/csi/nvme_node_test.go
File diff suppressed because it is too large
View File

247
weed/storage/blockvol/csi/nvme_util.go

@ -0,0 +1,247 @@
package csi
import (
"context"
"encoding/json"
"errors"
"fmt"
"net"
"os"
"os/exec"
"strings"
"time"
)
// NVMeUtil provides NVMe/TCP initiator operations.
type NVMeUtil interface {
Connect(ctx context.Context, nqn, addr string) error
Disconnect(ctx context.Context, nqn string) error
IsConnected(ctx context.Context, nqn string) (bool, error)
GetDeviceByNQN(ctx context.Context, nqn string) (string, error)
GetControllerByNQN(ctx context.Context, nqn string) (string, error)
Rescan(ctx context.Context, nqn string) error
IsNVMeTCPAvailable() bool
}
// realNVMeUtil uses nvme-cli commands.
type realNVMeUtil struct{}
func (r *realNVMeUtil) Connect(ctx context.Context, nqn, addr string) error {
host, port, err := net.SplitHostPort(addr)
if err != nil {
return fmt.Errorf("nvme connect: invalid addr %q: %w", addr, err)
}
cmd := exec.CommandContext(ctx, "nvme", "connect", "-t", "tcp", "-n", nqn, "-a", host, "-s", port)
out, err := cmd.CombinedOutput()
if err != nil {
// Treat "already connected" as success (idempotent).
if strings.Contains(string(out), "already connected") {
return nil
}
return fmt.Errorf("nvme connect: %s: %w", string(out), err)
}
return nil
}
func (r *realNVMeUtil) Disconnect(ctx context.Context, nqn string) error {
cmd := exec.CommandContext(ctx, "nvme", "disconnect", "-n", nqn)
out, err := cmd.CombinedOutput()
if err != nil {
// Treat "not connected" / "no subsystem" as success (idempotent).
outStr := string(out)
if strings.Contains(outStr, "not connected") || strings.Contains(outStr, "No subsystemtype") || strings.Contains(outStr, "Invalid argument") {
return nil
}
return fmt.Errorf("nvme disconnect: %s: %w", outStr, err)
}
return nil
}
func (r *realNVMeUtil) IsConnected(ctx context.Context, nqn string) (bool, error) {
_, _, err := r.findSubsys(ctx, nqn)
if err != nil {
if errors.Is(err, errNQNNotFound) {
return false, nil // NQN not present = not connected
}
return false, err // command/parse failure — propagate
}
return true, nil
}
// errNQNNotFound is returned by findSubsys when the NQN is not in the subsystem list.
// Callers use errors.Is to distinguish "not found" from command/parse errors.
var errNQNNotFound = errors.New("nvme: NQN not found")
func (r *realNVMeUtil) GetDeviceByNQN(ctx context.Context, nqn string) (string, error) {
// Poll for device to appear (NVMe connect + device enumeration is async).
deadline := time.After(10 * time.Second)
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return "", ctx.Err()
case <-deadline:
return "", fmt.Errorf("timeout waiting for NVMe device for NQN %s", nqn)
case <-ticker.C:
_, dev, err := r.findSubsys(ctx, nqn)
if err != nil {
continue
}
if dev != "" {
return dev, nil
}
}
}
}
func (r *realNVMeUtil) GetControllerByNQN(ctx context.Context, nqn string) (string, error) {
ctrl, _, err := r.findSubsys(ctx, nqn)
if err != nil {
return "", err
}
if ctrl == "" {
return "", fmt.Errorf("no controller found for NQN %s", nqn)
}
return ctrl, nil
}
func (r *realNVMeUtil) Rescan(ctx context.Context, nqn string) error {
ctrl, err := r.GetControllerByNQN(ctx, nqn)
if err != nil {
return fmt.Errorf("nvme rescan: find controller: %w", err)
}
cmd := exec.CommandContext(ctx, "nvme", "ns-rescan", ctrl)
out, errCmd := cmd.CombinedOutput()
if errCmd != nil {
return fmt.Errorf("nvme ns-rescan %s: %s: %w", ctrl, string(out), errCmd)
}
return nil
}
// IsNVMeTCPAvailable checks if the nvme_tcp kernel module is loaded (read-only).
func (r *realNVMeUtil) IsNVMeTCPAvailable() bool {
_, err := os.Stat("/sys/module/nvme_tcp")
return err == nil
}
// nvmeListSubsysOutput represents the JSON output from `nvme list-subsys -o json`.
type nvmeListSubsysOutput struct {
Subsystems []nvmeSubsys `json:"Subsystems"`
}
type nvmeSubsys struct {
NQN string `json:"NQN"`
Paths []nvmePath `json:"Paths"`
// Some nvme-cli versions use "Namespaces" instead.
}
type nvmePath struct {
Name string `json:"Name"` // controller name, e.g. "nvme0"
Transport string `json:"Transport"`
State string `json:"State"`
}
// findSubsys parses `nvme list-subsys -o json` to find controller and namespace device
// for a given NQN. Returns (controller path, namespace device path, error).
// Returns errNQNNotFound (sentinel) when the NQN is absent from the subsystem list.
// Returns a non-sentinel error for command execution or JSON parse failures.
func (r *realNVMeUtil) findSubsys(ctx context.Context, nqn string) (string, string, error) {
cmd := exec.CommandContext(ctx, "nvme", "list-subsys", "-o", "json")
out, err := cmd.CombinedOutput()
if err != nil {
return "", "", fmt.Errorf("nvme list-subsys: %s: %w", string(out), err)
}
var parsed nvmeListSubsysOutput
if err := json.Unmarshal(out, &parsed); err != nil {
return "", "", fmt.Errorf("nvme list-subsys: parse json: %w", err)
}
for _, ss := range parsed.Subsystems {
if ss.NQN != nqn {
continue
}
// Prefer a live TCP path. Fall back to any path with a name.
var fallbackCtrl string
for _, p := range ss.Paths {
if p.Name == "" {
continue
}
ctrl := "/dev/" + p.Name
dev := ctrl + "n1"
// Prefer Transport=tcp + State=live.
if strings.EqualFold(p.Transport, "tcp") && strings.EqualFold(p.State, "live") {
return ctrl, dev, nil
}
if fallbackCtrl == "" {
fallbackCtrl = ctrl
}
}
if fallbackCtrl != "" {
return fallbackCtrl, fallbackCtrl + "n1", nil
}
return "", "", fmt.Errorf("NQN %s found but no controller paths", nqn)
}
return "", "", errNQNNotFound
}
// mockNVMeUtil is a test double for NVMeUtil.
type mockNVMeUtil struct {
connectErr error
disconnectErr error
getDeviceResult string
getDeviceErr error
getControllerResult string
getControllerErr error
rescanErr error
nvmeTCPAvailable bool
connected map[string]bool
calls []string
}
func newMockNVMeUtil() *mockNVMeUtil {
return &mockNVMeUtil{connected: make(map[string]bool)}
}
func (m *mockNVMeUtil) Connect(_ context.Context, nqn, addr string) error {
m.calls = append(m.calls, "connect:"+nqn+":"+addr)
if m.connectErr != nil {
return m.connectErr
}
m.connected[nqn] = true
return nil
}
func (m *mockNVMeUtil) Disconnect(_ context.Context, nqn string) error {
m.calls = append(m.calls, "disconnect:"+nqn)
if m.disconnectErr != nil {
return m.disconnectErr
}
delete(m.connected, nqn)
return nil
}
func (m *mockNVMeUtil) IsConnected(_ context.Context, nqn string) (bool, error) {
return m.connected[nqn], nil
}
func (m *mockNVMeUtil) GetDeviceByNQN(_ context.Context, nqn string) (string, error) {
m.calls = append(m.calls, "getdevice:"+nqn)
return m.getDeviceResult, m.getDeviceErr
}
func (m *mockNVMeUtil) GetControllerByNQN(_ context.Context, nqn string) (string, error) {
m.calls = append(m.calls, "getcontroller:"+nqn)
return m.getControllerResult, m.getControllerErr
}
func (m *mockNVMeUtil) Rescan(_ context.Context, nqn string) error {
m.calls = append(m.calls, "rescan:"+nqn)
return m.rescanErr
}
func (m *mockNVMeUtil) IsNVMeTCPAvailable() bool {
return m.nvmeTCPAvailable
}

1088
weed/storage/blockvol/csi/qa_cp102_nvme_node_test.go
File diff suppressed because it is too large
View File

13
weed/storage/blockvol/csi/qa_cp62_test.go

@ -132,9 +132,16 @@ func TestQA_Node_ConcurrentStageUnstage(t *testing.T) {
var wg sync.WaitGroup
var panicked atomic.Bool
// Pre-allocate temp dirs to avoid calling t.TempDir() inside goroutines
// (t.TempDir() panics if called after test cleanup).
stagingDirs := make([]string, 20)
for i := range stagingDirs {
stagingDirs[i] = t.TempDir()
}
for i := 0; i < 20; i++ {
wg.Add(2)
go func() {
go func(dir string) {
defer wg.Done()
defer func() {
if r := recover(); r != nil {
@ -144,10 +151,10 @@ func TestQA_Node_ConcurrentStageUnstage(t *testing.T) {
}()
ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{
VolumeId: "test-vol",
StagingTargetPath: t.TempDir(),
StagingTargetPath: dir,
VolumeCapability: testVolCap(),
})
}()
}(stagingDirs[i])
go func() {
defer wg.Done()
defer func() {

7
weed/storage/blockvol/csi/server.go

@ -19,6 +19,8 @@ type DriverConfig struct {
DataDir string // volume data directory
ISCSIAddr string // local iSCSI target listen address
IQNPrefix string // IQN prefix for volumes
NVMeAddr string // local NVMe/TCP target listen address (empty = NVMe disabled)
NQNPrefix string // NQN prefix for NVMe subsystems
NodeID string // node identifier
Logger *log.Logger
@ -66,7 +68,8 @@ func NewCSIDriver(cfg DriverConfig) (*CSIDriver, error) {
var mgr *VolumeManager
needsLocalMgr := cfg.Mode == "all" && cfg.MasterAddr == "" || cfg.Mode == "node"
if needsLocalMgr {
mgr = NewVolumeManager(cfg.DataDir, cfg.ISCSIAddr, cfg.IQNPrefix, cfg.Logger)
mgr = NewVolumeManager(cfg.DataDir, cfg.ISCSIAddr, cfg.IQNPrefix, cfg.Logger,
VolumeManagerOpts{NvmeAddr: cfg.NVMeAddr, NQNPrefix: cfg.NQNPrefix})
d.mgr = mgr
}
@ -89,7 +92,9 @@ func NewCSIDriver(cfg DriverConfig) (*CSIDriver, error) {
mgr: mgr, // may be nil in controller-only mode
nodeID: cfg.NodeID,
iqnPrefix: cfg.IQNPrefix,
nqnPrefix: cfg.NQNPrefix,
iscsiUtil: &realISCSIUtil{},
nvmeUtil: &realNVMeUtil{},
mountUtil: &realMountUtil{},
logger: cfg.Logger,
staged: make(map[string]*stagedVolumeInfo),

11
weed/storage/blockvol/csi/volume_backend.go

@ -14,6 +14,8 @@ type VolumeInfo struct {
VolumeID string
ISCSIAddr string // iSCSI target address (ip:port)
IQN string // iSCSI target IQN
NvmeAddr string // NVMe/TCP target address (ip:port), empty if NVMe disabled
NQN string // NVMe subsystem NQN, empty if NVMe disabled
CapacityBytes uint64
}
@ -59,6 +61,8 @@ func (b *LocalVolumeBackend) CreateVolume(ctx context.Context, name string, size
VolumeID: name,
ISCSIAddr: b.mgr.ListenAddr(),
IQN: b.mgr.VolumeIQN(name),
NvmeAddr: b.mgr.NvmeAddr(),
NQN: b.mgr.VolumeNQN(name),
CapacityBytes: actualSize,
}, nil
}
@ -75,6 +79,8 @@ func (b *LocalVolumeBackend) LookupVolume(ctx context.Context, name string) (*Vo
VolumeID: name,
ISCSIAddr: b.mgr.ListenAddr(),
IQN: b.mgr.VolumeIQN(name),
NvmeAddr: b.mgr.NvmeAddr(),
NQN: b.mgr.VolumeNQN(name),
CapacityBytes: b.mgr.VolumeSizeBytes(name),
}, nil
}
@ -100,6 +106,11 @@ func (b *LocalVolumeBackend) ExpandVolume(ctx context.Context, volumeID string,
}
// MasterVolumeClient calls master gRPC for volume operations.
// NOTE: NvmeAddr/NQN fields in VolumeInfo are NOT populated by MasterVolumeClient
// because the master proto (CreateBlockVolumeResponse, LookupBlockVolumeResponse)
// does not yet have nvme_addr/nqn fields. This is deferred until proto is updated
// in a future CP. NVMe support via master-backend path is therefore iSCSI-only
// until that proto change lands.
type MasterVolumeClient struct {
masterAddr string
dialOpt grpc.DialOption

38
weed/storage/blockvol/csi/volume_manager.go

@ -25,6 +25,7 @@ type managedVolume struct {
vol *blockvol.BlockVol
path string // file path to .blk file
iqn string // target IQN for this volume
nqn string // NVMe subsystem NQN for this volume
sizeBytes uint64
}
@ -45,19 +46,27 @@ type VolumeManager struct {
volumes map[string]*managedVolume
target *iscsi.TargetServer
iqnPrefix string
nqnPrefix string
config iscsi.TargetConfig
logger *log.Logger
state managerState
iscsiAddr string
nvmeAddr string
}
// VolumeManagerOpts holds optional configuration for VolumeManager.
type VolumeManagerOpts struct {
NvmeAddr string // NVMe/TCP target address (ip:port), empty if NVMe disabled
NQNPrefix string // NQN prefix for NVMe subsystems
}
// NewVolumeManager creates a new VolumeManager.
func NewVolumeManager(dataDir, iscsiAddr, iqnPrefix string, logger *log.Logger) *VolumeManager {
func NewVolumeManager(dataDir, iscsiAddr, iqnPrefix string, logger *log.Logger, opts ...VolumeManagerOpts) *VolumeManager {
if logger == nil {
logger = log.Default()
}
config := iscsi.DefaultTargetConfig()
return &VolumeManager{
vm := &VolumeManager{
dataDir: dataDir,
volumes: make(map[string]*managedVolume),
iqnPrefix: iqnPrefix,
@ -65,6 +74,11 @@ func NewVolumeManager(dataDir, iscsiAddr, iqnPrefix string, logger *log.Logger)
logger: logger,
iscsiAddr: iscsiAddr,
}
if len(opts) > 0 {
vm.nvmeAddr = opts[0].NvmeAddr
vm.nqnPrefix = opts[0].NQNPrefix
}
return vm
}
// Start initializes and starts the shared TargetServer.
@ -175,6 +189,7 @@ func (m *VolumeManager) CreateVolume(name string, sizeBytes uint64) error {
vol: vol,
path: volPath,
iqn: iqn,
nqn: m.volumeNQN(name),
sizeBytes: info.VolumeSize,
}
m.logger.Printf("adopted existing volume %q: %s (%d bytes)", name, iqn, info.VolumeSize)
@ -198,6 +213,7 @@ func (m *VolumeManager) CreateVolume(name string, sizeBytes uint64) error {
vol: vol,
path: volPath,
iqn: iqn,
nqn: m.volumeNQN(name),
sizeBytes: sizeBytes,
}
@ -267,6 +283,7 @@ func (m *VolumeManager) OpenVolume(name string) error {
vol: vol,
path: volPath,
iqn: iqn,
nqn: m.volumeNQN(name),
sizeBytes: info.VolumeSize,
}
@ -325,6 +342,23 @@ func (m *VolumeManager) ListenAddr() string {
return ""
}
// NvmeAddr returns the NVMe/TCP target address, or empty if NVMe is disabled.
func (m *VolumeManager) NvmeAddr() string {
return m.nvmeAddr
}
// VolumeNQN returns the NVMe NQN for a volume name. Returns empty if nqnPrefix is not set.
func (m *VolumeManager) VolumeNQN(name string) string {
if m.nqnPrefix == "" {
return ""
}
return m.volumeNQN(name)
}
func (m *VolumeManager) volumeNQN(name string) string {
return blockvol.BuildNQN(m.nqnPrefix, name)
}
// WithVolume runs fn while holding the manager lock with a reference to the volume.
func (m *VolumeManager) WithVolume(name string, fn func(*blockvol.BlockVol) error) error {
m.mu.RLock()

8
weed/storage/blockvol/naming.go

@ -16,6 +16,14 @@ func SanitizeFilename(name string) string {
return reInvalidFilename.ReplaceAllString(strings.ToLower(name), "-")
}
// BuildNQN constructs an NVMe NQN from a prefix and volume name.
// The prefix must already include the separator (e.g. "nqn.2024-01.com.seaweedfs:vol.").
// This is the single source of truth for NQN construction — used by both
// the volume server (BlockService) and the CSI driver (VolumeManager/nodeServer).
func BuildNQN(prefix, name string) string {
return prefix + SanitizeIQN(name)
}
// SanitizeIQN normalizes a CSI volume ID for use in an IQN.
// Lowercases, replaces invalid chars with '-', truncates to 64 chars.
// When truncation is needed, a hash suffix is appended to preserve uniqueness.

Loading…
Cancel
Save