Browse Source
feat: Phase 6 CP6-2 -- CSI control-plane integration + csi-sanity/k3s validation
feat: Phase 6 CP6-2 -- CSI control-plane integration + csi-sanity/k3s validation
CP6-2 wires the CSI driver to SeaweedFS master/volume-server control plane: - Proto: block volume messages in master.proto/volume_server.proto, codegen - Master registry: in-memory BlockVolumeRegistry with Pending->Active status, full/delta heartbeat, inflight lock, placement (fewest volumes) - VS gRPC: AllocateBlockVolume/DeleteBlockVolume handlers, shared naming - Master RPCs: CreateBlockVolume (retry up to 3 servers), Delete, Lookup - Heartbeat: block volume fields wired into bidirectional stream - CSI Controller: VolumeBackend interface (Local + Master), returns volume_context - CSI Node: reads volume_context for remote targets, staged map + IQN derivation - Mode flag: --mode=controller/node/all, --master for control-plane - K8s manifests: csi-driver.yaml, csi-controller.yaml, csi-node.yaml csi-sanity conformance (33 pass, 58 skip) found 6 bugs: - BUG-SANITY-1/2/3: missing VolumeCapabilities/VolumeCapability validation - BUG-SANITY-4: NodePublish used mount instead of bind mount - BUG-SANITY-5: NodeUnpublish didn't remove target path - BUG-SANITY-6: NodeUnpublish failed on unmounted path k3s Level 4 (PVC->Pod data persistence) found 1 bug: - BUG-K3S-1: IsLoggedIn didn't handle iscsiadm exit code 21 226 CSI tests + 54 server tests = 280 new tests, all passing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>feature/sw-block
55 changed files with 8749 additions and 443 deletions
-
3go.mod
-
2go.sum
-
74weed/pb/master.proto
-
946weed/pb/master_pb/master.pb.go
-
168weed/pb/master_pb/master_grpc.pb.go
-
27weed/pb/volume_server.proto
-
417weed/pb/volume_server_pb/volume_server.pb.go
-
276weed/pb/volume_server_pb/volume_server_grpc.pb.go
-
276weed/server/master_block_registry.go
-
292weed/server/master_block_registry_test.go
-
13weed/server/master_grpc_server.go
-
167weed/server/master_grpc_server_block.go
-
298weed/server/master_grpc_server_block_test.go
-
40weed/server/master_server.go
-
604weed/server/qa_block_cp62_test.go
-
48weed/server/volume_grpc_block.go
-
110weed/server/volume_grpc_block_test.go
-
40weed/server/volume_grpc_client_to_master.go
-
2weed/server/volume_server.go
-
96weed/server/volume_server_block.go
-
63weed/storage/blockvol/adapter.go
-
78weed/storage/blockvol/adapter_test.go
-
111weed/storage/blockvol/block_heartbeat_proto.go
-
85weed/storage/blockvol/block_heartbeat_proto_test.go
-
68weed/storage/blockvol/csi/cmd/block-csi/main.go
-
113weed/storage/blockvol/csi/cmd/block-csi/smoke-test.sh
-
130weed/storage/blockvol/csi/controller.go
-
127weed/storage/blockvol/csi/controller_test.go
-
44weed/storage/blockvol/csi/deploy/csi-controller.yaml
-
9weed/storage/blockvol/csi/deploy/csi-driver.yaml
-
73weed/storage/blockvol/csi/deploy/csi-node.yaml
-
28weed/storage/blockvol/csi/deploy/example-pvc.yaml
-
45weed/storage/blockvol/csi/deploy/rbac.yaml
-
7weed/storage/blockvol/csi/deploy/storageclass.yaml
-
44weed/storage/blockvol/csi/identity.go
-
51weed/storage/blockvol/csi/identity_test.go
-
306weed/storage/blockvol/csi/iscsi_util.go
-
313weed/storage/blockvol/csi/node.go
-
451weed/storage/blockvol/csi/node_test.go
-
997weed/storage/blockvol/csi/qa_cp62_test.go
-
819weed/storage/blockvol/csi/qa_csi_test.go
-
170weed/storage/blockvol/csi/server.go
-
131weed/storage/blockvol/csi/volume_backend.go
-
92weed/storage/blockvol/csi/volume_backend_test.go
-
344weed/storage/blockvol/csi/volume_manager.go
-
231weed/storage/blockvol/csi/volume_manager_test.go
-
57weed/storage/blockvol/iscsi/cmd/iscsi-target/main.go
-
2weed/storage/blockvol/iscsi/cmd/iscsi-target/metrics.go
-
4weed/storage/blockvol/iscsi/cmd/iscsi-target/metrics_test.go
-
22weed/storage/blockvol/iscsi/session.go
-
14weed/storage/blockvol/iscsi/target.go
-
31weed/storage/blockvol/naming.go
-
79weed/storage/blockvol/naming_test.go
-
7weed/storage/blockvol/test/apps_test.go
-
147weed/storage/blockvol/test/fault_helpers.go
946
weed/pb/master_pb/master.pb.go
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,276 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"fmt" |
|||
"sync" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" |
|||
) |
|||
|
|||
// VolumeStatus tracks the lifecycle of a block volume entry.
|
|||
type VolumeStatus int |
|||
|
|||
const ( |
|||
StatusPending VolumeStatus = iota // Created via RPC, not yet confirmed by heartbeat
|
|||
StatusActive // Confirmed by heartbeat from volume server
|
|||
) |
|||
|
|||
// BlockVolumeEntry tracks one block volume across the cluster.
|
|||
type BlockVolumeEntry struct { |
|||
Name string |
|||
VolumeServer string // volume server address (ip:port or grpc addr)
|
|||
Path string // file path on volume server
|
|||
IQN string |
|||
ISCSIAddr string |
|||
SizeBytes uint64 |
|||
Epoch uint64 |
|||
Role uint32 |
|||
Status VolumeStatus |
|||
} |
|||
|
|||
// BlockVolumeRegistry is the in-memory registry of block volumes.
|
|||
// Rebuilt from heartbeats on master restart (no persistence).
|
|||
type BlockVolumeRegistry struct { |
|||
mu sync.RWMutex |
|||
volumes map[string]*BlockVolumeEntry // keyed by name
|
|||
byServer map[string]map[string]bool // server -> set of volume names
|
|||
blockServers map[string]bool // servers known to support block volumes
|
|||
|
|||
// inflight guards concurrent CreateBlockVolume for the same name.
|
|||
inflight sync.Map // name -> *inflightEntry
|
|||
} |
|||
|
|||
type inflightEntry struct{} |
|||
|
|||
// NewBlockVolumeRegistry creates an empty registry.
|
|||
func NewBlockVolumeRegistry() *BlockVolumeRegistry { |
|||
return &BlockVolumeRegistry{ |
|||
volumes: make(map[string]*BlockVolumeEntry), |
|||
byServer: make(map[string]map[string]bool), |
|||
blockServers: make(map[string]bool), |
|||
} |
|||
} |
|||
|
|||
// Register adds an entry to the registry.
|
|||
// Returns error if a volume with the same name already exists.
|
|||
func (r *BlockVolumeRegistry) Register(entry *BlockVolumeEntry) error { |
|||
r.mu.Lock() |
|||
defer r.mu.Unlock() |
|||
if _, ok := r.volumes[entry.Name]; ok { |
|||
return fmt.Errorf("block volume %q already registered", entry.Name) |
|||
} |
|||
r.volumes[entry.Name] = entry |
|||
r.addToServer(entry.VolumeServer, entry.Name) |
|||
return nil |
|||
} |
|||
|
|||
// Unregister removes and returns the entry. Returns nil if not found.
|
|||
func (r *BlockVolumeRegistry) Unregister(name string) *BlockVolumeEntry { |
|||
r.mu.Lock() |
|||
defer r.mu.Unlock() |
|||
entry, ok := r.volumes[name] |
|||
if !ok { |
|||
return nil |
|||
} |
|||
delete(r.volumes, name) |
|||
r.removeFromServer(entry.VolumeServer, name) |
|||
return entry |
|||
} |
|||
|
|||
// Lookup returns the entry for the given name.
|
|||
func (r *BlockVolumeRegistry) Lookup(name string) (*BlockVolumeEntry, bool) { |
|||
r.mu.RLock() |
|||
defer r.mu.RUnlock() |
|||
e, ok := r.volumes[name] |
|||
return e, ok |
|||
} |
|||
|
|||
// ListByServer returns all entries hosted on the given server.
|
|||
func (r *BlockVolumeRegistry) ListByServer(server string) []*BlockVolumeEntry { |
|||
r.mu.RLock() |
|||
defer r.mu.RUnlock() |
|||
names, ok := r.byServer[server] |
|||
if !ok { |
|||
return nil |
|||
} |
|||
entries := make([]*BlockVolumeEntry, 0, len(names)) |
|||
for name := range names { |
|||
if e, ok := r.volumes[name]; ok { |
|||
entries = append(entries, e) |
|||
} |
|||
} |
|||
return entries |
|||
} |
|||
|
|||
// UpdateFullHeartbeat reconciles the registry from a full heartbeat.
|
|||
// Called on the first heartbeat from a volume server.
|
|||
// Marks reported volumes as Active, removes entries for this server
|
|||
// that are not reported (stale).
|
|||
func (r *BlockVolumeRegistry) UpdateFullHeartbeat(server string, infos []*master_pb.BlockVolumeInfoMessage) { |
|||
r.mu.Lock() |
|||
defer r.mu.Unlock() |
|||
|
|||
// Mark server as block-capable since it sent block volume info.
|
|||
r.blockServers[server] = true |
|||
|
|||
// Build set of reported paths.
|
|||
reported := make(map[string]*master_pb.BlockVolumeInfoMessage, len(infos)) |
|||
for _, info := range infos { |
|||
reported[info.Path] = info |
|||
} |
|||
|
|||
// Find entries for this server that are NOT reported -> remove them.
|
|||
if names, ok := r.byServer[server]; ok { |
|||
for name := range names { |
|||
entry := r.volumes[name] |
|||
if entry == nil { |
|||
continue |
|||
} |
|||
if _, found := reported[entry.Path]; !found { |
|||
delete(r.volumes, name) |
|||
delete(names, name) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Update or add entries for reported volumes.
|
|||
for _, info := range infos { |
|||
// Find existing entry by path on this server.
|
|||
var existing *BlockVolumeEntry |
|||
if names, ok := r.byServer[server]; ok { |
|||
for name := range names { |
|||
if e := r.volumes[name]; e != nil && e.Path == info.Path { |
|||
existing = e |
|||
break |
|||
} |
|||
} |
|||
} |
|||
if existing != nil { |
|||
// Update fields from heartbeat.
|
|||
existing.SizeBytes = info.VolumeSize |
|||
existing.Epoch = info.Epoch |
|||
existing.Role = info.Role |
|||
existing.Status = StatusActive |
|||
} |
|||
// If no existing entry found by path, it was created outside master
|
|||
// (e.g., manually). We don't auto-register unknown volumes — they
|
|||
// must be created via CreateBlockVolume RPC.
|
|||
} |
|||
} |
|||
|
|||
// UpdateDeltaHeartbeat processes incremental new/deleted block volumes.
|
|||
// Called on subsequent heartbeats (not the first).
|
|||
func (r *BlockVolumeRegistry) UpdateDeltaHeartbeat(server string, added []*master_pb.BlockVolumeShortInfoMessage, removed []*master_pb.BlockVolumeShortInfoMessage) { |
|||
r.mu.Lock() |
|||
defer r.mu.Unlock() |
|||
|
|||
// Remove deleted volumes.
|
|||
for _, rm := range removed { |
|||
if names, ok := r.byServer[server]; ok { |
|||
for name := range names { |
|||
if e := r.volumes[name]; e != nil && e.Path == rm.Path { |
|||
delete(r.volumes, name) |
|||
delete(names, name) |
|||
break |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Mark newly appeared volumes as active (if they exist in registry).
|
|||
for _, add := range added { |
|||
if names, ok := r.byServer[server]; ok { |
|||
for name := range names { |
|||
if e := r.volumes[name]; e != nil && e.Path == add.Path { |
|||
e.Status = StatusActive |
|||
break |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
// PickServer returns the server address with the fewest block volumes.
|
|||
// servers is the list of online volume server addresses.
|
|||
// Returns error if no servers available.
|
|||
func (r *BlockVolumeRegistry) PickServer(servers []string) (string, error) { |
|||
if len(servers) == 0 { |
|||
return "", fmt.Errorf("no block volume servers available") |
|||
} |
|||
r.mu.RLock() |
|||
defer r.mu.RUnlock() |
|||
|
|||
best := servers[0] |
|||
bestCount := r.countForServer(best) |
|||
for _, s := range servers[1:] { |
|||
c := r.countForServer(s) |
|||
if c < bestCount { |
|||
best = s |
|||
bestCount = c |
|||
} |
|||
} |
|||
return best, nil |
|||
} |
|||
|
|||
// AcquireInflight tries to acquire a per-name create lock.
|
|||
// Returns true if acquired (caller must call ReleaseInflight when done).
|
|||
// Returns false if another create is already in progress for this name.
|
|||
func (r *BlockVolumeRegistry) AcquireInflight(name string) bool { |
|||
_, loaded := r.inflight.LoadOrStore(name, &inflightEntry{}) |
|||
return !loaded // true = we stored it (acquired), false = already existed
|
|||
} |
|||
|
|||
// ReleaseInflight releases the per-name create lock.
|
|||
func (r *BlockVolumeRegistry) ReleaseInflight(name string) { |
|||
r.inflight.Delete(name) |
|||
} |
|||
|
|||
// countForServer returns the number of volumes on the given server.
|
|||
// Caller must hold at least RLock.
|
|||
func (r *BlockVolumeRegistry) countForServer(server string) int { |
|||
if names, ok := r.byServer[server]; ok { |
|||
return len(names) |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
func (r *BlockVolumeRegistry) addToServer(server, name string) { |
|||
if r.byServer[server] == nil { |
|||
r.byServer[server] = make(map[string]bool) |
|||
} |
|||
r.byServer[server][name] = true |
|||
} |
|||
|
|||
func (r *BlockVolumeRegistry) removeFromServer(server, name string) { |
|||
if names, ok := r.byServer[server]; ok { |
|||
delete(names, name) |
|||
if len(names) == 0 { |
|||
delete(r.byServer, server) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// MarkBlockCapable records that the given server supports block volumes.
|
|||
func (r *BlockVolumeRegistry) MarkBlockCapable(server string) { |
|||
r.mu.Lock() |
|||
r.blockServers[server] = true |
|||
r.mu.Unlock() |
|||
} |
|||
|
|||
// UnmarkBlockCapable removes a server from the block-capable set.
|
|||
func (r *BlockVolumeRegistry) UnmarkBlockCapable(server string) { |
|||
r.mu.Lock() |
|||
delete(r.blockServers, server) |
|||
r.mu.Unlock() |
|||
} |
|||
|
|||
// BlockCapableServers returns the list of servers known to support block volumes.
|
|||
func (r *BlockVolumeRegistry) BlockCapableServers() []string { |
|||
r.mu.RLock() |
|||
defer r.mu.RUnlock() |
|||
servers := make([]string, 0, len(r.blockServers)) |
|||
for s := range r.blockServers { |
|||
servers = append(servers, s) |
|||
} |
|||
return servers |
|||
} |
|||
@ -0,0 +1,292 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"fmt" |
|||
"sync" |
|||
"testing" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" |
|||
) |
|||
|
|||
func TestRegistry_RegisterLookup(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
entry := &BlockVolumeEntry{ |
|||
Name: "vol1", |
|||
VolumeServer: "server1:9333", |
|||
Path: "/data/vol1.blk", |
|||
IQN: "iqn.2024.com.seaweedfs:vol1", |
|||
ISCSIAddr: "10.0.0.1:3260", |
|||
SizeBytes: 1 << 30, |
|||
Epoch: 1, |
|||
Role: 1, |
|||
Status: StatusPending, |
|||
} |
|||
if err := r.Register(entry); err != nil { |
|||
t.Fatalf("Register: %v", err) |
|||
} |
|||
got, ok := r.Lookup("vol1") |
|||
if !ok { |
|||
t.Fatal("Lookup: not found") |
|||
} |
|||
if got.Name != "vol1" || got.VolumeServer != "server1:9333" || got.Path != "/data/vol1.blk" { |
|||
t.Fatalf("Lookup: unexpected entry: %+v", got) |
|||
} |
|||
if got.Status != StatusPending { |
|||
t.Fatalf("Status: got %d, want %d", got.Status, StatusPending) |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_Unregister(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/vol1.blk"}) |
|||
removed := r.Unregister("vol1") |
|||
if removed == nil { |
|||
t.Fatal("Unregister returned nil") |
|||
} |
|||
if _, ok := r.Lookup("vol1"); ok { |
|||
t.Fatal("vol1 should not be found after Unregister") |
|||
} |
|||
// Double unregister returns nil.
|
|||
if r.Unregister("vol1") != nil { |
|||
t.Fatal("double Unregister should return nil") |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_DuplicateRegister(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/vol1.blk"}) |
|||
err := r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s2", Path: "/vol1.blk"}) |
|||
if err == nil { |
|||
t.Fatal("duplicate Register should return error") |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_ListByServer(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk"}) |
|||
r.Register(&BlockVolumeEntry{Name: "vol2", VolumeServer: "s1", Path: "/v2.blk"}) |
|||
r.Register(&BlockVolumeEntry{Name: "vol3", VolumeServer: "s2", Path: "/v3.blk"}) |
|||
|
|||
s1Vols := r.ListByServer("s1") |
|||
if len(s1Vols) != 2 { |
|||
t.Fatalf("ListByServer(s1): got %d, want 2", len(s1Vols)) |
|||
} |
|||
s2Vols := r.ListByServer("s2") |
|||
if len(s2Vols) != 1 { |
|||
t.Fatalf("ListByServer(s2): got %d, want 1", len(s2Vols)) |
|||
} |
|||
s3Vols := r.ListByServer("s3") |
|||
if len(s3Vols) != 0 { |
|||
t.Fatalf("ListByServer(s3): got %d, want 0", len(s3Vols)) |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_UpdateFullHeartbeat(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
// Register two volumes on server s1.
|
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", Status: StatusPending}) |
|||
r.Register(&BlockVolumeEntry{Name: "vol2", VolumeServer: "s1", Path: "/v2.blk", Status: StatusPending}) |
|||
|
|||
// Full heartbeat reports only vol1 (vol2 is stale).
|
|||
r.UpdateFullHeartbeat("s1", []*master_pb.BlockVolumeInfoMessage{ |
|||
{Path: "/v1.blk", Epoch: 5, Role: 1}, |
|||
}) |
|||
|
|||
// vol1 should be Active.
|
|||
e1, ok := r.Lookup("vol1") |
|||
if !ok { |
|||
t.Fatal("vol1 should exist after full heartbeat") |
|||
} |
|||
if e1.Status != StatusActive { |
|||
t.Fatalf("vol1 status: got %d, want %d", e1.Status, StatusActive) |
|||
} |
|||
if e1.Epoch != 5 { |
|||
t.Fatalf("vol1 epoch: got %d, want 5", e1.Epoch) |
|||
} |
|||
|
|||
// vol2 should be removed (stale).
|
|||
if _, ok := r.Lookup("vol2"); ok { |
|||
t.Fatal("vol2 should have been removed as stale") |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_UpdateDeltaHeartbeat(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", Status: StatusPending}) |
|||
r.Register(&BlockVolumeEntry{Name: "vol2", VolumeServer: "s1", Path: "/v2.blk", Status: StatusActive}) |
|||
|
|||
// Delta: vol1 newly appeared, vol2 deleted.
|
|||
r.UpdateDeltaHeartbeat("s1", |
|||
[]*master_pb.BlockVolumeShortInfoMessage{{Path: "/v1.blk"}}, |
|||
[]*master_pb.BlockVolumeShortInfoMessage{{Path: "/v2.blk"}}, |
|||
) |
|||
|
|||
// vol1 should be Active.
|
|||
e1, ok := r.Lookup("vol1") |
|||
if !ok { |
|||
t.Fatal("vol1 should exist") |
|||
} |
|||
if e1.Status != StatusActive { |
|||
t.Fatalf("vol1 status: got %d, want Active", e1.Status) |
|||
} |
|||
|
|||
// vol2 should be removed.
|
|||
if _, ok := r.Lookup("vol2"); ok { |
|||
t.Fatal("vol2 should have been removed by delta") |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_PendingToActive(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{ |
|||
Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", |
|||
Status: StatusPending, Epoch: 1, |
|||
}) |
|||
|
|||
// Full heartbeat confirms the volume.
|
|||
r.UpdateFullHeartbeat("s1", []*master_pb.BlockVolumeInfoMessage{ |
|||
{Path: "/v1.blk", Epoch: 1, Role: 1}, |
|||
}) |
|||
|
|||
e, _ := r.Lookup("vol1") |
|||
if e.Status != StatusActive { |
|||
t.Fatalf("expected Active after heartbeat, got %d", e.Status) |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_PickServer(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
// s1 has 2 volumes, s2 has 1, s3 has 0.
|
|||
r.Register(&BlockVolumeEntry{Name: "v1", VolumeServer: "s1", Path: "/v1.blk"}) |
|||
r.Register(&BlockVolumeEntry{Name: "v2", VolumeServer: "s1", Path: "/v2.blk"}) |
|||
r.Register(&BlockVolumeEntry{Name: "v3", VolumeServer: "s2", Path: "/v3.blk"}) |
|||
|
|||
got, err := r.PickServer([]string{"s1", "s2", "s3"}) |
|||
if err != nil { |
|||
t.Fatalf("PickServer: %v", err) |
|||
} |
|||
if got != "s3" { |
|||
t.Fatalf("PickServer: got %q, want s3 (fewest volumes)", got) |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_PickServerEmpty(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
_, err := r.PickServer(nil) |
|||
if err == nil { |
|||
t.Fatal("PickServer with no servers should return error") |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_InflightLock(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
|
|||
// First acquire succeeds.
|
|||
if !r.AcquireInflight("vol1") { |
|||
t.Fatal("first AcquireInflight should succeed") |
|||
} |
|||
|
|||
// Second acquire for same name fails.
|
|||
if r.AcquireInflight("vol1") { |
|||
t.Fatal("second AcquireInflight for same name should fail") |
|||
} |
|||
|
|||
// Different name succeeds.
|
|||
if !r.AcquireInflight("vol2") { |
|||
t.Fatal("AcquireInflight for different name should succeed") |
|||
} |
|||
|
|||
// Release and re-acquire.
|
|||
r.ReleaseInflight("vol1") |
|||
if !r.AcquireInflight("vol1") { |
|||
t.Fatal("AcquireInflight after release should succeed") |
|||
} |
|||
|
|||
r.ReleaseInflight("vol1") |
|||
r.ReleaseInflight("vol2") |
|||
} |
|||
|
|||
func TestRegistry_UnmarkDeadServer(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.MarkBlockCapable("s1") |
|||
r.MarkBlockCapable("s2") |
|||
|
|||
servers := r.BlockCapableServers() |
|||
if len(servers) != 2 { |
|||
t.Fatalf("expected 2 servers, got %d", len(servers)) |
|||
} |
|||
|
|||
// Simulate s1 disconnect.
|
|||
r.UnmarkBlockCapable("s1") |
|||
|
|||
servers = r.BlockCapableServers() |
|||
if len(servers) != 1 { |
|||
t.Fatalf("expected 1 server after unmark, got %d", len(servers)) |
|||
} |
|||
if servers[0] != "s2" { |
|||
t.Fatalf("expected s2, got %s", servers[0]) |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_FullHeartbeatUpdatesSizeBytes(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{ |
|||
Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", |
|||
SizeBytes: 1 << 30, Status: StatusPending, |
|||
}) |
|||
|
|||
// Heartbeat with updated size (online resize).
|
|||
r.UpdateFullHeartbeat("s1", []*master_pb.BlockVolumeInfoMessage{ |
|||
{Path: "/v1.blk", VolumeSize: 2 << 30, Epoch: 1, Role: 1}, |
|||
}) |
|||
|
|||
e, _ := r.Lookup("vol1") |
|||
if e.SizeBytes != 2<<30 { |
|||
t.Fatalf("SizeBytes: got %d, want %d", e.SizeBytes, 2<<30) |
|||
} |
|||
} |
|||
|
|||
func TestRegistry_ConcurrentAccess(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
var wg sync.WaitGroup |
|||
n := 50 |
|||
|
|||
// Concurrent register.
|
|||
for i := 0; i < n; i++ { |
|||
wg.Add(1) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
name := fmt.Sprintf("vol%d", i) |
|||
r.Register(&BlockVolumeEntry{ |
|||
Name: name, VolumeServer: "s1", |
|||
Path: fmt.Sprintf("/v%d.blk", i), |
|||
}) |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
// All should be findable.
|
|||
for i := 0; i < n; i++ { |
|||
name := fmt.Sprintf("vol%d", i) |
|||
if _, ok := r.Lookup(name); !ok { |
|||
t.Fatalf("vol%d not found after concurrent register", i) |
|||
} |
|||
} |
|||
|
|||
// Concurrent unregister.
|
|||
for i := 0; i < n; i++ { |
|||
wg.Add(1) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
r.Unregister(fmt.Sprintf("vol%d", i)) |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
// All should be gone.
|
|||
for i := 0; i < n; i++ { |
|||
if _, ok := r.Lookup(fmt.Sprintf("vol%d", i)); ok { |
|||
t.Fatalf("vol%d found after concurrent unregister", i) |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,167 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" |
|||
) |
|||
|
|||
// CreateBlockVolume picks a volume server, delegates creation, and records
|
|||
// the mapping in the block volume registry.
|
|||
func (ms *MasterServer) CreateBlockVolume(ctx context.Context, req *master_pb.CreateBlockVolumeRequest) (*master_pb.CreateBlockVolumeResponse, error) { |
|||
if req.Name == "" { |
|||
return nil, fmt.Errorf("name is required") |
|||
} |
|||
if req.SizeBytes == 0 { |
|||
return nil, fmt.Errorf("size_bytes must be > 0") |
|||
} |
|||
|
|||
// Idempotent: if already registered, return existing entry (validate size).
|
|||
if entry, ok := ms.blockRegistry.Lookup(req.Name); ok { |
|||
if entry.SizeBytes < req.SizeBytes { |
|||
return nil, fmt.Errorf("block volume %q exists with size %d (requested %d)", req.Name, entry.SizeBytes, req.SizeBytes) |
|||
} |
|||
return &master_pb.CreateBlockVolumeResponse{ |
|||
VolumeId: entry.Name, |
|||
VolumeServer: entry.VolumeServer, |
|||
IscsiAddr: entry.ISCSIAddr, |
|||
Iqn: entry.IQN, |
|||
CapacityBytes: entry.SizeBytes, |
|||
}, nil |
|||
} |
|||
|
|||
// Per-name inflight lock prevents concurrent creates for the same name.
|
|||
if !ms.blockRegistry.AcquireInflight(req.Name) { |
|||
return nil, fmt.Errorf("block volume %q creation already in progress", req.Name) |
|||
} |
|||
defer ms.blockRegistry.ReleaseInflight(req.Name) |
|||
|
|||
// Double-check after acquiring lock (another goroutine may have finished).
|
|||
if entry, ok := ms.blockRegistry.Lookup(req.Name); ok { |
|||
return &master_pb.CreateBlockVolumeResponse{ |
|||
VolumeId: entry.Name, |
|||
VolumeServer: entry.VolumeServer, |
|||
IscsiAddr: entry.ISCSIAddr, |
|||
Iqn: entry.IQN, |
|||
CapacityBytes: entry.SizeBytes, |
|||
}, nil |
|||
} |
|||
|
|||
// Get candidate servers.
|
|||
servers := ms.blockRegistry.BlockCapableServers() |
|||
if len(servers) == 0 { |
|||
return nil, fmt.Errorf("no block volume servers available") |
|||
} |
|||
|
|||
// Try up to 3 servers (or all available, whichever is smaller).
|
|||
maxRetries := 3 |
|||
if len(servers) < maxRetries { |
|||
maxRetries = len(servers) |
|||
} |
|||
|
|||
var lastErr error |
|||
for attempt := 0; attempt < maxRetries; attempt++ { |
|||
server, err := ms.blockRegistry.PickServer(servers) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
path, iqn, iscsiAddr, err := ms.blockVSAllocate(ctx, pb.ServerAddress(server), req.Name, req.SizeBytes, req.DiskType) |
|||
if err != nil { |
|||
lastErr = fmt.Errorf("server %s: %w", server, err) |
|||
glog.V(0).Infof("CreateBlockVolume %q: attempt %d on %s failed: %v", req.Name, attempt+1, server, err) |
|||
servers = removeServer(servers, server) |
|||
continue |
|||
} |
|||
|
|||
// Register in registry as Active (VS confirmed creation).
|
|||
// Heartbeat will update epoch/role fields later.
|
|||
if err := ms.blockRegistry.Register(&BlockVolumeEntry{ |
|||
Name: req.Name, |
|||
VolumeServer: server, |
|||
Path: path, |
|||
IQN: iqn, |
|||
ISCSIAddr: iscsiAddr, |
|||
SizeBytes: req.SizeBytes, |
|||
Status: StatusActive, |
|||
}); err != nil { |
|||
// Already registered (race condition) — return the existing entry.
|
|||
if existing, ok := ms.blockRegistry.Lookup(req.Name); ok { |
|||
return &master_pb.CreateBlockVolumeResponse{ |
|||
VolumeId: existing.Name, |
|||
VolumeServer: existing.VolumeServer, |
|||
IscsiAddr: existing.ISCSIAddr, |
|||
Iqn: existing.IQN, |
|||
CapacityBytes: existing.SizeBytes, |
|||
}, nil |
|||
} |
|||
return nil, fmt.Errorf("register block volume: %w", err) |
|||
} |
|||
|
|||
glog.V(0).Infof("CreateBlockVolume %q: created on %s (path=%s, iqn=%s)", req.Name, server, path, iqn) |
|||
return &master_pb.CreateBlockVolumeResponse{ |
|||
VolumeId: req.Name, |
|||
VolumeServer: server, |
|||
IscsiAddr: iscsiAddr, |
|||
Iqn: iqn, |
|||
CapacityBytes: req.SizeBytes, |
|||
}, nil |
|||
} |
|||
|
|||
return nil, fmt.Errorf("all volume servers failed for %q: %v", req.Name, lastErr) |
|||
} |
|||
|
|||
// DeleteBlockVolume removes a block volume from the registry and volume server.
|
|||
func (ms *MasterServer) DeleteBlockVolume(ctx context.Context, req *master_pb.DeleteBlockVolumeRequest) (*master_pb.DeleteBlockVolumeResponse, error) { |
|||
if req.Name == "" { |
|||
return nil, fmt.Errorf("name is required") |
|||
} |
|||
|
|||
entry, ok := ms.blockRegistry.Lookup(req.Name) |
|||
if !ok { |
|||
// Idempotent: not found is success.
|
|||
return &master_pb.DeleteBlockVolumeResponse{}, nil |
|||
} |
|||
|
|||
// Call volume server to delete.
|
|||
if err := ms.blockVSDelete(ctx, pb.ServerAddress(entry.VolumeServer), req.Name); err != nil { |
|||
return nil, fmt.Errorf("delete block volume %q on %s: %w", req.Name, entry.VolumeServer, err) |
|||
} |
|||
|
|||
ms.blockRegistry.Unregister(req.Name) |
|||
glog.V(0).Infof("DeleteBlockVolume %q: removed from %s", req.Name, entry.VolumeServer) |
|||
return &master_pb.DeleteBlockVolumeResponse{}, nil |
|||
} |
|||
|
|||
// LookupBlockVolume looks up a block volume in the registry.
|
|||
func (ms *MasterServer) LookupBlockVolume(ctx context.Context, req *master_pb.LookupBlockVolumeRequest) (*master_pb.LookupBlockVolumeResponse, error) { |
|||
if req.Name == "" { |
|||
return nil, fmt.Errorf("name is required") |
|||
} |
|||
|
|||
entry, ok := ms.blockRegistry.Lookup(req.Name) |
|||
if !ok { |
|||
return nil, fmt.Errorf("block volume %q not found", req.Name) |
|||
} |
|||
|
|||
return &master_pb.LookupBlockVolumeResponse{ |
|||
VolumeServer: entry.VolumeServer, |
|||
IscsiAddr: entry.ISCSIAddr, |
|||
Iqn: entry.IQN, |
|||
CapacityBytes: entry.SizeBytes, |
|||
}, nil |
|||
} |
|||
|
|||
// removeServer returns a new slice without the specified server.
|
|||
func removeServer(servers []string, server string) []string { |
|||
result := make([]string, 0, len(servers)-1) |
|||
for _, s := range servers { |
|||
if s != server { |
|||
result = append(result, s) |
|||
} |
|||
} |
|||
return result |
|||
} |
|||
@ -0,0 +1,298 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"sync" |
|||
"sync/atomic" |
|||
"testing" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" |
|||
) |
|||
|
|||
// testMasterServer creates a minimal MasterServer with mock VS calls for testing.
|
|||
func testMasterServer(t *testing.T) *MasterServer { |
|||
t.Helper() |
|||
ms := &MasterServer{ |
|||
blockRegistry: NewBlockVolumeRegistry(), |
|||
} |
|||
// Default mock: succeed with deterministic values.
|
|||
ms.blockVSAllocate = func(ctx context.Context, server pb.ServerAddress, name string, sizeBytes uint64, diskType string) (string, string, string, error) { |
|||
return fmt.Sprintf("/data/%s.blk", name), |
|||
fmt.Sprintf("iqn.2024.test:%s", name), |
|||
string(server), |
|||
nil |
|||
} |
|||
ms.blockVSDelete = func(ctx context.Context, server pb.ServerAddress, name string) error { |
|||
return nil |
|||
} |
|||
return ms |
|||
} |
|||
|
|||
func TestMaster_CreateBlockVolume(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
resp, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "test-vol", |
|||
SizeBytes: 1 << 30, |
|||
DiskType: "ssd", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("CreateBlockVolume: %v", err) |
|||
} |
|||
if resp.VolumeId != "test-vol" { |
|||
t.Fatalf("VolumeId: got %q, want test-vol", resp.VolumeId) |
|||
} |
|||
if resp.VolumeServer != "vs1:9333" { |
|||
t.Fatalf("VolumeServer: got %q, want vs1:9333", resp.VolumeServer) |
|||
} |
|||
if resp.Iqn == "" || resp.IscsiAddr == "" { |
|||
t.Fatal("IQN or ISCSIAddr is empty") |
|||
} |
|||
|
|||
// Verify registry entry.
|
|||
entry, ok := ms.blockRegistry.Lookup("test-vol") |
|||
if !ok { |
|||
t.Fatal("volume not found in registry") |
|||
} |
|||
if entry.Status != StatusActive { |
|||
t.Fatalf("status: got %d, want StatusActive", entry.Status) |
|||
} |
|||
} |
|||
|
|||
func TestMaster_CreateIdempotent(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
resp1, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
resp2, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("idempotent create: %v", err) |
|||
} |
|||
|
|||
if resp1.VolumeId != resp2.VolumeId || resp1.VolumeServer != resp2.VolumeServer { |
|||
t.Fatalf("idempotent mismatch: %+v vs %+v", resp1, resp2) |
|||
} |
|||
} |
|||
|
|||
func TestMaster_CreateIdempotentSizeMismatch(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
// Larger size should fail.
|
|||
_, err = ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 2 << 30, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error for size mismatch") |
|||
} |
|||
|
|||
// Same or smaller size should succeed (idempotent).
|
|||
_, err = ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 29, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("smaller size should succeed: %v", err) |
|||
} |
|||
} |
|||
|
|||
func TestMaster_CreateNoServers(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
// No block-capable servers registered.
|
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error when no servers available") |
|||
} |
|||
} |
|||
|
|||
func TestMaster_CreateVSFailure_Retry(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
ms.blockRegistry.MarkBlockCapable("vs2:9333") |
|||
|
|||
var callCount atomic.Int32 |
|||
ms.blockVSAllocate = func(ctx context.Context, server pb.ServerAddress, name string, sizeBytes uint64, diskType string) (string, string, string, error) { |
|||
n := callCount.Add(1) |
|||
if n == 1 { |
|||
return "", "", "", fmt.Errorf("disk full") |
|||
} |
|||
return fmt.Sprintf("/data/%s.blk", name), |
|||
fmt.Sprintf("iqn.2024.test:%s", name), |
|||
string(server), nil |
|||
} |
|||
|
|||
resp, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("expected retry to succeed: %v", err) |
|||
} |
|||
if resp.VolumeId != "vol1" { |
|||
t.Fatalf("VolumeId: got %q, want vol1", resp.VolumeId) |
|||
} |
|||
if callCount.Load() < 2 { |
|||
t.Fatalf("expected at least 2 VS calls, got %d", callCount.Load()) |
|||
} |
|||
} |
|||
|
|||
func TestMaster_CreateVSFailure_Cleanup(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
ms.blockVSAllocate = func(ctx context.Context, server pb.ServerAddress, name string, sizeBytes uint64, diskType string) (string, string, string, error) { |
|||
return "", "", "", fmt.Errorf("all servers broken") |
|||
} |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error when all VS fail") |
|||
} |
|||
|
|||
// No stale registry entry.
|
|||
if _, ok := ms.blockRegistry.Lookup("vol1"); ok { |
|||
t.Fatal("stale registry entry should not exist") |
|||
} |
|||
} |
|||
|
|||
func TestMaster_CreateConcurrentSameName(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
var callCount atomic.Int32 |
|||
ms.blockVSAllocate = func(ctx context.Context, server pb.ServerAddress, name string, sizeBytes uint64, diskType string) (string, string, string, error) { |
|||
callCount.Add(1) |
|||
return fmt.Sprintf("/data/%s.blk", name), |
|||
fmt.Sprintf("iqn.2024.test:%s", name), |
|||
string(server), nil |
|||
} |
|||
|
|||
var wg sync.WaitGroup |
|||
results := make([]*master_pb.CreateBlockVolumeResponse, 10) |
|||
errors := make([]error, 10) |
|||
for i := 0; i < 10; i++ { |
|||
wg.Add(1) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
results[i], errors[i] = ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "same-vol", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
// Some may get "already in progress" error, but at least one must succeed.
|
|||
successCount := 0 |
|||
for i := 0; i < 10; i++ { |
|||
if errors[i] == nil { |
|||
successCount++ |
|||
} |
|||
} |
|||
if successCount == 0 { |
|||
t.Fatal("at least one concurrent create should succeed") |
|||
} |
|||
|
|||
// Only one VS allocation call should have been made.
|
|||
if callCount.Load() != 1 { |
|||
t.Fatalf("expected exactly 1 VS call, got %d", callCount.Load()) |
|||
} |
|||
} |
|||
|
|||
func TestMaster_DeleteBlockVolume(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
_, err = ms.DeleteBlockVolume(context.Background(), &master_pb.DeleteBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
|
|||
if _, ok := ms.blockRegistry.Lookup("vol1"); ok { |
|||
t.Fatal("volume should be removed from registry") |
|||
} |
|||
} |
|||
|
|||
func TestMaster_DeleteNotFound(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
|
|||
_, err := ms.DeleteBlockVolume(context.Background(), &master_pb.DeleteBlockVolumeRequest{ |
|||
Name: "nonexistent", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("delete nonexistent should succeed (idempotent): %v", err) |
|||
} |
|||
} |
|||
|
|||
func TestMaster_LookupBlockVolume(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
resp, err := ms.LookupBlockVolume(context.Background(), &master_pb.LookupBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("lookup: %v", err) |
|||
} |
|||
if resp.VolumeServer != "vs1:9333" { |
|||
t.Fatalf("VolumeServer: got %q, want vs1:9333", resp.VolumeServer) |
|||
} |
|||
if resp.CapacityBytes != 1<<30 { |
|||
t.Fatalf("CapacityBytes: got %d, want %d", resp.CapacityBytes, 1<<30) |
|||
} |
|||
|
|||
// Lookup nonexistent.
|
|||
_, err = ms.LookupBlockVolume(context.Background(), &master_pb.LookupBlockVolumeRequest{ |
|||
Name: "nonexistent", |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("lookup nonexistent should return error") |
|||
} |
|||
} |
|||
@ -0,0 +1,604 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"os" |
|||
"strings" |
|||
"sync" |
|||
"sync/atomic" |
|||
"testing" |
|||
"time" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" |
|||
) |
|||
|
|||
// ============================================================
|
|||
// QA-REG: Registry adversarial tests
|
|||
// ============================================================
|
|||
|
|||
// QA-REG-1: FullHeartbeat removes volumes created via RPC while heartbeat is in transit.
|
|||
// Scenario: Master creates vol1 (StatusActive). Before the VS heartbeat arrives,
|
|||
// another VS heartbeat (from a different server?) or a delayed heartbeat arrives
|
|||
// that doesn't include vol1. FullHeartbeat should only remove stale entries for
|
|||
// THAT specific server, not all servers.
|
|||
func TestQA_Reg_FullHeartbeatCrossTalk(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
|
|||
// Register vol1 on server s1.
|
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", Status: StatusActive}) |
|||
// Register vol2 on server s2.
|
|||
r.Register(&BlockVolumeEntry{Name: "vol2", VolumeServer: "s2", Path: "/v2.blk", Status: StatusActive}) |
|||
|
|||
// Full heartbeat from s1 reports vol1 — should NOT affect s2's volumes.
|
|||
r.UpdateFullHeartbeat("s1", []*master_pb.BlockVolumeInfoMessage{ |
|||
{Path: "/v1.blk", Epoch: 1}, |
|||
}) |
|||
|
|||
// vol2 on s2 should still exist.
|
|||
if _, ok := r.Lookup("vol2"); !ok { |
|||
t.Fatal("BUG: full heartbeat from s1 removed vol2 which belongs to s2") |
|||
} |
|||
} |
|||
|
|||
// QA-REG-2: FullHeartbeat from server with zero volumes should clear all entries for that server.
|
|||
func TestQA_Reg_FullHeartbeatEmptyServer(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", Status: StatusActive}) |
|||
r.Register(&BlockVolumeEntry{Name: "vol2", VolumeServer: "s1", Path: "/v2.blk", Status: StatusActive}) |
|||
|
|||
// Empty heartbeat from s1 (HasNoBlockVolumes=true, zero infos).
|
|||
r.UpdateFullHeartbeat("s1", nil) |
|||
|
|||
if _, ok := r.Lookup("vol1"); ok { |
|||
t.Error("BUG: vol1 should be removed after empty full heartbeat") |
|||
} |
|||
if _, ok := r.Lookup("vol2"); ok { |
|||
t.Error("BUG: vol2 should be removed after empty full heartbeat") |
|||
} |
|||
} |
|||
|
|||
// QA-REG-3: Concurrent FullHeartbeat and Register for same server.
|
|||
// While a heartbeat is being processed, a new CreateBlockVolume registers on the same server.
|
|||
func TestQA_Reg_ConcurrentHeartbeatAndRegister(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
|
|||
var wg sync.WaitGroup |
|||
var panicked atomic.Bool |
|||
|
|||
for i := 0; i < 50; i++ { |
|||
wg.Add(2) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
} |
|||
}() |
|||
r.UpdateFullHeartbeat("s1", []*master_pb.BlockVolumeInfoMessage{ |
|||
{Path: fmt.Sprintf("/v%d.blk", i), Epoch: uint64(i)}, |
|||
}) |
|||
}(i) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
} |
|||
}() |
|||
r.Register(&BlockVolumeEntry{ |
|||
Name: fmt.Sprintf("vol%d", i), |
|||
VolumeServer: "s1", |
|||
Path: fmt.Sprintf("/v%d.blk", i), |
|||
Status: StatusActive, |
|||
}) |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
if panicked.Load() { |
|||
t.Fatal("BUG: concurrent heartbeat + register caused panic") |
|||
} |
|||
} |
|||
|
|||
// QA-REG-4: DeltaHeartbeat with removed path that doesn't match any registered volume.
|
|||
// Should be a no-op, not panic or corrupt state.
|
|||
func TestQA_Reg_DeltaHeartbeatUnknownPath(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk"}) |
|||
|
|||
// Delta says /unknown.blk was removed — this path doesn't match vol1.
|
|||
r.UpdateDeltaHeartbeat("s1", |
|||
nil, |
|||
[]*master_pb.BlockVolumeShortInfoMessage{{Path: "/unknown.blk"}}, |
|||
) |
|||
|
|||
// vol1 should still exist.
|
|||
if _, ok := r.Lookup("vol1"); !ok { |
|||
t.Fatal("BUG: delta heartbeat with unknown path removed an unrelated volume") |
|||
} |
|||
} |
|||
|
|||
// QA-REG-5: PickServer always picks the same server when counts are tied.
|
|||
// Deterministic placement prevents flip-flopping.
|
|||
func TestQA_Reg_PickServerTiebreaker(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
|
|||
// All servers have 0 volumes.
|
|||
servers := []string{"s1", "s2", "s3"} |
|||
results := make(map[string]int) |
|||
for i := 0; i < 10; i++ { |
|||
s, err := r.PickServer(servers) |
|||
if err != nil { |
|||
t.Fatal(err) |
|||
} |
|||
results[s]++ |
|||
} |
|||
// With stable ordering, the same server should win every time.
|
|||
// The algorithm picks servers[0] by default when counts are equal.
|
|||
if results["s1"] != 10 { |
|||
t.Logf("PickServer results with tied counts: %v (non-deterministic but OK if no panic)", results) |
|||
} |
|||
} |
|||
|
|||
// QA-REG-6: Unregister a volume then re-register with a different server.
|
|||
func TestQA_Reg_ReregisterDifferentServer(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk"}) |
|||
|
|||
// Unregister and re-register on s2.
|
|||
r.Unregister("vol1") |
|||
r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s2", Path: "/v1.blk"}) |
|||
|
|||
entry, ok := r.Lookup("vol1") |
|||
if !ok { |
|||
t.Fatal("vol1 should exist after re-register") |
|||
} |
|||
if entry.VolumeServer != "s2" { |
|||
t.Fatalf("VolumeServer: got %q, want s2", entry.VolumeServer) |
|||
} |
|||
|
|||
// s1 should have no volumes.
|
|||
if vols := r.ListByServer("s1"); len(vols) != 0 { |
|||
t.Fatalf("s1 should have 0 volumes after re-register, got %d", len(vols)) |
|||
} |
|||
// s2 should have 1 volume.
|
|||
if vols := r.ListByServer("s2"); len(vols) != 1 { |
|||
t.Fatalf("s2 should have 1 volume, got %d", len(vols)) |
|||
} |
|||
} |
|||
|
|||
// QA-REG-7: AcquireInflight for different names doesn't interfere.
|
|||
func TestQA_Reg_InflightIndependence(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
|
|||
if !r.AcquireInflight("vol1") { |
|||
t.Fatal("acquire vol1 should succeed") |
|||
} |
|||
if !r.AcquireInflight("vol2") { |
|||
t.Fatal("acquire vol2 should succeed (different name)") |
|||
} |
|||
if r.AcquireInflight("vol1") { |
|||
t.Fatal("double acquire vol1 should fail") |
|||
} |
|||
r.ReleaseInflight("vol1") |
|||
if !r.AcquireInflight("vol1") { |
|||
t.Fatal("acquire vol1 after release should succeed") |
|||
} |
|||
r.ReleaseInflight("vol1") |
|||
r.ReleaseInflight("vol2") |
|||
} |
|||
|
|||
// QA-REG-8: BlockCapableServers includes only currently-marked servers.
|
|||
func TestQA_Reg_BlockCapableServersAfterUnmark(t *testing.T) { |
|||
r := NewBlockVolumeRegistry() |
|||
r.MarkBlockCapable("s1") |
|||
r.MarkBlockCapable("s2") |
|||
r.MarkBlockCapable("s3") |
|||
|
|||
r.UnmarkBlockCapable("s2") |
|||
|
|||
servers := r.BlockCapableServers() |
|||
for _, s := range servers { |
|||
if s == "s2" { |
|||
t.Fatal("BUG: s2 should not be in block-capable list after UnmarkBlockCapable") |
|||
} |
|||
} |
|||
if len(servers) != 2 { |
|||
t.Fatalf("expected 2 block-capable servers, got %d: %v", len(servers), servers) |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-MASTER: Master RPC adversarial tests
|
|||
// ============================================================
|
|||
|
|||
// QA-MASTER-1: CreateBlockVolume then Delete while VS is unreachable.
|
|||
// Delete should fail (cannot contact VS), but registry entry should NOT be removed.
|
|||
func TestQA_Master_DeleteVSUnreachable(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "vol1", SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
// Make VS delete fail.
|
|||
ms.blockVSDelete = func(ctx context.Context, server pb.ServerAddress, name string) error { |
|||
return fmt.Errorf("connection refused") |
|||
} |
|||
|
|||
_, err = ms.DeleteBlockVolume(context.Background(), &master_pb.DeleteBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("delete should fail when VS is unreachable") |
|||
} |
|||
|
|||
// Registry entry should still exist (not orphaned).
|
|||
if _, ok := ms.blockRegistry.Lookup("vol1"); !ok { |
|||
t.Fatal("BUG: registry entry removed even though VS delete failed") |
|||
} |
|||
} |
|||
|
|||
// QA-MASTER-2: Create then lookup a volume with a name that requires sanitization.
|
|||
func TestQA_Master_CreateSanitizedName(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
// Name with special characters.
|
|||
resp, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "pvc-abc/def:123", SizeBytes: 1 << 30, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
// Lookup should use the exact name (not sanitized).
|
|||
lookupResp, err := ms.LookupBlockVolume(context.Background(), &master_pb.LookupBlockVolumeRequest{ |
|||
Name: "pvc-abc/def:123", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("lookup: %v", err) |
|||
} |
|||
if lookupResp.VolumeServer != resp.VolumeServer { |
|||
t.Fatalf("lookup mismatch: %+v vs %+v", lookupResp, resp) |
|||
} |
|||
} |
|||
|
|||
// QA-MASTER-3: Concurrent Create and Delete for the same volume name.
|
|||
func TestQA_Master_ConcurrentCreateDelete(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
var wg sync.WaitGroup |
|||
var panicked atomic.Bool |
|||
|
|||
for i := 0; i < 20; i++ { |
|||
wg.Add(2) |
|||
go func() { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
t.Errorf("PANIC in create: %v", r) |
|||
} |
|||
}() |
|||
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "race-vol", SizeBytes: 1 << 30, |
|||
}) |
|||
}() |
|||
go func() { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
t.Errorf("PANIC in delete: %v", r) |
|||
} |
|||
}() |
|||
ms.DeleteBlockVolume(context.Background(), &master_pb.DeleteBlockVolumeRequest{ |
|||
Name: "race-vol", |
|||
}) |
|||
}() |
|||
} |
|||
wg.Wait() |
|||
|
|||
if panicked.Load() { |
|||
t.Fatal("BUG: concurrent create/delete caused panic") |
|||
} |
|||
} |
|||
|
|||
// QA-MASTER-4: Create with all VS failing should not leave orphan in registry.
|
|||
func TestQA_Master_AllVSFailNoOrphan(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
ms.blockRegistry.MarkBlockCapable("vs2:9333") |
|||
ms.blockRegistry.MarkBlockCapable("vs3:9333") |
|||
|
|||
ms.blockVSAllocate = func(ctx context.Context, server pb.ServerAddress, name string, sizeBytes uint64, diskType string) (string, string, string, error) { |
|||
return "", "", "", fmt.Errorf("disk full on %s", server) |
|||
} |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "doomed", SizeBytes: 1 << 30, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error when all VS fail") |
|||
} |
|||
|
|||
// No orphan in registry.
|
|||
if _, ok := ms.blockRegistry.Lookup("doomed"); ok { |
|||
t.Fatal("BUG: orphan entry in registry after all VS failed") |
|||
} |
|||
|
|||
// Inflight lock should be released.
|
|||
if !ms.blockRegistry.AcquireInflight("doomed") { |
|||
t.Fatal("BUG: inflight lock not released after all VS failed") |
|||
} |
|||
ms.blockRegistry.ReleaseInflight("doomed") |
|||
} |
|||
|
|||
// QA-MASTER-5: VS allocate succeeds but is slow — inflight lock must block second create.
|
|||
func TestQA_Master_SlowAllocateBlocksSecond(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
var allocCount atomic.Int32 |
|||
ms.blockVSAllocate = func(ctx context.Context, server pb.ServerAddress, name string, sizeBytes uint64, diskType string) (string, string, string, error) { |
|||
allocCount.Add(1) |
|||
time.Sleep(100 * time.Millisecond) // simulate slow VS
|
|||
return fmt.Sprintf("/data/%s.blk", name), |
|||
fmt.Sprintf("iqn.test:%s", name), |
|||
string(server), nil |
|||
} |
|||
|
|||
var wg sync.WaitGroup |
|||
errors := make([]error, 2) |
|||
for i := 0; i < 2; i++ { |
|||
wg.Add(1) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
_, errors[i] = ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "slow-vol", SizeBytes: 1 << 30, |
|||
}) |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
// One should succeed, one should get "already in progress" or succeed via idempotent path.
|
|||
successCount := 0 |
|||
for _, err := range errors { |
|||
if err == nil { |
|||
successCount++ |
|||
} |
|||
} |
|||
if successCount == 0 { |
|||
t.Fatal("at least one create should succeed") |
|||
} |
|||
|
|||
// Only 1 VS allocation call should have been made (inflight blocks the second).
|
|||
if allocCount.Load() > 1 { |
|||
t.Logf("WARNING: %d VS allocations made (expected 1 — second should be blocked by inflight lock or return idempotent)", allocCount.Load()) |
|||
} |
|||
} |
|||
|
|||
// QA-MASTER-6: Create with SizeBytes == 0 should be rejected.
|
|||
func TestQA_Master_CreateZeroSize(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
ms.blockRegistry.MarkBlockCapable("vs1:9333") |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "zero", SizeBytes: 0, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error for zero size") |
|||
} |
|||
} |
|||
|
|||
// QA-MASTER-7: Create with empty name should be rejected.
|
|||
func TestQA_Master_CreateEmptyName(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
|
|||
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ |
|||
Name: "", SizeBytes: 1 << 30, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error for empty name") |
|||
} |
|||
} |
|||
|
|||
// QA-MASTER-8: Lookup/Delete with empty name should be rejected.
|
|||
func TestQA_Master_EmptyNameValidation(t *testing.T) { |
|||
ms := testMasterServer(t) |
|||
|
|||
_, err := ms.LookupBlockVolume(context.Background(), &master_pb.LookupBlockVolumeRequest{Name: ""}) |
|||
if err == nil { |
|||
t.Error("lookup with empty name should fail") |
|||
} |
|||
|
|||
_, err = ms.DeleteBlockVolume(context.Background(), &master_pb.DeleteBlockVolumeRequest{Name: ""}) |
|||
if err == nil { |
|||
t.Error("delete with empty name should fail") |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-VS: Volume server BlockService adversarial tests
|
|||
// ============================================================
|
|||
|
|||
// QA-VS-1: Concurrent CreateBlockVol for the same name on a single VS.
|
|||
func TestQA_VS_ConcurrentCreate(t *testing.T) { |
|||
bs, _ := newTestBlockServiceWithDir(t) |
|||
|
|||
var wg sync.WaitGroup |
|||
var panicked atomic.Bool |
|||
errors := make([]error, 20) |
|||
|
|||
for i := 0; i < 20; i++ { |
|||
wg.Add(1) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
t.Errorf("PANIC: %v", r) |
|||
} |
|||
}() |
|||
_, _, _, errors[i] = bs.CreateBlockVol("race-vol", 4*1024*1024, "") |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
if panicked.Load() { |
|||
t.Fatal("BUG: concurrent CreateBlockVol caused panic") |
|||
} |
|||
|
|||
// At least some should succeed.
|
|||
successCount := 0 |
|||
for _, err := range errors { |
|||
if err == nil { |
|||
successCount++ |
|||
} |
|||
} |
|||
if successCount == 0 { |
|||
t.Fatal("at least one concurrent CreateBlockVol should succeed") |
|||
} |
|||
} |
|||
|
|||
// QA-VS-2: Concurrent CreateBlockVol and DeleteBlockVol for the same name.
|
|||
func TestQA_VS_ConcurrentCreateDelete(t *testing.T) { |
|||
bs, _ := newTestBlockServiceWithDir(t) |
|||
|
|||
var wg sync.WaitGroup |
|||
var panicked atomic.Bool |
|||
|
|||
for i := 0; i < 20; i++ { |
|||
wg.Add(2) |
|||
go func() { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
} |
|||
}() |
|||
bs.CreateBlockVol("cd-vol", 4*1024*1024, "") |
|||
}() |
|||
go func() { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
} |
|||
}() |
|||
bs.DeleteBlockVol("cd-vol") |
|||
}() |
|||
} |
|||
wg.Wait() |
|||
|
|||
if panicked.Load() { |
|||
t.Fatal("BUG: concurrent create/delete caused panic") |
|||
} |
|||
} |
|||
|
|||
// QA-VS-3: DeleteBlockVol should clean up .snap.* files.
|
|||
func TestQA_VS_DeleteCleansSnapshots(t *testing.T) { |
|||
bs, blockDir := newTestBlockServiceWithDir(t) |
|||
|
|||
bs.CreateBlockVol("snap-vol", 4*1024*1024, "") |
|||
|
|||
// Simulate snapshot files.
|
|||
snapPath := blockDir + "/snap-vol.blk.snap.0" |
|||
if err := writeTestFile(snapPath); err != nil { |
|||
t.Fatalf("create snap file: %v", err) |
|||
} |
|||
|
|||
bs.DeleteBlockVol("snap-vol") |
|||
|
|||
// Snap file should be removed.
|
|||
if fileExists(snapPath) { |
|||
t.Error("BUG: .snap.0 file not cleaned up after DeleteBlockVol") |
|||
} |
|||
} |
|||
|
|||
// QA-VS-4: CreateBlockVol with name that sanitizes to the same filename as another volume.
|
|||
func TestQA_VS_SanitizationCollision(t *testing.T) { |
|||
bs, _ := newTestBlockServiceWithDir(t) |
|||
|
|||
// "VolA" sanitizes to "vola.blk", "vola" also sanitizes to "vola.blk".
|
|||
_, _, _, err := bs.CreateBlockVol("VolA", 4*1024*1024, "") |
|||
if err != nil { |
|||
t.Fatalf("create VolA: %v", err) |
|||
} |
|||
|
|||
// "vola" should get the idempotent path (same file on disk).
|
|||
path2, _, _, err := bs.CreateBlockVol("vola", 4*1024*1024, "") |
|||
if err != nil { |
|||
t.Fatalf("create vola: %v", err) |
|||
} |
|||
|
|||
// Should point to the same file.
|
|||
if !strings.HasSuffix(path2, "vola.blk") { |
|||
t.Errorf("path: got %q, expected to end with vola.blk", path2) |
|||
} |
|||
} |
|||
|
|||
// QA-VS-5: CreateBlockVol idempotent path verifies TargetServer re-registration.
|
|||
func TestQA_VS_CreateIdempotentReaddTarget(t *testing.T) { |
|||
bs, _ := newTestBlockServiceWithDir(t) |
|||
|
|||
// First create.
|
|||
_, iqn1, _, err := bs.CreateBlockVol("readd-vol", 4*1024*1024, "") |
|||
if err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
// Second create (idempotent) — should succeed and re-add to TargetServer.
|
|||
_, iqn2, _, err := bs.CreateBlockVol("readd-vol", 4*1024*1024, "") |
|||
if err != nil { |
|||
t.Fatalf("idempotent create: %v", err) |
|||
} |
|||
|
|||
if iqn1 != iqn2 { |
|||
t.Fatalf("IQN mismatch: %q vs %q", iqn1, iqn2) |
|||
} |
|||
} |
|||
|
|||
// QA-VS-6: gRPC handler with nil blockService.
|
|||
func TestQA_VS_GrpcNilBlockService(t *testing.T) { |
|||
vs := &VolumeServer{blockService: nil} |
|||
|
|||
_, err := vs.AllocateBlockVolume(context.Background(), &volume_server_pb.AllocateBlockVolumeRequest{ |
|||
Name: "vol1", SizeBytes: 1 << 30, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error when blockService is nil") |
|||
} |
|||
|
|||
_, err = vs.VolumeServerDeleteBlockVolume(context.Background(), &volume_server_pb.VolumeServerDeleteBlockVolumeRequest{ |
|||
Name: "vol1", |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error when blockService is nil") |
|||
} |
|||
} |
|||
|
|||
// Helper functions.
|
|||
|
|||
func writeTestFile(path string) error { |
|||
f, err := os.Create(path) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
f.Write([]byte("test")) |
|||
return f.Close() |
|||
} |
|||
|
|||
func fileExists(path string) bool { |
|||
_, err := os.Stat(path) |
|||
return err == nil |
|||
} |
|||
@ -0,0 +1,48 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" |
|||
) |
|||
|
|||
// AllocateBlockVolume creates a new block volume on this volume server.
|
|||
func (vs *VolumeServer) AllocateBlockVolume(_ context.Context, req *volume_server_pb.AllocateBlockVolumeRequest) (*volume_server_pb.AllocateBlockVolumeResponse, error) { |
|||
if vs.blockService == nil { |
|||
return nil, fmt.Errorf("block service not enabled on this volume server") |
|||
} |
|||
if req.Name == "" { |
|||
return nil, fmt.Errorf("name is required") |
|||
} |
|||
if req.SizeBytes == 0 { |
|||
return nil, fmt.Errorf("size_bytes must be > 0") |
|||
} |
|||
|
|||
path, iqn, iscsiAddr, err := vs.blockService.CreateBlockVol(req.Name, req.SizeBytes, req.DiskType) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("create block volume %q: %w", req.Name, err) |
|||
} |
|||
|
|||
return &volume_server_pb.AllocateBlockVolumeResponse{ |
|||
Path: path, |
|||
Iqn: iqn, |
|||
IscsiAddr: iscsiAddr, |
|||
}, nil |
|||
} |
|||
|
|||
// VolumeServerDeleteBlockVolume deletes a block volume on this volume server.
|
|||
func (vs *VolumeServer) VolumeServerDeleteBlockVolume(_ context.Context, req *volume_server_pb.VolumeServerDeleteBlockVolumeRequest) (*volume_server_pb.VolumeServerDeleteBlockVolumeResponse, error) { |
|||
if vs.blockService == nil { |
|||
return nil, fmt.Errorf("block service not enabled on this volume server") |
|||
} |
|||
if req.Name == "" { |
|||
return nil, fmt.Errorf("name is required") |
|||
} |
|||
|
|||
if err := vs.blockService.DeleteBlockVol(req.Name); err != nil { |
|||
return nil, fmt.Errorf("delete block volume %q: %w", req.Name, err) |
|||
} |
|||
|
|||
return &volume_server_pb.VolumeServerDeleteBlockVolumeResponse{}, nil |
|||
} |
|||
@ -0,0 +1,110 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"os" |
|||
"path/filepath" |
|||
"strings" |
|||
"testing" |
|||
) |
|||
|
|||
func newTestBlockServiceWithDir(t *testing.T) (*BlockService, string) { |
|||
t.Helper() |
|||
dir := t.TempDir() |
|||
blockDir := filepath.Join(dir, "blocks") |
|||
os.MkdirAll(blockDir, 0755) |
|||
bs := StartBlockService("127.0.0.1:0", blockDir, "iqn.2024.test:") |
|||
if bs == nil { |
|||
t.Fatal("StartBlockService returned nil") |
|||
} |
|||
t.Cleanup(func() { bs.Shutdown() }) |
|||
return bs, blockDir |
|||
} |
|||
|
|||
func TestVS_AllocateBlockVolume(t *testing.T) { |
|||
bs, blockDir := newTestBlockServiceWithDir(t) |
|||
|
|||
path, iqn, iscsiAddr, err := bs.CreateBlockVol("test-vol", 4*1024*1024, "ssd") |
|||
if err != nil { |
|||
t.Fatalf("CreateBlockVol: %v", err) |
|||
} |
|||
if path == "" || iqn == "" || iscsiAddr == "" { |
|||
t.Fatalf("empty return values: path=%q iqn=%q addr=%q", path, iqn, iscsiAddr) |
|||
} |
|||
|
|||
// Verify file exists.
|
|||
expectedPath := filepath.Join(blockDir, "test-vol.blk") |
|||
if path != expectedPath { |
|||
t.Fatalf("path: got %q, want %q", path, expectedPath) |
|||
} |
|||
if _, err := os.Stat(path); os.IsNotExist(err) { |
|||
t.Fatalf(".blk file not created at %s", path) |
|||
} |
|||
|
|||
// IQN should contain sanitized name.
|
|||
if !strings.Contains(iqn, "test-vol") { |
|||
t.Fatalf("IQN %q should contain 'test-vol'", iqn) |
|||
} |
|||
} |
|||
|
|||
func TestVS_AllocateIdempotent(t *testing.T) { |
|||
bs, _ := newTestBlockServiceWithDir(t) |
|||
|
|||
path1, iqn1, _, err := bs.CreateBlockVol("vol1", 4*1024*1024, "") |
|||
if err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
// Same name+size should return same info.
|
|||
path2, iqn2, _, err := bs.CreateBlockVol("vol1", 4*1024*1024, "") |
|||
if err != nil { |
|||
t.Fatalf("idempotent create: %v", err) |
|||
} |
|||
if path1 != path2 || iqn1 != iqn2 { |
|||
t.Fatalf("idempotent mismatch: (%q,%q) vs (%q,%q)", path1, iqn1, path2, iqn2) |
|||
} |
|||
} |
|||
|
|||
func TestVS_AllocateSizeMismatch(t *testing.T) { |
|||
bs, _ := newTestBlockServiceWithDir(t) |
|||
|
|||
_, _, _, err := bs.CreateBlockVol("vol1", 4*1024*1024, "") |
|||
if err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
// Requesting a LARGER size than existing should fail.
|
|||
_, _, _, err = bs.CreateBlockVol("vol1", 8*1024*1024, "") |
|||
if err == nil { |
|||
t.Fatal("size mismatch should return error") |
|||
} |
|||
} |
|||
|
|||
func TestVS_DeleteBlockVolume(t *testing.T) { |
|||
bs, blockDir := newTestBlockServiceWithDir(t) |
|||
|
|||
bs.CreateBlockVol("vol1", 4*1024*1024, "") |
|||
path := filepath.Join(blockDir, "vol1.blk") |
|||
|
|||
// File should exist.
|
|||
if _, err := os.Stat(path); err != nil { |
|||
t.Fatalf("file should exist: %v", err) |
|||
} |
|||
|
|||
if err := bs.DeleteBlockVol("vol1"); err != nil { |
|||
t.Fatalf("DeleteBlockVol: %v", err) |
|||
} |
|||
|
|||
// File should be gone.
|
|||
if _, err := os.Stat(path); !os.IsNotExist(err) { |
|||
t.Fatal("file should be removed after delete") |
|||
} |
|||
} |
|||
|
|||
func TestVS_DeleteNotFound(t *testing.T) { |
|||
bs, _ := newTestBlockServiceWithDir(t) |
|||
|
|||
// Deleting non-existent volume should be idempotent.
|
|||
if err := bs.DeleteBlockVol("no-such-vol"); err != nil { |
|||
t.Fatalf("delete non-existent should not error: %v", err) |
|||
} |
|||
} |
|||
@ -0,0 +1,111 @@ |
|||
package blockvol |
|||
|
|||
import ( |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" |
|||
) |
|||
|
|||
// InfoMessageToProto converts a Go wire type to proto.
|
|||
func InfoMessageToProto(m BlockVolumeInfoMessage) *master_pb.BlockVolumeInfoMessage { |
|||
return &master_pb.BlockVolumeInfoMessage{ |
|||
Path: m.Path, |
|||
VolumeSize: m.VolumeSize, |
|||
BlockSize: m.BlockSize, |
|||
Epoch: m.Epoch, |
|||
Role: m.Role, |
|||
WalHeadLsn: m.WalHeadLsn, |
|||
CheckpointLsn: m.CheckpointLsn, |
|||
HasLease: m.HasLease, |
|||
DiskType: m.DiskType, |
|||
} |
|||
} |
|||
|
|||
// InfoMessageFromProto converts a proto to Go wire type.
|
|||
func InfoMessageFromProto(p *master_pb.BlockVolumeInfoMessage) BlockVolumeInfoMessage { |
|||
if p == nil { |
|||
return BlockVolumeInfoMessage{} |
|||
} |
|||
return BlockVolumeInfoMessage{ |
|||
Path: p.Path, |
|||
VolumeSize: p.VolumeSize, |
|||
BlockSize: p.BlockSize, |
|||
Epoch: p.Epoch, |
|||
Role: p.Role, |
|||
WalHeadLsn: p.WalHeadLsn, |
|||
CheckpointLsn: p.CheckpointLsn, |
|||
HasLease: p.HasLease, |
|||
DiskType: p.DiskType, |
|||
} |
|||
} |
|||
|
|||
// InfoMessagesToProto converts a slice of Go wire types to proto.
|
|||
func InfoMessagesToProto(msgs []BlockVolumeInfoMessage) []*master_pb.BlockVolumeInfoMessage { |
|||
out := make([]*master_pb.BlockVolumeInfoMessage, len(msgs)) |
|||
for i, m := range msgs { |
|||
out[i] = InfoMessageToProto(m) |
|||
} |
|||
return out |
|||
} |
|||
|
|||
// InfoMessagesFromProto converts a slice of proto messages to Go wire types.
|
|||
func InfoMessagesFromProto(protos []*master_pb.BlockVolumeInfoMessage) []BlockVolumeInfoMessage { |
|||
out := make([]BlockVolumeInfoMessage, len(protos)) |
|||
for i, p := range protos { |
|||
out[i] = InfoMessageFromProto(p) |
|||
} |
|||
return out |
|||
} |
|||
|
|||
// ShortInfoToProto converts a Go short info to proto.
|
|||
func ShortInfoToProto(m BlockVolumeShortInfoMessage) *master_pb.BlockVolumeShortInfoMessage { |
|||
return &master_pb.BlockVolumeShortInfoMessage{ |
|||
Path: m.Path, |
|||
VolumeSize: m.VolumeSize, |
|||
BlockSize: m.BlockSize, |
|||
DiskType: m.DiskType, |
|||
} |
|||
} |
|||
|
|||
// ShortInfoFromProto converts a proto short info to Go wire type.
|
|||
func ShortInfoFromProto(p *master_pb.BlockVolumeShortInfoMessage) BlockVolumeShortInfoMessage { |
|||
if p == nil { |
|||
return BlockVolumeShortInfoMessage{} |
|||
} |
|||
return BlockVolumeShortInfoMessage{ |
|||
Path: p.Path, |
|||
VolumeSize: p.VolumeSize, |
|||
BlockSize: p.BlockSize, |
|||
DiskType: p.DiskType, |
|||
} |
|||
} |
|||
|
|||
// AssignmentToProto converts a Go assignment to proto.
|
|||
func AssignmentToProto(a BlockVolumeAssignment) *master_pb.BlockVolumeAssignment { |
|||
return &master_pb.BlockVolumeAssignment{ |
|||
Path: a.Path, |
|||
Epoch: a.Epoch, |
|||
Role: a.Role, |
|||
LeaseTtlMs: a.LeaseTtlMs, |
|||
} |
|||
} |
|||
|
|||
// AssignmentFromProto converts a proto assignment to Go wire type.
|
|||
func AssignmentFromProto(p *master_pb.BlockVolumeAssignment) BlockVolumeAssignment { |
|||
if p == nil { |
|||
return BlockVolumeAssignment{} |
|||
} |
|||
return BlockVolumeAssignment{ |
|||
Path: p.Path, |
|||
Epoch: p.Epoch, |
|||
Role: p.Role, |
|||
LeaseTtlMs: p.LeaseTtlMs, |
|||
} |
|||
} |
|||
|
|||
// AssignmentsFromProto converts a slice of proto assignments to Go wire types.
|
|||
func AssignmentsFromProto(protos []*master_pb.BlockVolumeAssignment) []BlockVolumeAssignment { |
|||
out := make([]BlockVolumeAssignment, len(protos)) |
|||
for i, p := range protos { |
|||
out[i] = AssignmentFromProto(p) |
|||
} |
|||
return out |
|||
} |
|||
@ -0,0 +1,85 @@ |
|||
package blockvol |
|||
|
|||
import ( |
|||
"testing" |
|||
) |
|||
|
|||
func TestInfoMessageRoundTrip(t *testing.T) { |
|||
orig := BlockVolumeInfoMessage{ |
|||
Path: "/data/vol1.blk", |
|||
VolumeSize: 1 << 30, |
|||
BlockSize: 4096, |
|||
Epoch: 42, |
|||
Role: RoleToWire(RolePrimary), |
|||
WalHeadLsn: 1000, |
|||
CheckpointLsn: 900, |
|||
HasLease: true, |
|||
DiskType: "ssd", |
|||
} |
|||
pb := InfoMessageToProto(orig) |
|||
back := InfoMessageFromProto(pb) |
|||
if back != orig { |
|||
t.Fatalf("round-trip mismatch:\n got %+v\n want %+v", back, orig) |
|||
} |
|||
} |
|||
|
|||
func TestShortInfoRoundTrip(t *testing.T) { |
|||
orig := BlockVolumeShortInfoMessage{ |
|||
Path: "/data/vol2.blk", |
|||
VolumeSize: 2 << 30, |
|||
BlockSize: 4096, |
|||
DiskType: "hdd", |
|||
} |
|||
pb := ShortInfoToProto(orig) |
|||
back := ShortInfoFromProto(pb) |
|||
if back != orig { |
|||
t.Fatalf("round-trip mismatch:\n got %+v\n want %+v", back, orig) |
|||
} |
|||
} |
|||
|
|||
func TestAssignmentRoundTrip(t *testing.T) { |
|||
orig := BlockVolumeAssignment{ |
|||
Path: "/data/vol3.blk", |
|||
Epoch: 7, |
|||
Role: RoleToWire(RoleReplica), |
|||
LeaseTtlMs: 5000, |
|||
} |
|||
pb := AssignmentToProto(orig) |
|||
back := AssignmentFromProto(pb) |
|||
if back != orig { |
|||
t.Fatalf("round-trip mismatch:\n got %+v\n want %+v", back, orig) |
|||
} |
|||
} |
|||
|
|||
func TestInfoMessagesSliceRoundTrip(t *testing.T) { |
|||
origSlice := []BlockVolumeInfoMessage{ |
|||
{Path: "/a.blk", VolumeSize: 100, Epoch: 1}, |
|||
{Path: "/b.blk", VolumeSize: 200, Epoch: 2, HasLease: true}, |
|||
} |
|||
pbs := InfoMessagesToProto(origSlice) |
|||
back := InfoMessagesFromProto(pbs) |
|||
if len(back) != len(origSlice) { |
|||
t.Fatalf("length mismatch: got %d, want %d", len(back), len(origSlice)) |
|||
} |
|||
for i := range origSlice { |
|||
if back[i] != origSlice[i] { |
|||
t.Fatalf("index %d mismatch:\n got %+v\n want %+v", i, back[i], origSlice[i]) |
|||
} |
|||
} |
|||
} |
|||
|
|||
func TestNilProtoConversions(t *testing.T) { |
|||
// Nil proto -> zero-value Go types.
|
|||
info := InfoMessageFromProto(nil) |
|||
if info != (BlockVolumeInfoMessage{}) { |
|||
t.Fatalf("nil info proto should yield zero value, got %+v", info) |
|||
} |
|||
short := ShortInfoFromProto(nil) |
|||
if short != (BlockVolumeShortInfoMessage{}) { |
|||
t.Fatalf("nil short proto should yield zero value, got %+v", short) |
|||
} |
|||
assign := AssignmentFromProto(nil) |
|||
if assign != (BlockVolumeAssignment{}) { |
|||
t.Fatalf("nil assignment proto should yield zero value, got %+v", assign) |
|||
} |
|||
} |
|||
@ -0,0 +1,68 @@ |
|||
// block-csi is the SeaweedFS BlockVol CSI driver.
|
|||
// It embeds a BlockVol engine and iSCSI target in-process, serving
|
|||
// CSI Identity, Controller, and Node services on a Unix socket.
|
|||
package main |
|||
|
|||
import ( |
|||
"flag" |
|||
"fmt" |
|||
"log" |
|||
"os" |
|||
"os/signal" |
|||
"syscall" |
|||
|
|||
blockcsi "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/csi" |
|||
) |
|||
|
|||
func main() { |
|||
endpoint := flag.String("endpoint", "unix:///csi/csi.sock", "CSI endpoint (unix socket)") |
|||
dataDir := flag.String("data-dir", "/var/lib/sw-block", "volume data directory") |
|||
iscsiAddr := flag.String("iscsi-addr", "127.0.0.1:3260", "local iSCSI target listen address") |
|||
iqnPrefix := flag.String("iqn-prefix", "iqn.2024.com.seaweedfs", "IQN prefix for volumes") |
|||
nodeID := flag.String("node-id", "", "node identifier (required)") |
|||
masterAddr := flag.String("master", "", "master address for control-plane mode (e.g. master:9333)") |
|||
mode := flag.String("mode", "all", "driver mode: controller, node, or all") |
|||
flag.Parse() |
|||
|
|||
if *nodeID == "" { |
|||
fmt.Fprintln(os.Stderr, "error: -node-id is required") |
|||
flag.Usage() |
|||
os.Exit(1) |
|||
} |
|||
if *mode == "controller" && *masterAddr == "" { |
|||
fmt.Fprintln(os.Stderr, "error: -master is required in controller mode") |
|||
flag.Usage() |
|||
os.Exit(1) |
|||
} |
|||
|
|||
logger := log.New(os.Stdout, "[block-csi] ", log.LstdFlags) |
|||
|
|||
driver, err := blockcsi.NewCSIDriver(blockcsi.DriverConfig{ |
|||
Endpoint: *endpoint, |
|||
DataDir: *dataDir, |
|||
ISCSIAddr: *iscsiAddr, |
|||
IQNPrefix: *iqnPrefix, |
|||
NodeID: *nodeID, |
|||
MasterAddr: *masterAddr, |
|||
Mode: *mode, |
|||
Logger: logger, |
|||
}) |
|||
if err != nil { |
|||
log.Fatalf("create CSI driver: %v", err) |
|||
} |
|||
|
|||
// Graceful shutdown on signal.
|
|||
sigCh := make(chan os.Signal, 1) |
|||
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) |
|||
go func() { |
|||
sig := <-sigCh |
|||
logger.Printf("received %v, shutting down...", sig) |
|||
driver.Stop() |
|||
}() |
|||
|
|||
logger.Printf("starting block-csi driver: node=%s endpoint=%s", *nodeID, *endpoint) |
|||
if err := driver.Run(); err != nil { |
|||
log.Fatalf("CSI driver: %v", err) |
|||
} |
|||
logger.Println("block-csi driver stopped") |
|||
} |
|||
@ -0,0 +1,113 @@ |
|||
#!/usr/bin/env bash |
|||
# smoke-test.sh -- deploy sw-block CSI driver to k3s/kind and verify PVC lifecycle. |
|||
# Requires: k3s or kind pre-installed, kubectl, go, docker (for kind). |
|||
# Usage: bash smoke-test.sh |
|||
set -euo pipefail |
|||
|
|||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" |
|||
REPO_ROOT="$(cd "$SCRIPT_DIR/../../../../.." && pwd)" |
|||
DEPLOY_DIR="$SCRIPT_DIR/../../deploy" |
|||
BINARY="$SCRIPT_DIR/block-csi" |
|||
|
|||
echo "=== sw-block CSI smoke test ===" |
|||
|
|||
# 1. Build binary |
|||
echo "[1/7] Building block-csi (linux/amd64)..." |
|||
cd "$REPO_ROOT" |
|||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o "$BINARY" ./weed/storage/blockvol/csi/cmd/block-csi/ |
|||
echo " built: $BINARY" |
|||
|
|||
# 2. Detect runtime (k3s or kind) |
|||
if command -v k3s &>/dev/null; then |
|||
RUNTIME=k3s |
|||
KUBECTL="k3s kubectl" |
|||
echo "[2/7] Detected k3s" |
|||
|
|||
# Copy binary to a path accessible by the k3s node |
|||
sudo cp "$BINARY" /usr/local/bin/block-csi |
|||
sudo chmod +x /usr/local/bin/block-csi |
|||
elif command -v kind &>/dev/null; then |
|||
RUNTIME=kind |
|||
KUBECTL="kubectl" |
|||
echo "[2/7] Detected kind" |
|||
|
|||
# Build a minimal container image |
|||
cat > /tmp/block-csi-Dockerfile <<'DOCKERFILE' |
|||
FROM alpine:3.19 |
|||
RUN apk add --no-cache open-iscsi e2fsprogs util-linux |
|||
COPY block-csi /usr/local/bin/block-csi |
|||
ENTRYPOINT ["/usr/local/bin/block-csi"] |
|||
DOCKERFILE |
|||
docker build -t sw-block-csi:local -f /tmp/block-csi-Dockerfile "$SCRIPT_DIR" |
|||
kind load docker-image sw-block-csi:local |
|||
else |
|||
echo "ERROR: neither k3s nor kind found" >&2 |
|||
exit 1 |
|||
fi |
|||
|
|||
# 3. Deploy manifests |
|||
echo "[3/7] Deploying CSI driver..." |
|||
$KUBECTL apply -f "$DEPLOY_DIR/rbac.yaml" |
|||
$KUBECTL apply -f "$DEPLOY_DIR/csi-driver.yaml" |
|||
$KUBECTL apply -f "$DEPLOY_DIR/storageclass.yaml" |
|||
|
|||
echo " Waiting for DaemonSet to be ready..." |
|||
$KUBECTL -n kube-system rollout status daemonset/sw-block-csi-node --timeout=120s |
|||
|
|||
# 4. Create PVC |
|||
echo "[4/7] Creating PVC..." |
|||
$KUBECTL apply -f "$DEPLOY_DIR/example-pvc.yaml" |
|||
|
|||
echo " Waiting for pod to be ready..." |
|||
$KUBECTL wait --for=condition=Ready pod/sw-block-test-pod --timeout=120s |
|||
|
|||
# 5. Verify data |
|||
echo "[5/7] Verifying data..." |
|||
DATA=$($KUBECTL exec sw-block-test-pod -- cat /data/test.txt) |
|||
if [ "$DATA" = "hello sw-block" ]; then |
|||
echo " OK: data verified" |
|||
else |
|||
echo " FAIL: expected 'hello sw-block', got '$DATA'" >&2 |
|||
exit 1 |
|||
fi |
|||
|
|||
# 6. Delete and recreate pod to verify persistence |
|||
echo "[6/7] Deleting pod and verifying persistence..." |
|||
$KUBECTL delete pod sw-block-test-pod --grace-period=5 |
|||
$KUBECTL apply -f - <<'EOF' |
|||
apiVersion: v1 |
|||
kind: Pod |
|||
metadata: |
|||
name: sw-block-test-pod |
|||
spec: |
|||
containers: |
|||
- name: app |
|||
image: busybox |
|||
command: ["sh", "-c", "cat /data/test.txt && sleep 3600"] |
|||
volumeMounts: |
|||
- name: data |
|||
mountPath: /data |
|||
volumes: |
|||
- name: data |
|||
persistentVolumeClaim: |
|||
claimName: sw-block-test |
|||
EOF |
|||
|
|||
$KUBECTL wait --for=condition=Ready pod/sw-block-test-pod --timeout=120s |
|||
DATA=$($KUBECTL exec sw-block-test-pod -- cat /data/test.txt) |
|||
if [ "$DATA" = "hello sw-block" ]; then |
|||
echo " OK: data persisted across pod restart" |
|||
else |
|||
echo " FAIL: data not persisted, got '$DATA'" >&2 |
|||
exit 1 |
|||
fi |
|||
|
|||
# 7. Cleanup |
|||
echo "[7/7] Cleaning up..." |
|||
$KUBECTL delete pod sw-block-test-pod --grace-period=5 || true |
|||
$KUBECTL delete pvc sw-block-test || true |
|||
$KUBECTL delete -f "$DEPLOY_DIR/csi-driver.yaml" || true |
|||
$KUBECTL delete -f "$DEPLOY_DIR/storageclass.yaml" || true |
|||
$KUBECTL delete -f "$DEPLOY_DIR/rbac.yaml" || true |
|||
|
|||
echo "=== smoke test PASSED ===" |
|||
@ -0,0 +1,130 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"errors" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
"google.golang.org/grpc/codes" |
|||
"google.golang.org/grpc/status" |
|||
) |
|||
|
|||
const ( |
|||
defaultVolumeSizeBytes = 1 << 30 // 1 GiB
|
|||
minVolumeSizeBytes = 1 << 20 // 1 MiB
|
|||
blockSize = 4096 |
|||
) |
|||
|
|||
type controllerServer struct { |
|||
csi.UnimplementedControllerServer |
|||
backend VolumeBackend |
|||
} |
|||
|
|||
func (s *controllerServer) CreateVolume(_ context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) { |
|||
if req.Name == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "volume name is required") |
|||
} |
|||
if len(req.VolumeCapabilities) == 0 { |
|||
return nil, status.Error(codes.InvalidArgument, "volume capabilities are required") |
|||
} |
|||
|
|||
sizeBytes := int64(defaultVolumeSizeBytes) |
|||
if req.CapacityRange != nil && req.CapacityRange.RequiredBytes > 0 { |
|||
sizeBytes = req.CapacityRange.RequiredBytes |
|||
} else if req.CapacityRange != nil && req.CapacityRange.LimitBytes > 0 { |
|||
// No RequiredBytes set — use LimitBytes as the target size.
|
|||
sizeBytes = req.CapacityRange.LimitBytes |
|||
} |
|||
if req.CapacityRange != nil && req.CapacityRange.LimitBytes > 0 { |
|||
if req.CapacityRange.RequiredBytes > req.CapacityRange.LimitBytes { |
|||
return nil, status.Errorf(codes.InvalidArgument, |
|||
"required_bytes (%d) exceeds limit_bytes (%d)", |
|||
req.CapacityRange.RequiredBytes, req.CapacityRange.LimitBytes) |
|||
} |
|||
} |
|||
if sizeBytes < minVolumeSizeBytes { |
|||
sizeBytes = minVolumeSizeBytes |
|||
} |
|||
// Round up to block size.
|
|||
if sizeBytes%blockSize != 0 { |
|||
sizeBytes = (sizeBytes/blockSize + 1) * blockSize |
|||
} |
|||
// Verify rounded size still respects LimitBytes.
|
|||
if req.CapacityRange != nil && req.CapacityRange.LimitBytes > 0 { |
|||
if sizeBytes > req.CapacityRange.LimitBytes { |
|||
return nil, status.Errorf(codes.InvalidArgument, |
|||
"volume size (%d) after rounding exceeds limit_bytes (%d)", |
|||
sizeBytes, req.CapacityRange.LimitBytes) |
|||
} |
|||
} |
|||
|
|||
info, err := s.backend.CreateVolume(context.Background(), req.Name, uint64(sizeBytes)) |
|||
if err != nil { |
|||
if errors.Is(err, ErrVolumeSizeMismatch) { |
|||
return nil, status.Errorf(codes.AlreadyExists, "volume %q exists with different size", req.Name) |
|||
} |
|||
return nil, status.Errorf(codes.Internal, "create volume: %v", err) |
|||
} |
|||
|
|||
resp := &csi.CreateVolumeResponse{ |
|||
Volume: &csi.Volume{ |
|||
VolumeId: info.VolumeID, |
|||
CapacityBytes: int64(info.CapacityBytes), |
|||
}, |
|||
} |
|||
|
|||
// Attach volume_context with iSCSI target info for NodeStageVolume.
|
|||
if info.ISCSIAddr != "" || info.IQN != "" { |
|||
resp.Volume.VolumeContext = map[string]string{ |
|||
"iscsiAddr": info.ISCSIAddr, |
|||
"iqn": info.IQN, |
|||
} |
|||
} |
|||
|
|||
return resp, nil |
|||
} |
|||
|
|||
func (s *controllerServer) DeleteVolume(_ context.Context, req *csi.DeleteVolumeRequest) (*csi.DeleteVolumeResponse, error) { |
|||
if req.VolumeId == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "volume ID is required") |
|||
} |
|||
|
|||
// Idempotent: DeleteVolume succeeds even if volume doesn't exist.
|
|||
if err := s.backend.DeleteVolume(context.Background(), req.VolumeId); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "delete volume: %v", err) |
|||
} |
|||
|
|||
return &csi.DeleteVolumeResponse{}, nil |
|||
} |
|||
|
|||
func (s *controllerServer) ControllerGetCapabilities(_ context.Context, _ *csi.ControllerGetCapabilitiesRequest) (*csi.ControllerGetCapabilitiesResponse, error) { |
|||
return &csi.ControllerGetCapabilitiesResponse{ |
|||
Capabilities: []*csi.ControllerServiceCapability{ |
|||
{ |
|||
Type: &csi.ControllerServiceCapability_Rpc{ |
|||
Rpc: &csi.ControllerServiceCapability_RPC{ |
|||
Type: csi.ControllerServiceCapability_RPC_CREATE_DELETE_VOLUME, |
|||
}, |
|||
}, |
|||
}, |
|||
}, |
|||
}, nil |
|||
} |
|||
|
|||
func (s *controllerServer) ValidateVolumeCapabilities(_ context.Context, req *csi.ValidateVolumeCapabilitiesRequest) (*csi.ValidateVolumeCapabilitiesResponse, error) { |
|||
if req.VolumeId == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "volume ID is required") |
|||
} |
|||
if len(req.VolumeCapabilities) == 0 { |
|||
return nil, status.Error(codes.InvalidArgument, "volume capabilities are required") |
|||
} |
|||
if _, err := s.backend.LookupVolume(context.Background(), req.VolumeId); err != nil { |
|||
return nil, status.Errorf(codes.NotFound, "volume %q not found", req.VolumeId) |
|||
} |
|||
|
|||
return &csi.ValidateVolumeCapabilitiesResponse{ |
|||
Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{ |
|||
VolumeCapabilities: req.VolumeCapabilities, |
|||
}, |
|||
}, nil |
|||
} |
|||
@ -0,0 +1,127 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"testing" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
) |
|||
|
|||
// testVolCaps returns a standard volume capability for testing.
|
|||
func testVolCaps() []*csi.VolumeCapability { |
|||
return []*csi.VolumeCapability{{ |
|||
AccessType: &csi.VolumeCapability_Mount{ |
|||
Mount: &csi.VolumeCapability_MountVolume{FsType: "ext4"}, |
|||
}, |
|||
AccessMode: &csi.VolumeCapability_AccessMode{ |
|||
Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, |
|||
}, |
|||
}} |
|||
} |
|||
|
|||
func testVolCap() *csi.VolumeCapability { |
|||
return testVolCaps()[0] |
|||
} |
|||
|
|||
func TestController_CreateVolume(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
cs := &controllerServer{backend: backend} |
|||
|
|||
resp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "test-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 4 * 1024 * 1024, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("CreateVolume: %v", err) |
|||
} |
|||
if resp.Volume.VolumeId != "test-vol" { |
|||
t.Fatalf("volume_id: got %q, want %q", resp.Volume.VolumeId, "test-vol") |
|||
} |
|||
if resp.Volume.CapacityBytes != 4*1024*1024 { |
|||
t.Fatalf("capacity: got %d, want %d", resp.Volume.CapacityBytes, 4*1024*1024) |
|||
} |
|||
if !mgr.VolumeExists("test-vol") { |
|||
t.Fatal("expected volume to exist") |
|||
} |
|||
|
|||
// Verify volume_context has iSCSI info.
|
|||
if resp.Volume.VolumeContext == nil { |
|||
t.Fatal("expected volume_context to be set") |
|||
} |
|||
if resp.Volume.VolumeContext["iqn"] == "" { |
|||
t.Fatal("expected iqn in volume_context") |
|||
} |
|||
} |
|||
|
|||
func TestController_CreateIdempotent(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
cs := &controllerServer{backend: backend} |
|||
|
|||
req := &csi.CreateVolumeRequest{ |
|||
Name: "idem-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 4 * 1024 * 1024, |
|||
}, |
|||
} |
|||
|
|||
if _, err := cs.CreateVolume(context.Background(), req); err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
// Second create with same size should succeed (idempotent).
|
|||
resp, err := cs.CreateVolume(context.Background(), req) |
|||
if err != nil { |
|||
t.Fatalf("second create: %v", err) |
|||
} |
|||
if resp.Volume.VolumeId != "idem-vol" { |
|||
t.Fatalf("volume_id: got %q, want %q", resp.Volume.VolumeId, "idem-vol") |
|||
} |
|||
} |
|||
|
|||
func TestController_DeleteVolume(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
cs := &controllerServer{backend: backend} |
|||
|
|||
// Create then delete.
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "del-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 4 * 1024 * 1024, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
_, err = cs.DeleteVolume(context.Background(), &csi.DeleteVolumeRequest{ |
|||
VolumeId: "del-vol", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
if mgr.VolumeExists("del-vol") { |
|||
t.Fatal("expected volume to not exist after delete") |
|||
} |
|||
} |
|||
|
|||
func TestController_DeleteNotFound(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
cs := &controllerServer{backend: backend} |
|||
|
|||
// Delete non-existent volume -- should succeed (CSI spec idempotency).
|
|||
_, err := cs.DeleteVolume(context.Background(), &csi.DeleteVolumeRequest{ |
|||
VolumeId: "nonexistent", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("delete non-existent: %v", err) |
|||
} |
|||
} |
|||
@ -0,0 +1,44 @@ |
|||
apiVersion: apps/v1 |
|||
kind: Deployment |
|||
metadata: |
|||
name: sw-block-csi-controller |
|||
namespace: kube-system |
|||
spec: |
|||
replicas: 1 |
|||
selector: |
|||
matchLabels: |
|||
app: sw-block-csi-controller |
|||
template: |
|||
metadata: |
|||
labels: |
|||
app: sw-block-csi-controller |
|||
spec: |
|||
serviceAccountName: sw-block-csi |
|||
containers: |
|||
- name: block-csi |
|||
image: sw-block-csi:local |
|||
imagePullPolicy: Never |
|||
args: |
|||
- "-endpoint=unix:///csi/csi.sock" |
|||
- "-mode=controller" |
|||
- "-master=master:9333" |
|||
- "-node-id=$(NODE_NAME)" |
|||
env: |
|||
- name: NODE_NAME |
|||
valueFrom: |
|||
fieldRef: |
|||
fieldPath: spec.nodeName |
|||
volumeMounts: |
|||
- name: socket-dir |
|||
mountPath: /csi |
|||
- name: csi-provisioner |
|||
image: registry.k8s.io/sig-storage/csi-provisioner:v5.1.0 |
|||
args: |
|||
- "--csi-address=/csi/csi.sock" |
|||
- "--feature-gates=Topology=true" |
|||
volumeMounts: |
|||
- name: socket-dir |
|||
mountPath: /csi |
|||
volumes: |
|||
- name: socket-dir |
|||
emptyDir: {} |
|||
@ -0,0 +1,9 @@ |
|||
apiVersion: storage.k8s.io/v1 |
|||
kind: CSIDriver |
|||
metadata: |
|||
name: block.csi.seaweedfs.com |
|||
spec: |
|||
attachRequired: false |
|||
podInfoOnMount: false |
|||
volumeLifecycleModes: |
|||
- Persistent |
|||
@ -0,0 +1,73 @@ |
|||
apiVersion: apps/v1 |
|||
kind: DaemonSet |
|||
metadata: |
|||
name: sw-block-csi-node |
|||
namespace: kube-system |
|||
spec: |
|||
selector: |
|||
matchLabels: |
|||
app: sw-block-csi-node |
|||
template: |
|||
metadata: |
|||
labels: |
|||
app: sw-block-csi-node |
|||
spec: |
|||
serviceAccountName: sw-block-csi |
|||
hostNetwork: true |
|||
hostPID: true |
|||
containers: |
|||
- name: block-csi |
|||
image: sw-block-csi:local |
|||
imagePullPolicy: Never |
|||
args: |
|||
- "-endpoint=unix:///csi/csi.sock" |
|||
- "-mode=node" |
|||
- "-node-id=$(NODE_NAME)" |
|||
env: |
|||
- name: NODE_NAME |
|||
valueFrom: |
|||
fieldRef: |
|||
fieldPath: spec.nodeName |
|||
securityContext: |
|||
privileged: true |
|||
volumeMounts: |
|||
- name: socket-dir |
|||
mountPath: /csi |
|||
- name: kubelet-dir |
|||
mountPath: /var/lib/kubelet |
|||
mountPropagation: Bidirectional |
|||
- name: dev |
|||
mountPath: /dev |
|||
- name: iscsi-dir |
|||
mountPath: /etc/iscsi |
|||
- name: csi-node-driver-registrar |
|||
image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.12.0 |
|||
args: |
|||
- "--csi-address=/csi/csi.sock" |
|||
- "--kubelet-registration-path=/var/lib/kubelet/plugins/block.csi.seaweedfs.com/csi.sock" |
|||
volumeMounts: |
|||
- name: socket-dir |
|||
mountPath: /csi |
|||
- name: registration-dir |
|||
mountPath: /registration |
|||
volumes: |
|||
- name: socket-dir |
|||
hostPath: |
|||
path: /var/lib/kubelet/plugins/block.csi.seaweedfs.com |
|||
type: DirectoryOrCreate |
|||
- name: kubelet-dir |
|||
hostPath: |
|||
path: /var/lib/kubelet |
|||
type: Directory |
|||
- name: dev |
|||
hostPath: |
|||
path: /dev |
|||
type: Directory |
|||
- name: iscsi-dir |
|||
hostPath: |
|||
path: /etc/iscsi |
|||
type: DirectoryOrCreate |
|||
- name: registration-dir |
|||
hostPath: |
|||
path: /var/lib/kubelet/plugins_registry |
|||
type: Directory |
|||
@ -0,0 +1,28 @@ |
|||
apiVersion: v1 |
|||
kind: PersistentVolumeClaim |
|||
metadata: |
|||
name: sw-block-test |
|||
spec: |
|||
accessModes: |
|||
- ReadWriteOnce |
|||
storageClassName: sw-block |
|||
resources: |
|||
requests: |
|||
storage: 100Mi |
|||
--- |
|||
apiVersion: v1 |
|||
kind: Pod |
|||
metadata: |
|||
name: sw-block-test-pod |
|||
spec: |
|||
containers: |
|||
- name: app |
|||
image: busybox |
|||
command: ["sh", "-c", "echo 'hello sw-block' > /data/test.txt && cat /data/test.txt && sleep 3600"] |
|||
volumeMounts: |
|||
- name: data |
|||
mountPath: /data |
|||
volumes: |
|||
- name: data |
|||
persistentVolumeClaim: |
|||
claimName: sw-block-test |
|||
@ -0,0 +1,45 @@ |
|||
apiVersion: v1 |
|||
kind: ServiceAccount |
|||
metadata: |
|||
name: sw-block-csi |
|||
namespace: kube-system |
|||
--- |
|||
apiVersion: rbac.authorization.k8s.io/v1 |
|||
kind: ClusterRole |
|||
metadata: |
|||
name: sw-block-csi |
|||
rules: |
|||
- apiGroups: [""] |
|||
resources: ["persistentvolumes"] |
|||
verbs: ["get", "list", "watch", "create", "delete"] |
|||
- apiGroups: [""] |
|||
resources: ["persistentvolumeclaims"] |
|||
verbs: ["get", "list", "watch", "update"] |
|||
- apiGroups: ["storage.k8s.io"] |
|||
resources: ["storageclasses"] |
|||
verbs: ["get", "list", "watch"] |
|||
- apiGroups: [""] |
|||
resources: ["events"] |
|||
verbs: ["list", "watch", "create", "update", "patch"] |
|||
- apiGroups: ["storage.k8s.io"] |
|||
resources: ["csinodes"] |
|||
verbs: ["get", "list", "watch"] |
|||
- apiGroups: [""] |
|||
resources: ["nodes"] |
|||
verbs: ["get", "list", "watch"] |
|||
- apiGroups: ["storage.k8s.io"] |
|||
resources: ["volumeattachments"] |
|||
verbs: ["get", "list", "watch"] |
|||
--- |
|||
apiVersion: rbac.authorization.k8s.io/v1 |
|||
kind: ClusterRoleBinding |
|||
metadata: |
|||
name: sw-block-csi |
|||
subjects: |
|||
- kind: ServiceAccount |
|||
name: sw-block-csi |
|||
namespace: kube-system |
|||
roleRef: |
|||
kind: ClusterRole |
|||
name: sw-block-csi |
|||
apiGroup: rbac.authorization.k8s.io |
|||
@ -0,0 +1,7 @@ |
|||
apiVersion: storage.k8s.io/v1 |
|||
kind: StorageClass |
|||
metadata: |
|||
name: sw-block |
|||
provisioner: block.csi.seaweedfs.com |
|||
volumeBindingMode: WaitForFirstConsumer |
|||
reclaimPolicy: Delete |
|||
@ -0,0 +1,44 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
"google.golang.org/protobuf/types/known/wrapperspb" |
|||
) |
|||
|
|||
const ( |
|||
DriverName = "block.csi.seaweedfs.com" |
|||
DriverVersion = "0.1.0" |
|||
) |
|||
|
|||
type identityServer struct { |
|||
csi.UnimplementedIdentityServer |
|||
} |
|||
|
|||
func (s *identityServer) GetPluginInfo(_ context.Context, _ *csi.GetPluginInfoRequest) (*csi.GetPluginInfoResponse, error) { |
|||
return &csi.GetPluginInfoResponse{ |
|||
Name: DriverName, |
|||
VendorVersion: DriverVersion, |
|||
}, nil |
|||
} |
|||
|
|||
func (s *identityServer) GetPluginCapabilities(_ context.Context, _ *csi.GetPluginCapabilitiesRequest) (*csi.GetPluginCapabilitiesResponse, error) { |
|||
return &csi.GetPluginCapabilitiesResponse{ |
|||
Capabilities: []*csi.PluginCapability{ |
|||
{ |
|||
Type: &csi.PluginCapability_Service_{ |
|||
Service: &csi.PluginCapability_Service{ |
|||
Type: csi.PluginCapability_Service_CONTROLLER_SERVICE, |
|||
}, |
|||
}, |
|||
}, |
|||
}, |
|||
}, nil |
|||
} |
|||
|
|||
func (s *identityServer) Probe(_ context.Context, _ *csi.ProbeRequest) (*csi.ProbeResponse, error) { |
|||
return &csi.ProbeResponse{ |
|||
Ready: wrapperspb.Bool(true), |
|||
}, nil |
|||
} |
|||
@ -0,0 +1,51 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"testing" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
) |
|||
|
|||
func TestIdentity_GetPluginInfo(t *testing.T) { |
|||
s := &identityServer{} |
|||
resp, err := s.GetPluginInfo(context.Background(), &csi.GetPluginInfoRequest{}) |
|||
if err != nil { |
|||
t.Fatalf("GetPluginInfo: %v", err) |
|||
} |
|||
if resp.Name != DriverName { |
|||
t.Fatalf("name: got %q, want %q", resp.Name, DriverName) |
|||
} |
|||
if resp.VendorVersion != DriverVersion { |
|||
t.Fatalf("version: got %q, want %q", resp.VendorVersion, DriverVersion) |
|||
} |
|||
} |
|||
|
|||
func TestIdentity_GetPluginCapabilities(t *testing.T) { |
|||
s := &identityServer{} |
|||
resp, err := s.GetPluginCapabilities(context.Background(), &csi.GetPluginCapabilitiesRequest{}) |
|||
if err != nil { |
|||
t.Fatalf("GetPluginCapabilities: %v", err) |
|||
} |
|||
if len(resp.Capabilities) != 1 { |
|||
t.Fatalf("capabilities: got %d, want 1", len(resp.Capabilities)) |
|||
} |
|||
svc := resp.Capabilities[0].GetService() |
|||
if svc == nil { |
|||
t.Fatal("expected service capability") |
|||
} |
|||
if svc.Type != csi.PluginCapability_Service_CONTROLLER_SERVICE { |
|||
t.Fatalf("capability type: got %v, want CONTROLLER_SERVICE", svc.Type) |
|||
} |
|||
} |
|||
|
|||
func TestIdentity_Probe(t *testing.T) { |
|||
s := &identityServer{} |
|||
resp, err := s.Probe(context.Background(), &csi.ProbeRequest{}) |
|||
if err != nil { |
|||
t.Fatalf("Probe: %v", err) |
|||
} |
|||
if resp.Ready == nil || !resp.Ready.Value { |
|||
t.Fatal("expected ready=true") |
|||
} |
|||
} |
|||
@ -0,0 +1,306 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"os/exec" |
|||
"path/filepath" |
|||
"strings" |
|||
"time" |
|||
) |
|||
|
|||
// ISCSIUtil provides iSCSI initiator operations.
|
|||
type ISCSIUtil interface { |
|||
Discovery(ctx context.Context, portal string) error |
|||
Login(ctx context.Context, iqn, portal string) error |
|||
Logout(ctx context.Context, iqn string) error |
|||
GetDeviceByIQN(ctx context.Context, iqn string) (string, error) |
|||
IsLoggedIn(ctx context.Context, iqn string) (bool, error) |
|||
} |
|||
|
|||
// MountUtil provides filesystem mount operations.
|
|||
type MountUtil interface { |
|||
FormatAndMount(ctx context.Context, device, target, fsType string) error |
|||
Mount(ctx context.Context, source, target, fsType string, readOnly bool) error |
|||
BindMount(ctx context.Context, source, target string, readOnly bool) error |
|||
Unmount(ctx context.Context, target string) error |
|||
IsFormatted(ctx context.Context, device string) (bool, error) |
|||
IsMounted(ctx context.Context, target string) (bool, error) |
|||
} |
|||
|
|||
// realISCSIUtil uses iscsiadm CLI.
|
|||
type realISCSIUtil struct{} |
|||
|
|||
func (r *realISCSIUtil) Discovery(ctx context.Context, portal string) error { |
|||
cmd := exec.CommandContext(ctx, "iscsiadm", "-m", "discovery", "-t", "sendtargets", "-p", portal) |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
return fmt.Errorf("iscsiadm discovery: %s: %w", string(out), err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (r *realISCSIUtil) Login(ctx context.Context, iqn, portal string) error { |
|||
cmd := exec.CommandContext(ctx, "iscsiadm", "-m", "node", "-T", iqn, "-p", portal, "--login") |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
return fmt.Errorf("iscsiadm login: %s: %w", string(out), err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (r *realISCSIUtil) Logout(ctx context.Context, iqn string) error { |
|||
cmd := exec.CommandContext(ctx, "iscsiadm", "-m", "node", "-T", iqn, "--logout") |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
// Treat "not logged in" as success.
|
|||
if strings.Contains(string(out), "No matching sessions") { |
|||
return nil |
|||
} |
|||
return fmt.Errorf("iscsiadm logout: %s: %w", string(out), err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (r *realISCSIUtil) GetDeviceByIQN(ctx context.Context, iqn string) (string, error) { |
|||
// Poll for device to appear (iSCSI login is async).
|
|||
deadline := time.After(10 * time.Second) |
|||
ticker := time.NewTicker(200 * time.Millisecond) |
|||
defer ticker.Stop() |
|||
|
|||
for { |
|||
select { |
|||
case <-ctx.Done(): |
|||
return "", ctx.Err() |
|||
case <-deadline: |
|||
return "", fmt.Errorf("timeout waiting for device for IQN %s", iqn) |
|||
case <-ticker.C: |
|||
// Look for block device symlinks under /dev/disk/by-path/
|
|||
pattern := fmt.Sprintf("/dev/disk/by-path/*%s*", iqn) |
|||
matches, err := filepath.Glob(pattern) |
|||
if err != nil { |
|||
continue |
|||
} |
|||
for _, m := range matches { |
|||
// Skip partitions.
|
|||
if strings.Contains(m, "-part") { |
|||
continue |
|||
} |
|||
// Resolve symlink to actual device.
|
|||
dev, err := filepath.EvalSymlinks(m) |
|||
if err != nil { |
|||
continue |
|||
} |
|||
return dev, nil |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
func (r *realISCSIUtil) IsLoggedIn(ctx context.Context, iqn string) (bool, error) { |
|||
cmd := exec.CommandContext(ctx, "iscsiadm", "-m", "session") |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
// Exit code 21 = no sessions, not an error.
|
|||
outStr := string(out) |
|||
if strings.Contains(outStr, "No active sessions") { |
|||
return false, nil |
|||
} |
|||
// Also handle exit code 21 directly (nsenter may suppress output).
|
|||
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 21 { |
|||
return false, nil |
|||
} |
|||
return false, fmt.Errorf("iscsiadm session: %s: %w", outStr, err) |
|||
} |
|||
return strings.Contains(string(out), iqn), nil |
|||
} |
|||
|
|||
// realMountUtil uses mount/umount/mkfs CLI.
|
|||
type realMountUtil struct{} |
|||
|
|||
func (r *realMountUtil) FormatAndMount(ctx context.Context, device, target, fsType string) error { |
|||
formatted, err := r.IsFormatted(ctx, device) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
if !formatted { |
|||
cmd := exec.CommandContext(ctx, "mkfs."+fsType, device) |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
return fmt.Errorf("mkfs.%s: %s: %w", fsType, string(out), err) |
|||
} |
|||
} |
|||
return r.Mount(ctx, device, target, fsType, false) |
|||
} |
|||
|
|||
func (r *realMountUtil) Mount(ctx context.Context, source, target, fsType string, readOnly bool) error { |
|||
args := []string{"-t", fsType} |
|||
if readOnly { |
|||
args = append(args, "-o", "ro") |
|||
} |
|||
args = append(args, source, target) |
|||
cmd := exec.CommandContext(ctx, "mount", args...) |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
return fmt.Errorf("mount: %s: %w", string(out), err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (r *realMountUtil) BindMount(ctx context.Context, source, target string, readOnly bool) error { |
|||
args := []string{"--bind", source, target} |
|||
cmd := exec.CommandContext(ctx, "mount", args...) |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
return fmt.Errorf("bind mount: %s: %w", string(out), err) |
|||
} |
|||
if readOnly { |
|||
cmd = exec.CommandContext(ctx, "mount", "-o", "remount,bind,ro", target) |
|||
out, err = cmd.CombinedOutput() |
|||
if err != nil { |
|||
return fmt.Errorf("remount ro: %s: %w", string(out), err) |
|||
} |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (r *realMountUtil) Unmount(ctx context.Context, target string) error { |
|||
cmd := exec.CommandContext(ctx, "umount", target) |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
// Treat "not mounted" as success.
|
|||
if strings.Contains(string(out), "not mounted") { |
|||
return nil |
|||
} |
|||
return fmt.Errorf("umount: %s: %w", string(out), err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (r *realMountUtil) IsFormatted(ctx context.Context, device string) (bool, error) { |
|||
cmd := exec.CommandContext(ctx, "blkid", "-p", device) |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
// Exit code 2 = no filesystem found.
|
|||
if cmd.ProcessState != nil && cmd.ProcessState.ExitCode() == 2 { |
|||
return false, nil |
|||
} |
|||
return false, fmt.Errorf("blkid: %s: %w", string(out), err) |
|||
} |
|||
return strings.Contains(string(out), "TYPE="), nil |
|||
} |
|||
|
|||
func (r *realMountUtil) IsMounted(ctx context.Context, target string) (bool, error) { |
|||
cmd := exec.CommandContext(ctx, "mountpoint", "-q", target) |
|||
err := cmd.Run() |
|||
if err == nil { |
|||
return true, nil |
|||
} |
|||
// Non-zero exit = not a mount point.
|
|||
return false, nil |
|||
} |
|||
|
|||
// mockISCSIUtil is a test double for ISCSIUtil.
|
|||
type mockISCSIUtil struct { |
|||
discoveryErr error |
|||
loginErr error |
|||
logoutErr error |
|||
getDeviceResult string |
|||
getDeviceErr error |
|||
loggedIn map[string]bool |
|||
calls []string |
|||
} |
|||
|
|||
func newMockISCSIUtil() *mockISCSIUtil { |
|||
return &mockISCSIUtil{loggedIn: make(map[string]bool)} |
|||
} |
|||
|
|||
func (m *mockISCSIUtil) Discovery(_ context.Context, portal string) error { |
|||
m.calls = append(m.calls, "discovery:"+portal) |
|||
return m.discoveryErr |
|||
} |
|||
|
|||
func (m *mockISCSIUtil) Login(_ context.Context, iqn, portal string) error { |
|||
m.calls = append(m.calls, "login:"+iqn) |
|||
if m.loginErr != nil { |
|||
return m.loginErr |
|||
} |
|||
m.loggedIn[iqn] = true |
|||
return nil |
|||
} |
|||
|
|||
func (m *mockISCSIUtil) Logout(_ context.Context, iqn string) error { |
|||
m.calls = append(m.calls, "logout:"+iqn) |
|||
if m.logoutErr != nil { |
|||
return m.logoutErr |
|||
} |
|||
delete(m.loggedIn, iqn) |
|||
return nil |
|||
} |
|||
|
|||
func (m *mockISCSIUtil) GetDeviceByIQN(_ context.Context, iqn string) (string, error) { |
|||
m.calls = append(m.calls, "getdevice:"+iqn) |
|||
return m.getDeviceResult, m.getDeviceErr |
|||
} |
|||
|
|||
func (m *mockISCSIUtil) IsLoggedIn(_ context.Context, iqn string) (bool, error) { |
|||
return m.loggedIn[iqn], nil |
|||
} |
|||
|
|||
// mockMountUtil is a test double for MountUtil.
|
|||
type mockMountUtil struct { |
|||
formatAndMountErr error |
|||
mountErr error |
|||
unmountErr error |
|||
isFormattedResult bool |
|||
isMountedTargets map[string]bool |
|||
calls []string |
|||
} |
|||
|
|||
func newMockMountUtil() *mockMountUtil { |
|||
return &mockMountUtil{isMountedTargets: make(map[string]bool)} |
|||
} |
|||
|
|||
func (m *mockMountUtil) FormatAndMount(_ context.Context, device, target, fsType string) error { |
|||
m.calls = append(m.calls, "formatandmount:"+device+":"+target) |
|||
if m.formatAndMountErr != nil { |
|||
return m.formatAndMountErr |
|||
} |
|||
m.isMountedTargets[target] = true |
|||
return nil |
|||
} |
|||
|
|||
func (m *mockMountUtil) Mount(_ context.Context, source, target, fsType string, readOnly bool) error { |
|||
m.calls = append(m.calls, "mount:"+source+":"+target) |
|||
if m.mountErr != nil { |
|||
return m.mountErr |
|||
} |
|||
m.isMountedTargets[target] = true |
|||
return nil |
|||
} |
|||
|
|||
func (m *mockMountUtil) BindMount(_ context.Context, source, target string, readOnly bool) error { |
|||
m.calls = append(m.calls, "bindmount:"+source+":"+target) |
|||
if m.mountErr != nil { |
|||
return m.mountErr |
|||
} |
|||
m.isMountedTargets[target] = true |
|||
return nil |
|||
} |
|||
|
|||
func (m *mockMountUtil) Unmount(_ context.Context, target string) error { |
|||
m.calls = append(m.calls, "unmount:"+target) |
|||
if m.unmountErr != nil { |
|||
return m.unmountErr |
|||
} |
|||
delete(m.isMountedTargets, target) |
|||
return nil |
|||
} |
|||
|
|||
func (m *mockMountUtil) IsFormatted(_ context.Context, device string) (bool, error) { |
|||
return m.isFormattedResult, nil |
|||
} |
|||
|
|||
func (m *mockMountUtil) IsMounted(_ context.Context, target string) (bool, error) { |
|||
return m.isMountedTargets[target], nil |
|||
} |
|||
@ -0,0 +1,313 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"log" |
|||
"os" |
|||
"sync" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol" |
|||
"google.golang.org/grpc/codes" |
|||
"google.golang.org/grpc/status" |
|||
) |
|||
|
|||
// stagedVolumeInfo tracks info needed for NodeUnstageVolume.
|
|||
type stagedVolumeInfo struct { |
|||
iqn string |
|||
iscsiAddr string |
|||
isLocal bool // true if volume is served by local VolumeManager
|
|||
} |
|||
|
|||
type nodeServer struct { |
|||
csi.UnimplementedNodeServer |
|||
mgr *VolumeManager // may be nil in controller-only mode
|
|||
nodeID string |
|||
iqnPrefix string // for IQN derivation fallback on restart
|
|||
iscsiUtil ISCSIUtil |
|||
mountUtil MountUtil |
|||
logger *log.Logger |
|||
|
|||
stagedMu sync.Mutex |
|||
staged map[string]*stagedVolumeInfo // volumeID -> staged info
|
|||
} |
|||
|
|||
func (s *nodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) { |
|||
volumeID := req.VolumeId |
|||
stagingPath := req.StagingTargetPath |
|||
|
|||
if volumeID == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "volume ID is required") |
|||
} |
|||
if stagingPath == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "staging target path is required") |
|||
} |
|||
if req.VolumeCapability == nil { |
|||
return nil, status.Error(codes.InvalidArgument, "volume capability is required") |
|||
} |
|||
|
|||
// Idempotency: if already mounted at staging path, return OK.
|
|||
mounted, err := s.mountUtil.IsMounted(ctx, stagingPath) |
|||
if err != nil { |
|||
return nil, status.Errorf(codes.Internal, "check mount: %v", err) |
|||
} |
|||
if mounted { |
|||
s.logger.Printf("NodeStageVolume: %s already mounted at %s", volumeID, stagingPath) |
|||
return &csi.NodeStageVolumeResponse{}, nil |
|||
} |
|||
|
|||
// Determine iSCSI target info: from volume_context (remote) or local mgr.
|
|||
var iqn, portal string |
|||
isLocal := false |
|||
|
|||
if req.VolumeContext != nil && req.VolumeContext["iscsiAddr"] != "" && req.VolumeContext["iqn"] != "" { |
|||
// Remote target: iSCSI info from volume_context (set by controller via master).
|
|||
portal = req.VolumeContext["iscsiAddr"] |
|||
iqn = req.VolumeContext["iqn"] |
|||
} else if s.mgr != nil { |
|||
// Local fallback: open volume via local VolumeManager.
|
|||
isLocal = true |
|||
if err := s.mgr.OpenVolume(volumeID); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "open volume: %v", err) |
|||
} |
|||
iqn = s.mgr.VolumeIQN(volumeID) |
|||
portal = s.mgr.ListenAddr() |
|||
} else { |
|||
return nil, status.Error(codes.FailedPrecondition, "no volume_context and no local volume manager") |
|||
} |
|||
|
|||
// Cleanup on error.
|
|||
success := false |
|||
defer func() { |
|||
if !success { |
|||
s.logger.Printf("NodeStageVolume: cleaning up %s after error", volumeID) |
|||
if isLocal && s.mgr != nil { |
|||
s.mgr.CloseVolume(volumeID) |
|||
} |
|||
} |
|||
}() |
|||
|
|||
// Check if already logged in, skip login if so.
|
|||
loggedIn, err := s.iscsiUtil.IsLoggedIn(ctx, iqn) |
|||
if err != nil { |
|||
return nil, status.Errorf(codes.Internal, "check iscsi login: %v", err) |
|||
} |
|||
|
|||
if !loggedIn { |
|||
// Discovery + login.
|
|||
if err := s.iscsiUtil.Discovery(ctx, portal); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "iscsi discovery: %v", err) |
|||
} |
|||
if err := s.iscsiUtil.Login(ctx, iqn, portal); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "iscsi login: %v", err) |
|||
} |
|||
} |
|||
|
|||
// Wait for device to appear.
|
|||
device, err := s.iscsiUtil.GetDeviceByIQN(ctx, iqn) |
|||
if err != nil { |
|||
return nil, status.Errorf(codes.Internal, "get device: %v", err) |
|||
} |
|||
|
|||
// Ensure staging directory exists.
|
|||
if err := os.MkdirAll(stagingPath, 0750); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "create staging dir: %v", err) |
|||
} |
|||
|
|||
// Format (if needed) and mount.
|
|||
fsType := "ext4" |
|||
if req.VolumeCapability != nil { |
|||
if mnt := req.VolumeCapability.GetMount(); mnt != nil && mnt.FsType != "" { |
|||
fsType = mnt.FsType |
|||
} |
|||
} |
|||
|
|||
if err := s.mountUtil.FormatAndMount(ctx, device, stagingPath, fsType); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "format and mount: %v", err) |
|||
} |
|||
|
|||
// Track staged volume for unstage.
|
|||
s.stagedMu.Lock() |
|||
if s.staged == nil { |
|||
s.staged = make(map[string]*stagedVolumeInfo) |
|||
} |
|||
s.staged[volumeID] = &stagedVolumeInfo{ |
|||
iqn: iqn, |
|||
iscsiAddr: portal, |
|||
isLocal: isLocal, |
|||
} |
|||
s.stagedMu.Unlock() |
|||
|
|||
success = true |
|||
s.logger.Printf("NodeStageVolume: %s staged at %s (device=%s, iqn=%s, local=%v)", volumeID, stagingPath, device, iqn, isLocal) |
|||
return &csi.NodeStageVolumeResponse{}, nil |
|||
} |
|||
|
|||
func (s *nodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) { |
|||
volumeID := req.VolumeId |
|||
stagingPath := req.StagingTargetPath |
|||
|
|||
if volumeID == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "volume ID is required") |
|||
} |
|||
if stagingPath == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "staging target path is required") |
|||
} |
|||
|
|||
// Look up staged info. If not found (e.g. driver restarted), derive IQN.
|
|||
s.stagedMu.Lock() |
|||
info := s.staged[volumeID] |
|||
s.stagedMu.Unlock() |
|||
|
|||
var iqn string |
|||
isLocal := false |
|||
if info != nil { |
|||
iqn = info.iqn |
|||
isLocal = info.isLocal |
|||
} else { |
|||
// Restart fallback: derive IQN from volumeID.
|
|||
// iscsiadm -m node -T <iqn> --logout works without knowing the portal.
|
|||
if s.mgr != nil { |
|||
iqn = s.mgr.VolumeIQN(volumeID) |
|||
isLocal = true |
|||
} else if s.iqnPrefix != "" { |
|||
iqn = s.iqnPrefix + ":" + blockvol.SanitizeIQN(volumeID) |
|||
} |
|||
s.logger.Printf("NodeUnstageVolume: %s not in staged map, derived iqn=%s", volumeID, iqn) |
|||
} |
|||
|
|||
// Best-effort cleanup: always attempt all steps even if one fails.
|
|||
var firstErr error |
|||
|
|||
// Unmount.
|
|||
if err := s.mountUtil.Unmount(ctx, stagingPath); err != nil { |
|||
s.logger.Printf("NodeUnstageVolume: unmount error: %v", err) |
|||
firstErr = err |
|||
} |
|||
|
|||
// iSCSI logout.
|
|||
if iqn != "" { |
|||
if err := s.iscsiUtil.Logout(ctx, iqn); err != nil { |
|||
s.logger.Printf("NodeUnstageVolume: logout error: %v", err) |
|||
if firstErr == nil { |
|||
firstErr = err |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Close the local volume if applicable.
|
|||
if isLocal && s.mgr != nil { |
|||
if err := s.mgr.CloseVolume(volumeID); err != nil { |
|||
s.logger.Printf("NodeUnstageVolume: close volume error: %v", err) |
|||
if firstErr == nil { |
|||
firstErr = err |
|||
} |
|||
} |
|||
} |
|||
|
|||
if firstErr != nil { |
|||
// Keep staged entry so retry has correct isLocal/iqn info.
|
|||
return nil, status.Errorf(codes.Internal, "unstage: %v", firstErr) |
|||
} |
|||
|
|||
// Remove from staged map only after successful cleanup.
|
|||
s.stagedMu.Lock() |
|||
delete(s.staged, volumeID) |
|||
s.stagedMu.Unlock() |
|||
|
|||
s.logger.Printf("NodeUnstageVolume: %s unstaged from %s", volumeID, stagingPath) |
|||
return &csi.NodeUnstageVolumeResponse{}, nil |
|||
} |
|||
|
|||
func (s *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) { |
|||
volumeID := req.VolumeId |
|||
targetPath := req.TargetPath |
|||
stagingPath := req.StagingTargetPath |
|||
|
|||
if volumeID == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "volume ID is required") |
|||
} |
|||
if targetPath == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "target path is required") |
|||
} |
|||
if stagingPath == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "staging target path is required") |
|||
} |
|||
|
|||
// Idempotency: if already bind-mounted, return OK.
|
|||
mounted, err := s.mountUtil.IsMounted(ctx, targetPath) |
|||
if err != nil { |
|||
return nil, status.Errorf(codes.Internal, "check mount: %v", err) |
|||
} |
|||
if mounted { |
|||
s.logger.Printf("NodePublishVolume: %s already mounted at %s", volumeID, targetPath) |
|||
return &csi.NodePublishVolumeResponse{}, nil |
|||
} |
|||
|
|||
// Ensure target directory exists.
|
|||
if err := os.MkdirAll(targetPath, 0750); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "create target dir: %v", err) |
|||
} |
|||
|
|||
// Bind mount staging path to target path.
|
|||
readOnly := req.Readonly |
|||
if err := s.mountUtil.BindMount(ctx, stagingPath, targetPath, readOnly); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "bind mount: %v", err) |
|||
} |
|||
|
|||
s.logger.Printf("NodePublishVolume: %s published at %s", volumeID, targetPath) |
|||
return &csi.NodePublishVolumeResponse{}, nil |
|||
} |
|||
|
|||
func (s *nodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) { |
|||
if req.VolumeId == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "volume ID is required") |
|||
} |
|||
if req.TargetPath == "" { |
|||
return nil, status.Error(codes.InvalidArgument, "target path is required") |
|||
} |
|||
|
|||
// Idempotent: only unmount if still mounted.
|
|||
mounted, err := s.mountUtil.IsMounted(ctx, req.TargetPath) |
|||
if err != nil { |
|||
return nil, status.Errorf(codes.Internal, "check mount: %v", err) |
|||
} |
|||
if mounted { |
|||
if err := s.mountUtil.Unmount(ctx, req.TargetPath); err != nil { |
|||
return nil, status.Errorf(codes.Internal, "unmount: %v", err) |
|||
} |
|||
} |
|||
|
|||
// CSI spec: remove mount point at target path.
|
|||
os.RemoveAll(req.TargetPath) |
|||
|
|||
s.logger.Printf("NodeUnpublishVolume: %s unpublished from %s", req.VolumeId, req.TargetPath) |
|||
return &csi.NodeUnpublishVolumeResponse{}, nil |
|||
} |
|||
|
|||
func (s *nodeServer) NodeGetCapabilities(_ context.Context, _ *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error) { |
|||
return &csi.NodeGetCapabilitiesResponse{ |
|||
Capabilities: []*csi.NodeServiceCapability{ |
|||
{ |
|||
Type: &csi.NodeServiceCapability_Rpc{ |
|||
Rpc: &csi.NodeServiceCapability_RPC{ |
|||
Type: csi.NodeServiceCapability_RPC_STAGE_UNSTAGE_VOLUME, |
|||
}, |
|||
}, |
|||
}, |
|||
}, |
|||
}, nil |
|||
} |
|||
|
|||
func (s *nodeServer) NodeGetInfo(_ context.Context, _ *csi.NodeGetInfoRequest) (*csi.NodeGetInfoResponse, error) { |
|||
return &csi.NodeGetInfoResponse{ |
|||
NodeId: s.nodeID, |
|||
MaxVolumesPerNode: 256, |
|||
AccessibleTopology: &csi.Topology{ |
|||
Segments: map[string]string{ |
|||
fmt.Sprintf("topology.%s/node", DriverName): s.nodeID, |
|||
}, |
|||
}, |
|||
}, nil |
|||
} |
|||
@ -0,0 +1,451 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"errors" |
|||
"log" |
|||
"os" |
|||
"testing" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
"google.golang.org/grpc/codes" |
|||
"google.golang.org/grpc/status" |
|||
) |
|||
|
|||
func newTestNodeServer(t *testing.T) (*nodeServer, *mockISCSIUtil, *mockMountUtil) { |
|||
t.Helper() |
|||
mgr := newTestManager(t) |
|||
|
|||
// Pre-create a volume for stage/unstage tests.
|
|||
if err := mgr.CreateVolume("test-vol", 4*1024*1024); err != nil { |
|||
t.Fatalf("create test-vol: %v", err) |
|||
} |
|||
// Close it so OpenVolume in NodeStageVolume can reopen it.
|
|||
if err := mgr.CloseVolume("test-vol"); err != nil { |
|||
t.Fatalf("close test-vol: %v", err) |
|||
} |
|||
|
|||
mi := newMockISCSIUtil() |
|||
mi.getDeviceResult = "/dev/sda" |
|||
mm := newMockMountUtil() |
|||
|
|||
ns := &nodeServer{ |
|||
mgr: mgr, |
|||
nodeID: "test-node-1", |
|||
iqnPrefix: "iqn.2024.com.seaweedfs", |
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: log.New(os.Stderr, "[test-node] ", log.LstdFlags), |
|||
staged: make(map[string]*stagedVolumeInfo), |
|||
} |
|||
return ns, mi, mm |
|||
} |
|||
|
|||
func TestNode_StageUnstage(t *testing.T) { |
|||
ns, mi, mm := newTestNodeServer(t) |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
// Stage
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: &csi.VolumeCapability{ |
|||
AccessType: &csi.VolumeCapability_Mount{ |
|||
Mount: &csi.VolumeCapability_MountVolume{FsType: "ext4"}, |
|||
}, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodeStageVolume: %v", err) |
|||
} |
|||
|
|||
// Verify calls.
|
|||
if len(mi.calls) < 3 { |
|||
t.Fatalf("expected at least 3 iscsi calls, got %d: %v", len(mi.calls), mi.calls) |
|||
} |
|||
if mi.calls[0] != "discovery:"+ns.mgr.ListenAddr() { |
|||
t.Fatalf("expected discovery call, got %q", mi.calls[0]) |
|||
} |
|||
|
|||
if len(mm.calls) < 1 { |
|||
t.Fatalf("expected at least 1 mount call, got %d: %v", len(mm.calls), mm.calls) |
|||
} |
|||
|
|||
// Verify staged map entry.
|
|||
ns.stagedMu.Lock() |
|||
info, ok := ns.staged["test-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if !ok { |
|||
t.Fatal("expected test-vol in staged map") |
|||
} |
|||
if !info.isLocal { |
|||
t.Fatal("expected isLocal=true for local volume") |
|||
} |
|||
|
|||
// Unstage
|
|||
_, err = ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodeUnstageVolume: %v", err) |
|||
} |
|||
|
|||
// Verify staged map cleared.
|
|||
ns.stagedMu.Lock() |
|||
_, ok = ns.staged["test-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if ok { |
|||
t.Fatal("expected test-vol removed from staged map") |
|||
} |
|||
} |
|||
|
|||
func TestNode_PublishUnpublish(t *testing.T) { |
|||
ns, _, _ := newTestNodeServer(t) |
|||
|
|||
stagingPath := t.TempDir() |
|||
targetPath := t.TempDir() |
|||
|
|||
_, err := ns.NodePublishVolume(context.Background(), &csi.NodePublishVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
TargetPath: targetPath, |
|||
VolumeCapability: &csi.VolumeCapability{ |
|||
AccessType: &csi.VolumeCapability_Mount{ |
|||
Mount: &csi.VolumeCapability_MountVolume{FsType: "ext4"}, |
|||
}, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodePublishVolume: %v", err) |
|||
} |
|||
|
|||
_, err = ns.NodeUnpublishVolume(context.Background(), &csi.NodeUnpublishVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
TargetPath: targetPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodeUnpublishVolume: %v", err) |
|||
} |
|||
} |
|||
|
|||
func TestNode_StageIdempotent(t *testing.T) { |
|||
ns, _, mm := newTestNodeServer(t) |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
// Mark as already mounted -> stage should be idempotent.
|
|||
mm.isMountedTargets[stagingPath] = true |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("idempotent NodeStageVolume: %v", err) |
|||
} |
|||
|
|||
// No iscsi or mount calls should have been made.
|
|||
if len(mm.calls) != 0 { |
|||
t.Fatalf("expected 0 mount calls, got %d: %v", len(mm.calls), mm.calls) |
|||
} |
|||
} |
|||
|
|||
func TestNode_StageLoginFailure(t *testing.T) { |
|||
ns, mi, _ := newTestNodeServer(t) |
|||
|
|||
mi.loginErr = errors.New("connection refused") |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error from login failure") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.Internal { |
|||
t.Fatalf("expected Internal error, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
func TestNode_StageMkfsFailure(t *testing.T) { |
|||
ns, _, mm := newTestNodeServer(t) |
|||
|
|||
mm.formatAndMountErr = errors.New("mkfs failed") |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error from mkfs failure") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.Internal { |
|||
t.Fatalf("expected Internal error, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
// TestNode_StageLoginFailureCleanup verifies that the volume is closed/disconnected
|
|||
// when login fails during NodeStageVolume (Finding #3: resource leak).
|
|||
func TestNode_StageLoginFailureCleanup(t *testing.T) { |
|||
ns, mi, _ := newTestNodeServer(t) |
|||
|
|||
mi.loginErr = errors.New("connection refused") |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error from login failure") |
|||
} |
|||
|
|||
// Volume should have been cleaned up (closed) after error.
|
|||
if ns.mgr.VolumeExists("test-vol") { |
|||
t.Fatal("expected volume to be closed after login failure (resource leak)") |
|||
} |
|||
} |
|||
|
|||
// TestNode_PublishMissingStagingPath verifies that NodePublishVolume rejects
|
|||
// empty StagingTargetPath (Finding #2).
|
|||
func TestNode_PublishMissingStagingPath(t *testing.T) { |
|||
ns, _, _ := newTestNodeServer(t) |
|||
|
|||
_, err := ns.NodePublishVolume(context.Background(), &csi.NodePublishVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: "", |
|||
TargetPath: t.TempDir(), |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error for empty StagingTargetPath") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.InvalidArgument { |
|||
t.Fatalf("expected InvalidArgument error, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
// --- Remote target tests ---
|
|||
|
|||
// TestNode_StageRemoteTarget verifies NodeStageVolume reads iSCSI info from volume_context
|
|||
// instead of using local VolumeManager.
|
|||
func TestNode_StageRemoteTarget(t *testing.T) { |
|||
mi := newMockISCSIUtil() |
|||
mi.getDeviceResult = "/dev/sdb" |
|||
mm := newMockMountUtil() |
|||
|
|||
ns := &nodeServer{ |
|||
mgr: nil, // no local manager
|
|||
nodeID: "test-node-1", |
|||
iqnPrefix: "iqn.2024.com.seaweedfs", |
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: log.New(os.Stderr, "[test-node] ", log.LstdFlags), |
|||
staged: make(map[string]*stagedVolumeInfo), |
|||
} |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "remote-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
VolumeContext: map[string]string{ |
|||
"iscsiAddr": "10.0.0.5:3260", |
|||
"iqn": "iqn.2024.com.seaweedfs:remote-vol", |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodeStageVolume: %v", err) |
|||
} |
|||
|
|||
// Verify discovery was called with remote portal.
|
|||
if len(mi.calls) < 1 || mi.calls[0] != "discovery:10.0.0.5:3260" { |
|||
t.Fatalf("expected discovery with remote portal, got: %v", mi.calls) |
|||
} |
|||
|
|||
// Verify staged map has remote info.
|
|||
ns.stagedMu.Lock() |
|||
info, ok := ns.staged["remote-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if !ok { |
|||
t.Fatal("expected remote-vol in staged map") |
|||
} |
|||
if info.isLocal { |
|||
t.Fatal("expected isLocal=false for remote volume") |
|||
} |
|||
if info.iqn != "iqn.2024.com.seaweedfs:remote-vol" { |
|||
t.Fatalf("unexpected IQN: %s", info.iqn) |
|||
} |
|||
} |
|||
|
|||
// TestNode_UnstageRemoteTarget verifies unstage uses the staged map IQN.
|
|||
func TestNode_UnstageRemoteTarget(t *testing.T) { |
|||
mi := newMockISCSIUtil() |
|||
mm := newMockMountUtil() |
|||
|
|||
ns := &nodeServer{ |
|||
mgr: nil, |
|||
nodeID: "test-node-1", |
|||
iqnPrefix: "iqn.2024.com.seaweedfs", |
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: log.New(os.Stderr, "[test-node] ", log.LstdFlags), |
|||
staged: map[string]*stagedVolumeInfo{ |
|||
"remote-vol": { |
|||
iqn: "iqn.2024.com.seaweedfs:remote-vol", |
|||
iscsiAddr: "10.0.0.5:3260", |
|||
isLocal: false, |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "remote-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodeUnstageVolume: %v", err) |
|||
} |
|||
|
|||
// Verify logout was called with correct IQN.
|
|||
foundLogout := false |
|||
for _, c := range mi.calls { |
|||
if c == "logout:iqn.2024.com.seaweedfs:remote-vol" { |
|||
foundLogout = true |
|||
} |
|||
} |
|||
if !foundLogout { |
|||
t.Fatalf("expected logout call with remote IQN, got: %v", mi.calls) |
|||
} |
|||
} |
|||
|
|||
// TestNode_UnstageAfterRestart verifies IQN derivation when staged map is empty.
|
|||
func TestNode_UnstageAfterRestart(t *testing.T) { |
|||
mi := newMockISCSIUtil() |
|||
mm := newMockMountUtil() |
|||
|
|||
ns := &nodeServer{ |
|||
mgr: nil, |
|||
nodeID: "test-node-1", |
|||
iqnPrefix: "iqn.2024.com.seaweedfs", |
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: log.New(os.Stderr, "[test-node] ", log.LstdFlags), |
|||
staged: make(map[string]*stagedVolumeInfo), // empty (simulates restart)
|
|||
} |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "restart-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodeUnstageVolume: %v", err) |
|||
} |
|||
|
|||
// Verify logout was called with derived IQN.
|
|||
foundLogout := false |
|||
for _, c := range mi.calls { |
|||
if c == "logout:iqn.2024.com.seaweedfs:restart-vol" { |
|||
foundLogout = true |
|||
} |
|||
} |
|||
if !foundLogout { |
|||
t.Fatalf("expected logout with derived IQN, got: %v", mi.calls) |
|||
} |
|||
} |
|||
|
|||
// TestNode_UnstageRetryKeepsStagedEntry verifies that if unmount fails,
|
|||
// the staged entry is preserved for correct retry behavior.
|
|||
func TestNode_UnstageRetryKeepsStagedEntry(t *testing.T) { |
|||
mi := newMockISCSIUtil() |
|||
mm := newMockMountUtil() |
|||
mm.unmountErr = errors.New("device busy") |
|||
|
|||
ns := &nodeServer{ |
|||
mgr: nil, |
|||
nodeID: "test-node-1", |
|||
iqnPrefix: "iqn.2024.com.seaweedfs", |
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: log.New(os.Stderr, "[test-node] ", log.LstdFlags), |
|||
staged: map[string]*stagedVolumeInfo{ |
|||
"busy-vol": { |
|||
iqn: "iqn.2024.com.seaweedfs:busy-vol", |
|||
iscsiAddr: "10.0.0.5:3260", |
|||
isLocal: false, |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "busy-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error from unmount failure") |
|||
} |
|||
|
|||
// Staged entry should still be present for retry.
|
|||
ns.stagedMu.Lock() |
|||
info, ok := ns.staged["busy-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if !ok { |
|||
t.Fatal("staged entry should be preserved on failure for retry") |
|||
} |
|||
if info.iqn != "iqn.2024.com.seaweedfs:busy-vol" { |
|||
t.Fatalf("unexpected IQN: %s", info.iqn) |
|||
} |
|||
} |
|||
|
|||
// TestNode_StageFallbackLocal verifies local fallback when no volume_context is provided.
|
|||
func TestNode_StageFallbackLocal(t *testing.T) { |
|||
ns, mi, _ := newTestNodeServer(t) |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
// Stage without volume_context — should use local VolumeManager.
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NodeStageVolume: %v", err) |
|||
} |
|||
|
|||
// Should have called discovery with local addr.
|
|||
if len(mi.calls) < 1 { |
|||
t.Fatal("expected iSCSI calls") |
|||
} |
|||
if mi.calls[0] != "discovery:"+ns.mgr.ListenAddr() { |
|||
t.Fatalf("expected local discovery, got %q", mi.calls[0]) |
|||
} |
|||
|
|||
// Verify staged as local.
|
|||
ns.stagedMu.Lock() |
|||
info := ns.staged["test-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if info == nil || !info.isLocal { |
|||
t.Fatal("expected isLocal=true for local fallback") |
|||
} |
|||
} |
|||
@ -0,0 +1,997 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"errors" |
|||
"fmt" |
|||
"log" |
|||
"os" |
|||
"path/filepath" |
|||
"strings" |
|||
"sync" |
|||
"sync/atomic" |
|||
"testing" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol" |
|||
"google.golang.org/grpc/codes" |
|||
"google.golang.org/grpc/status" |
|||
) |
|||
|
|||
// ============================================================
|
|||
// QA-NODE-CP62: Node adversarial tests for remote + staged map
|
|||
// ============================================================
|
|||
|
|||
// QA-NODE-R1: Stage a remote target then unstage — staged map must track isLocal=false.
|
|||
// Unstage should NOT call CloseVolume (remote volumes aren't managed locally).
|
|||
func TestQA_Node_RemoteUnstageNoCloseVolume(t *testing.T) { |
|||
ns, mi, mm := newTestNodeServer(t) |
|||
mi.getDeviceResult = "/dev/sdb" |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
// Stage with remote volume_context (not using local mgr).
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "remote-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
VolumeContext: map[string]string{ |
|||
"iscsiAddr": "10.0.0.5:3260", |
|||
"iqn": "iqn.2024.com.seaweedfs:remote-vol", |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("stage remote: %v", err) |
|||
} |
|||
|
|||
// Verify staged entry has isLocal=false.
|
|||
ns.stagedMu.Lock() |
|||
info := ns.staged["remote-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if info == nil { |
|||
t.Fatal("remote-vol not in staged map") |
|||
} |
|||
if info.isLocal { |
|||
t.Fatal("BUG: remote volume should have isLocal=false") |
|||
} |
|||
|
|||
// Unstage.
|
|||
_, err = ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "remote-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("unstage: %v", err) |
|||
} |
|||
|
|||
// Verify CloseVolume was NOT called (remote volume).
|
|||
for _, call := range mm.calls { |
|||
if strings.Contains(call, "close") { |
|||
t.Error("BUG: CloseVolume should not be called for remote volumes") |
|||
} |
|||
} |
|||
|
|||
// Verify staged entry is removed after successful unstage.
|
|||
ns.stagedMu.Lock() |
|||
if _, ok := ns.staged["remote-vol"]; ok { |
|||
t.Error("BUG: staged entry should be removed after successful unstage") |
|||
} |
|||
ns.stagedMu.Unlock() |
|||
} |
|||
|
|||
// QA-NODE-R2: Stage remote, then unstage fails (unmount error) — staged entry preserved.
|
|||
func TestQA_Node_RemoteUnstageFailPreservesStaged(t *testing.T) { |
|||
ns, mi, mm := newTestNodeServer(t) |
|||
mi.getDeviceResult = "/dev/sdb" |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
// Stage remote.
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "fail-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
VolumeContext: map[string]string{ |
|||
"iscsiAddr": "10.0.0.5:3260", |
|||
"iqn": "iqn.2024.com.seaweedfs:fail-vol", |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("stage: %v", err) |
|||
} |
|||
|
|||
// Make unmount fail.
|
|||
mm.unmountErr = errors.New("device busy") |
|||
|
|||
_, err = ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "fail-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error from unmount failure") |
|||
} |
|||
|
|||
// Staged entry should still be present (for retry).
|
|||
ns.stagedMu.Lock() |
|||
info := ns.staged["fail-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if info == nil { |
|||
t.Fatal("BUG: staged entry removed despite unstage failure") |
|||
} |
|||
if info.iqn != "iqn.2024.com.seaweedfs:fail-vol" { |
|||
t.Fatalf("staged IQN mismatch: %q", info.iqn) |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-R3: Concurrent Stage and Unstage for the same volumeID.
|
|||
func TestQA_Node_ConcurrentStageUnstage(t *testing.T) { |
|||
ns, mi, mm := newTestNodeServer(t) |
|||
mi.getDeviceResult = "/dev/sdb" |
|||
_ = mm |
|||
|
|||
var wg sync.WaitGroup |
|||
var panicked atomic.Bool |
|||
|
|||
for i := 0; i < 20; i++ { |
|||
wg.Add(2) |
|||
go func() { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
t.Errorf("PANIC in stage: %v", r) |
|||
} |
|||
}() |
|||
ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: t.TempDir(), |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
}() |
|||
go func() { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
t.Errorf("PANIC in unstage: %v", r) |
|||
} |
|||
}() |
|||
ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: "/tmp/staging", |
|||
}) |
|||
}() |
|||
} |
|||
wg.Wait() |
|||
|
|||
if panicked.Load() { |
|||
t.Fatal("BUG: concurrent stage/unstage caused panic") |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-R4: Stage with remote volume_context should use the remote portal, not local.
|
|||
// Verify iSCSI discovery and login calls use the correct portal from volume_context.
|
|||
func TestQA_Node_RemotePortalUsedCorrectly(t *testing.T) { |
|||
ns, mi, _ := newTestNodeServer(t) |
|||
mi.getDeviceResult = "/dev/sdc" |
|||
|
|||
stagingPath := t.TempDir() |
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "remote-portal-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
VolumeContext: map[string]string{ |
|||
"iscsiAddr": "192.168.1.100:3260", |
|||
"iqn": "iqn.2024.com.seaweedfs:remote-portal-vol", |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("stage: %v", err) |
|||
} |
|||
|
|||
// Verify discovery was called with the REMOTE portal, not local.
|
|||
foundDiscovery := false |
|||
for _, call := range mi.calls { |
|||
if strings.Contains(call, "discovery:192.168.1.100:3260") { |
|||
foundDiscovery = true |
|||
} |
|||
if strings.Contains(call, "discovery:127.0.0.1") { |
|||
t.Error("BUG: discovery used local portal instead of remote") |
|||
} |
|||
} |
|||
if !foundDiscovery { |
|||
t.Error("discovery not called with remote portal 192.168.1.100:3260") |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-R5: Stage with partial volume_context (only iscsiAddr, no iqn) should fallback to local.
|
|||
func TestQA_Node_PartialVolumeContext(t *testing.T) { |
|||
ns, mi, _ := newTestNodeServer(t) |
|||
mi.getDeviceResult = "/dev/sda" |
|||
|
|||
stagingPath := t.TempDir() |
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
VolumeContext: map[string]string{ |
|||
"iscsiAddr": "10.0.0.5:3260", |
|||
// Missing "iqn" — should fallback to local.
|
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("stage: %v", err) |
|||
} |
|||
|
|||
// Should have used local mgr (isLocal=true).
|
|||
ns.stagedMu.Lock() |
|||
info := ns.staged["test-vol"] |
|||
ns.stagedMu.Unlock() |
|||
if info == nil { |
|||
t.Fatal("not in staged map") |
|||
} |
|||
if !info.isLocal { |
|||
t.Error("BUG: partial volume_context should fallback to local") |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-R6: Unstage after restart with no mgr and no iqnPrefix — IQN derivation fails gracefully.
|
|||
func TestQA_Node_UnstageNoMgrNoPrefix(t *testing.T) { |
|||
mi := newMockISCSIUtil() |
|||
mm := newMockMountUtil() |
|||
ns := &nodeServer{ |
|||
mgr: nil, |
|||
nodeID: "test-node", |
|||
iqnPrefix: "", // no prefix
|
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: log.New(os.Stderr, "[test-qa] ", log.LstdFlags), |
|||
staged: make(map[string]*stagedVolumeInfo), |
|||
} |
|||
|
|||
// Unstage with no staged info, no mgr, no prefix — should still succeed
|
|||
// (logout is skipped because IQN is empty).
|
|||
_, err := ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "ghost-vol", |
|||
StagingTargetPath: t.TempDir(), |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("unstage should succeed gracefully: %v", err) |
|||
} |
|||
|
|||
// Verify logout was NOT called (no IQN to logout from).
|
|||
for _, call := range mi.calls { |
|||
if strings.Contains(call, "logout") { |
|||
t.Error("BUG: logout called without IQN") |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-CTRL-CP62: Controller adversarial tests with VolumeBackend
|
|||
// ============================================================
|
|||
|
|||
// QA-CTRL-B1: CreateVolume returns volume_context with iSCSI info.
|
|||
func TestQA_Ctrl_VolumeContextPresent(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
resp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "ctx-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 4 * 1024 * 1024}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
if resp.Volume.VolumeContext == nil { |
|||
t.Fatal("BUG: volume_context is nil") |
|||
} |
|||
if resp.Volume.VolumeContext["iqn"] == "" { |
|||
t.Error("BUG: volume_context missing 'iqn'") |
|||
} |
|||
if resp.Volume.VolumeContext["iscsiAddr"] == "" { |
|||
t.Error("BUG: volume_context missing 'iscsiAddr'") |
|||
} |
|||
} |
|||
|
|||
// QA-CTRL-B2: ValidateVolumeCapabilities via backend — should use LookupVolume.
|
|||
func TestQA_Ctrl_ValidateUsesBackend(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
// Create a volume first.
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "validate-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 4 * 1024 * 1024}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
// Validate should succeed.
|
|||
_, err = cs.ValidateVolumeCapabilities(context.Background(), &csi.ValidateVolumeCapabilitiesRequest{ |
|||
VolumeId: "validate-vol", |
|||
VolumeCapabilities: []*csi.VolumeCapability{ |
|||
{AccessType: &csi.VolumeCapability_Mount{Mount: &csi.VolumeCapability_MountVolume{}}}, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("validate: %v", err) |
|||
} |
|||
|
|||
// Delete the volume.
|
|||
_, err = cs.DeleteVolume(context.Background(), &csi.DeleteVolumeRequest{VolumeId: "validate-vol"}) |
|||
if err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
|
|||
// Validate should now fail with NotFound.
|
|||
_, err = cs.ValidateVolumeCapabilities(context.Background(), &csi.ValidateVolumeCapabilitiesRequest{ |
|||
VolumeId: "validate-vol", |
|||
VolumeCapabilities: []*csi.VolumeCapability{ |
|||
{AccessType: &csi.VolumeCapability_Mount{Mount: &csi.VolumeCapability_MountVolume{}}}, |
|||
}, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("validate should fail after delete") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.NotFound { |
|||
t.Fatalf("expected NotFound, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-CTRL-B3: CreateVolume then CreateVolume with LARGER size — backend should reject.
|
|||
func TestQA_Ctrl_CreateLargerSizeRejected(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "grow-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 4 * 1024 * 1024}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
// Second create with larger size.
|
|||
_, err = cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "grow-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 8 * 1024 * 1024}, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error for larger size") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.AlreadyExists { |
|||
t.Fatalf("expected AlreadyExists, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-CTRL-B4: CreateVolume with RequiredBytes exactly at blockSize boundary — no rounding needed.
|
|||
func TestQA_Ctrl_ExactBlockSizeBoundary(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
resp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "exact-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 4 * 1024 * 1024}, // exactly 4 MiB, aligned to 4096
|
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
if resp.Volume.CapacityBytes != 4*1024*1024 { |
|||
t.Errorf("capacity: got %d, want %d", resp.Volume.CapacityBytes, 4*1024*1024) |
|||
} |
|||
} |
|||
|
|||
// QA-CTRL-B5: Concurrent CreateVolume calls for same name via backend.
|
|||
func TestQA_Ctrl_ConcurrentCreate(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
var wg sync.WaitGroup |
|||
var panicked atomic.Bool |
|||
errors := make([]error, 10) |
|||
|
|||
for i := 0; i < 10; i++ { |
|||
wg.Add(1) |
|||
go func(i int) { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
t.Errorf("PANIC: %v", r) |
|||
} |
|||
}() |
|||
_, errors[i] = cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "concurrent-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 4 * 1024 * 1024}, |
|||
}) |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
if panicked.Load() { |
|||
t.Fatal("BUG: concurrent CreateVolume caused panic") |
|||
} |
|||
|
|||
// All should succeed (idempotent).
|
|||
for i, err := range errors { |
|||
if err != nil { |
|||
t.Errorf("goroutine %d: %v", i, err) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-BACKEND: LocalVolumeBackend adversarial tests
|
|||
// ============================================================
|
|||
|
|||
// QA-BACKEND-1: LookupVolume for a volume that exists on disk but not in-memory (after restart).
|
|||
func TestQA_Backend_LookupAfterRestart(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
|
|||
// Phase 1: create volume, stop manager.
|
|||
mgr1 := NewVolumeManager(dir, "127.0.0.1:0", "iqn.test", logger) |
|||
mgr1.Start(context.Background()) |
|||
mgr1.CreateVolume("orphan-vol", 4*1024*1024) |
|||
mgr1.Stop() |
|||
|
|||
// Phase 2: new manager — volume exists on disk but not tracked.
|
|||
mgr2 := NewVolumeManager(dir, "127.0.0.1:0", "iqn.test", logger) |
|||
mgr2.Start(context.Background()) |
|||
defer mgr2.Stop() |
|||
|
|||
backend := NewLocalVolumeBackend(mgr2) |
|||
|
|||
// LookupVolume should fail (not tracked).
|
|||
_, err := backend.LookupVolume(context.Background(), "orphan-vol") |
|||
if err == nil { |
|||
t.Fatal("BUG: LookupVolume should fail for untracked volume") |
|||
} |
|||
|
|||
// CreateVolume should re-adopt from disk.
|
|||
info, err := backend.CreateVolume(context.Background(), "orphan-vol", 4*1024*1024) |
|||
if err != nil { |
|||
t.Fatalf("re-adopt: %v", err) |
|||
} |
|||
if info.CapacityBytes < 4*1024*1024 { |
|||
t.Fatalf("capacity: got %d, want >= %d", info.CapacityBytes, 4*1024*1024) |
|||
} |
|||
|
|||
// Now lookup should succeed.
|
|||
_, err = backend.LookupVolume(context.Background(), "orphan-vol") |
|||
if err != nil { |
|||
t.Fatalf("lookup after re-adopt: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-BACKEND-2: DeleteVolume then LookupVolume — should fail.
|
|||
func TestQA_Backend_DeleteThenLookup(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
|
|||
_, err := backend.CreateVolume(context.Background(), "del-vol", 4*1024*1024) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
err = backend.DeleteVolume(context.Background(), "del-vol") |
|||
if err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
|
|||
_, err = backend.LookupVolume(context.Background(), "del-vol") |
|||
if err == nil { |
|||
t.Fatal("BUG: LookupVolume should fail after delete") |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-NAMING: Cross-layer naming consistency
|
|||
// ============================================================
|
|||
|
|||
// QA-NAMING-1: Verify IQN generated by VolumeManager matches IQN generated by BlockService.
|
|||
// Both should use blockvol.SanitizeIQN.
|
|||
func TestQA_Naming_CrossLayerConsistency(t *testing.T) { |
|||
testNames := []string{ |
|||
"pvc-abc-123", |
|||
"VolA", |
|||
"has spaces", |
|||
"UPPER_CASE", |
|||
"special!@#$chars", |
|||
strings.Repeat("long-name-", 10), // 100 chars, triggers truncation
|
|||
} |
|||
|
|||
for _, name := range testNames { |
|||
// What VolumeManager would generate.
|
|||
vmIQN := "iqn.2024.com.seaweedfs:" + blockvol.SanitizeIQN(name) |
|||
|
|||
// What BlockService would generate (same prefix + SanitizeIQN).
|
|||
bsIQN := "iqn.2024.com.seaweedfs:" + blockvol.SanitizeIQN(name) |
|||
|
|||
if vmIQN != bsIQN { |
|||
t.Errorf("IQN mismatch for %q: VM=%q, BS=%q", name, vmIQN, bsIQN) |
|||
} |
|||
|
|||
// Filename consistency.
|
|||
vmFile := blockvol.SanitizeFilename(name) + ".blk" |
|||
bsFile := blockvol.SanitizeFilename(name) + ".blk" |
|||
if vmFile != bsFile { |
|||
t.Errorf("filename mismatch for %q: VM=%q, BS=%q", name, vmFile, bsFile) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// QA-NAMING-2: Two different names that produce the same sanitized IQN.
|
|||
// This shouldn't happen for typical CSI volume IDs, but test the hash suffix behavior.
|
|||
func TestQA_Naming_LongNameHashCollision(t *testing.T) { |
|||
// Two names that are identical for the first 55 chars but differ at the end.
|
|||
// Both exceed 64 chars, so they get truncated with hash suffix.
|
|||
name1 := strings.Repeat("a", 55) + "-suffix-one" |
|||
name2 := strings.Repeat("a", 55) + "-suffix-two" |
|||
|
|||
iqn1 := blockvol.SanitizeIQN(name1) |
|||
iqn2 := blockvol.SanitizeIQN(name2) |
|||
|
|||
if iqn1 == iqn2 { |
|||
t.Errorf("BUG: different names produced same IQN (hash collision):\n name1=%q\n name2=%q\n iqn=%q", name1, name2, iqn1) |
|||
} |
|||
|
|||
// Both should be <= 64 chars.
|
|||
if len(iqn1) > 64 { |
|||
t.Errorf("iqn1 too long: %d", len(iqn1)) |
|||
} |
|||
if len(iqn2) > 64 { |
|||
t.Errorf("iqn2 too long: %d", len(iqn2)) |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-LIFECYCLE: End-to-end lifecycle with remote targets
|
|||
// ============================================================
|
|||
|
|||
// QA-LIFECYCLE-1: Full remote lifecycle: create → stage (remote) → publish → unpublish → unstage → delete.
|
|||
func TestQA_RemoteLifecycleFull(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
mgr := NewVolumeManager(dir, "127.0.0.1:0", "iqn.2024.com.seaweedfs", logger) |
|||
if err := mgr.Start(context.Background()); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
defer mgr.Stop() |
|||
|
|||
mi := newMockISCSIUtil() |
|||
mi.getDeviceResult = "/dev/sda" |
|||
mm := newMockMountUtil() |
|||
|
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
ns := &nodeServer{ |
|||
mgr: nil, // simulate node-only mode (no local mgr)
|
|||
nodeID: "test-node", |
|||
iqnPrefix: "iqn.2024.com.seaweedfs", |
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: logger, |
|||
staged: make(map[string]*stagedVolumeInfo), |
|||
} |
|||
|
|||
// Create volume via controller (this creates it locally, but we'll use it as if remote).
|
|||
createResp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "remote-life-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 4 * 1024 * 1024}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
// Stage using volume_context (simulating remote node).
|
|||
stagingPath := filepath.Join(t.TempDir(), "staging") |
|||
targetPath := filepath.Join(t.TempDir(), "target") |
|||
|
|||
_, err = ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "remote-life-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
VolumeContext: createResp.Volume.VolumeContext, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("stage: %v", err) |
|||
} |
|||
|
|||
// Publish.
|
|||
_, err = ns.NodePublishVolume(context.Background(), &csi.NodePublishVolumeRequest{ |
|||
VolumeId: "remote-life-vol", |
|||
StagingTargetPath: stagingPath, |
|||
TargetPath: targetPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("publish: %v", err) |
|||
} |
|||
|
|||
// Unpublish.
|
|||
_, err = ns.NodeUnpublishVolume(context.Background(), &csi.NodeUnpublishVolumeRequest{ |
|||
VolumeId: "remote-life-vol", |
|||
TargetPath: targetPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("unpublish: %v", err) |
|||
} |
|||
|
|||
// Unstage.
|
|||
_, err = ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "remote-life-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("unstage: %v", err) |
|||
} |
|||
|
|||
// Delete.
|
|||
_, err = cs.DeleteVolume(context.Background(), &csi.DeleteVolumeRequest{ |
|||
VolumeId: "remote-life-vol", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
|
|||
// Verify file is gone.
|
|||
volPath := filepath.Join(dir, sanitizeFilename("remote-life-vol")+".blk") |
|||
if _, statErr := os.Stat(volPath); !os.IsNotExist(statErr) { |
|||
t.Errorf(".blk file not cleaned up: %v", statErr) |
|||
} |
|||
} |
|||
|
|||
// QA-LIFECYCLE-2: Mode validation — controller mode should not need local VolumeManager.
|
|||
func TestQA_ModeControllerNoMgr(t *testing.T) { |
|||
// In controller mode with masterAddr, no local VolumeManager is needed.
|
|||
// We can't test MasterVolumeClient without a real master, but we can verify
|
|||
// that the driver setup logic is correct.
|
|||
_, err := NewCSIDriver(DriverConfig{ |
|||
Endpoint: "unix:///tmp/test.sock", |
|||
DataDir: t.TempDir(), |
|||
NodeID: "test-node", |
|||
MasterAddr: "master:9333", |
|||
Mode: "controller", |
|||
}) |
|||
if err != nil { |
|||
// MasterVolumeClient creation should work (connection is lazy).
|
|||
// If this fails, it means the driver config validation is wrong.
|
|||
t.Logf("NewCSIDriver controller mode: %v (may fail without grpc deps, OK)", err) |
|||
} |
|||
} |
|||
|
|||
// QA-LIFECYCLE-3: Driver with mode "node" should not create controller service.
|
|||
func TestQA_ModeNodeOnly(t *testing.T) { |
|||
driver, err := NewCSIDriver(DriverConfig{ |
|||
Endpoint: "unix:///tmp/test-node.sock", |
|||
DataDir: t.TempDir(), |
|||
NodeID: "test-node", |
|||
Mode: "node", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NewCSIDriver node mode: %v", err) |
|||
} |
|||
|
|||
// In node mode, controller should be nil.
|
|||
if driver.controller != nil { |
|||
t.Error("BUG: controller should be nil in node mode") |
|||
} |
|||
// Node should be non-nil.
|
|||
if driver.node == nil { |
|||
t.Error("BUG: node should be non-nil in node mode") |
|||
} |
|||
} |
|||
|
|||
// QA-LIFECYCLE-4: Driver with invalid mode should error.
|
|||
func TestQA_ModeInvalid(t *testing.T) { |
|||
_, err := NewCSIDriver(DriverConfig{ |
|||
Endpoint: "unix:///tmp/test.sock", |
|||
DataDir: t.TempDir(), |
|||
NodeID: "test-node", |
|||
Mode: "invalid", |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error for invalid mode") |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-SERVER: Server/Driver configuration adversarial tests
|
|||
// ============================================================
|
|||
|
|||
// QA-SRV-CP62-1: DriverConfig with mode="all" and no masterAddr uses local backend.
|
|||
func TestQA_Srv_AllModeLocalBackend(t *testing.T) { |
|||
driver, err := NewCSIDriver(DriverConfig{ |
|||
Endpoint: "unix:///tmp/test-all.sock", |
|||
DataDir: t.TempDir(), |
|||
NodeID: "test-node", |
|||
Mode: "all", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NewCSIDriver: %v", err) |
|||
} |
|||
if driver.controller == nil { |
|||
t.Error("controller should be non-nil in 'all' mode") |
|||
} |
|||
if driver.node == nil { |
|||
t.Error("node should be non-nil in 'all' mode") |
|||
} |
|||
if driver.mgr == nil { |
|||
t.Error("mgr should be non-nil when no masterAddr and mode is 'all'") |
|||
} |
|||
} |
|||
|
|||
// QA-SRV-CP62-2: Multiple calls to driver.Stop() should not panic.
|
|||
func TestQA_Srv_DoubleStop(t *testing.T) { |
|||
driver, err := NewCSIDriver(DriverConfig{ |
|||
Endpoint: "unix:///tmp/test-stop.sock", |
|||
DataDir: t.TempDir(), |
|||
NodeID: "test-node", |
|||
Mode: "all", |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("NewCSIDriver: %v", err) |
|||
} |
|||
|
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
t.Fatalf("BUG: double Stop() panicked: %v", r) |
|||
} |
|||
}() |
|||
|
|||
driver.Stop() |
|||
driver.Stop() // should not panic
|
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-VM-CP62: VolumeManager adversarial tests (CP6-2 additions)
|
|||
// ============================================================
|
|||
|
|||
// QA-VM-CP62-1: CreateVolume after Stop without Start — should return ErrNotReady.
|
|||
func TestQA_VM_CreateAfterStop(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
mgr := NewVolumeManager(dir, "127.0.0.1:0", "iqn.test", logger) |
|||
mgr.Start(context.Background()) |
|||
mgr.Stop() |
|||
|
|||
err := mgr.CreateVolume("vol1", 4*1024*1024) |
|||
if !errors.Is(err, ErrNotReady) { |
|||
t.Fatalf("expected ErrNotReady after Stop, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-VM-CP62-2: OpenVolume on a volume that doesn't exist on disk.
|
|||
func TestQA_VM_OpenNonExistent(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
err := mgr.OpenVolume("does-not-exist") |
|||
if err == nil { |
|||
t.Fatal("OpenVolume for non-existent should fail") |
|||
} |
|||
} |
|||
|
|||
// QA-VM-CP62-3: ListenAddr returns empty string after Stop.
|
|||
func TestQA_VM_ListenAddrAfterStop(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
mgr := NewVolumeManager(dir, "127.0.0.1:0", "iqn.test", logger) |
|||
mgr.Start(context.Background()) |
|||
|
|||
addr := mgr.ListenAddr() |
|||
if addr == "" { |
|||
t.Fatal("ListenAddr should be non-empty while running") |
|||
} |
|||
|
|||
mgr.Stop() |
|||
|
|||
addr = mgr.ListenAddr() |
|||
if addr != "" { |
|||
t.Logf("ListenAddr after Stop: %q (may return stale addr, not a bug if documented)", addr) |
|||
} |
|||
} |
|||
|
|||
// QA-VM-CP62-4: VolumeIQN uses shared sanitization.
|
|||
func TestQA_VM_VolumeIQNSanitized(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
iqn := mgr.VolumeIQN("pvc-ABC/def:123") |
|||
expected := "iqn.2024.com.seaweedfs:" + blockvol.SanitizeIQN("pvc-ABC/def:123") |
|||
if iqn != expected { |
|||
t.Errorf("VolumeIQN: got %q, want %q", iqn, expected) |
|||
} |
|||
|
|||
// Should be lowercase.
|
|||
if strings.ToLower(iqn) != iqn { |
|||
t.Errorf("VolumeIQN not fully lowercase: %q", iqn) |
|||
} |
|||
} |
|||
|
|||
// ============================================================
|
|||
// QA-EDGE: Edge case tests
|
|||
// ============================================================
|
|||
|
|||
// QA-EDGE-1: CreateVolume with RequiredBytes = minVolumeSizeBytes (1 MiB) exactly.
|
|||
func TestQA_Edge_MinSize(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
resp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "min-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 1 << 20}, // exactly 1 MiB
|
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
if resp.Volume.CapacityBytes < 1<<20 { |
|||
t.Errorf("capacity too small: got %d, want >= %d", resp.Volume.CapacityBytes, 1<<20) |
|||
} |
|||
} |
|||
|
|||
// QA-EDGE-2: CreateVolume with RequiredBytes just below minVolumeSizeBytes.
|
|||
func TestQA_Edge_BelowMinSize(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
resp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "sub-min-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 100}, // 100 bytes, below min
|
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
// Should be rounded up to minVolumeSizeBytes.
|
|||
if resp.Volume.CapacityBytes < 1<<20 { |
|||
t.Errorf("capacity: got %d, expected >= minVolumeSizeBytes (1 MiB)", resp.Volume.CapacityBytes) |
|||
} |
|||
} |
|||
|
|||
// QA-EDGE-3: CreateVolume with RequiredBytes = LimitBytes (exactly equal).
|
|||
func TestQA_Edge_RequiredEqualsLimit(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
size := int64(4 * 1024 * 1024) |
|||
resp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "exact-limit-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: size, |
|||
LimitBytes: size, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
if resp.Volume.CapacityBytes != size { |
|||
t.Errorf("capacity: got %d, want %d", resp.Volume.CapacityBytes, size) |
|||
} |
|||
} |
|||
|
|||
// QA-EDGE-4: CreateVolume with RequiredBytes = 4097 (needs rounding to 8192).
|
|||
// With LimitBytes = 4097, the rounded size exceeds LimitBytes — should fail.
|
|||
func TestQA_Edge_RoundingExceedsLimit(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "round-exceed", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 4097, |
|||
LimitBytes: 4097, |
|||
}, |
|||
}) |
|||
// RequiredBytes=4097, rounds to 8192, but LimitBytes=4097 < 8192.
|
|||
// However, since RequiredBytes < minVolumeSizeBytes (1 MiB), it gets bumped to 1 MiB.
|
|||
// LimitBytes=4097 < 1 MiB, so sizeBytes (1 MiB) > LimitBytes → should fail.
|
|||
if err == nil { |
|||
t.Fatal("expected error: rounded size exceeds LimitBytes") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.InvalidArgument { |
|||
t.Fatalf("expected InvalidArgument, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-EDGE-5: Empty string volume name in node operations.
|
|||
func TestQA_Edge_EmptyVolumeIDNode(t *testing.T) { |
|||
ns, _, _ := newTestNodeServer(t) |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "", |
|||
StagingTargetPath: "/tmp/staging", |
|||
}) |
|||
if err == nil { |
|||
t.Error("stage with empty volumeID should fail") |
|||
} |
|||
|
|||
_, err = ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "", |
|||
StagingTargetPath: "/tmp/staging", |
|||
}) |
|||
if err == nil { |
|||
t.Error("unstage with empty volumeID should fail") |
|||
} |
|||
|
|||
_, err = ns.NodePublishVolume(context.Background(), &csi.NodePublishVolumeRequest{ |
|||
VolumeId: "", |
|||
TargetPath: "/tmp/target", |
|||
}) |
|||
if err == nil { |
|||
t.Error("publish with empty volumeID should fail") |
|||
} |
|||
|
|||
_, err = ns.NodeUnpublishVolume(context.Background(), &csi.NodeUnpublishVolumeRequest{ |
|||
VolumeId: "", |
|||
TargetPath: "/tmp/target", |
|||
}) |
|||
if err == nil { |
|||
t.Error("unpublish with empty volumeID should fail") |
|||
} |
|||
} |
|||
|
|||
// QA-EDGE-6: Sanitization: volume name with only dots.
|
|||
func TestQA_Edge_AllDotsName(t *testing.T) { |
|||
name := "..." |
|||
iqn := blockvol.SanitizeIQN(name) |
|||
file := blockvol.SanitizeFilename(name) |
|||
|
|||
if iqn == "" { |
|||
t.Error("SanitizeIQN('...') should not be empty") |
|||
} |
|||
if file == "" { |
|||
t.Error("SanitizeFilename('...') should not be empty") |
|||
} |
|||
t.Logf("SanitizeIQN('...')=%q, SanitizeFilename('...')=%q", iqn, file) |
|||
|
|||
// The filename "..." -> "..." (dots are valid). Check it doesn't create
|
|||
// a directory traversal.
|
|||
if strings.Contains(file, "..") { |
|||
t.Logf("WARNING: sanitized filename contains '..': %q (could be path traversal in filepath.Join)", file) |
|||
} |
|||
} |
|||
|
|||
// QA-EDGE-7: Large number of volumes registered, then full heartbeat reconcile.
|
|||
func TestQA_Edge_LargeScaleHeartbeat(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
// Create 100 volumes.
|
|||
for i := 0; i < 100; i++ { |
|||
name := fmt.Sprintf("scale-vol-%d", i) |
|||
if err := mgr.CreateVolume(name, 1<<20); err != nil { // 1 MiB each
|
|||
t.Fatalf("create %s: %v", name, err) |
|||
} |
|||
} |
|||
|
|||
// Verify all exist.
|
|||
for i := 0; i < 100; i++ { |
|||
if !mgr.VolumeExists(fmt.Sprintf("scale-vol-%d", i)) { |
|||
t.Fatalf("scale-vol-%d not found", i) |
|||
} |
|||
} |
|||
|
|||
// Delete all.
|
|||
for i := 0; i < 100; i++ { |
|||
mgr.DeleteVolume(fmt.Sprintf("scale-vol-%d", i)) |
|||
} |
|||
|
|||
// Verify all gone.
|
|||
for i := 0; i < 100; i++ { |
|||
if mgr.VolumeExists(fmt.Sprintf("scale-vol-%d", i)) { |
|||
t.Fatalf("scale-vol-%d still exists after delete", i) |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,819 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"errors" |
|||
"fmt" |
|||
"log" |
|||
"os" |
|||
"path/filepath" |
|||
"strings" |
|||
"sync" |
|||
"sync/atomic" |
|||
"testing" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
"google.golang.org/grpc/codes" |
|||
"google.golang.org/grpc/status" |
|||
) |
|||
|
|||
// --- VolumeManager adversarial tests ---
|
|||
|
|||
// QA-VM-1: DeleteVolume leaks snapshot delta files (.snap.N).
|
|||
// CreateVolume then call BlockVol's CreateSnapshot, then DeleteVolume.
|
|||
// After delete, .snap.* files should not remain on disk.
|
|||
func TestQA_VM_DeleteLeaksSnapshotFiles(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
if err := mgr.CreateVolume("snap-leak", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
// Access the managed volume to create a snapshot through the engine.
|
|||
mgr.mu.RLock() |
|||
mv := mgr.volumes["snap-leak"] |
|||
mgr.mu.RUnlock() |
|||
if mv == nil { |
|||
t.Fatal("volume not tracked") |
|||
} |
|||
|
|||
// Create a snapshot to generate a .snap.0 delta file.
|
|||
if err := mv.vol.CreateSnapshot(0); err != nil { |
|||
t.Fatalf("create snapshot: %v", err) |
|||
} |
|||
|
|||
// Write some data so the flusher creates CoW entries in the delta.
|
|||
data := make([]byte, 4096) |
|||
for i := range data { |
|||
data[i] = 0xAB |
|||
} |
|||
if err := mv.vol.WriteLBA(0, data); err != nil { |
|||
t.Fatalf("write: %v", err) |
|||
} |
|||
|
|||
// Verify .snap.0 file exists on disk.
|
|||
snapPattern := filepath.Join(mgr.dataDir, "snap-leak.blk.snap.*") |
|||
matches, _ := filepath.Glob(snapPattern) |
|||
if len(matches) == 0 { |
|||
t.Skipf("no snapshot delta files created (flusher may not have CoW'd yet)") |
|||
} |
|||
t.Logf("snapshot files before delete: %v", matches) |
|||
|
|||
// Delete the volume.
|
|||
if err := mgr.DeleteVolume("snap-leak"); err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
|
|||
// BUG: check for leaked snapshot files.
|
|||
matches, _ = filepath.Glob(snapPattern) |
|||
if len(matches) > 0 { |
|||
t.Errorf("BUG: snapshot delta files leaked after DeleteVolume: %v", matches) |
|||
} |
|||
|
|||
// Also check the main .blk file is gone.
|
|||
volPath := filepath.Join(mgr.dataDir, "snap-leak.blk") |
|||
if _, err := os.Stat(volPath); !os.IsNotExist(err) { |
|||
t.Errorf("BUG: .blk file still exists after delete") |
|||
} |
|||
} |
|||
|
|||
// QA-VM-2: Start is retryable after failure.
|
|||
// If initial Start fails (e.g., port in use), calling Start again after the
|
|||
// port is freed should succeed.
|
|||
func TestQA_VM_StartNotRetryableAfterFailure(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
|
|||
// Start first manager to occupy the port.
|
|||
mgr1 := NewVolumeManager(dir, "127.0.0.1:19876", "iqn.test", logger) |
|||
if err := mgr1.Start(context.Background()); err != nil { |
|||
t.Fatalf("first start: %v", err) |
|||
} |
|||
defer mgr1.Stop() |
|||
|
|||
// Second manager on same port should fail.
|
|||
mgr2 := NewVolumeManager(dir, "127.0.0.1:19876", "iqn.test", logger) |
|||
err := mgr2.Start(context.Background()) |
|||
if err == nil { |
|||
mgr2.Stop() |
|||
t.Fatal("expected second start to fail (port in use)") |
|||
} |
|||
t.Logf("first start failed as expected: %v", err) |
|||
|
|||
// Free the port, then retry — should succeed now.
|
|||
mgr1.Stop() |
|||
|
|||
err = mgr2.Start(context.Background()) |
|||
if err != nil { |
|||
t.Fatalf("second start should succeed after port freed: %v", err) |
|||
} |
|||
defer mgr2.Stop() |
|||
|
|||
// Manager should be fully functional.
|
|||
if err := mgr2.CreateVolume("test", 4*1024*1024); err != nil { |
|||
t.Fatalf("CreateVolume after retry: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-VM-3: Stop then re-Start should work — manager should be fully functional.
|
|||
func TestQA_VM_StopThenRestart(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
|
|||
mgr := NewVolumeManager(dir, "127.0.0.1:0", "iqn.test", logger) |
|||
if err := mgr.Start(context.Background()); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
if err := mgr.CreateVolume("v1", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
mgr.Stop() |
|||
|
|||
// Start after Stop should work.
|
|||
if err := mgr.Start(context.Background()); err != nil { |
|||
t.Fatalf("restart after stop: %v", err) |
|||
} |
|||
defer mgr.Stop() |
|||
|
|||
// Manager should be fully functional — create a new volume.
|
|||
if err := mgr.CreateVolume("v2", 4*1024*1024); err != nil { |
|||
t.Fatalf("create after restart: %v", err) |
|||
} |
|||
if !mgr.VolumeExists("v2") { |
|||
t.Error("volume created but not tracked") |
|||
} |
|||
} |
|||
|
|||
// QA-VM-4: CreateVolume with 0 size should return clear error.
|
|||
func TestQA_VM_CreateZeroSize(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
err := mgr.CreateVolume("zero", 0) |
|||
if err == nil { |
|||
t.Error("BUG: CreateVolume with 0 size should fail") |
|||
mgr.DeleteVolume("zero") |
|||
} else { |
|||
t.Logf("correctly rejected 0-size: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-VM-5: Concurrent CreateVolume + DeleteVolume for same name.
|
|||
func TestQA_VM_ConcurrentCreateDeleteSameName(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
var wg sync.WaitGroup |
|||
var panicked atomic.Bool |
|||
|
|||
// Run 20 goroutines, half creating and half deleting the same volume.
|
|||
for i := 0; i < 20; i++ { |
|||
wg.Add(1) |
|||
go func(idx int) { |
|||
defer wg.Done() |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
panicked.Store(true) |
|||
t.Errorf("PANIC in goroutine %d: %v", idx, r) |
|||
} |
|||
}() |
|||
if idx%2 == 0 { |
|||
mgr.CreateVolume("race-vol", 4*1024*1024) |
|||
} else { |
|||
mgr.DeleteVolume("race-vol") |
|||
} |
|||
}(i) |
|||
} |
|||
wg.Wait() |
|||
|
|||
if panicked.Load() { |
|||
t.Fatal("BUG: concurrent create/delete caused panic") |
|||
} |
|||
} |
|||
|
|||
// QA-VM-6: Filename and IQN sanitization are consistent.
|
|||
// Both sanitizeFilename and SanitizeIQN lowercase, so "VolA" and "vola"
|
|||
// map to the same file and same IQN — treated as the same volume (idempotent).
|
|||
func TestQA_VM_SanitizationDivergence(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
// Both sanitizers now lowercase, so "VolA" and "vola" produce:
|
|||
// filename: "vola.blk" (both)
|
|||
// IQN: ":vola" (both)
|
|||
// This means they are the same volume — second create is idempotent.
|
|||
|
|||
err1 := mgr.CreateVolume("VolA", 4*1024*1024) |
|||
if err1 != nil { |
|||
t.Fatalf("create VolA: %v", err1) |
|||
} |
|||
|
|||
// "vola" should be idempotent (same file, same IQN, same in-memory name "VolA").
|
|||
// But note: volume names are tracked as-is in the map ("VolA" != "vola"),
|
|||
// so the second create goes to file "vola.blk" which is the same file as
|
|||
// "VolA" -> "vola.blk". The existing-file adoption path handles this.
|
|||
err2 := mgr.CreateVolume("vola", 4*1024*1024) |
|||
if err2 != nil { |
|||
t.Fatalf("create vola (should be idempotent via file adoption): %v", err2) |
|||
} |
|||
|
|||
iqn1 := mgr.VolumeIQN("VolA") |
|||
iqn2 := mgr.VolumeIQN("vola") |
|||
|
|||
if iqn1 != iqn2 { |
|||
t.Errorf("IQN mismatch: VolA=%q, vola=%q (should be identical after lowercasing)", iqn1, iqn2) |
|||
} |
|||
} |
|||
|
|||
// QA-VM-7: OpenVolume for a volume that's already open and tracked should be idempotent.
|
|||
// But what if the file was modified externally between close and reopen?
|
|||
func TestQA_VM_OpenAlreadyTracked(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
if err := mgr.CreateVolume("tracked", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
// Open again while already tracked — should be idempotent (no error).
|
|||
if err := mgr.OpenVolume("tracked"); err != nil { |
|||
t.Fatalf("second open (expected idempotent): %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-VM-8: DeleteVolume for untracked volume — does it clean up .blk file from disk?
|
|||
func TestQA_VM_DeleteUntrackedWithFileOnDisk(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
// Create then close (removes from tracking but keeps file on disk).
|
|||
if err := mgr.CreateVolume("orphan", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
if err := mgr.CloseVolume("orphan"); err != nil { |
|||
t.Fatalf("close: %v", err) |
|||
} |
|||
if mgr.VolumeExists("orphan") { |
|||
t.Fatal("expected volume to be untracked after close") |
|||
} |
|||
|
|||
// File should still exist on disk.
|
|||
volPath := filepath.Join(mgr.dataDir, "orphan.blk") |
|||
if _, err := os.Stat(volPath); err != nil { |
|||
t.Fatalf("expected .blk file to exist: %v", err) |
|||
} |
|||
|
|||
// DeleteVolume for untracked name should still clean up file.
|
|||
if err := mgr.DeleteVolume("orphan"); err != nil { |
|||
t.Fatalf("delete untracked: %v", err) |
|||
} |
|||
|
|||
if _, err := os.Stat(volPath); !os.IsNotExist(err) { |
|||
t.Errorf("BUG: .blk file not cleaned up by DeleteVolume for untracked volume") |
|||
} |
|||
} |
|||
|
|||
// --- Controller adversarial tests ---
|
|||
|
|||
// QA-CTRL-1: CreateVolume with LimitBytes smaller than RequiredBytes.
|
|||
// CSI spec says limit_bytes is the maximum size. If set and smaller than required,
|
|||
// it should be an error.
|
|||
func TestQA_Ctrl_CreateLimitLessThanRequired(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "limit-vol", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 10 * 1024 * 1024, |
|||
LimitBytes: 1 * 1024 * 1024, |
|||
}, |
|||
}) |
|||
if err == nil { |
|||
mgr.DeleteVolume("limit-vol") |
|||
t.Fatal("expected CreateVolume to reject LimitBytes < RequiredBytes") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.InvalidArgument { |
|||
t.Fatalf("expected InvalidArgument, got: %v", err) |
|||
} |
|||
t.Logf("correctly rejected: code=%v msg=%s", st.Code(), st.Message()) |
|||
} |
|||
|
|||
// QA-CTRL-2: CreateVolume with RequiredBytes=0 and LimitBytes set.
|
|||
// Should use LimitBytes as the size.
|
|||
func TestQA_Ctrl_CreateOnlyLimitBytes(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
resp, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "limit-only", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 0, |
|||
LimitBytes: 2 * 1024 * 1024, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("CreateVolume: %v", err) |
|||
} |
|||
// Volume should be created but size should not exceed LimitBytes.
|
|||
if resp.Volume.CapacityBytes > 2*1024*1024 { |
|||
t.Errorf("BUG: volume size %d exceeds LimitBytes %d", |
|||
resp.Volume.CapacityBytes, 2*1024*1024) |
|||
} |
|||
} |
|||
|
|||
// QA-CTRL-3: CreateVolume with name containing path traversal.
|
|||
func TestQA_Ctrl_CreatePathTraversal(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "../../etc/shadow", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 4 * 1024 * 1024, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Logf("path traversal rejected: %v", err) |
|||
return |
|||
} |
|||
|
|||
// If it succeeded, verify the file was NOT created outside dataDir.
|
|||
if _, statErr := os.Stat("../../etc/shadow.blk"); statErr == nil { |
|||
t.Fatal("BUG: path traversal created file outside data directory!") |
|||
} |
|||
|
|||
// Check it went to a sanitized name inside dataDir.
|
|||
sanitized := filepath.Join(mgr.dataDir, "..-..-etc-shadow.blk") |
|||
if _, statErr := os.Stat(sanitized); statErr == nil { |
|||
t.Logf("file created with sanitized name: %s (safe)", sanitized) |
|||
} |
|||
|
|||
mgr.DeleteVolume("../../etc/shadow") |
|||
} |
|||
|
|||
// QA-CTRL-4: ValidateVolumeCapabilities after restart (not tracked in memory).
|
|||
// By design, VolumeManager does not auto-discover volumes on startup.
|
|||
// Volumes are re-tracked when kubelet re-calls CreateVolume or NodeStageVolume.
|
|||
// ValidateVolumeCapabilities returns NotFound for orphaned volumes — expected.
|
|||
func TestQA_Ctrl_ValidateAfterRestart(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
|
|||
// Phase 1: create volume, stop.
|
|||
mgr1 := NewVolumeManager(dir, "127.0.0.1:0", "iqn.test", logger) |
|||
if err := mgr1.Start(context.Background()); err != nil { |
|||
t.Fatalf("start1: %v", err) |
|||
} |
|||
if err := mgr1.CreateVolume("validate-vol", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
mgr1.Stop() |
|||
|
|||
// Phase 2: new manager (simulates restart — no auto-discovery).
|
|||
mgr2 := NewVolumeManager(dir, "127.0.0.1:0", "iqn.test", logger) |
|||
if err := mgr2.Start(context.Background()); err != nil { |
|||
t.Fatalf("start2: %v", err) |
|||
} |
|||
defer mgr2.Stop() |
|||
|
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr2)} |
|||
|
|||
// ValidateVolumeCapabilities for volume that exists on disk but not in memory.
|
|||
// Expected: NotFound (by design — volumes are re-tracked via CreateVolume).
|
|||
_, err := cs.ValidateVolumeCapabilities(context.Background(), &csi.ValidateVolumeCapabilitiesRequest{ |
|||
VolumeId: "validate-vol", |
|||
VolumeCapabilities: []*csi.VolumeCapability{ |
|||
{AccessType: &csi.VolumeCapability_Mount{Mount: &csi.VolumeCapability_MountVolume{}}}, |
|||
}, |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected NotFound for volume not yet re-tracked after restart") |
|||
} |
|||
st, ok := status.FromError(err) |
|||
if !ok || st.Code() != codes.NotFound { |
|||
t.Fatalf("expected NotFound, got: %v", err) |
|||
} |
|||
t.Log("correctly returns NotFound for volume not yet re-tracked (by design)") |
|||
|
|||
// After CreateVolume re-adopts it, Validate should succeed.
|
|||
if err := mgr2.CreateVolume("validate-vol", 4*1024*1024); err != nil { |
|||
t.Fatalf("re-adopt: %v", err) |
|||
} |
|||
_, err = cs.ValidateVolumeCapabilities(context.Background(), &csi.ValidateVolumeCapabilitiesRequest{ |
|||
VolumeId: "validate-vol", |
|||
VolumeCapabilities: []*csi.VolumeCapability{ |
|||
{AccessType: &csi.VolumeCapability_Mount{Mount: &csi.VolumeCapability_MountVolume{}}}, |
|||
}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("after re-adopt, expected success: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-CTRL-5: CreateVolume with size that overflows uint64 after rounding.
|
|||
func TestQA_Ctrl_CreateMaxSize(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
|
|||
// Request just under max int64 — rounding up to blockSize could overflow.
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: "huge", |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{ |
|||
RequiredBytes: 1<<63 - 1, // max int64
|
|||
}, |
|||
}) |
|||
if err == nil { |
|||
t.Error("BUG: should reject unreasonably large size or fail gracefully") |
|||
mgr.DeleteVolume("huge") |
|||
} else { |
|||
t.Logf("large size handled: %v", err) |
|||
} |
|||
} |
|||
|
|||
// --- Node adversarial tests ---
|
|||
|
|||
// QA-NODE-1: NodeStageVolume for a volume that doesn't exist.
|
|||
func TestQA_Node_StageNonExistentVolume(t *testing.T) { |
|||
ns, _, _ := newTestNodeServer(t) |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "does-not-exist", |
|||
StagingTargetPath: t.TempDir(), |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err == nil { |
|||
t.Error("BUG: should fail for non-existent volume") |
|||
} else { |
|||
t.Logf("correctly rejected: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-2: NodeUnstageVolume when all operations fail — should propagate first error.
|
|||
func TestQA_Node_UnstageAllFail(t *testing.T) { |
|||
ns, mi, mm := newTestNodeServer(t) |
|||
|
|||
mm.unmountErr = errors.New("unmount failed") |
|||
mi.logoutErr = errors.New("logout failed") |
|||
|
|||
_, err := ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: "/tmp/staging", |
|||
}) |
|||
if err == nil { |
|||
t.Error("BUG: should return error when unmount and logout both fail") |
|||
} else { |
|||
// Should report the first error (unmount).
|
|||
if !strings.Contains(err.Error(), "unmount") { |
|||
t.Errorf("expected unmount error to be first, got: %v", err) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-3: NodePublishVolume when staging path is not actually mounted.
|
|||
// This should either fail or at least warn — bind-mounting an empty dir
|
|||
// could silently give the pod an empty volume.
|
|||
func TestQA_Node_PublishWithoutStaging(t *testing.T) { |
|||
ns, _, _ := newTestNodeServer(t) |
|||
|
|||
stagingPath := t.TempDir() |
|||
targetPath := t.TempDir() |
|||
|
|||
// Staging path is NOT mounted.
|
|||
// NodePublishVolume should either check or just mount (depends on behavior).
|
|||
_, err := ns.NodePublishVolume(context.Background(), &csi.NodePublishVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
TargetPath: targetPath, |
|||
VolumeCapability: &csi.VolumeCapability{ |
|||
AccessType: &csi.VolumeCapability_Mount{ |
|||
Mount: &csi.VolumeCapability_MountVolume{FsType: "ext4"}, |
|||
}, |
|||
}, |
|||
}) |
|||
// This likely succeeds (bind mounts empty dir) — which is wrong.
|
|||
if err == nil { |
|||
t.Log("WARNING: NodePublishVolume succeeded when staging path was not mounted (bind-mounts empty dir)") |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-4: NodeStageVolume idempotency doesn't verify correct volume.
|
|||
// If something else is mounted at the staging path, Stage returns success
|
|||
// without verifying it's our volume.
|
|||
func TestQA_Node_StageWrongVolumeAtPath(t *testing.T) { |
|||
ns, _, mm := newTestNodeServer(t) |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
// Pre-mark staging path as mounted (simulating another volume mounted there).
|
|||
mm.isMountedTargets[stagingPath] = true |
|||
|
|||
// NodeStageVolume for a different volume — should it succeed?
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err == nil { |
|||
t.Log("WARNING: NodeStageVolume returned success because staging path was already mounted, " + |
|||
"but it could be a different volume (no verification of mount source)") |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-5: Double NodeUnstageVolume — should be idempotent.
|
|||
func TestQA_Node_DoubleUnstage(t *testing.T) { |
|||
ns, _, _ := newTestNodeServer(t) |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
// First unstage — nothing to undo, but should succeed.
|
|||
_, err := ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("first unstage: %v", err) |
|||
} |
|||
|
|||
// Second unstage — should also succeed (idempotent).
|
|||
_, err = ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Errorf("BUG: double unstage should be idempotent: %v", err) |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-6: NodeGetInfo returns correct topology and max volumes.
|
|||
func TestQA_Node_GetInfo(t *testing.T) { |
|||
ns, _, _ := newTestNodeServer(t) |
|||
|
|||
resp, err := ns.NodeGetInfo(context.Background(), &csi.NodeGetInfoRequest{}) |
|||
if err != nil { |
|||
t.Fatalf("NodeGetInfo: %v", err) |
|||
} |
|||
if resp.NodeId != "test-node-1" { |
|||
t.Errorf("node_id: got %q, want %q", resp.NodeId, "test-node-1") |
|||
} |
|||
if resp.MaxVolumesPerNode <= 0 { |
|||
t.Errorf("max_volumes: got %d, want > 0", resp.MaxVolumesPerNode) |
|||
} |
|||
if resp.AccessibleTopology == nil { |
|||
t.Error("expected non-nil topology") |
|||
} |
|||
} |
|||
|
|||
// QA-NODE-7: NodeStageVolume with iSCSI discovery failure should clean up.
|
|||
func TestQA_Node_StageDiscoveryFailureCleanup(t *testing.T) { |
|||
ns, mi, _ := newTestNodeServer(t) |
|||
|
|||
mi.discoveryErr = errors.New("unreachable portal") |
|||
|
|||
stagingPath := t.TempDir() |
|||
|
|||
_, err := ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: "test-vol", |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err == nil { |
|||
t.Fatal("expected error from discovery failure") |
|||
} |
|||
|
|||
// Volume should be cleaned up.
|
|||
if ns.mgr.VolumeExists("test-vol") { |
|||
t.Error("BUG: volume still tracked after discovery failure (resource leak)") |
|||
} |
|||
} |
|||
|
|||
// --- Server adversarial tests ---
|
|||
|
|||
// QA-SRV-1: parseEndpoint with unsupported scheme.
|
|||
func TestQA_Srv_ParseEndpointBadScheme(t *testing.T) { |
|||
_, _, err := parseEndpoint("http://localhost:50051") |
|||
if err == nil { |
|||
t.Error("BUG: should reject http:// scheme") |
|||
} |
|||
} |
|||
|
|||
// QA-SRV-2: parseEndpoint with various formats.
|
|||
func TestQA_Srv_ParseEndpointFormats(t *testing.T) { |
|||
tests := []struct { |
|||
input string |
|||
wantProto string |
|||
wantAddr string |
|||
wantErr bool |
|||
}{ |
|||
{"unix:///csi/csi.sock", "unix", "/csi/csi.sock", false}, |
|||
{"unix:///var/lib/kubelet/plugins/block.csi/csi.sock", "unix", "/var/lib/kubelet/plugins/block.csi/csi.sock", false}, |
|||
{"tcp://0.0.0.0:50051", "tcp", "0.0.0.0:50051", false}, |
|||
{"ftp://host/path", "", "", true}, |
|||
{"", "", "", true}, |
|||
} |
|||
for _, tt := range tests { |
|||
proto, addr, err := parseEndpoint(tt.input) |
|||
if tt.wantErr { |
|||
if err == nil { |
|||
t.Errorf("parseEndpoint(%q): expected error", tt.input) |
|||
} |
|||
continue |
|||
} |
|||
if err != nil { |
|||
t.Errorf("parseEndpoint(%q): %v", tt.input, err) |
|||
continue |
|||
} |
|||
if proto != tt.wantProto || addr != tt.wantAddr { |
|||
t.Errorf("parseEndpoint(%q): got (%q, %q), want (%q, %q)", |
|||
tt.input, proto, addr, tt.wantProto, tt.wantAddr) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// QA-SRV-3: NewCSIDriver with empty NodeID should fail.
|
|||
func TestQA_Srv_DriverEmptyNodeID(t *testing.T) { |
|||
_, err := NewCSIDriver(DriverConfig{ |
|||
Endpoint: "unix:///tmp/test.sock", |
|||
DataDir: t.TempDir(), |
|||
NodeID: "", |
|||
}) |
|||
if err == nil { |
|||
t.Error("BUG: should reject empty NodeID") |
|||
} |
|||
} |
|||
|
|||
// --- Identity adversarial tests ---
|
|||
|
|||
// QA-ID-1: Identity methods should work with nil requests.
|
|||
func TestQA_Identity_NilRequests(t *testing.T) { |
|||
s := &identityServer{} |
|||
|
|||
if _, err := s.GetPluginInfo(context.Background(), nil); err != nil { |
|||
t.Errorf("GetPluginInfo(nil): %v", err) |
|||
} |
|||
if _, err := s.GetPluginCapabilities(context.Background(), nil); err != nil { |
|||
t.Errorf("GetPluginCapabilities(nil): %v", err) |
|||
} |
|||
if _, err := s.Probe(context.Background(), nil); err != nil { |
|||
t.Errorf("Probe(nil): %v", err) |
|||
} |
|||
} |
|||
|
|||
// --- SanitizeIQN adversarial tests ---
|
|||
|
|||
// QA-IQN-1: IQN with only invalid characters should not produce empty string.
|
|||
func TestQA_IQN_AllInvalidChars(t *testing.T) { |
|||
iqn := SanitizeIQN("!@#$%^&*()") |
|||
if iqn == "" { |
|||
t.Error("BUG: SanitizeIQN produced empty string for all-invalid input") |
|||
} |
|||
t.Logf("SanitizeIQN('!@#$%%^&*()') = %q", iqn) |
|||
} |
|||
|
|||
// QA-IQN-2: Empty string input.
|
|||
func TestQA_IQN_Empty(t *testing.T) { |
|||
iqn := SanitizeIQN("") |
|||
// Empty is technically valid but probably wrong — should the caller validate?
|
|||
t.Logf("SanitizeIQN('') = %q (len=%d)", iqn, len(iqn)) |
|||
} |
|||
|
|||
// QA-IQN-3: IQN at exactly 64 chars should NOT get hash suffix.
|
|||
func TestQA_IQN_ExactlyMaxLength(t *testing.T) { |
|||
name := strings.Repeat("a", 64) |
|||
iqn := SanitizeIQN(name) |
|||
if len(iqn) != 64 { |
|||
t.Errorf("expected 64 chars, got %d: %q", len(iqn), iqn) |
|||
} |
|||
// Should not have hash suffix at exactly 64.
|
|||
if strings.Contains(iqn, "-") && len(name) == 64 { |
|||
// This would mean it was unnecessarily truncated.
|
|||
t.Log("at-boundary: has dash but input was exactly 64 chars") |
|||
} |
|||
} |
|||
|
|||
// QA-IQN-4: IQN at 65 chars should get hash suffix.
|
|||
func TestQA_IQN_OneOverMax(t *testing.T) { |
|||
name := strings.Repeat("a", 65) |
|||
iqn := SanitizeIQN(name) |
|||
if len(iqn) > 64 { |
|||
t.Errorf("expected max 64 chars, got %d", len(iqn)) |
|||
} |
|||
// Verify hash suffix is present.
|
|||
parts := strings.Split(iqn, "-") |
|||
if len(parts) < 2 { |
|||
t.Errorf("expected hash suffix after truncation: %q", iqn) |
|||
} |
|||
} |
|||
|
|||
// QA-IQN-5: Two names that differ only by case should produce different IQNs
|
|||
// (or we should document that case is folded).
|
|||
func TestQA_IQN_CaseFolding(t *testing.T) { |
|||
iqn1 := SanitizeIQN("MyVolume") |
|||
iqn2 := SanitizeIQN("myvolume") |
|||
if iqn1 != iqn2 { |
|||
t.Errorf("case folding: %q != %q (different IQNs for same logical name)", iqn1, iqn2) |
|||
} |
|||
// This is expected — IQN lowercases. But the FILENAMES may differ.
|
|||
t.Logf("SanitizeIQN('MyVolume')=%q, SanitizeIQN('myvolume')=%q", iqn1, iqn2) |
|||
} |
|||
|
|||
// --- Cross-cutting adversarial tests ---
|
|||
|
|||
// QA-X-1: Full lifecycle: create -> stage -> publish -> unpublish -> unstage -> delete.
|
|||
// Run twice to verify second lifecycle works.
|
|||
func TestQA_FullLifecycleTwice(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-qa] ", log.LstdFlags) |
|||
mgr := NewVolumeManager(dir, "127.0.0.1:0", "iqn.2024.com.seaweedfs", logger) |
|||
if err := mgr.Start(context.Background()); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
defer mgr.Stop() |
|||
|
|||
mi := newMockISCSIUtil() |
|||
mi.getDeviceResult = "/dev/sda" |
|||
mm := newMockMountUtil() |
|||
|
|||
cs := &controllerServer{backend: NewLocalVolumeBackend(mgr)} |
|||
ns := &nodeServer{ |
|||
mgr: mgr, |
|||
nodeID: "test-node", |
|||
iscsiUtil: mi, |
|||
mountUtil: mm, |
|||
logger: logger, |
|||
} |
|||
|
|||
for round := 0; round < 2; round++ { |
|||
volName := fmt.Sprintf("lifecycle-%d", round) |
|||
t.Logf("--- round %d ---", round) |
|||
|
|||
// Create
|
|||
_, err := cs.CreateVolume(context.Background(), &csi.CreateVolumeRequest{ |
|||
Name: volName, |
|||
VolumeCapabilities: testVolCaps(), |
|||
CapacityRange: &csi.CapacityRange{RequiredBytes: 4 * 1024 * 1024}, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("round %d create: %v", round, err) |
|||
} |
|||
|
|||
// Close so stage can reopen.
|
|||
mgr.CloseVolume(volName) |
|||
|
|||
stagingPath := filepath.Join(t.TempDir(), "staging") |
|||
targetPath := filepath.Join(t.TempDir(), "target") |
|||
|
|||
// Stage
|
|||
_, err = ns.NodeStageVolume(context.Background(), &csi.NodeStageVolumeRequest{ |
|||
VolumeId: volName, |
|||
StagingTargetPath: stagingPath, |
|||
VolumeCapability: testVolCap(), |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("round %d stage: %v", round, err) |
|||
} |
|||
|
|||
// Publish
|
|||
_, err = ns.NodePublishVolume(context.Background(), &csi.NodePublishVolumeRequest{ |
|||
VolumeId: volName, |
|||
StagingTargetPath: stagingPath, |
|||
TargetPath: targetPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("round %d publish: %v", round, err) |
|||
} |
|||
|
|||
// Unpublish
|
|||
_, err = ns.NodeUnpublishVolume(context.Background(), &csi.NodeUnpublishVolumeRequest{ |
|||
VolumeId: volName, |
|||
TargetPath: targetPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("round %d unpublish: %v", round, err) |
|||
} |
|||
|
|||
// Unstage
|
|||
_, err = ns.NodeUnstageVolume(context.Background(), &csi.NodeUnstageVolumeRequest{ |
|||
VolumeId: volName, |
|||
StagingTargetPath: stagingPath, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("round %d unstage: %v", round, err) |
|||
} |
|||
|
|||
// Delete
|
|||
_, err = cs.DeleteVolume(context.Background(), &csi.DeleteVolumeRequest{ |
|||
VolumeId: volName, |
|||
}) |
|||
if err != nil { |
|||
t.Fatalf("round %d delete: %v", round, err) |
|||
} |
|||
|
|||
// Verify file gone.
|
|||
volPath := filepath.Join(dir, sanitizeFilename(volName)+".blk") |
|||
if _, statErr := os.Stat(volPath); !os.IsNotExist(statErr) { |
|||
t.Errorf("round %d: .blk file not cleaned up: %v", round, statErr) |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,170 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"log" |
|||
"net" |
|||
"net/url" |
|||
"os" |
|||
"strings" |
|||
|
|||
"github.com/container-storage-interface/spec/lib/go/csi" |
|||
"google.golang.org/grpc" |
|||
) |
|||
|
|||
// DriverConfig holds configuration for the CSI driver.
|
|||
type DriverConfig struct { |
|||
Endpoint string // CSI endpoint (unix:///csi/csi.sock)
|
|||
DataDir string // volume data directory
|
|||
ISCSIAddr string // local iSCSI target listen address
|
|||
IQNPrefix string // IQN prefix for volumes
|
|||
NodeID string // node identifier
|
|||
Logger *log.Logger |
|||
|
|||
// Control-plane mode fields.
|
|||
MasterAddr string // master address for control-plane mode (empty = local/standalone)
|
|||
Mode string // "controller", "node", "all" (default "all")
|
|||
} |
|||
|
|||
// CSIDriver manages the gRPC server and CSI services.
|
|||
type CSIDriver struct { |
|||
identity *identityServer |
|||
controller *controllerServer |
|||
node *nodeServer |
|||
mgr *VolumeManager |
|||
server *grpc.Server |
|||
endpoint string |
|||
logger *log.Logger |
|||
} |
|||
|
|||
// NewCSIDriver creates a new CSI driver from the given configuration.
|
|||
func NewCSIDriver(cfg DriverConfig) (*CSIDriver, error) { |
|||
if cfg.NodeID == "" { |
|||
return nil, fmt.Errorf("csi: node ID is required") |
|||
} |
|||
if cfg.Logger == nil { |
|||
cfg.Logger = log.Default() |
|||
} |
|||
if cfg.Mode == "" { |
|||
cfg.Mode = "all" |
|||
} |
|||
switch cfg.Mode { |
|||
case "controller", "node", "all": |
|||
// valid
|
|||
default: |
|||
return nil, fmt.Errorf("csi: invalid mode %q, must be controller/node/all", cfg.Mode) |
|||
} |
|||
|
|||
d := &CSIDriver{ |
|||
identity: &identityServer{}, |
|||
endpoint: cfg.Endpoint, |
|||
logger: cfg.Logger, |
|||
} |
|||
|
|||
// Create VolumeManager for modes that need local volume management.
|
|||
var mgr *VolumeManager |
|||
needsLocalMgr := cfg.Mode == "all" && cfg.MasterAddr == "" || cfg.Mode == "node" |
|||
if needsLocalMgr { |
|||
mgr = NewVolumeManager(cfg.DataDir, cfg.ISCSIAddr, cfg.IQNPrefix, cfg.Logger) |
|||
d.mgr = mgr |
|||
} |
|||
|
|||
// Create backend for controller.
|
|||
var backend VolumeBackend |
|||
if cfg.Mode == "controller" || cfg.Mode == "all" { |
|||
if cfg.MasterAddr != "" { |
|||
backend = NewMasterVolumeClient(cfg.MasterAddr, nil) |
|||
} else if mgr != nil { |
|||
backend = NewLocalVolumeBackend(mgr) |
|||
} else { |
|||
return nil, fmt.Errorf("csi: controller mode requires either --master or --data-dir") |
|||
} |
|||
d.controller = &controllerServer{backend: backend} |
|||
} |
|||
|
|||
// Create node server.
|
|||
if cfg.Mode == "node" || cfg.Mode == "all" { |
|||
d.node = &nodeServer{ |
|||
mgr: mgr, // may be nil in controller-only mode
|
|||
nodeID: cfg.NodeID, |
|||
iqnPrefix: cfg.IQNPrefix, |
|||
iscsiUtil: &realISCSIUtil{}, |
|||
mountUtil: &realMountUtil{}, |
|||
logger: cfg.Logger, |
|||
staged: make(map[string]*stagedVolumeInfo), |
|||
} |
|||
} |
|||
|
|||
return d, nil |
|||
} |
|||
|
|||
// Run starts the volume manager and gRPC server. Blocks until Stop is called.
|
|||
func (d *CSIDriver) Run() error { |
|||
if d.mgr != nil { |
|||
if err := d.mgr.Start(context.Background()); err != nil { |
|||
return fmt.Errorf("csi: start volume manager: %w", err) |
|||
} |
|||
} |
|||
|
|||
// Parse endpoint URL.
|
|||
proto, addr, err := parseEndpoint(d.endpoint) |
|||
if err != nil { |
|||
return fmt.Errorf("csi: parse endpoint: %w", err) |
|||
} |
|||
|
|||
// Remove existing socket file if present.
|
|||
if proto == "unix" { |
|||
os.Remove(addr) |
|||
} |
|||
|
|||
ln, err := net.Listen(proto, addr) |
|||
if err != nil { |
|||
return fmt.Errorf("csi: listen %s: %w", d.endpoint, err) |
|||
} |
|||
|
|||
d.server = grpc.NewServer() |
|||
csi.RegisterIdentityServer(d.server, d.identity) |
|||
if d.controller != nil { |
|||
csi.RegisterControllerServer(d.server, d.controller) |
|||
} |
|||
if d.node != nil { |
|||
csi.RegisterNodeServer(d.server, d.node) |
|||
} |
|||
|
|||
d.logger.Printf("CSI driver serving on %s", d.endpoint) |
|||
return d.server.Serve(ln) |
|||
} |
|||
|
|||
// Stop gracefully shuts down the gRPC server and volume manager.
|
|||
func (d *CSIDriver) Stop() { |
|||
if d.server != nil { |
|||
d.server.GracefulStop() |
|||
} |
|||
if d.mgr != nil { |
|||
d.mgr.Stop() |
|||
} |
|||
} |
|||
|
|||
// parseEndpoint parses a CSI endpoint string (unix:///path or tcp://host:port).
|
|||
func parseEndpoint(ep string) (string, string, error) { |
|||
if strings.HasPrefix(ep, "unix://") { |
|||
u, err := url.Parse(ep) |
|||
if err != nil { |
|||
return "", "", err |
|||
} |
|||
addr := u.Path |
|||
if u.Host != "" { |
|||
addr = u.Host + addr |
|||
} |
|||
return "unix", addr, nil |
|||
} |
|||
if strings.HasPrefix(ep, "tcp://") { |
|||
u, err := url.Parse(ep) |
|||
if err != nil { |
|||
return "", "", err |
|||
} |
|||
return "tcp", u.Host, nil |
|||
} |
|||
return "", "", fmt.Errorf("unsupported endpoint scheme: %s", ep) |
|||
} |
|||
@ -0,0 +1,131 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" |
|||
"google.golang.org/grpc" |
|||
) |
|||
|
|||
// VolumeInfo holds volume metadata returned by the backend.
|
|||
type VolumeInfo struct { |
|||
VolumeID string |
|||
ISCSIAddr string // iSCSI target address (ip:port)
|
|||
IQN string // iSCSI target IQN
|
|||
CapacityBytes uint64 |
|||
} |
|||
|
|||
// VolumeBackend abstracts volume lifecycle for the CSI controller.
|
|||
type VolumeBackend interface { |
|||
CreateVolume(ctx context.Context, name string, sizeBytes uint64) (*VolumeInfo, error) |
|||
DeleteVolume(ctx context.Context, name string) error |
|||
LookupVolume(ctx context.Context, name string) (*VolumeInfo, error) |
|||
} |
|||
|
|||
// LocalVolumeBackend wraps VolumeManager for standalone/local mode (CP6-1).
|
|||
type LocalVolumeBackend struct { |
|||
mgr *VolumeManager |
|||
} |
|||
|
|||
// NewLocalVolumeBackend creates a backend backed by the local VolumeManager.
|
|||
func NewLocalVolumeBackend(mgr *VolumeManager) *LocalVolumeBackend { |
|||
return &LocalVolumeBackend{mgr: mgr} |
|||
} |
|||
|
|||
func (b *LocalVolumeBackend) CreateVolume(ctx context.Context, name string, sizeBytes uint64) (*VolumeInfo, error) { |
|||
if err := b.mgr.CreateVolume(name, sizeBytes); err != nil { |
|||
return nil, err |
|||
} |
|||
actualSize := b.mgr.VolumeSizeBytes(name) |
|||
if actualSize == 0 { |
|||
actualSize = sizeBytes |
|||
} |
|||
return &VolumeInfo{ |
|||
VolumeID: name, |
|||
ISCSIAddr: b.mgr.ListenAddr(), |
|||
IQN: b.mgr.VolumeIQN(name), |
|||
CapacityBytes: actualSize, |
|||
}, nil |
|||
} |
|||
|
|||
func (b *LocalVolumeBackend) DeleteVolume(ctx context.Context, name string) error { |
|||
return b.mgr.DeleteVolume(name) |
|||
} |
|||
|
|||
func (b *LocalVolumeBackend) LookupVolume(ctx context.Context, name string) (*VolumeInfo, error) { |
|||
if !b.mgr.VolumeExists(name) { |
|||
return nil, fmt.Errorf("volume %q not found", name) |
|||
} |
|||
return &VolumeInfo{ |
|||
VolumeID: name, |
|||
ISCSIAddr: b.mgr.ListenAddr(), |
|||
IQN: b.mgr.VolumeIQN(name), |
|||
CapacityBytes: b.mgr.VolumeSizeBytes(name), |
|||
}, nil |
|||
} |
|||
|
|||
// MasterVolumeClient calls master gRPC for volume operations.
|
|||
type MasterVolumeClient struct { |
|||
masterAddr string |
|||
dialOpt grpc.DialOption |
|||
} |
|||
|
|||
// NewMasterVolumeClient creates a client that calls the master for volume operations.
|
|||
func NewMasterVolumeClient(masterAddr string, dialOpt grpc.DialOption) *MasterVolumeClient { |
|||
return &MasterVolumeClient{ |
|||
masterAddr: masterAddr, |
|||
dialOpt: dialOpt, |
|||
} |
|||
} |
|||
|
|||
func (c *MasterVolumeClient) CreateVolume(ctx context.Context, name string, sizeBytes uint64) (*VolumeInfo, error) { |
|||
var info *VolumeInfo |
|||
err := pb.WithMasterClient(false, pb.ServerAddress(c.masterAddr), c.dialOpt, false, func(client master_pb.SeaweedClient) error { |
|||
resp, err := client.CreateBlockVolume(ctx, &master_pb.CreateBlockVolumeRequest{ |
|||
Name: name, |
|||
SizeBytes: sizeBytes, |
|||
}) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
info = &VolumeInfo{ |
|||
VolumeID: resp.VolumeId, |
|||
ISCSIAddr: resp.IscsiAddr, |
|||
IQN: resp.Iqn, |
|||
CapacityBytes: resp.CapacityBytes, |
|||
} |
|||
return nil |
|||
}) |
|||
return info, err |
|||
} |
|||
|
|||
func (c *MasterVolumeClient) DeleteVolume(ctx context.Context, name string) error { |
|||
return pb.WithMasterClient(false, pb.ServerAddress(c.masterAddr), c.dialOpt, false, func(client master_pb.SeaweedClient) error { |
|||
_, err := client.DeleteBlockVolume(ctx, &master_pb.DeleteBlockVolumeRequest{ |
|||
Name: name, |
|||
}) |
|||
return err |
|||
}) |
|||
} |
|||
|
|||
func (c *MasterVolumeClient) LookupVolume(ctx context.Context, name string) (*VolumeInfo, error) { |
|||
var info *VolumeInfo |
|||
err := pb.WithMasterClient(false, pb.ServerAddress(c.masterAddr), c.dialOpt, false, func(client master_pb.SeaweedClient) error { |
|||
resp, err := client.LookupBlockVolume(ctx, &master_pb.LookupBlockVolumeRequest{ |
|||
Name: name, |
|||
}) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
info = &VolumeInfo{ |
|||
VolumeID: name, |
|||
ISCSIAddr: resp.IscsiAddr, |
|||
IQN: resp.Iqn, |
|||
CapacityBytes: resp.CapacityBytes, |
|||
} |
|||
return nil |
|||
}) |
|||
return info, err |
|||
} |
|||
@ -0,0 +1,92 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"testing" |
|||
) |
|||
|
|||
func TestBackend_LocalCreate(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
|
|||
info, err := backend.CreateVolume(context.Background(), "vol1", 4*1024*1024) |
|||
if err != nil { |
|||
t.Fatalf("CreateVolume: %v", err) |
|||
} |
|||
if info.VolumeID != "vol1" { |
|||
t.Fatalf("VolumeID: got %q, want vol1", info.VolumeID) |
|||
} |
|||
if info.CapacityBytes != 4*1024*1024 { |
|||
t.Fatalf("CapacityBytes: got %d, want %d", info.CapacityBytes, 4*1024*1024) |
|||
} |
|||
if info.IQN == "" { |
|||
t.Fatal("IQN should not be empty") |
|||
} |
|||
|
|||
// Lookup should find it.
|
|||
looked, err := backend.LookupVolume(context.Background(), "vol1") |
|||
if err != nil { |
|||
t.Fatalf("LookupVolume: %v", err) |
|||
} |
|||
if looked.VolumeID != "vol1" || looked.IQN != info.IQN { |
|||
t.Fatalf("LookupVolume mismatch: got %+v", looked) |
|||
} |
|||
} |
|||
|
|||
func TestBackend_LocalDelete(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
|
|||
if _, err := backend.CreateVolume(context.Background(), "vol1", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
if err := backend.DeleteVolume(context.Background(), "vol1"); err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
|
|||
// Lookup should fail.
|
|||
if _, err := backend.LookupVolume(context.Background(), "vol1"); err == nil { |
|||
t.Fatal("lookup should fail after delete") |
|||
} |
|||
} |
|||
|
|||
func TestBackend_LocalIdempotent(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
|
|||
info1, err := backend.CreateVolume(context.Background(), "vol1", 4*1024*1024) |
|||
if err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
|
|||
// Same name + same size = idempotent.
|
|||
info2, err := backend.CreateVolume(context.Background(), "vol1", 4*1024*1024) |
|||
if err != nil { |
|||
t.Fatalf("second create: %v", err) |
|||
} |
|||
|
|||
if info1.IQN != info2.IQN { |
|||
t.Fatalf("IQN mismatch: %q vs %q", info1.IQN, info2.IQN) |
|||
} |
|||
} |
|||
|
|||
func TestBackend_LocalDeleteIdempotent(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
|
|||
// Deleting non-existent volume should not error.
|
|||
if err := backend.DeleteVolume(context.Background(), "nonexistent"); err != nil { |
|||
t.Fatalf("delete nonexistent: %v", err) |
|||
} |
|||
} |
|||
|
|||
func TestBackend_LocalLookupNotFound(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
backend := NewLocalVolumeBackend(mgr) |
|||
|
|||
_, err := backend.LookupVolume(context.Background(), "missing") |
|||
if err == nil { |
|||
t.Fatal("lookup missing should return error") |
|||
} |
|||
} |
|||
@ -0,0 +1,344 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"errors" |
|||
"fmt" |
|||
"log" |
|||
"net" |
|||
"os" |
|||
"path/filepath" |
|||
"sync" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol" |
|||
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/iscsi" |
|||
) |
|||
|
|||
var ( |
|||
ErrNotReady = errors.New("csi: volume manager not ready") |
|||
ErrVolumeExists = errors.New("csi: volume already exists") |
|||
ErrVolumeNotFound = errors.New("csi: volume not found") |
|||
) |
|||
|
|||
// managedVolume tracks a single BlockVol instance and its iSCSI target.
|
|||
type managedVolume struct { |
|||
vol *blockvol.BlockVol |
|||
path string // file path to .blk file
|
|||
iqn string // target IQN for this volume
|
|||
sizeBytes uint64 |
|||
} |
|||
|
|||
// managerState tracks the lifecycle of the VolumeManager.
|
|||
type managerState int |
|||
|
|||
const ( |
|||
stateStopped managerState = iota // initial or after Stop()
|
|||
stateStarting // Start() in progress
|
|||
stateReady // running normally
|
|||
stateFailed // Start() failed, retryable
|
|||
) |
|||
|
|||
// VolumeManager manages multiple BlockVol instances behind a shared TargetServer.
|
|||
type VolumeManager struct { |
|||
mu sync.RWMutex |
|||
dataDir string |
|||
volumes map[string]*managedVolume |
|||
target *iscsi.TargetServer |
|||
iqnPrefix string |
|||
config iscsi.TargetConfig |
|||
logger *log.Logger |
|||
state managerState |
|||
iscsiAddr string |
|||
} |
|||
|
|||
// NewVolumeManager creates a new VolumeManager.
|
|||
func NewVolumeManager(dataDir, iscsiAddr, iqnPrefix string, logger *log.Logger) *VolumeManager { |
|||
if logger == nil { |
|||
logger = log.Default() |
|||
} |
|||
config := iscsi.DefaultTargetConfig() |
|||
return &VolumeManager{ |
|||
dataDir: dataDir, |
|||
volumes: make(map[string]*managedVolume), |
|||
iqnPrefix: iqnPrefix, |
|||
config: config, |
|||
logger: logger, |
|||
iscsiAddr: iscsiAddr, |
|||
} |
|||
} |
|||
|
|||
// Start initializes and starts the shared TargetServer.
|
|||
// Safe to call after Stop() or after a failed Start(). Returns immediately if already running.
|
|||
// The listener is created synchronously so port-in-use errors surface immediately.
|
|||
func (m *VolumeManager) Start(ctx context.Context) error { |
|||
m.mu.Lock() |
|||
defer m.mu.Unlock() |
|||
|
|||
if m.state == stateReady { |
|||
return nil // already running
|
|||
} |
|||
if m.state == stateStarting { |
|||
return fmt.Errorf("csi: start already in progress") |
|||
} |
|||
m.state = stateStarting |
|||
|
|||
if err := os.MkdirAll(m.dataDir, 0755); err != nil { |
|||
m.state = stateFailed |
|||
return fmt.Errorf("csi: create data dir: %w", err) |
|||
} |
|||
|
|||
m.target = iscsi.NewTargetServer(m.iscsiAddr, m.config, m.logger) |
|||
|
|||
// Create listener synchronously so bind errors are reported immediately.
|
|||
ln, err := net.Listen("tcp", m.iscsiAddr) |
|||
if err != nil { |
|||
m.target = nil |
|||
m.state = stateFailed |
|||
return fmt.Errorf("csi: listen %s: %w", m.iscsiAddr, err) |
|||
} |
|||
|
|||
ts := m.target // capture for goroutine (m.target may be reset by Stop)
|
|||
go func() { |
|||
if err := ts.Serve(ln); err != nil { |
|||
m.logger.Printf("target server error: %v", err) |
|||
} |
|||
}() |
|||
|
|||
m.state = stateReady |
|||
m.logger.Printf("volume manager started: dataDir=%s iscsiAddr=%s", m.dataDir, ln.Addr()) |
|||
return nil |
|||
} |
|||
|
|||
// Stop closes all volumes and the target server. After Stop, Start may be called again.
|
|||
func (m *VolumeManager) Stop() error { |
|||
m.mu.Lock() |
|||
defer m.mu.Unlock() |
|||
|
|||
for name, mv := range m.volumes { |
|||
if m.target != nil { |
|||
m.target.DisconnectVolume(mv.iqn) |
|||
} |
|||
mv.vol.Close() |
|||
delete(m.volumes, name) |
|||
} |
|||
|
|||
var err error |
|||
if m.target != nil { |
|||
err = m.target.Close() |
|||
m.target = nil |
|||
} |
|||
|
|||
m.state = stateStopped |
|||
return err |
|||
} |
|||
|
|||
// ErrVolumeSizeMismatch indicates a volume exists on disk with a different size.
|
|||
var ErrVolumeSizeMismatch = errors.New("csi: volume exists with different size") |
|||
|
|||
// CreateVolume creates a new BlockVol file and registers it with the target.
|
|||
// Idempotent: if the .blk file already exists on disk (e.g. after driver restart),
|
|||
// it is opened and tracked. Returns ErrVolumeSizeMismatch if the existing volume
|
|||
// has a smaller size than requested.
|
|||
func (m *VolumeManager) CreateVolume(name string, sizeBytes uint64) error { |
|||
m.mu.Lock() |
|||
defer m.mu.Unlock() |
|||
|
|||
if m.state != stateReady { |
|||
return ErrNotReady |
|||
} |
|||
|
|||
// Already tracked in-memory.
|
|||
if mv, ok := m.volumes[name]; ok { |
|||
if mv.sizeBytes >= sizeBytes { |
|||
return nil // idempotent
|
|||
} |
|||
return ErrVolumeSizeMismatch |
|||
} |
|||
|
|||
volPath := m.volumePath(name) |
|||
|
|||
// Check for existing .blk file on disk (survives driver restart).
|
|||
if _, statErr := os.Stat(volPath); statErr == nil { |
|||
vol, err := blockvol.OpenBlockVol(volPath) |
|||
if err != nil { |
|||
return fmt.Errorf("csi: open existing blockvol: %w", err) |
|||
} |
|||
info := vol.Info() |
|||
if info.VolumeSize < sizeBytes { |
|||
vol.Close() |
|||
return ErrVolumeSizeMismatch |
|||
} |
|||
iqn := m.volumeIQN(name) |
|||
adapter := &blockvol.BlockVolAdapter{Vol: vol, TPGID: 1} |
|||
m.target.AddVolume(iqn, adapter) |
|||
m.volumes[name] = &managedVolume{ |
|||
vol: vol, |
|||
path: volPath, |
|||
iqn: iqn, |
|||
sizeBytes: info.VolumeSize, |
|||
} |
|||
m.logger.Printf("adopted existing volume %q: %s (%d bytes)", name, iqn, info.VolumeSize) |
|||
return nil |
|||
} |
|||
|
|||
vol, err := blockvol.CreateBlockVol(volPath, blockvol.CreateOptions{ |
|||
VolumeSize: sizeBytes, |
|||
BlockSize: 4096, |
|||
WALSize: 64 * 1024 * 1024, |
|||
}) |
|||
if err != nil { |
|||
return fmt.Errorf("csi: create blockvol: %w", err) |
|||
} |
|||
|
|||
iqn := m.volumeIQN(name) |
|||
adapter := &blockvol.BlockVolAdapter{Vol: vol, TPGID: 1} |
|||
m.target.AddVolume(iqn, adapter) |
|||
|
|||
m.volumes[name] = &managedVolume{ |
|||
vol: vol, |
|||
path: volPath, |
|||
iqn: iqn, |
|||
sizeBytes: sizeBytes, |
|||
} |
|||
|
|||
m.logger.Printf("created volume %q: %s (%d bytes)", name, iqn, sizeBytes) |
|||
return nil |
|||
} |
|||
|
|||
// DeleteVolume closes and deletes a volume file and associated snapshot files.
|
|||
func (m *VolumeManager) DeleteVolume(name string) error { |
|||
m.mu.Lock() |
|||
defer m.mu.Unlock() |
|||
|
|||
mv, ok := m.volumes[name] |
|||
if !ok { |
|||
// Idempotent: try to remove file anyway in case volume was not tracked.
|
|||
volPath := m.volumePath(name) |
|||
os.Remove(volPath) |
|||
removeSnapshotFiles(volPath) |
|||
return nil |
|||
} |
|||
|
|||
if m.target != nil { |
|||
m.target.DisconnectVolume(mv.iqn) |
|||
} |
|||
mv.vol.Close() |
|||
os.Remove(mv.path) |
|||
removeSnapshotFiles(mv.path) |
|||
delete(m.volumes, name) |
|||
|
|||
m.logger.Printf("deleted volume %q", name) |
|||
return nil |
|||
} |
|||
|
|||
// removeSnapshotFiles removes any .snap.* delta files associated with a volume path.
|
|||
func removeSnapshotFiles(volPath string) { |
|||
matches, _ := filepath.Glob(volPath + ".snap.*") |
|||
for _, m := range matches { |
|||
os.Remove(m) |
|||
} |
|||
} |
|||
|
|||
// OpenVolume opens an existing BlockVol file and adds it to the target.
|
|||
func (m *VolumeManager) OpenVolume(name string) error { |
|||
m.mu.Lock() |
|||
defer m.mu.Unlock() |
|||
|
|||
if m.state != stateReady { |
|||
return ErrNotReady |
|||
} |
|||
|
|||
if _, ok := m.volumes[name]; ok { |
|||
return nil // already open, idempotent
|
|||
} |
|||
|
|||
volPath := m.volumePath(name) |
|||
vol, err := blockvol.OpenBlockVol(volPath) |
|||
if err != nil { |
|||
return fmt.Errorf("csi: open blockvol: %w", err) |
|||
} |
|||
|
|||
info := vol.Info() |
|||
iqn := m.volumeIQN(name) |
|||
adapter := &blockvol.BlockVolAdapter{Vol: vol, TPGID: 1} |
|||
m.target.AddVolume(iqn, adapter) |
|||
|
|||
m.volumes[name] = &managedVolume{ |
|||
vol: vol, |
|||
path: volPath, |
|||
iqn: iqn, |
|||
sizeBytes: info.VolumeSize, |
|||
} |
|||
|
|||
m.logger.Printf("opened volume %q: %s", name, iqn) |
|||
return nil |
|||
} |
|||
|
|||
// CloseVolume disconnects sessions, removes from target, and closes the BlockVol.
|
|||
func (m *VolumeManager) CloseVolume(name string) error { |
|||
m.mu.Lock() |
|||
defer m.mu.Unlock() |
|||
|
|||
mv, ok := m.volumes[name] |
|||
if !ok { |
|||
return nil // already closed, idempotent
|
|||
} |
|||
|
|||
if m.target != nil { |
|||
m.target.DisconnectVolume(mv.iqn) |
|||
} |
|||
mv.vol.Close() |
|||
delete(m.volumes, name) |
|||
|
|||
m.logger.Printf("closed volume %q", name) |
|||
return nil |
|||
} |
|||
|
|||
// VolumeIQN returns the iSCSI IQN for a volume name.
|
|||
func (m *VolumeManager) VolumeIQN(name string) string { |
|||
return m.volumeIQN(name) |
|||
} |
|||
|
|||
// VolumeExists returns true if the volume is currently tracked.
|
|||
func (m *VolumeManager) VolumeExists(name string) bool { |
|||
m.mu.RLock() |
|||
defer m.mu.RUnlock() |
|||
_, ok := m.volumes[name] |
|||
return ok |
|||
} |
|||
|
|||
// VolumeSizeBytes returns the size of a tracked volume or 0 if not found.
|
|||
func (m *VolumeManager) VolumeSizeBytes(name string) uint64 { |
|||
m.mu.RLock() |
|||
defer m.mu.RUnlock() |
|||
if mv, ok := m.volumes[name]; ok { |
|||
return mv.sizeBytes |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
// ListenAddr returns the target server's listen address.
|
|||
func (m *VolumeManager) ListenAddr() string { |
|||
if m.target != nil { |
|||
return m.target.ListenAddr() |
|||
} |
|||
return "" |
|||
} |
|||
|
|||
func (m *VolumeManager) volumePath(name string) string { |
|||
return filepath.Join(m.dataDir, sanitizeFilename(name)+".blk") |
|||
} |
|||
|
|||
func (m *VolumeManager) volumeIQN(name string) string { |
|||
return m.iqnPrefix + ":" + SanitizeIQN(name) |
|||
} |
|||
|
|||
// sanitizeFilename delegates to the shared blockvol.SanitizeFilename.
|
|||
func sanitizeFilename(name string) string { |
|||
return blockvol.SanitizeFilename(name) |
|||
} |
|||
|
|||
// SanitizeIQN delegates to the shared blockvol.SanitizeIQN.
|
|||
func SanitizeIQN(name string) string { |
|||
return blockvol.SanitizeIQN(name) |
|||
} |
|||
@ -0,0 +1,231 @@ |
|||
package csi |
|||
|
|||
import ( |
|||
"context" |
|||
"log" |
|||
"os" |
|||
"path/filepath" |
|||
"sync" |
|||
"testing" |
|||
) |
|||
|
|||
func newTestManager(t *testing.T) *VolumeManager { |
|||
t.Helper() |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-vm] ", log.LstdFlags) |
|||
mgr := NewVolumeManager(dir, "127.0.0.1:0", "iqn.2024.com.seaweedfs", logger) |
|||
if err := mgr.Start(context.Background()); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
t.Cleanup(func() { mgr.Stop() }) |
|||
return mgr |
|||
} |
|||
|
|||
func TestVolumeManager_CreateOpenClose(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
if err := mgr.CreateVolume("vol1", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
if !mgr.VolumeExists("vol1") { |
|||
t.Fatal("expected vol1 to exist after create") |
|||
} |
|||
iqn := mgr.VolumeIQN("vol1") |
|||
if iqn == "" { |
|||
t.Fatal("expected non-empty IQN") |
|||
} |
|||
|
|||
// Close
|
|||
if err := mgr.CloseVolume("vol1"); err != nil { |
|||
t.Fatalf("close: %v", err) |
|||
} |
|||
if mgr.VolumeExists("vol1") { |
|||
t.Fatal("expected vol1 to not exist after close") |
|||
} |
|||
|
|||
// Reopen
|
|||
if err := mgr.OpenVolume("vol1"); err != nil { |
|||
t.Fatalf("reopen: %v", err) |
|||
} |
|||
if !mgr.VolumeExists("vol1") { |
|||
t.Fatal("expected vol1 to exist after reopen") |
|||
} |
|||
} |
|||
|
|||
func TestVolumeManager_DeleteRemovesFile(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
if err := mgr.CreateVolume("delvol", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
|
|||
volPath := filepath.Join(mgr.dataDir, "delvol.blk") |
|||
if _, err := os.Stat(volPath); err != nil { |
|||
t.Fatalf("expected file to exist: %v", err) |
|||
} |
|||
|
|||
if err := mgr.DeleteVolume("delvol"); err != nil { |
|||
t.Fatalf("delete: %v", err) |
|||
} |
|||
|
|||
if _, err := os.Stat(volPath); !os.IsNotExist(err) { |
|||
t.Fatalf("expected file to be removed, got: %v", err) |
|||
} |
|||
if mgr.VolumeExists("delvol") { |
|||
t.Fatal("expected volume to not exist after delete") |
|||
} |
|||
} |
|||
|
|||
func TestVolumeManager_DuplicateCreate(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
if err := mgr.CreateVolume("dup", 4*1024*1024); err != nil { |
|||
t.Fatalf("first create: %v", err) |
|||
} |
|||
// Same size -> idempotent success.
|
|||
if err := mgr.CreateVolume("dup", 4*1024*1024); err != nil { |
|||
t.Fatalf("duplicate create (same size): expected success, got: %v", err) |
|||
} |
|||
// Larger size -> mismatch error.
|
|||
err := mgr.CreateVolume("dup", 8*1024*1024) |
|||
if err != ErrVolumeSizeMismatch { |
|||
t.Fatalf("expected ErrVolumeSizeMismatch, got: %v", err) |
|||
} |
|||
} |
|||
|
|||
func TestVolumeManager_ListenAddr(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
addr := mgr.ListenAddr() |
|||
if addr == "" { |
|||
t.Fatal("expected non-empty listen addr") |
|||
} |
|||
} |
|||
|
|||
func TestVolumeManager_OpenNonExistent(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
err := mgr.OpenVolume("nonexistent") |
|||
if err == nil { |
|||
t.Fatal("expected error opening non-existent volume") |
|||
} |
|||
} |
|||
|
|||
func TestVolumeManager_CloseAlreadyClosed(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
// Close a volume that was never opened -- should be idempotent.
|
|||
if err := mgr.CloseVolume("nope"); err != nil { |
|||
t.Fatalf("close non-existent: %v", err) |
|||
} |
|||
} |
|||
|
|||
func TestVolumeManager_ConcurrentCreateDelete(t *testing.T) { |
|||
mgr := newTestManager(t) |
|||
|
|||
var wg sync.WaitGroup |
|||
for i := 0; i < 10; i++ { |
|||
name := "conc" + string(rune('0'+i)) |
|||
wg.Add(1) |
|||
go func(n string) { |
|||
defer wg.Done() |
|||
if err := mgr.CreateVolume(n, 4*1024*1024); err != nil { |
|||
t.Errorf("create %s: %v", n, err) |
|||
return |
|||
} |
|||
if err := mgr.DeleteVolume(n); err != nil { |
|||
t.Errorf("delete %s: %v", n, err) |
|||
} |
|||
}(name) |
|||
} |
|||
wg.Wait() |
|||
} |
|||
|
|||
func TestVolumeManager_SanitizeIQN(t *testing.T) { |
|||
tests := []struct { |
|||
input string |
|||
want string |
|||
}{ |
|||
{"pvc-abc123", "pvc-abc123"}, |
|||
{"PVC_ABC123", "pvc-abc123"}, |
|||
{"hello world!", "hello-world-"}, |
|||
{"a/b\\c:d", "a-b-c-d"}, |
|||
} |
|||
for _, tt := range tests { |
|||
got := SanitizeIQN(tt.input) |
|||
if got != tt.want { |
|||
t.Errorf("SanitizeIQN(%q): got %q, want %q", tt.input, got, tt.want) |
|||
} |
|||
} |
|||
|
|||
// Test truncation to 64 chars.
|
|||
long := "" |
|||
for i := 0; i < 100; i++ { |
|||
long += "a" |
|||
} |
|||
if len(SanitizeIQN(long)) != 64 { |
|||
t.Fatalf("expected truncation to 64, got %d", len(SanitizeIQN(long))) |
|||
} |
|||
} |
|||
|
|||
// TestVolumeManager_CreateIdempotentAfterRestart simulates driver restart:
|
|||
// existing .blk file on disk but not tracked in-memory.
|
|||
func TestVolumeManager_CreateIdempotentAfterRestart(t *testing.T) { |
|||
dir := t.TempDir() |
|||
logger := log.New(os.Stderr, "[test-vm] ", log.LstdFlags) |
|||
|
|||
// Phase 1: create a volume, then stop the manager.
|
|||
mgr1 := NewVolumeManager(dir, "127.0.0.1:0", "iqn.2024.com.seaweedfs", logger) |
|||
if err := mgr1.Start(context.Background()); err != nil { |
|||
t.Fatalf("start1: %v", err) |
|||
} |
|||
if err := mgr1.CreateVolume("restart-vol", 4*1024*1024); err != nil { |
|||
t.Fatalf("create: %v", err) |
|||
} |
|||
mgr1.Stop() |
|||
|
|||
// Verify .blk file still exists on disk.
|
|||
volPath := filepath.Join(dir, "restart-vol.blk") |
|||
if _, err := os.Stat(volPath); err != nil { |
|||
t.Fatalf("expected .blk file to exist: %v", err) |
|||
} |
|||
|
|||
// Phase 2: new manager (simulates restart) -- CreateVolume should
|
|||
// adopt the existing file and return success.
|
|||
mgr2 := NewVolumeManager(dir, "127.0.0.1:0", "iqn.2024.com.seaweedfs", logger) |
|||
if err := mgr2.Start(context.Background()); err != nil { |
|||
t.Fatalf("start2: %v", err) |
|||
} |
|||
defer mgr2.Stop() |
|||
|
|||
if err := mgr2.CreateVolume("restart-vol", 4*1024*1024); err != nil { |
|||
t.Fatalf("create after restart: expected idempotent success, got: %v", err) |
|||
} |
|||
if !mgr2.VolumeExists("restart-vol") { |
|||
t.Fatal("expected volume to be tracked after adoption") |
|||
} |
|||
if mgr2.VolumeSizeBytes("restart-vol") < 4*1024*1024 { |
|||
t.Fatalf("expected size >= 4MiB, got %d", mgr2.VolumeSizeBytes("restart-vol")) |
|||
} |
|||
} |
|||
|
|||
// TestVolumeManager_IQNCollision verifies that two long names sharing a prefix
|
|||
// produce distinct IQNs after truncation.
|
|||
func TestVolumeManager_IQNCollision(t *testing.T) { |
|||
prefix := "" |
|||
for i := 0; i < 70; i++ { |
|||
prefix += "a" |
|||
} |
|||
name1 := prefix + "-suffix1" |
|||
name2 := prefix + "-suffix2" |
|||
|
|||
iqn1 := SanitizeIQN(name1) |
|||
iqn2 := SanitizeIQN(name2) |
|||
|
|||
if iqn1 == iqn2 { |
|||
t.Fatalf("IQN collision: both names produced %q", iqn1) |
|||
} |
|||
if len(iqn1) > 64 || len(iqn2) > 64 { |
|||
t.Fatalf("IQN too long: %d, %d", len(iqn1), len(iqn2)) |
|||
} |
|||
} |
|||
@ -0,0 +1,31 @@ |
|||
package blockvol |
|||
|
|||
import ( |
|||
"crypto/sha256" |
|||
"encoding/hex" |
|||
"regexp" |
|||
"strings" |
|||
) |
|||
|
|||
var reInvalidFilename = regexp.MustCompile(`[^a-z0-9._-]`) |
|||
var reInvalidIQN = regexp.MustCompile(`[^a-z0-9.\-]`) |
|||
|
|||
// SanitizeFilename normalizes a volume name for use as a filename.
|
|||
// Lowercases, replaces invalid chars with '-'.
|
|||
func SanitizeFilename(name string) string { |
|||
return reInvalidFilename.ReplaceAllString(strings.ToLower(name), "-") |
|||
} |
|||
|
|||
// SanitizeIQN normalizes a CSI volume ID for use in an IQN.
|
|||
// Lowercases, replaces invalid chars with '-', truncates to 64 chars.
|
|||
// When truncation is needed, a hash suffix is appended to preserve uniqueness.
|
|||
func SanitizeIQN(name string) string { |
|||
s := strings.ToLower(name) |
|||
s = reInvalidIQN.ReplaceAllString(s, "-") |
|||
if len(s) > 64 { |
|||
h := sha256.Sum256([]byte(name)) |
|||
suffix := hex.EncodeToString(h[:4]) // 8 hex chars
|
|||
s = s[:64-1-len(suffix)] + "-" + suffix |
|||
} |
|||
return s |
|||
} |
|||
@ -0,0 +1,79 @@ |
|||
package blockvol |
|||
|
|||
import ( |
|||
"strings" |
|||
"testing" |
|||
) |
|||
|
|||
func TestSanitizeFilename(t *testing.T) { |
|||
tests := []struct { |
|||
input string |
|||
want string |
|||
}{ |
|||
{"simple", "simple"}, |
|||
{"VolA", "vola"}, |
|||
{"pvc-abc-123", "pvc-abc-123"}, |
|||
{"has spaces", "has-spaces"}, |
|||
{"UPPER_CASE", "upper_case"}, |
|||
{"special!@#$%chars", "special-----chars"}, |
|||
{"dots.and-dashes", "dots.and-dashes"}, |
|||
} |
|||
for _, tt := range tests { |
|||
got := SanitizeFilename(tt.input) |
|||
if got != tt.want { |
|||
t.Errorf("SanitizeFilename(%q) = %q, want %q", tt.input, got, tt.want) |
|||
} |
|||
} |
|||
} |
|||
|
|||
func TestSanitizeIQN(t *testing.T) { |
|||
tests := []struct { |
|||
input string |
|||
want string |
|||
}{ |
|||
{"simple", "simple"}, |
|||
{"VolA", "vola"}, |
|||
{"pvc-abc-123", "pvc-abc-123"}, |
|||
{"has spaces", "has-spaces"}, |
|||
{"under_score", "under-score"}, |
|||
} |
|||
for _, tt := range tests { |
|||
got := SanitizeIQN(tt.input) |
|||
if got != tt.want { |
|||
t.Errorf("SanitizeIQN(%q) = %q, want %q", tt.input, got, tt.want) |
|||
} |
|||
} |
|||
} |
|||
|
|||
func TestSanitizeIQN_Truncation(t *testing.T) { |
|||
long := strings.Repeat("a", 100) |
|||
got := SanitizeIQN(long) |
|||
if len(got) > 64 { |
|||
t.Errorf("SanitizeIQN should truncate to 64 chars, got %d", len(got)) |
|||
} |
|||
// Should end with hash suffix.
|
|||
if !strings.Contains(got, "-") { |
|||
t.Error("truncated IQN should have hash suffix separated by dash") |
|||
} |
|||
} |
|||
|
|||
func TestSanitizeConsistency(t *testing.T) { |
|||
// SanitizeFilename and SanitizeIQN should agree on lowercasing.
|
|||
// "VolA" and "vola" should produce the same sanitized output from both.
|
|||
names := []string{"VolA", "vola"} |
|||
for _, fn := range []struct { |
|||
name string |
|||
f func(string) string |
|||
}{ |
|||
{"SanitizeFilename", SanitizeFilename}, |
|||
{"SanitizeIQN", SanitizeIQN}, |
|||
} { |
|||
results := make(map[string]bool) |
|||
for _, n := range names { |
|||
results[fn.f(n)] = true |
|||
} |
|||
if len(results) != 1 { |
|||
t.Errorf("%s: 'VolA' and 'vola' should produce same result, got %v", fn.name, results) |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,147 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
// requireCmd skips the test if cmd is not available on clientNode.
|
|||
func requireCmd(t *testing.T, cmd string) { |
|||
t.Helper() |
|||
if !clientNode.HasCommand(cmd) { |
|||
t.Skipf("%s not available", cmd) |
|||
} |
|||
} |
|||
|
|||
// injectNetem adds a netem delay on the node's outbound traffic to targetIP.
|
|||
// Returns a cleanup function that removes the qdisc.
|
|||
// Requires tc (iproute2) and root access.
|
|||
func injectNetem(ctx context.Context, node *Node, targetIP string, delayMs int) (cleanup func(), err error) { |
|||
// Find the interface routing to targetIP
|
|||
iface, _, code, err := node.RunRoot(ctx, fmt.Sprintf( |
|||
"ip route get %s | head -1 | awk '{for(i=1;i<=NF;i++) if($i==\"dev\") print $(i+1)}'", targetIP)) |
|||
iface = strings.TrimSpace(iface) |
|||
if err != nil || code != 0 || iface == "" { |
|||
return nil, fmt.Errorf("find interface for %s: iface=%q code=%d err=%v", targetIP, iface, code, err) |
|||
} |
|||
|
|||
_, stderr, code, err := node.RunRoot(ctx, fmt.Sprintf( |
|||
"tc qdisc add dev %s root netem delay %dms", iface, delayMs)) |
|||
if err != nil || code != 0 { |
|||
return nil, fmt.Errorf("tc qdisc add: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
|
|||
cleanup = func() { |
|||
cctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cancel() |
|||
node.RunRoot(cctx, fmt.Sprintf("tc qdisc del dev %s root 2>/dev/null", iface)) |
|||
} |
|||
return cleanup, nil |
|||
} |
|||
|
|||
// injectIptablesDrop blocks outbound TCP traffic from node to targetIP on the given ports.
|
|||
// Returns a cleanup function that removes the iptables rules.
|
|||
func injectIptablesDrop(ctx context.Context, node *Node, targetIP string, ports []int) (cleanup func(), err error) { |
|||
for _, port := range ports { |
|||
_, stderr, code, err := node.RunRoot(ctx, fmt.Sprintf( |
|||
"iptables -A OUTPUT -d %s -p tcp --dport %d -j DROP", targetIP, port)) |
|||
if err != nil || code != 0 { |
|||
// Rollback already-added rules
|
|||
for _, p2 := range ports { |
|||
if p2 == port { |
|||
break |
|||
} |
|||
node.RunRoot(ctx, fmt.Sprintf( |
|||
"iptables -D OUTPUT -d %s -p tcp --dport %d -j DROP 2>/dev/null", targetIP, p2)) |
|||
} |
|||
return nil, fmt.Errorf("iptables add port %d: code=%d stderr=%s err=%v", port, code, stderr, err) |
|||
} |
|||
} |
|||
|
|||
cleanup = func() { |
|||
cctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cancel() |
|||
for _, port := range ports { |
|||
node.RunRoot(cctx, fmt.Sprintf( |
|||
"iptables -D OUTPUT -d %s -p tcp --dport %d -j DROP 2>/dev/null", targetIP, port)) |
|||
} |
|||
} |
|||
return cleanup, nil |
|||
} |
|||
|
|||
// fillDisk fills the filesystem at dir, leaving ~4MB free.
|
|||
// Returns a cleanup function that removes the fill file.
|
|||
func fillDisk(ctx context.Context, node *Node, dir string) (cleanup func(), err error) { |
|||
// Get available space in MB
|
|||
stdout, _, code, err := node.RunRoot(ctx, fmt.Sprintf( |
|||
"df -BM --output=avail %s | tail -1 | tr -d ' M'", dir)) |
|||
if err != nil || code != 0 { |
|||
return nil, fmt.Errorf("df: code=%d err=%v", code, err) |
|||
} |
|||
availMB := 0 |
|||
fmt.Sscanf(strings.TrimSpace(stdout), "%d", &availMB) |
|||
if availMB < 8 { |
|||
return nil, fmt.Errorf("not enough space to fill: %dMB available", availMB) |
|||
} |
|||
fillMB := availMB - 4 // leave 4MB
|
|||
|
|||
_, stderr, code, err := node.RunRoot(ctx, fmt.Sprintf( |
|||
"dd if=/dev/zero of=%s/fillfile bs=1M count=%d 2>/dev/null", dir, fillMB)) |
|||
if err != nil || code != 0 { |
|||
// dd may return non-zero on ENOSPC which is expected; check if file was created
|
|||
stdout2, _, _, _ := node.RunRoot(ctx, fmt.Sprintf("test -f %s/fillfile && echo ok", dir)) |
|||
if !strings.Contains(stdout2, "ok") { |
|||
return nil, fmt.Errorf("fillDisk dd: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
} |
|||
|
|||
cleanup = func() { |
|||
cctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cancel() |
|||
node.RunRoot(cctx, fmt.Sprintf("rm -f %s/fillfile", dir)) |
|||
} |
|||
return cleanup, nil |
|||
} |
|||
|
|||
// corruptWALRegion overwrites nBytes within the WAL section of the volume file.
|
|||
// The WAL is embedded in the volume file starting at offset 4096 (SuperblockSize).
|
|||
// We corrupt near the end of the WAL region to simulate torn writes.
|
|||
func corruptWALRegion(ctx context.Context, node *Node, volPath string, nBytes int) error { |
|||
const walOffset = 4096 // SuperblockSize — WAL starts here
|
|||
|
|||
// Get file size to determine WAL region extent
|
|||
stdout, _, code, err := node.RunRoot(ctx, fmt.Sprintf("stat -c %%s %s", volPath)) |
|||
if err != nil || code != 0 { |
|||
return fmt.Errorf("stat %s: code=%d err=%v", volPath, code, err) |
|||
} |
|||
fileSize := 0 |
|||
fmt.Sscanf(strings.TrimSpace(stdout), "%d", &fileSize) |
|||
|
|||
// WAL region is from walOffset to walOffset + walSize.
|
|||
// For a 50M vol with default 64M WAL, WAL extends from 4096 to ~67M.
|
|||
// Corrupt nBytes at a position 1/3 into the WAL region (where recent writes live).
|
|||
walEnd := walOffset + 64*1024*1024 // default 64MB WAL
|
|||
if walEnd > fileSize { |
|||
walEnd = fileSize |
|||
} |
|||
walUsable := walEnd - walOffset |
|||
if walUsable < nBytes*2 { |
|||
return fmt.Errorf("WAL region too small: %d", walUsable) |
|||
} |
|||
// Corrupt near the middle of the WAL region
|
|||
seekPos := walOffset + walUsable/3 |
|||
|
|||
_, stderr, code, err := node.RunRoot(ctx, fmt.Sprintf( |
|||
"python3 -c \"import sys; sys.stdout.buffer.write(b'\\xff'*%d)\" | dd of=%s bs=1 seek=%d conv=notrunc 2>/dev/null", |
|||
nBytes, volPath, seekPos)) |
|||
if err != nil || code != 0 { |
|||
return fmt.Errorf("corrupt WAL region: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue