Browse Source
test: add integration test infrastructure for blockvol iSCSI
test: add integration test infrastructure for blockvol iSCSI
Test harness for running blockvol iSCSI tests on WSL2 and remote nodes (m01/M02). Includes Node (SSH/local exec), ISCSIClient (discover/login/ logout), WeedTarget (weed volume server lifecycle), and test suites for smoke, stress, crash recovery, chaos, perf benchmarks, and apps (fio/dd). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>feature/sw-block
11 changed files with 3034 additions and 0 deletions
-
311weed/storage/blockvol/test/apps_test.go
-
181weed/storage/blockvol/test/chaos_test.go
-
228weed/storage/blockvol/test/crash_test.go
-
281weed/storage/blockvol/test/integration_test.go
-
229weed/storage/blockvol/test/iscsi.go
-
316weed/storage/blockvol/test/node.go
-
168weed/storage/blockvol/test/perf_test.go
-
190weed/storage/blockvol/test/smoke_test.go
-
182weed/storage/blockvol/test/stress_test.go
-
212weed/storage/blockvol/test/weed_target.go
-
736weed/storage/blockvol/test/weedvol_test.go
@ -0,0 +1,311 @@ |
|||
//go:build integration && apps
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
func TestApps(t *testing.T) { |
|||
t.Run("Postgres", testAppsPostgres) |
|||
t.Run("MySQL", testAppsMySQL) |
|||
t.Run("SQLiteWAL", testAppsSQLiteWAL) |
|||
t.Run("QemuBoot", testAppsQemuBoot) |
|||
t.Run("QemuFio", testAppsQemuFio) |
|||
t.Run("DockerOverlay", testAppsDockerOverlay) |
|||
t.Run("LVMStripe", testAppsLVMStripe) |
|||
t.Run("MdRaid1", testAppsMdRaid1) |
|||
} |
|||
|
|||
func testAppsPostgres(t *testing.T) { |
|||
requireCmd(t, "pg_isready") |
|||
requireCmd(t, "pgbench") |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "500M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
mnt := "/tmp/blockvol-pg" |
|||
pgdata := mnt + "/pgdata" |
|||
|
|||
t.Cleanup(func() { |
|||
cleanCtx, c := context.WithTimeout(context.Background(), 15*time.Second) |
|||
defer c() |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("sudo -u postgres pg_ctl -D %s stop -m fast 2>/dev/null || true", pgdata)) |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("rm -rf %s", mnt)) |
|||
}) |
|||
|
|||
// mkfs + mount
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
|
|||
// initdb -- use full path since sudo doesn't inherit PG bin dir
|
|||
// chown the entire mount point so postgres can write pg.log there
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("chown postgres:postgres %s", mnt)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", pgdata)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("chown postgres:postgres %s", pgdata)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("chmod 700 %s", pgdata)) |
|||
_, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("sudo -u postgres /usr/lib/postgresql/*/bin/initdb -D %s", pgdata)) |
|||
if code != 0 { |
|||
t.Fatalf("initdb: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
// Start postgres with custom port to avoid conflict with system instance
|
|||
_, stderr, code, _ = clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("sudo -u postgres /usr/lib/postgresql/*/bin/pg_ctl -D %s -l %s/pg.log -o '-p 15432' start", pgdata, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("pg_ctl start: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
// pgbench init + run
|
|||
clientNode.RunRoot(ctx, "sudo -u postgres /usr/lib/postgresql/*/bin/createdb -p 15432 pgbench 2>/dev/null") |
|||
_, stderr, code, _ = clientNode.RunRoot(ctx, "sudo -u postgres pgbench -p 15432 -i pgbench") |
|||
if code != 0 { |
|||
t.Fatalf("pgbench init: code=%d stderr=%s", code, stderr) |
|||
} |
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, "sudo -u postgres pgbench -p 15432 -T 30 pgbench") |
|||
if code != 0 { |
|||
t.Fatalf("pgbench run: code=%d stderr=%s", code, stderr) |
|||
} |
|||
// Extract TPS from pgbench output
|
|||
for _, line := range strings.Split(stdout, "\n") { |
|||
if strings.Contains(line, "tps") { |
|||
t.Logf("pgbench: %s", strings.TrimSpace(line)) |
|||
} |
|||
} |
|||
|
|||
// Kill9 target
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("sudo -u postgres /usr/lib/postgresql/*/bin/pg_ctl -D %s stop -m fast 2>/dev/null || true", pgdata)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
iscsi.Logout(ctx, tgt.config.IQN) |
|||
iscsi.CleanupAll(ctx, tgt.config.IQN) |
|||
tgt.Kill9() |
|||
|
|||
// Restart and verify recovery
|
|||
if err := tgt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart: %v", err) |
|||
} |
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("re-login: %v", err) |
|||
} |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
|
|||
_, stderr, code, _ = clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("sudo -u postgres /usr/lib/postgresql/*/bin/pg_ctl -D %s -l %s/pg.log -o '-p 15432' start", pgdata, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("pg recovery start: code=%d stderr=%s", code, stderr) |
|||
} |
|||
// Verify recovery -- pg_isready should succeed
|
|||
_, _, code, _ = clientNode.RunRoot(ctx, "pg_isready -p 15432") |
|||
if code != 0 { |
|||
t.Fatalf("pg_isready failed after recovery") |
|||
} |
|||
t.Log("postgres recovery after Kill9 succeeded") |
|||
} |
|||
|
|||
func testAppsMySQL(t *testing.T) { |
|||
requireCmd(t, "mysqld") |
|||
requireCmd(t, "sysbench") |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "500M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
mnt := "/tmp/blockvol-mysql" |
|||
mysqlData := mnt + "/mysql" |
|||
sock := "/tmp/mysql-blockvol-test.sock" |
|||
|
|||
t.Cleanup(func() { |
|||
cleanCtx, c := context.WithTimeout(context.Background(), 15*time.Second) |
|||
defer c() |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("mysqladmin -u root -S %s shutdown 2>/dev/null || true", sock)) |
|||
time.Sleep(2 * time.Second) |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("rm -rf %s %s", mnt, sock)) |
|||
}) |
|||
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
|
|||
// Stop any system mysqld to avoid port/socket conflicts
|
|||
clientNode.RunRoot(ctx, "systemctl stop mysql 2>/dev/null || true") |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("rm -f %s", sock)) |
|||
|
|||
// Initialize MySQL with custom datadir
|
|||
// Run as root to avoid AppArmor ownership issues on iSCSI-backed ext4
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("chown -R mysql:mysql %s", mnt)) |
|||
_, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("mysqld --initialize-insecure --datadir=%s --user=root 2>&1", mysqlData)) |
|||
if code != 0 { |
|||
t.Fatalf("mysqld init: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
// Start mysqld with custom socket and port
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf( |
|||
"bash -c 'mysqld --datadir=%s --socket=%s --port=13306 --user=root --skip-grant-tables &'", |
|||
mysqlData, sock)) |
|||
// Wait for mysqld to be ready
|
|||
for i := 0; i < 30; i++ { |
|||
_, _, code, _ = clientNode.RunRoot(ctx, fmt.Sprintf("mysqladmin -u root -S %s ping 2>/dev/null", sock)) |
|||
if code == 0 { |
|||
break |
|||
} |
|||
time.Sleep(time.Second) |
|||
} |
|||
if code != 0 { |
|||
t.Fatalf("mysqld did not start") |
|||
} |
|||
|
|||
// Sysbench
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mysql -u root -S %s -e 'CREATE DATABASE IF NOT EXISTS sbtest'", sock)) |
|||
_, stderr, code, _ = clientNode.RunRoot(ctx, fmt.Sprintf( |
|||
"sysbench oltp_read_write --mysql-socket=%s --mysql-user=root --db-driver=mysql --tables=4 --table-size=1000 prepare", sock)) |
|||
if code != 0 { |
|||
t.Fatalf("sysbench prepare: code=%d stderr=%s", code, stderr) |
|||
} |
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf( |
|||
"sysbench oltp_read_write --mysql-socket=%s --mysql-user=root --db-driver=mysql --tables=4 --table-size=1000 --time=30 run", sock)) |
|||
if code != 0 { |
|||
t.Fatalf("sysbench run: code=%d stderr=%s", code, stderr) |
|||
} |
|||
for _, line := range strings.Split(stdout, "\n") { |
|||
if strings.Contains(line, "transactions:") || strings.Contains(line, "queries:") { |
|||
t.Logf("sysbench: %s", strings.TrimSpace(line)) |
|||
} |
|||
} |
|||
|
|||
// Clean shutdown
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mysqladmin -u root -S %s shutdown", sock)) |
|||
t.Log("MySQL + sysbench test passed") |
|||
} |
|||
|
|||
func testAppsSQLiteWAL(t *testing.T) { |
|||
requireCmd(t, "sqlite3") |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
mnt := "/tmp/blockvol-sqlite" |
|||
|
|||
t.Cleanup(func() { |
|||
cleanCtx, c := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer c() |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("rm -rf %s", mnt)) |
|||
}) |
|||
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s && mount %s %s", mnt, dev, mnt)) |
|||
|
|||
// Create DB in WAL mode, insert 10K rows via batched inserts
|
|||
// Use a script file to avoid shell quoting issues over SSH
|
|||
script := fmt.Sprintf(`bash -c ' |
|||
set -e |
|||
DB="%s/test.db" |
|||
rm -f "$DB" "$DB-wal" "$DB-shm" |
|||
sqlite3 "$DB" "PRAGMA journal_mode=WAL; CREATE TABLE t(id INTEGER PRIMARY KEY, val TEXT);" |
|||
for i in $(seq 1 100); do |
|||
SQL="BEGIN;" |
|||
for j in $(seq 1 100); do |
|||
n=$(( (i-1)*100 + j )) |
|||
SQL="${SQL} INSERT INTO t(val) VALUES('"'"'row_${n}'"'"');" |
|||
done |
|||
SQL="${SQL} COMMIT;" |
|||
sqlite3 "$DB" "$SQL" |
|||
done |
|||
sqlite3 "$DB" "SELECT count(*) FROM t;" |
|||
'`, mnt) |
|||
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, script) |
|||
if code != 0 { |
|||
t.Fatalf("sqlite3 failed: code=%d stderr=%s", code, stderr) |
|||
} |
|||
// Last line of stdout should be the count
|
|||
lines := strings.Split(strings.TrimSpace(stdout), "\n") |
|||
lastLine := lines[len(lines)-1] |
|||
if lastLine != "10000" { |
|||
t.Fatalf("expected 10000 rows, got last line: %q (full output: %s)", lastLine, stdout) |
|||
} |
|||
t.Log("SQLite WAL: 10K rows inserted and verified") |
|||
} |
|||
|
|||
func testAppsQemuBoot(t *testing.T) { |
|||
requireCmd(t, "qemu-system-x86_64") |
|||
t.Skip("QEMU boot test requires Alpine ISO setup") |
|||
} |
|||
|
|||
func testAppsQemuFio(t *testing.T) { |
|||
requireCmd(t, "qemu-system-x86_64") |
|||
t.Skip("QEMU fio test requires VM image setup") |
|||
} |
|||
|
|||
func testAppsDockerOverlay(t *testing.T) { |
|||
requireCmd(t, "docker") |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "500M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
mnt := "/tmp/blockvol-docker" |
|||
|
|||
t.Cleanup(func() { |
|||
cleanCtx, c := context.WithTimeout(context.Background(), 15*time.Second) |
|||
defer c() |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("rm -rf %s", mnt)) |
|||
}) |
|||
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s && mount %s %s", mnt, dev, mnt)) |
|||
|
|||
// Write a file via Docker bind-mount to the iSCSI-backed filesystem
|
|||
clientNode.RunRoot(ctx, "docker pull alpine:latest 2>/dev/null") |
|||
testContent := "blockvol-docker-integration-test" |
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("docker run --rm -v %s:/data alpine:latest sh -c 'echo %s > /data/docker-test.txt && cat /data/docker-test.txt'", |
|||
mnt, testContent)) |
|||
if code != 0 { |
|||
t.Fatalf("docker run failed: code=%d stderr=%s stdout=%s", code, stderr, stdout) |
|||
} |
|||
if !strings.Contains(stdout, testContent) { |
|||
t.Fatalf("expected %q in output, got: %s", testContent, stdout) |
|||
} |
|||
|
|||
// Verify file persists on host
|
|||
stdout2, _, _, _ := clientNode.RunRoot(ctx, fmt.Sprintf("cat %s/docker-test.txt", mnt)) |
|||
if !strings.Contains(stdout2, testContent) { |
|||
t.Fatalf("file not persisted: %s", stdout2) |
|||
} |
|||
t.Log("Docker on iSCSI-backed ext4 passed") |
|||
} |
|||
|
|||
func testAppsLVMStripe(t *testing.T) { |
|||
requireCmd(t, "pvcreate") |
|||
t.Skip("LVM stripe test requires 2 iSCSI volumes") |
|||
} |
|||
|
|||
func testAppsMdRaid1(t *testing.T) { |
|||
requireCmd(t, "mdadm") |
|||
t.Skip("MD RAID-1 test requires 2 iSCSI volumes") |
|||
} |
|||
|
|||
func requireCmd(t *testing.T, cmd string) { |
|||
t.Helper() |
|||
if !clientNode.HasCommand(cmd) { |
|||
t.Skipf("%s not available", cmd) |
|||
} |
|||
} |
|||
@ -0,0 +1,181 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
func TestChaos(t *testing.T) { |
|||
t.Run("Reconnect20", testChaosReconnect20) |
|||
t.Run("MultiSession4", testChaosMultiSession4) |
|||
t.Run("WALFull", testChaosWALFull) |
|||
t.Run("AttachDetach10", testChaosAttachDetach10) |
|||
t.Run("ConfigRestart", testChaosConfigRestart) |
|||
} |
|||
|
|||
func testChaosReconnect20(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
if err := tgt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
n := 20 |
|||
if testing.Short() { |
|||
n = 5 |
|||
} |
|||
for i := 0; i < n; i++ { |
|||
t.Logf("reconnect %d/%d", i+1, n) |
|||
|
|||
iscsi.Discover(ctx, host, tgt.config.Port) |
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("iter %d login: %v", i, err) |
|||
} |
|||
|
|||
// Write 1MB + verify
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s bs=1M count=1 oflag=direct 2>/dev/null", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("iter %d dd write failed", i) |
|||
} |
|||
|
|||
sum, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=1M count=1 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
if firstLine(sum) == "" { |
|||
t.Fatalf("iter %d empty checksum", i) |
|||
} |
|||
|
|||
if err := iscsi.Logout(ctx, tgt.config.IQN); err != nil { |
|||
t.Fatalf("iter %d logout: %v", i, err) |
|||
} |
|||
|
|||
// Brief pause for session teardown
|
|||
time.Sleep(200 * time.Millisecond) |
|||
} |
|||
t.Logf("%dx reconnect completed", n) |
|||
} |
|||
|
|||
func testChaosMultiSession4(t *testing.T) { |
|||
t.Skip("multi-session test requires multiple target IQN support") |
|||
} |
|||
|
|||
func testChaosWALFull(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "4M") // tiny WAL
|
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Sustained write much larger than WAL
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=walfull --filename=%s --rw=write --bs=64k "+ |
|||
"--size=80M --direct=1 --ioengine=libaio", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s stdout=%s", code, stderr, stdout) |
|||
} |
|||
t.Log("WAL full test passed (4MB WAL, 80MB write)") |
|||
} |
|||
|
|||
func testChaosAttachDetach10(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
if err := tgt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
n2 := 10 |
|||
if testing.Short() { |
|||
n2 = 3 |
|||
} |
|||
for i := 0; i < n2; i++ { |
|||
t.Logf("attach/detach %d/%d", i+1, n2) |
|||
|
|||
iscsi.Discover(ctx, host, tgt.config.Port) |
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("iter %d login: %v", i, err) |
|||
} |
|||
|
|||
// Quick fio
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=ad%d --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=4k --size=10M --direct=1 --ioengine=libaio --randrepeat=1", i, dev)) |
|||
if code != 0 { |
|||
t.Fatalf("iter %d fio failed", i) |
|||
} |
|||
|
|||
if err := iscsi.Logout(ctx, tgt.config.IQN); err != nil { |
|||
t.Fatalf("iter %d logout: %v", i, err) |
|||
} |
|||
time.Sleep(200 * time.Millisecond) |
|||
} |
|||
|
|||
// Verify no stale devices
|
|||
stdout, _, _, _ := clientNode.RunRoot(ctx, "iscsiadm -m session 2>&1") |
|||
if strings.Contains(stdout, tgt.config.IQN) { |
|||
t.Fatalf("stale session after 10 cycles: %s", stdout) |
|||
} |
|||
t.Logf("%dx attach/detach completed, no stale devices", n2) |
|||
} |
|||
|
|||
func testChaosConfigRestart(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// fio with default config
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=cfg1 --filename=%s --rw=randrw --bs=4k "+ |
|||
"--size=10M --direct=1 --ioengine=libaio --randrepeat=1", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio phase 1 failed") |
|||
} |
|||
|
|||
// Logout + stop
|
|||
iscsi.Logout(ctx, tgt.config.IQN) |
|||
tgt.Stop(ctx) |
|||
|
|||
// Restart (open existing vol)
|
|||
if err := tgt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart: %v", err) |
|||
} |
|||
|
|||
iscsi.Discover(ctx, host, tgt.config.Port) |
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("re-login: %v", err) |
|||
} |
|||
|
|||
// fio again
|
|||
_, _, code, _ = clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=cfg2 --filename=%s --rw=randrw --bs=4k "+ |
|||
"--size=10M --direct=1 --ioengine=libaio --randrepeat=1", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio phase 2 failed") |
|||
} |
|||
t.Log("config restart test passed") |
|||
} |
|||
@ -0,0 +1,228 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
func TestCrash(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required for crash tests") |
|||
} |
|||
t.Run("Kill9Fsync", testCrashKill9Fsync) |
|||
t.Run("Kill9NoSync", testCrashKill9NoSync) |
|||
t.Run("WALReplay", testCrashWALReplay) |
|||
t.Run("RapidKill10x", testCrashRapidKill10x) |
|||
t.Run("FsckAfterCrash", testCrashFsckAfterCrash) |
|||
} |
|||
|
|||
func testCrashKill9Fsync(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Write with fdatasync
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=sync --filename=%s --rw=write --bs=4k --size=10M "+ |
|||
"--fdatasync=1 --direct=1 --ioengine=libaio", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio write failed: code=%d", code) |
|||
} |
|||
|
|||
// Record checksum of synced data
|
|||
sum1, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=2560 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
|
|||
// Kill9
|
|||
t.Log("killing target...") |
|||
tgt.Kill9() |
|||
|
|||
// Clean up stale iSCSI state before restart
|
|||
iscsi.Logout(ctx, tgt.config.IQN) |
|||
iscsi.CleanupAll(ctx, tgt.config.IQN) |
|||
|
|||
// Restart
|
|||
t.Log("restarting target...") |
|||
if err := tgt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart: %v", err) |
|||
} |
|||
|
|||
// Re-login
|
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("re-login: %v", err) |
|||
} |
|||
|
|||
// Verify synced data intact
|
|||
sum2, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=2560 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
|
|||
if firstLine(sum1) != firstLine(sum2) { |
|||
t.Fatalf("synced data corrupted: %s vs %s", firstLine(sum1), firstLine(sum2)) |
|||
} |
|||
t.Log("synced data intact after Kill9") |
|||
} |
|||
|
|||
func testCrashKill9NoSync(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
_ = startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Kill9 without sync
|
|||
tgt.Kill9() |
|||
iscsi.Logout(ctx, tgt.config.IQN) |
|||
|
|||
// Restart -- volume must open without corruption
|
|||
if err := tgt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart after unclean kill: %v", err) |
|||
} |
|||
|
|||
// Login to verify volume is usable
|
|||
_, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("login after restart: %v", err) |
|||
} |
|||
t.Log("volume opened successfully after Kill9 (no sync)") |
|||
} |
|||
|
|||
func testCrashWALReplay(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "4M") // small WAL
|
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Write a 4k block of known data (O_DIRECT requires sector-aligned writes)
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s bs=4k count=1 oflag=direct 2>/dev/null", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("pattern write failed") |
|||
} |
|||
// Read back the checksum before kill
|
|||
sumBefore, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=1 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
|
|||
// Kill9 before flush can happen
|
|||
tgt.Kill9() |
|||
iscsi.Logout(ctx, tgt.config.IQN) |
|||
|
|||
// Restart (WAL replay)
|
|||
if err := tgt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart: %v", err) |
|||
} |
|||
|
|||
// Re-login and verify
|
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("re-login: %v", err) |
|||
} |
|||
|
|||
sumAfter, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=1 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
// Non-fdatasync writes have no durability guarantee, but volume must be readable
|
|||
if firstLine(sumAfter) == "" { |
|||
t.Fatalf("could not read data after WAL replay") |
|||
} |
|||
t.Logf("WAL replay: before=%s after=%s (match=%v)", |
|||
firstLine(sumBefore), firstLine(sumAfter), firstLine(sumBefore) == firstLine(sumAfter)) |
|||
t.Log("WAL replay completed, volume intact") |
|||
} |
|||
|
|||
func testCrashRapidKill10x(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "50M", "") |
|||
|
|||
n := 10 |
|||
if testing.Short() { |
|||
n = 3 |
|||
} |
|||
for i := 0; i < n; i++ { |
|||
t.Logf("iteration %d/%d", i+1, n) |
|||
|
|||
create := (i == 0) |
|||
if err := tgt.Start(ctx, create); err != nil { |
|||
t.Fatalf("iter %d start: %v", i, err) |
|||
} |
|||
|
|||
_, err := iscsi.Discover(ctx, host, tgt.config.Port) |
|||
if err != nil { |
|||
t.Fatalf("iter %d discover: %v", i, err) |
|||
} |
|||
|
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("iter %d login: %v", i, err) |
|||
} |
|||
|
|||
// Write 1MB
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s bs=1M count=1 oflag=direct 2>/dev/null", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("iter %d dd write failed", i) |
|||
} |
|||
|
|||
iscsi.Logout(ctx, tgt.config.IQN) |
|||
tgt.Kill9() |
|||
} |
|||
t.Logf("%dx rapid kill completed", n) |
|||
} |
|||
|
|||
func testCrashFsckAfterCrash(t *testing.T) { |
|||
t.Skip("P3-BUG-11: WRITE SAME(16) not implemented, XFS sends it for inode zeroing") |
|||
if !clientNode.HasCommand("mkfs.xfs") { |
|||
t.Skip("mkfs.xfs required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "500M", "") // XFS needs >= 300MB
|
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
mnt := "/tmp/blockvol-mnt" |
|||
|
|||
// mkfs.xfs + mount
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.xfs -f %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
_, _, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("mount failed") |
|||
} |
|||
|
|||
// Workload: create some files
|
|||
for i := 0; i < 50; i++ { |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("dd if=/dev/urandom of=%s/file%d bs=4k count=10 2>/dev/null", mnt, i)) |
|||
} |
|||
|
|||
// Sync filesystem metadata to device, then unmount + Kill9
|
|||
clientNode.RunRoot(ctx, "sync") |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("umount %s 2>/dev/null", mnt)) |
|||
iscsi.Logout(ctx, tgt.config.IQN) |
|||
iscsi.CleanupAll(ctx, tgt.config.IQN) |
|||
tgt.Kill9() |
|||
|
|||
// Restart
|
|||
if err := tgt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart: %v", err) |
|||
} |
|||
|
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("re-login: %v", err) |
|||
} |
|||
|
|||
// xfs_repair -n (read-only check)
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf("xfs_repair -n %s", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("xfs_repair failed: stdout=%s stderr=%s", stdout, stderr) |
|||
} |
|||
t.Log("xfs_repair -n passed (filesystem clean)") |
|||
} |
|||
@ -0,0 +1,281 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"flag" |
|||
"fmt" |
|||
"os" |
|||
"path/filepath" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
var ( |
|||
flagEnv = flag.String("env", "wsl2", "wsl2 or remote") |
|||
flagTargetHost = flag.String("target-host", "127.0.0.1", "target node IP (SSH)") |
|||
flagClientHost = flag.String("client-host", "127.0.0.1", "initiator node IP (SSH)") |
|||
flagISCSIHost = flag.String("iscsi-host", "", "iSCSI target IP for discovery/login (defaults to target-host)") |
|||
flagSSHKey = flag.String("ssh-key", "", "SSH private key path") |
|||
flagSSHUser = flag.String("ssh-user", "testdev", "SSH user") |
|||
flagRepoDir = flag.String("repo-dir", "C:/work/seaweedfs", "seaweedfs repo path") |
|||
) |
|||
|
|||
// Global state shared across tests.
|
|||
var ( |
|||
targetNode *Node |
|||
clientNode *Node |
|||
artifacts *ArtifactCollector |
|||
) |
|||
|
|||
const iqnPrefix = "iqn.2024.com.seaweedfs:test" |
|||
|
|||
func TestMain(m *testing.M) { |
|||
flag.Parse() |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
// Setup nodes
|
|||
if *flagEnv == "wsl2" { |
|||
targetNode = &Node{IsLocal: true} |
|||
clientNode = targetNode // same node for WSL2
|
|||
} else { |
|||
targetNode = &Node{Host: *flagTargetHost, User: *flagSSHUser, KeyFile: *flagSSHKey} |
|||
clientNode = &Node{Host: *flagClientHost, User: *flagSSHUser, KeyFile: *flagSSHKey} |
|||
} |
|||
|
|||
if err := targetNode.Connect(); err != nil { |
|||
fmt.Fprintf(os.Stderr, "FATAL: target connect: %v\n", err) |
|||
os.Exit(1) |
|||
} |
|||
if clientNode != targetNode { |
|||
if err := clientNode.Connect(); err != nil { |
|||
fmt.Fprintf(os.Stderr, "FATAL: client connect: %v\n", err) |
|||
os.Exit(1) |
|||
} |
|||
} |
|||
|
|||
// Preflight: print versions
|
|||
preflight(ctx) |
|||
|
|||
// Build target binary
|
|||
fmt.Println("=== Building iscsi-target binary ===") |
|||
tgt := NewTarget(targetNode, DefaultTargetConfig()) |
|||
if err := tgt.Build(ctx, *flagRepoDir); err != nil { |
|||
fmt.Fprintf(os.Stderr, "FATAL: build target: %v\n", err) |
|||
os.Exit(1) |
|||
} |
|||
if err := tgt.Deploy(*flagRepoDir + "/iscsi-target-linux"); err != nil { |
|||
fmt.Fprintf(os.Stderr, "FATAL: deploy target: %v\n", err) |
|||
os.Exit(1) |
|||
} |
|||
fmt.Println("=== Build + deploy complete ===") |
|||
|
|||
// Setup artifact collector (no Target -- each test provides its own)
|
|||
artDir, _ := filepath.Abs("artifacts") |
|||
artifacts = NewArtifactCollector(artDir, clientNode) |
|||
|
|||
// Run tests
|
|||
code := m.Run() |
|||
|
|||
// Global cleanup (unconditional)
|
|||
cleanup() |
|||
|
|||
os.Exit(code) |
|||
} |
|||
|
|||
func preflight(ctx context.Context) { |
|||
fmt.Println("=== Preflight ===") |
|||
checks := []struct { |
|||
name string |
|||
cmd string |
|||
node *Node |
|||
}{ |
|||
{"fio", "fio --version", clientNode}, |
|||
{"iscsiadm", "iscsiadm --version 2>&1", clientNode}, |
|||
{"go", "go version", targetNode}, |
|||
{"kernel", "uname -r", targetNode}, |
|||
} |
|||
for _, c := range checks { |
|||
stdout, _, code, err := c.node.Run(ctx, c.cmd) |
|||
if err != nil || code != 0 { |
|||
fmt.Printf(" %-10s MISSING\n", c.name) |
|||
} else { |
|||
fmt.Printf(" %-10s %s\n", c.name, firstLine(stdout)) |
|||
} |
|||
} |
|||
fmt.Println("=== End Preflight ===") |
|||
} |
|||
|
|||
func cleanup() { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) |
|||
defer cancel() |
|||
|
|||
fmt.Println("=== Global Cleanup ===") |
|||
|
|||
iscsi := NewISCSIClient(clientNode) |
|||
iscsi.CleanupAll(ctx, iqnPrefix) |
|||
|
|||
// Unmount any test mount points
|
|||
clientNode.RunRoot(ctx, "umount -f /tmp/blockvol-mnt 2>/dev/null") |
|||
|
|||
// Kill any leftover target process
|
|||
targetNode.Run(ctx, "pkill -f iscsi-target-test 2>/dev/null") |
|||
|
|||
// Remove temp files
|
|||
targetNode.Run(ctx, "rm -f /tmp/blockvol-test.blk /tmp/blockvol-test.blk.wal /tmp/iscsi-target-test /tmp/iscsi-target-test.log") |
|||
|
|||
if clientNode != targetNode { |
|||
clientNode.Close() |
|||
} |
|||
targetNode.Close() |
|||
|
|||
fmt.Println("=== Cleanup Done ===") |
|||
} |
|||
|
|||
// TestHarnessSelfCheck validates the test framework itself.
|
|||
// Run first: go test -tags integration -run TestHarnessSelfCheck -v
|
|||
func TestHarnessSelfCheck(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) |
|||
defer cancel() |
|||
|
|||
cfg := DefaultTargetConfig() |
|||
cfg.IQN = iqnPrefix + "-harness" |
|||
cfg.VolSize = "50M" |
|||
tgt := NewTarget(targetNode, cfg) |
|||
iscsi := NewISCSIClient(clientNode) |
|||
host := targetHost() |
|||
|
|||
t.Cleanup(func() { |
|||
iscsi.Logout(ctx, cfg.IQN) |
|||
tgt.Stop(ctx) |
|||
tgt.Cleanup(ctx) |
|||
}) |
|||
t.Cleanup(func() { artifacts.Collect(t, tgt) }) |
|||
|
|||
// Start target
|
|||
t.Log("starting target...") |
|||
if err := tgt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start target: %v", err) |
|||
} |
|||
|
|||
// Discovery
|
|||
t.Log("discovering...") |
|||
iqns, err := iscsi.Discover(ctx, host, cfg.Port) |
|||
if err != nil { |
|||
t.Fatalf("discover: %v", err) |
|||
} |
|||
found := false |
|||
for _, iqn := range iqns { |
|||
if iqn == cfg.IQN { |
|||
found = true |
|||
} |
|||
} |
|||
if !found { |
|||
t.Fatalf("IQN %s not in discovery: %v", cfg.IQN, iqns) |
|||
} |
|||
|
|||
// Login
|
|||
t.Log("logging in...") |
|||
dev, err := iscsi.Login(ctx, cfg.IQN) |
|||
if err != nil { |
|||
t.Fatalf("login: %v", err) |
|||
} |
|||
t.Logf("device: %s", dev) |
|||
|
|||
// DD 1MB write + read + verify
|
|||
t.Log("dd write/read verify...") |
|||
_, _, code, err := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s bs=1M count=1 oflag=direct 2>/dev/null", dev)) |
|||
if err != nil || code != 0 { |
|||
t.Fatalf("dd write failed: code=%d err=%v", code, err) |
|||
} |
|||
|
|||
wSum, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=1M count=1 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
t.Logf("md5: %s", firstLine(wSum)) |
|||
|
|||
// Logout
|
|||
t.Log("logging out...") |
|||
if err := iscsi.Logout(ctx, cfg.IQN); err != nil { |
|||
t.Fatalf("logout: %v", err) |
|||
} |
|||
|
|||
// Stop target
|
|||
t.Log("stopping target...") |
|||
if err := tgt.Stop(ctx); err != nil { |
|||
t.Fatalf("stop: %v", err) |
|||
} |
|||
|
|||
t.Log("harness self-check passed") |
|||
} |
|||
|
|||
// targetHost returns the iSCSI target address for discovery/login from the initiator.
|
|||
// Uses -iscsi-host if set, otherwise falls back to -target-host.
|
|||
func targetHost() string { |
|||
if *flagEnv == "wsl2" { |
|||
return "127.0.0.1" |
|||
} |
|||
if *flagISCSIHost != "" { |
|||
return *flagISCSIHost |
|||
} |
|||
return *flagTargetHost |
|||
} |
|||
|
|||
func firstLine(s string) string { |
|||
for i, c := range s { |
|||
if c == '\n' || c == '\r' { |
|||
return s[:i] |
|||
} |
|||
} |
|||
return s |
|||
} |
|||
|
|||
// newTestTarget creates a target with test-specific IQN, unique vol file, and cleanup.
|
|||
// Tests must not run in parallel -- they share the same target node and port.
|
|||
func newTestTarget(t *testing.T, volSize, walSize string) (*Target, *ISCSIClient, string) { |
|||
cfg := DefaultTargetConfig() |
|||
// Sanitize test name for IQN -- replace / with - (subtests use /)
|
|||
name := strings.ReplaceAll(t.Name(), "/", "-") |
|||
cfg.IQN = iqnPrefix + "-" + strings.ToLower(name) |
|||
if volSize != "" { |
|||
cfg.VolSize = volSize |
|||
} |
|||
if walSize != "" { |
|||
cfg.WALSize = walSize |
|||
} |
|||
|
|||
tgt := NewTarget(targetNode, cfg) |
|||
iscsi := NewISCSIClient(clientNode) |
|||
host := targetHost() |
|||
|
|||
t.Cleanup(func() { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) |
|||
defer cancel() |
|||
iscsi.Logout(ctx, cfg.IQN) |
|||
tgt.Stop(ctx) |
|||
tgt.Cleanup(ctx) |
|||
}) |
|||
t.Cleanup(func() { artifacts.Collect(t, tgt) }) |
|||
|
|||
return tgt, iscsi, host |
|||
} |
|||
|
|||
// startAndLogin creates volume, starts target, discovers, logs in, returns device.
|
|||
func startAndLogin(t *testing.T, ctx context.Context, tgt *Target, iscsi *ISCSIClient, host string) string { |
|||
t.Helper() |
|||
if err := tgt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start target: %v", err) |
|||
} |
|||
if _, err := iscsi.Discover(ctx, host, tgt.config.Port); err != nil { |
|||
t.Fatalf("discover: %v", err) |
|||
} |
|||
dev, err := iscsi.Login(ctx, tgt.config.IQN) |
|||
if err != nil { |
|||
t.Fatalf("login: %v", err) |
|||
} |
|||
return dev |
|||
} |
|||
@ -0,0 +1,229 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"strings" |
|||
"time" |
|||
) |
|||
|
|||
// ISCSIClient wraps iscsiadm commands on a node.
|
|||
type ISCSIClient struct { |
|||
node *Node |
|||
targetHost string // set after Discover, used to fix wildcard portals
|
|||
targetPort int |
|||
} |
|||
|
|||
// NewISCSIClient creates an iSCSI client bound to a node.
|
|||
func NewISCSIClient(node *Node) *ISCSIClient { |
|||
return &ISCSIClient{node: node} |
|||
} |
|||
|
|||
// Discover runs iSCSI SendTargets discovery and returns discovered IQNs.
|
|||
// Remembers the target host for subsequent Login calls.
|
|||
func (c *ISCSIClient) Discover(ctx context.Context, host string, port int) ([]string, error) { |
|||
c.targetHost = host |
|||
c.targetPort = port |
|||
|
|||
cmd := fmt.Sprintf("iscsiadm -m discovery -t sendtargets -p %s:%d", host, port) |
|||
stdout, stderr, code, err := c.node.RunRoot(ctx, cmd) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("discovery error: %w", err) |
|||
} |
|||
if code != 0 { |
|||
return nil, fmt.Errorf("discovery failed (code %d): %s", code, stderr) |
|||
} |
|||
|
|||
var iqns []string |
|||
for _, line := range strings.Split(stdout, "\n") { |
|||
line = strings.TrimSpace(line) |
|||
if line == "" { |
|||
continue |
|||
} |
|||
// Format: "10.0.0.1:3260,1 iqn.2024.com.seaweedfs:vol1"
|
|||
// or: "[::]:3260,-1 iqn.2024.com.seaweedfs:vol1"
|
|||
parts := strings.Fields(line) |
|||
if len(parts) >= 2 { |
|||
iqns = append(iqns, parts[1]) |
|||
} |
|||
} |
|||
|
|||
// Fix wildcard portals: target may advertise [::]:3260 but remote
|
|||
// initiators need the real IP. Delete wildcard records and re-create
|
|||
// with the correct portal address.
|
|||
for _, iqn := range iqns { |
|||
c.fixNodePortal(ctx, iqn, host, port) |
|||
} |
|||
|
|||
return iqns, nil |
|||
} |
|||
|
|||
// fixNodePortal ensures the node record for iqn uses the actual target
|
|||
// host address, not a wildcard like [::] or 0.0.0.0.
|
|||
func (c *ISCSIClient) fixNodePortal(ctx context.Context, iqn, host string, port int) { |
|||
// List node records for this IQN
|
|||
stdout, _, _, _ := c.node.RunRoot(ctx, |
|||
fmt.Sprintf("iscsiadm -m node -T %s 2>/dev/null", iqn)) |
|||
|
|||
// Check if any record has a wildcard address
|
|||
hasWildcard := false |
|||
for _, line := range strings.Split(stdout, "\n") { |
|||
if strings.Contains(line, "node.conn[0].address") { |
|||
if strings.Contains(line, "::") || strings.Contains(line, "0.0.0.0") { |
|||
hasWildcard = true |
|||
} |
|||
} |
|||
} |
|||
if !hasWildcard { |
|||
return |
|||
} |
|||
|
|||
// Delete ALL node records for this IQN (wildcard ones)
|
|||
c.node.RunRoot(ctx, fmt.Sprintf("iscsiadm -m node -T %s -o delete 2>/dev/null", iqn)) |
|||
|
|||
// Create a new node record with the correct portal
|
|||
portal := fmt.Sprintf("%s:%d", host, port) |
|||
c.node.RunRoot(ctx, fmt.Sprintf("iscsiadm -m node -T %s -p %s -o new 2>/dev/null", iqn, portal)) |
|||
} |
|||
|
|||
// Login connects to the target and returns the device path (e.g. /dev/sda).
|
|||
// Uses explicit portal from Discover when available to avoid wildcard issues.
|
|||
func (c *ISCSIClient) Login(ctx context.Context, iqn string) (string, error) { |
|||
var cmd string |
|||
if c.targetHost != "" && c.targetHost != "127.0.0.1" && c.targetHost != "localhost" { |
|||
// Remote mode: use explicit portal to avoid wildcard [::] issue
|
|||
portal := fmt.Sprintf("%s:%d", c.targetHost, c.targetPort) |
|||
cmd = fmt.Sprintf("iscsiadm -m node -T %s -p %s --login", iqn, portal) |
|||
} else { |
|||
// Local/WSL2 mode: IQN-only works fine
|
|||
cmd = fmt.Sprintf("iscsiadm -m node -T %s --login", iqn) |
|||
} |
|||
_, stderr, code, err := c.node.RunRoot(ctx, cmd) |
|||
if err != nil { |
|||
return "", fmt.Errorf("login error: %w", err) |
|||
} |
|||
if code != 0 { |
|||
return "", fmt.Errorf("login failed (code %d): %s", code, stderr) |
|||
} |
|||
|
|||
// Poll for device to appear (kernel creates /dev/sdX asynchronously)
|
|||
return c.waitForDevice(ctx, iqn) |
|||
} |
|||
|
|||
// Logout disconnects from the target.
|
|||
func (c *ISCSIClient) Logout(ctx context.Context, iqn string) error { |
|||
cmd := fmt.Sprintf("iscsiadm -m node -T %s --logout", iqn) |
|||
_, stderr, code, err := c.node.RunRoot(ctx, cmd) |
|||
if err != nil { |
|||
return fmt.Errorf("logout error: %w", err) |
|||
} |
|||
if code != 0 { |
|||
return fmt.Errorf("logout failed (code %d): %s", code, stderr) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// GetDevice returns the device path for an active session.
|
|||
func (c *ISCSIClient) GetDevice(ctx context.Context, iqn string) (string, error) { |
|||
return c.waitForDevice(ctx, iqn) |
|||
} |
|||
|
|||
func (c *ISCSIClient) waitForDevice(ctx context.Context, iqn string) (string, error) { |
|||
deadline := time.Now().Add(30 * time.Second) |
|||
rescanned := false |
|||
for time.Now().Before(deadline) { |
|||
select { |
|||
case <-ctx.Done(): |
|||
return "", ctx.Err() |
|||
default: |
|||
} |
|||
|
|||
// Parse session details to find the attached disk
|
|||
stdout, _, code, _ := c.node.RunRoot(ctx, "iscsiadm -m session -P3") |
|||
if code == 0 { |
|||
dev := parseDeviceFromSession(stdout, iqn) |
|||
if dev != "" { |
|||
return dev, nil |
|||
} |
|||
} |
|||
|
|||
// After 5s without device, force a LUN rescan (WSL2 needs this)
|
|||
if !rescanned && time.Until(deadline) < 25*time.Second { |
|||
c.node.RunRoot(ctx, "iscsiadm -m session -R") |
|||
rescanned = true |
|||
} |
|||
time.Sleep(500 * time.Millisecond) |
|||
} |
|||
return "", fmt.Errorf("device for %s did not appear within 30s", iqn) |
|||
} |
|||
|
|||
// parseDeviceFromSession extracts /dev/sdX from iscsiadm -m session -P3 output.
|
|||
func parseDeviceFromSession(output, iqn string) string { |
|||
lines := strings.Split(output, "\n") |
|||
inTarget := false |
|||
for _, line := range lines { |
|||
if strings.Contains(line, "Target: "+iqn) { |
|||
inTarget = true |
|||
continue |
|||
} |
|||
if inTarget && strings.Contains(line, "Target: ") { |
|||
break // next target
|
|||
} |
|||
if inTarget && strings.Contains(line, "Attached scsi disk") { |
|||
// "Attached scsi disk sda State: running"
|
|||
fields := strings.Fields(line) |
|||
for i, f := range fields { |
|||
if f == "disk" && i+1 < len(fields) { |
|||
return "/dev/" + fields[i+1] |
|||
} |
|||
} |
|||
} |
|||
} |
|||
return "" |
|||
} |
|||
|
|||
// WaitForSession polls until a session for the given IQN is in LOGGED_IN state.
|
|||
// Used after Kill9+Restart to wait for iSCSI session recovery.
|
|||
func (c *ISCSIClient) WaitForSession(ctx context.Context, iqn string) error { |
|||
for { |
|||
select { |
|||
case <-ctx.Done(): |
|||
return fmt.Errorf("session %s did not recover: %w", iqn, ctx.Err()) |
|||
default: |
|||
} |
|||
|
|||
stdout, _, code, _ := c.node.RunRoot(ctx, "iscsiadm -m session") |
|||
if code == 0 && strings.Contains(stdout, iqn) { |
|||
return nil |
|||
} |
|||
time.Sleep(500 * time.Millisecond) |
|||
} |
|||
} |
|||
|
|||
// CleanupAll force-logouts sessions matching the IQN prefix only.
|
|||
// Does not touch other iSCSI sessions on the node.
|
|||
func (c *ISCSIClient) CleanupAll(ctx context.Context, iqnPrefix string) error { |
|||
stdout, _, _, _ := c.node.RunRoot(ctx, "iscsiadm -m session 2>&1") |
|||
if stdout == "" || strings.Contains(stdout, "No active sessions") { |
|||
return nil |
|||
} |
|||
|
|||
// Parse session lines: "tcp: [N] 10.0.0.1:3260,1 iqn.2024.com.seaweedfs:test-..."
|
|||
for _, line := range strings.Split(stdout, "\n") { |
|||
line = strings.TrimSpace(line) |
|||
if !strings.Contains(line, iqnPrefix) { |
|||
continue |
|||
} |
|||
// Extract IQN from the line
|
|||
fields := strings.Fields(line) |
|||
for _, f := range fields { |
|||
if strings.HasPrefix(f, iqnPrefix) { |
|||
c.node.RunRoot(ctx, fmt.Sprintf("iscsiadm -m node -T %s --logout 2>/dev/null", f)) |
|||
c.node.RunRoot(ctx, fmt.Sprintf("iscsiadm -m node -T %s -o delete 2>/dev/null", f)) |
|||
} |
|||
} |
|||
} |
|||
return nil |
|||
} |
|||
@ -0,0 +1,316 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"bytes" |
|||
"context" |
|||
"fmt" |
|||
"io" |
|||
"net" |
|||
"os" |
|||
"os/exec" |
|||
"strings" |
|||
"sync" |
|||
"time" |
|||
|
|||
"golang.org/x/crypto/ssh" |
|||
) |
|||
|
|||
// Node represents an SSH-accessible (or local WSL2) machine.
|
|||
type Node struct { |
|||
Host string |
|||
User string |
|||
KeyFile string |
|||
IsLocal bool // WSL2 mode: use exec.CommandContext instead of SSH
|
|||
|
|||
mu sync.Mutex |
|||
client *ssh.Client |
|||
} |
|||
|
|||
// Connect establishes the SSH connection (no-op for local mode).
|
|||
func (n *Node) Connect() error { |
|||
if n.IsLocal { |
|||
return nil |
|||
} |
|||
|
|||
key, err := os.ReadFile(n.KeyFile) |
|||
if err != nil { |
|||
return fmt.Errorf("read SSH key %s: %w", n.KeyFile, err) |
|||
} |
|||
signer, err := ssh.ParsePrivateKey(key) |
|||
if err != nil { |
|||
return fmt.Errorf("parse SSH key: %w", err) |
|||
} |
|||
|
|||
config := &ssh.ClientConfig{ |
|||
User: n.User, |
|||
Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, |
|||
HostKeyCallback: ssh.InsecureIgnoreHostKey(), |
|||
Timeout: 10 * time.Second, |
|||
} |
|||
|
|||
addr := n.Host |
|||
if !strings.Contains(addr, ":") { |
|||
addr += ":22" |
|||
} |
|||
|
|||
n.mu.Lock() |
|||
defer n.mu.Unlock() |
|||
n.client, err = ssh.Dial("tcp", addr, config) |
|||
if err != nil { |
|||
return fmt.Errorf("SSH dial %s: %w", addr, err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// Run executes a command and returns stdout, stderr, exit code.
|
|||
// The context controls timeout -- cancelled context kills the command.
|
|||
func (n *Node) Run(ctx context.Context, cmd string) (stdout, stderr string, exitCode int, err error) { |
|||
if n.IsLocal { |
|||
return n.runLocal(ctx, cmd) |
|||
} |
|||
return n.runSSH(ctx, cmd) |
|||
} |
|||
|
|||
func (n *Node) runLocal(ctx context.Context, cmd string) (string, string, int, error) { |
|||
c := exec.CommandContext(ctx, "wsl", "-e", "bash", "-c", cmd) |
|||
var outBuf, errBuf bytes.Buffer |
|||
c.Stdout = &outBuf |
|||
c.Stderr = &errBuf |
|||
|
|||
err := c.Run() |
|||
if ctx.Err() != nil { |
|||
return outBuf.String(), errBuf.String(), -1, fmt.Errorf("command timed out: %w", ctx.Err()) |
|||
} |
|||
if err != nil { |
|||
if exitErr, ok := err.(*exec.ExitError); ok { |
|||
return outBuf.String(), errBuf.String(), exitErr.ExitCode(), nil |
|||
} |
|||
return outBuf.String(), errBuf.String(), -1, err |
|||
} |
|||
return outBuf.String(), errBuf.String(), 0, nil |
|||
} |
|||
|
|||
func (n *Node) runSSH(ctx context.Context, cmd string) (string, string, int, error) { |
|||
n.mu.Lock() |
|||
if n.client == nil { |
|||
n.mu.Unlock() |
|||
return "", "", -1, fmt.Errorf("SSH not connected") |
|||
} |
|||
session, err := n.client.NewSession() |
|||
n.mu.Unlock() |
|||
if err != nil { |
|||
return "", "", -1, fmt.Errorf("new SSH session: %w", err) |
|||
} |
|||
defer session.Close() |
|||
|
|||
var outBuf, errBuf bytes.Buffer |
|||
session.Stdout = &outBuf |
|||
session.Stderr = &errBuf |
|||
|
|||
done := make(chan error, 1) |
|||
go func() { done <- session.Run(cmd) }() |
|||
|
|||
select { |
|||
case <-ctx.Done(): |
|||
_ = session.Signal(ssh.SIGKILL) |
|||
return outBuf.String(), errBuf.String(), -1, fmt.Errorf("command timed out: %w", ctx.Err()) |
|||
case err := <-done: |
|||
if err != nil { |
|||
if exitErr, ok := err.(*ssh.ExitError); ok { |
|||
return outBuf.String(), errBuf.String(), exitErr.ExitStatus(), nil |
|||
} |
|||
return outBuf.String(), errBuf.String(), -1, err |
|||
} |
|||
return outBuf.String(), errBuf.String(), 0, nil |
|||
} |
|||
} |
|||
|
|||
// RunRoot executes a command with sudo -n (non-interactive).
|
|||
// Fails immediately if sudo requires a password instead of hanging.
|
|||
func (n *Node) RunRoot(ctx context.Context, cmd string) (string, string, int, error) { |
|||
return n.Run(ctx, "sudo -n "+cmd) |
|||
} |
|||
|
|||
// Upload copies a local file to the remote node via SCP.
|
|||
func (n *Node) Upload(local, remote string) error { |
|||
if n.IsLocal { |
|||
// Convert Windows path to WSL path for cp
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) |
|||
defer cancel() |
|||
wslLocal := toWSLPath(local) |
|||
_, stderr, code, err := n.Run(ctx, fmt.Sprintf("cp %s %s && chmod +x %s", wslLocal, remote, remote)) |
|||
if err != nil || code != 0 { |
|||
return fmt.Errorf("local upload: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
return nil |
|||
} |
|||
return n.scpUpload(local, remote) |
|||
} |
|||
|
|||
func (n *Node) scpUpload(local, remote string) error { |
|||
data, err := os.ReadFile(local) |
|||
if err != nil { |
|||
return fmt.Errorf("read local file %s: %w", local, err) |
|||
} |
|||
|
|||
n.mu.Lock() |
|||
if n.client == nil { |
|||
n.mu.Unlock() |
|||
return fmt.Errorf("SSH not connected") |
|||
} |
|||
session, err := n.client.NewSession() |
|||
n.mu.Unlock() |
|||
if err != nil { |
|||
return fmt.Errorf("new SSH session: %w", err) |
|||
} |
|||
defer session.Close() |
|||
|
|||
go func() { |
|||
w, _ := session.StdinPipe() |
|||
fmt.Fprintf(w, "C0755 %d %s\n", len(data), remoteName(remote)) |
|||
w.Write(data) |
|||
fmt.Fprint(w, "\x00") |
|||
w.Close() |
|||
}() |
|||
|
|||
dir := remoteDir(remote) |
|||
return session.Run(fmt.Sprintf("scp -t %s", dir)) |
|||
} |
|||
|
|||
// Download copies a remote file to local via SCP.
|
|||
func (n *Node) Download(remote, local string) error { |
|||
if n.IsLocal { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) |
|||
defer cancel() |
|||
wslLocal := toWSLPath(local) |
|||
_, stderr, code, err := n.Run(ctx, fmt.Sprintf("cp %s %s", remote, wslLocal)) |
|||
if err != nil || code != 0 { |
|||
return fmt.Errorf("local download: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
return nil |
|||
} |
|||
return n.scpDownload(remote, local) |
|||
} |
|||
|
|||
func (n *Node) scpDownload(remote, local string) error { |
|||
n.mu.Lock() |
|||
if n.client == nil { |
|||
n.mu.Unlock() |
|||
return fmt.Errorf("SSH not connected") |
|||
} |
|||
session, err := n.client.NewSession() |
|||
n.mu.Unlock() |
|||
if err != nil { |
|||
return fmt.Errorf("new SSH session: %w", err) |
|||
} |
|||
defer session.Close() |
|||
|
|||
var buf bytes.Buffer |
|||
session.Stdout = &buf |
|||
if err := session.Run(fmt.Sprintf("cat %s", remote)); err != nil { |
|||
return fmt.Errorf("read remote %s: %w", remote, err) |
|||
} |
|||
return os.WriteFile(local, buf.Bytes(), 0644) |
|||
} |
|||
|
|||
// Kill sends SIGKILL to a process by PID.
|
|||
func (n *Node) Kill(pid int) error { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cancel() |
|||
_, _, _, err := n.RunRoot(ctx, fmt.Sprintf("kill -9 %d", pid)) |
|||
return err |
|||
} |
|||
|
|||
// HasCommand checks if a command is available on the node.
|
|||
func (n *Node) HasCommand(cmd string) bool { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) |
|||
defer cancel() |
|||
_, _, code, err := n.Run(ctx, fmt.Sprintf("which %s", cmd)) |
|||
return err == nil && code == 0 |
|||
} |
|||
|
|||
// Close closes the SSH connection.
|
|||
func (n *Node) Close() { |
|||
n.mu.Lock() |
|||
defer n.mu.Unlock() |
|||
if n.client != nil { |
|||
n.client.Close() |
|||
n.client = nil |
|||
} |
|||
} |
|||
|
|||
// DialTCP opens a direct TCP connection through the SSH tunnel.
|
|||
func (n *Node) DialTCP(addr string) (net.Conn, error) { |
|||
if n.IsLocal { |
|||
return net.DialTimeout("tcp", addr, 5*time.Second) |
|||
} |
|||
n.mu.Lock() |
|||
defer n.mu.Unlock() |
|||
if n.client == nil { |
|||
return nil, fmt.Errorf("SSH not connected") |
|||
} |
|||
return n.client.Dial("tcp", addr) |
|||
} |
|||
|
|||
// StreamRun executes a command and streams stdout to the writer.
|
|||
func (n *Node) StreamRun(ctx context.Context, cmd string, w io.Writer) error { |
|||
if n.IsLocal { |
|||
c := exec.CommandContext(ctx, "wsl", "-e", "bash", "-c", cmd) |
|||
c.Stdout = w |
|||
c.Stderr = w |
|||
return c.Run() |
|||
} |
|||
|
|||
n.mu.Lock() |
|||
if n.client == nil { |
|||
n.mu.Unlock() |
|||
return fmt.Errorf("SSH not connected") |
|||
} |
|||
session, err := n.client.NewSession() |
|||
n.mu.Unlock() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
defer session.Close() |
|||
|
|||
session.Stdout = w |
|||
session.Stderr = w |
|||
|
|||
done := make(chan error, 1) |
|||
go func() { done <- session.Run(cmd) }() |
|||
|
|||
select { |
|||
case <-ctx.Done(): |
|||
_ = session.Signal(ssh.SIGKILL) |
|||
return ctx.Err() |
|||
case err := <-done: |
|||
return err |
|||
} |
|||
} |
|||
|
|||
// Helper functions
|
|||
|
|||
func toWSLPath(winPath string) string { |
|||
// Convert C:\foo\bar to /mnt/c/foo/bar
|
|||
p := strings.ReplaceAll(winPath, "\\", "/") |
|||
if len(p) >= 2 && p[1] == ':' { |
|||
drive := strings.ToLower(string(p[0])) |
|||
p = "/mnt/" + drive + p[2:] |
|||
} |
|||
return p |
|||
} |
|||
|
|||
func remoteName(path string) string { |
|||
parts := strings.Split(path, "/") |
|||
return parts[len(parts)-1] |
|||
} |
|||
|
|||
func remoteDir(path string) string { |
|||
idx := strings.LastIndex(path, "/") |
|||
if idx < 0 { |
|||
return "." |
|||
} |
|||
return path[:idx] |
|||
} |
|||
@ -0,0 +1,168 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"encoding/json" |
|||
"fmt" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
func TestPerf(t *testing.T) { |
|||
if *flagEnv != "remote" { |
|||
t.Skip("perf tests require remote mode (-env remote)") |
|||
} |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
|
|||
t.Run("GoBench", testPerfGoBench) |
|||
t.Run("FioRandWrite", testPerfFioRandWrite) |
|||
t.Run("FioRandRead", testPerfFioRandRead) |
|||
t.Run("LatencyP99", testPerfLatencyP99) |
|||
} |
|||
|
|||
func testPerfGoBench(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
benchDir := "/opt/work/seaweedfs/weed/storage/blockvol" |
|||
stdout, stderr, code, err := targetNode.Run(ctx, |
|||
fmt.Sprintf("cd %s && go test -run=^$ -bench=. -benchmem -count=1 -timeout=5m ./...", benchDir)) |
|||
if err != nil || code != 0 { |
|||
t.Fatalf("go bench: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
|
|||
t.Log(stdout) |
|||
artifacts.CollectPerf(t, "gobench", stdout) |
|||
} |
|||
|
|||
func testPerfFioRandWrite(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "1G", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=randwrite --filename=%s --rw=randwrite "+ |
|||
"--bs=4k --size=500M --direct=1 --ioengine=libaio --iodepth=32 "+ |
|||
"--numjobs=4 --runtime=120 --time_based --group_reporting "+ |
|||
"--output-format=json", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
iops := extractIOPS(stdout, "write") |
|||
t.Logf("random write IOPS: %.0f", iops) |
|||
if iops < 10000 { |
|||
t.Fatalf("IOPS %.0f below threshold 10000", iops) |
|||
} |
|||
|
|||
artifacts.CollectPerf(t, "fio-randwrite", stdout) |
|||
} |
|||
|
|||
func testPerfFioRandRead(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "1G", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Pre-fill
|
|||
clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=prefill --filename=%s --rw=write --bs=1M "+ |
|||
"--size=500M --direct=1 --ioengine=libaio", dev)) |
|||
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=randread --filename=%s --rw=randread "+ |
|||
"--bs=4k --size=500M --direct=1 --ioengine=libaio --iodepth=32 "+ |
|||
"--numjobs=4 --runtime=120 --time_based --group_reporting "+ |
|||
"--output-format=json", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
iops := extractIOPS(stdout, "read") |
|||
t.Logf("random read IOPS: %.0f", iops) |
|||
if iops < 10000 { |
|||
t.Fatalf("IOPS %.0f below threshold 10000", iops) |
|||
} |
|||
|
|||
artifacts.CollectPerf(t, "fio-randread", stdout) |
|||
} |
|||
|
|||
func testPerfLatencyP99(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "1G", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=latency --filename=%s --rw=randwrite "+ |
|||
"--bs=4k --size=500M --direct=1 --ioengine=libaio --iodepth=1 "+ |
|||
"--numjobs=1 --runtime=60 --time_based "+ |
|||
"--lat_percentiles=1 --output-format=json", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
p99 := extractP99Latency(stdout) // nanoseconds (fio clat_ns)
|
|||
p99ms := p99 / 1_000_000 // ns -> ms
|
|||
t.Logf("P99 latency: %.2f ms", p99ms) |
|||
if p99ms > 10 { |
|||
t.Fatalf("P99 %.2fms exceeds 10ms threshold", p99ms) |
|||
} |
|||
|
|||
artifacts.CollectPerf(t, "fio-latency", stdout) |
|||
} |
|||
|
|||
// extractIOPS parses fio JSON output for IOPS.
|
|||
func extractIOPS(fioJSON string, rw string) float64 { |
|||
var result struct { |
|||
Jobs []struct { |
|||
Read struct{ IOPS float64 `json:"iops"` } `json:"read"` |
|||
Write struct{ IOPS float64 `json:"iops"` } `json:"write"` |
|||
} `json:"jobs"` |
|||
} |
|||
if err := json.Unmarshal([]byte(fioJSON), &result); err != nil { |
|||
return 0 |
|||
} |
|||
if len(result.Jobs) == 0 { |
|||
return 0 |
|||
} |
|||
if rw == "read" { |
|||
return result.Jobs[0].Read.IOPS |
|||
} |
|||
return result.Jobs[0].Write.IOPS |
|||
} |
|||
|
|||
// extractP99Latency parses fio JSON output for P99 latency in microseconds.
|
|||
func extractP99Latency(fioJSON string) float64 { |
|||
// Look for clat_ns percentile 99.000000
|
|||
idx := strings.Index(fioJSON, "99.000000") |
|||
if idx < 0 { |
|||
return 0 |
|||
} |
|||
// Find the value after the colon
|
|||
sub := fioJSON[idx:] |
|||
colonIdx := strings.Index(sub, ":") |
|||
if colonIdx < 0 { |
|||
return 0 |
|||
} |
|||
valStr := strings.TrimSpace(sub[colonIdx+1:]) |
|||
// Take until comma or closing bracket
|
|||
for i, c := range valStr { |
|||
if c == ',' || c == '}' || c == ']' { |
|||
valStr = valStr[:i] |
|||
break |
|||
} |
|||
} |
|||
var val float64 |
|||
fmt.Sscanf(strings.TrimSpace(valStr), "%f", &val) |
|||
return val |
|||
} |
|||
@ -0,0 +1,190 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
func TestSmoke(t *testing.T) { |
|||
t.Run("Discovery", testSmokeDiscovery) |
|||
t.Run("DDIntegrity", testSmokeDDIntegrity) |
|||
t.Run("MkfsExt4", testSmokeMkfsExt4) |
|||
t.Run("MkfsXfs", testSmokeMkfsXfs) |
|||
t.Run("FioVerify", testSmokeFioVerify) |
|||
t.Run("LogoutClean", testSmokeLogoutClean) |
|||
} |
|||
|
|||
func testSmokeDiscovery(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "50M", "") |
|||
|
|||
if err := tgt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
iqns, err := iscsi.Discover(ctx, host, tgt.config.Port) |
|||
if err != nil { |
|||
t.Fatalf("discover: %v", err) |
|||
} |
|||
|
|||
found := false |
|||
for _, iqn := range iqns { |
|||
if iqn == tgt.config.IQN { |
|||
found = true |
|||
} |
|||
} |
|||
if !found { |
|||
t.Fatalf("IQN %s not found in discovery response: %v", tgt.config.IQN, iqns) |
|||
} |
|||
} |
|||
|
|||
func testSmokeDDIntegrity(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "50M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Write 1MB of random data
|
|||
_, _, code, err := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s bs=1M count=1 oflag=direct 2>/dev/null", dev)) |
|||
if err != nil || code != 0 { |
|||
t.Fatalf("dd write: code=%d err=%v", code, err) |
|||
} |
|||
|
|||
// Read back and checksum
|
|||
sum1, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=1M count=1 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
sum2, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=1M count=1 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
|
|||
s1 := firstLine(sum1) |
|||
s2 := firstLine(sum2) |
|||
if s1 != s2 { |
|||
t.Fatalf("checksum mismatch: %s vs %s", s1, s2) |
|||
} |
|||
t.Logf("checksums match: %s", s1) |
|||
} |
|||
|
|||
func testSmokeMkfsExt4(t *testing.T) { |
|||
testSmokeMkfs(t, "ext4", "mkfs.ext4", "100M") |
|||
} |
|||
|
|||
func testSmokeMkfsXfs(t *testing.T) { |
|||
t.Skip("P3-BUG-11: WRITE SAME(16) not implemented, XFS sends it for inode zeroing") |
|||
if !clientNode.HasCommand("mkfs.xfs") { |
|||
t.Skip("mkfs.xfs not available") |
|||
} |
|||
testSmokeMkfs(t, "xfs", "mkfs.xfs", "500M") // XFS needs >= 300MB
|
|||
} |
|||
|
|||
func testSmokeMkfs(t *testing.T, fstype, mkfsCmd, volSize string) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, volSize, "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
mnt := "/tmp/blockvol-mnt" |
|||
|
|||
t.Cleanup(func() { |
|||
cleanCtx, cleanCancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cleanCancel() |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("rm -rf %s", mnt)) |
|||
}) |
|||
|
|||
// mkfs
|
|||
mkfsArgs := " -F" // ext4: force, xfs: force overwrite
|
|||
if fstype == "xfs" { |
|||
mkfsArgs = " -f" |
|||
} |
|||
_, stderr, code, err := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("%s%s %s", mkfsCmd, mkfsArgs, dev)) |
|||
if err != nil || code != 0 { |
|||
t.Fatalf("mkfs.%s: code=%d stderr=%s err=%v", fstype, code, stderr, err) |
|||
} |
|||
|
|||
// Mount, write, unmount
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
_, _, code, _ = clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("mount failed") |
|||
} |
|||
|
|||
testContent := "blockvol-integration-test-data" |
|||
// Use bash -c with tee to ensure redirect works under sudo
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("bash -c 'echo %s | tee %s/testfile.txt'", testContent, mnt)) |
|||
clientNode.RunRoot(ctx, "sync") |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("umount %s", mnt)) |
|||
|
|||
// Brief pause to let device settle after unmount
|
|||
time.Sleep(1 * time.Second) |
|||
|
|||
// Remount and verify
|
|||
mountOpts := "" |
|||
if fstype == "xfs" { |
|||
mountOpts = "-o nouuid" // avoid UUID conflict with stale kernel state
|
|||
} |
|||
_, stderr2, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s %s", mountOpts, dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("remount failed: %s", stderr2) |
|||
} |
|||
|
|||
stdout, _, _, _ := clientNode.RunRoot(ctx, fmt.Sprintf("cat %s/testfile.txt", mnt)) |
|||
if !strings.Contains(stdout, testContent) { |
|||
t.Fatalf("file content mismatch: got %q, want %q", stdout, testContent) |
|||
} |
|||
t.Logf("%s: file persists across mount cycles", fstype) |
|||
} |
|||
|
|||
func testSmokeFioVerify(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio not available") |
|||
} |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
cmd := fmt.Sprintf("fio --name=verify --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=4k --size=50M --randrepeat=1 --direct=1 --ioengine=libaio "+ |
|||
"--runtime=60 --time_based=0 --output-format=json", dev) |
|||
stdout, stderr, code, err := clientNode.RunRoot(ctx, cmd) |
|||
if err != nil || code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
|
|||
if strings.Contains(stdout, "\"verify_errors\"") && !strings.Contains(stdout, "\"verify_errors\" : 0") { |
|||
t.Fatalf("fio verify errors detected") |
|||
} |
|||
t.Log("fio verify passed with 0 errors") |
|||
} |
|||
|
|||
func testSmokeLogoutClean(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "50M", "") |
|||
_ = startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Logout
|
|||
if err := iscsi.Logout(ctx, tgt.config.IQN); err != nil { |
|||
t.Fatalf("logout: %v", err) |
|||
} |
|||
|
|||
// Verify no stale sessions
|
|||
stdout, _, _, _ := clientNode.RunRoot(ctx, "iscsiadm -m session 2>&1") |
|||
if strings.Contains(stdout, tgt.config.IQN) { |
|||
t.Fatalf("stale session found after logout: %s", stdout) |
|||
} |
|||
t.Log("no stale sessions after logout") |
|||
} |
|||
@ -0,0 +1,182 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
func TestStress(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required for stress tests") |
|||
} |
|||
t.Run("Fio5Min", testStressFio5Min) |
|||
t.Run("WALPressure", testStressWALPressure) |
|||
t.Run("SyncBatch", testStressSyncBatch) |
|||
t.Run("TarExtract", testStressTarExtract) |
|||
t.Run("Soak30Min", testStressSoak30Min) |
|||
t.Run("MixedBlockSize", testStressMixedBlockSize) |
|||
} |
|||
|
|||
func testStressFio5Min(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "200M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
runtime := 300 |
|||
if testing.Short() { |
|||
runtime = 30 |
|||
} |
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=stress5m --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=4k --size=100M --randrepeat=1 --direct=1 --ioengine=libaio "+ |
|||
"--runtime=%d --time_based --output-format=json", dev, runtime)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
if strings.Contains(stdout, "\"verify_errors\"") && !strings.Contains(stdout, "\"verify_errors\" : 0") { |
|||
t.Fatal("fio verify errors") |
|||
} |
|||
t.Log("5-minute fio randrw+verify passed") |
|||
} |
|||
|
|||
func testStressWALPressure(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "4M") // small WAL
|
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
// Write more than WAL size to force WAL wrap
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=walpressure --filename=%s --rw=write --bs=64k "+ |
|||
"--size=50M --direct=1 --ioengine=libaio", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s stdout=%s", code, stderr, stdout) |
|||
} |
|||
t.Log("WAL pressure test passed (4MB WAL, 50MB write)") |
|||
} |
|||
|
|||
func testStressSyncBatch(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "100M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=syncbatch --filename=%s --rw=randwrite --bs=4k "+ |
|||
"--size=50M --fdatasync=1 --numjobs=16 --direct=1 --ioengine=libaio "+ |
|||
"--runtime=60 --time_based --group_reporting --output-format=json", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
// Extract IOPS from output
|
|||
if idx := strings.Index(stdout, "\"iops\""); idx >= 0 { |
|||
end := idx + 30 |
|||
if end > len(stdout) { |
|||
end = len(stdout) |
|||
} |
|||
t.Logf("sync batch IOPS: %s...", stdout[idx:end]) |
|||
} |
|||
t.Log("sync batch test passed (16 jobs, fdatasync)") |
|||
} |
|||
|
|||
func testStressTarExtract(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "200M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
mnt := "/tmp/blockvol-mnt" |
|||
|
|||
t.Cleanup(func() { |
|||
cleanCtx, cleanCancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cleanCancel() |
|||
clientNode.RunRoot(cleanCtx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
}) |
|||
|
|||
// mkfs + mount
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
_, _, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("mount failed") |
|||
} |
|||
|
|||
// Create a tarball with known content, extract, verify
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s/src", mnt)) |
|||
for i := 0; i < 100; i++ { |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("dd if=/dev/urandom of=%s/src/file%d bs=1k count=10 2>/dev/null", mnt, i)) |
|||
} |
|||
|
|||
// Tar and extract
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("cd %s && tar cf archive.tar src/", mnt)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s/dst && cd %s/dst && tar xf %s/archive.tar", mnt, mnt, mnt)) |
|||
|
|||
// Verify
|
|||
sum1, _, _, _ := clientNode.RunRoot(ctx, fmt.Sprintf("cd %s/src && find . -type f -exec md5sum {} \\; | sort", mnt)) |
|||
sum2, _, _, _ := clientNode.RunRoot(ctx, fmt.Sprintf("cd %s/dst/src && find . -type f -exec md5sum {} \\; | sort", mnt)) |
|||
if sum1 != sum2 { |
|||
t.Fatalf("tar extract checksums differ") |
|||
} |
|||
t.Log("tar extract + verify passed (100 files)") |
|||
} |
|||
|
|||
func testStressSoak30Min(t *testing.T) { |
|||
if testing.Short() { |
|||
t.Skip("skipping 30-minute soak in short mode") |
|||
} |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 35*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "200M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
soakTime := 1800 |
|||
if testing.Short() { |
|||
soakTime = 60 |
|||
} |
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=soak --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=4k --size=100M --randrepeat=1 --direct=1 --ioengine=libaio "+ |
|||
"--runtime=%d --time_based --output-format=json", dev, soakTime)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
if strings.Contains(stdout, "\"verify_errors\"") && !strings.Contains(stdout, "\"verify_errors\" : 0") { |
|||
t.Fatal("fio verify errors during 30-min soak") |
|||
} |
|||
t.Log("30-minute soak passed") |
|||
} |
|||
|
|||
func testStressMixedBlockSize(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
tgt, iscsi, host := newTestTarget(t, "200M", "") |
|||
dev := startAndLogin(t, ctx, tgt, iscsi, host) |
|||
|
|||
sizes := []string{"4k", "64k", "1M"} // 512 below logical block size (4096)
|
|||
for _, bs := range sizes { |
|||
t.Logf("testing bs=%s", bs) |
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=mixed_%s --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=%s --size=20M --randrepeat=1 --direct=1 --ioengine=libaio", bs, dev, bs)) |
|||
if code != 0 { |
|||
t.Fatalf("fio bs=%s: code=%d stderr=%s", bs, code, stderr) |
|||
} |
|||
if strings.Contains(stdout, "\"verify_errors\"") && !strings.Contains(stdout, "\"verify_errors\" : 0") { |
|||
t.Fatalf("fio verify errors at bs=%s", bs) |
|||
} |
|||
} |
|||
t.Log("mixed block size test passed (512, 4k, 64k, 1M)") |
|||
} |
|||
@ -0,0 +1,212 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"os" |
|||
"os/exec" |
|||
"strconv" |
|||
"strings" |
|||
"time" |
|||
) |
|||
|
|||
// WeedTarget manages the lifecycle of a `weed volume --block.listen` process.
|
|||
// Unlike Target (standalone iscsi-target binary), this builds and runs the
|
|||
// full weed binary with block volume support.
|
|||
type WeedTarget struct { |
|||
node *Node |
|||
config TargetConfig |
|||
binPath string // remote path to weed binary
|
|||
pid int |
|||
logFile string |
|||
blockDir string // remote dir containing .blk files
|
|||
volFile string // remote path to the .blk file
|
|||
iqnPrefix string |
|||
} |
|||
|
|||
// NewWeedTarget creates a WeedTarget bound to a node.
|
|||
func NewWeedTarget(node *Node, config TargetConfig) *WeedTarget { |
|||
return &WeedTarget{ |
|||
node: node, |
|||
config: config, |
|||
binPath: "/tmp/weed-test", |
|||
logFile: "/tmp/weed-test.log", |
|||
blockDir: "/tmp/blockvol-weedtest", |
|||
iqnPrefix: "iqn.2024-01.com.seaweedfs:vol.", |
|||
} |
|||
} |
|||
|
|||
// Build cross-compiles the weed binary for linux/amd64.
|
|||
func (t *WeedTarget) Build(ctx context.Context, repoDir string) error { |
|||
binDir := repoDir + "/weed" |
|||
outPath := repoDir + "/weed-linux" |
|||
|
|||
cmd := exec.CommandContext(ctx, "go", "build", "-o", outPath, ".") |
|||
cmd.Dir = binDir |
|||
cmd.Env = append(os.Environ(), "GOOS=linux", "GOARCH=amd64", "CGO_ENABLED=0") |
|||
out, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
return fmt.Errorf("build weed failed: %s\n%w", out, err) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// Deploy uploads the pre-built weed binary to the target node.
|
|||
func (t *WeedTarget) Deploy(localBin string) error { |
|||
return t.node.Upload(localBin, t.binPath) |
|||
} |
|||
|
|||
// Start launches `weed volume --block.listen`. If create is true, creates
|
|||
// the block directory and volume file first.
|
|||
func (t *WeedTarget) Start(ctx context.Context, create bool) error { |
|||
// Remove old log
|
|||
t.node.Run(ctx, fmt.Sprintf("rm -f %s", t.logFile)) |
|||
|
|||
if create { |
|||
// Create block directory and volume file
|
|||
t.node.Run(ctx, fmt.Sprintf("rm -rf %s", t.blockDir)) |
|||
t.node.Run(ctx, fmt.Sprintf("mkdir -p %s", t.blockDir)) |
|||
|
|||
// Derive volume name from IQN suffix
|
|||
volName := t.volName() |
|||
t.volFile = fmt.Sprintf("%s/%s.blk", t.blockDir, volName) |
|||
|
|||
// Create the .blk file (truncate to size)
|
|||
_, _, code, err := t.node.Run(ctx, |
|||
fmt.Sprintf("truncate -s %s %s", t.config.VolSize, t.volFile)) |
|||
if err != nil || code != 0 { |
|||
return fmt.Errorf("create volume file: code=%d err=%v", code, err) |
|||
} |
|||
} |
|||
|
|||
// Start weed volume with block support
|
|||
args := fmt.Sprintf("volume -port=19333 -block.listen=:%d -block.dir=%s", |
|||
t.config.Port, t.blockDir) |
|||
|
|||
cmd := fmt.Sprintf("setsid -f %s %s >%s 2>&1", t.binPath, args, t.logFile) |
|||
_, stderr, code, err := t.node.Run(ctx, cmd) |
|||
if err != nil || code != 0 { |
|||
return fmt.Errorf("start weed volume: code=%d stderr=%s err=%v", code, stderr, err) |
|||
} |
|||
|
|||
// Wait for iSCSI port
|
|||
if err := t.WaitForPort(ctx); err != nil { |
|||
return err |
|||
} |
|||
|
|||
// Discover PID
|
|||
stdout, _, _, _ := t.node.Run(ctx, |
|||
fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", t.binPath)) |
|||
pidStr := strings.TrimSpace(stdout) |
|||
if idx := strings.IndexByte(pidStr, '\n'); idx > 0 { |
|||
pidStr = pidStr[:idx] |
|||
} |
|||
pid, err := strconv.Atoi(pidStr) |
|||
if err != nil { |
|||
return fmt.Errorf("find weed PID: %q: %w", pidStr, err) |
|||
} |
|||
t.pid = pid |
|||
return nil |
|||
} |
|||
|
|||
// Stop sends SIGTERM, waits up to 10s, then Kill9.
|
|||
func (t *WeedTarget) Stop(ctx context.Context) error { |
|||
if t.pid == 0 { |
|||
return nil |
|||
} |
|||
|
|||
t.node.Run(ctx, fmt.Sprintf("kill %d", t.pid)) |
|||
|
|||
deadline := time.Now().Add(10 * time.Second) |
|||
for time.Now().Before(deadline) { |
|||
_, _, code, _ := t.node.Run(ctx, fmt.Sprintf("kill -0 %d 2>/dev/null", t.pid)) |
|||
if code != 0 { |
|||
t.pid = 0 |
|||
return nil |
|||
} |
|||
time.Sleep(500 * time.Millisecond) |
|||
} |
|||
|
|||
return t.Kill9() |
|||
} |
|||
|
|||
// Kill9 sends SIGKILL immediately.
|
|||
func (t *WeedTarget) Kill9() error { |
|||
if t.pid == 0 { |
|||
return nil |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) |
|||
defer cancel() |
|||
t.node.Run(ctx, fmt.Sprintf("kill -9 %d", t.pid)) |
|||
t.pid = 0 |
|||
return nil |
|||
} |
|||
|
|||
// Restart stops and starts weed volume (preserving the volume file).
|
|||
func (t *WeedTarget) Restart(ctx context.Context) error { |
|||
if err := t.Stop(ctx); err != nil { |
|||
return fmt.Errorf("restart stop: %w", err) |
|||
} |
|||
return t.Start(ctx, false) |
|||
} |
|||
|
|||
// WaitForPort polls until the iSCSI port is listening.
|
|||
func (t *WeedTarget) WaitForPort(ctx context.Context) error { |
|||
for { |
|||
select { |
|||
case <-ctx.Done(): |
|||
return fmt.Errorf("wait for port %d: %w", t.config.Port, ctx.Err()) |
|||
default: |
|||
} |
|||
|
|||
stdout, _, code, _ := t.node.Run(ctx, fmt.Sprintf("ss -tln | grep :%d", t.config.Port)) |
|||
if code == 0 && strings.Contains(stdout, fmt.Sprintf(":%d", t.config.Port)) { |
|||
return nil |
|||
} |
|||
time.Sleep(200 * time.Millisecond) |
|||
} |
|||
} |
|||
|
|||
// CollectLog downloads the log file contents.
|
|||
func (t *WeedTarget) CollectLog() (string, error) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cancel() |
|||
stdout, _, _, err := t.node.Run(ctx, fmt.Sprintf("cat %s 2>/dev/null", t.logFile)) |
|||
if err != nil { |
|||
return "", err |
|||
} |
|||
return stdout, nil |
|||
} |
|||
|
|||
// Cleanup removes the block directory, volume files, and log.
|
|||
func (t *WeedTarget) Cleanup(ctx context.Context) { |
|||
t.node.Run(ctx, fmt.Sprintf("rm -rf %s %s", t.blockDir, t.logFile)) |
|||
} |
|||
|
|||
// IQN returns the expected IQN for the volume.
|
|||
func (t *WeedTarget) IQN() string { |
|||
return t.iqnPrefix + t.volName() |
|||
} |
|||
|
|||
// volName derives the volume name from the config IQN or a default.
|
|||
func (t *WeedTarget) volName() string { |
|||
// Use IQN suffix if set, otherwise "test"
|
|||
if t.config.IQN != "" { |
|||
parts := strings.Split(t.config.IQN, ":") |
|||
if len(parts) > 1 { |
|||
return parts[len(parts)-1] |
|||
} |
|||
} |
|||
return "test" |
|||
} |
|||
|
|||
// PID returns the current process ID.
|
|||
func (t *WeedTarget) PID() int { return t.pid } |
|||
|
|||
// VolFilePath returns the remote volume file path.
|
|||
func (t *WeedTarget) VolFilePath() string { return t.volFile } |
|||
|
|||
// LogFile returns the remote log file path.
|
|||
func (t *WeedTarget) LogFile() string { return t.logFile } |
|||
@ -0,0 +1,736 @@ |
|||
//go:build integration
|
|||
|
|||
package test |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"os" |
|||
"path/filepath" |
|||
"strings" |
|||
"testing" |
|||
"time" |
|||
) |
|||
|
|||
// weedBinary is built once in TestWeedVol and reused across subtests.
|
|||
var weedBinary string |
|||
|
|||
func TestWeedVol(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) |
|||
defer cancel() |
|||
|
|||
// Build weed binary once
|
|||
repoDir := *flagRepoDir |
|||
t.Log("building weed binary...") |
|||
wt := NewWeedTarget(targetNode, DefaultTargetConfig()) |
|||
if err := wt.Build(ctx, repoDir); err != nil { |
|||
t.Fatalf("build weed: %v", err) |
|||
} |
|||
weedBinary = repoDir + "/weed-linux" |
|||
if err := wt.Deploy(weedBinary); err != nil { |
|||
t.Fatalf("deploy weed: %v", err) |
|||
} |
|||
t.Log("weed binary built and deployed") |
|||
|
|||
// 3B-1: Smoke
|
|||
t.Run("Discovery", testWeedVolDiscovery) |
|||
t.Run("LoginIO", testWeedVolLoginIO) |
|||
t.Run("MkfsExt4", testWeedVolMkfsExt4) |
|||
t.Run("FioVerify", testWeedVolFioVerify) |
|||
t.Run("Heartbeat", testWeedVolHeartbeat) |
|||
t.Run("AttachScript", testWeedVolAttachScript) |
|||
|
|||
// 3B-2: WAL Pressure
|
|||
t.Run("PressureSustained", testWeedVolPressureSustained) |
|||
t.Run("PressureSync", testWeedVolPressureSync) |
|||
t.Run("PressureCrash", testWeedVolPressureCrash) |
|||
t.Run("PressureBatch", testWeedVolPressureBatch) |
|||
|
|||
// 3B-3: Chaos
|
|||
t.Run("MonkeyReconnect", testWeedVolMonkeyReconnect) |
|||
t.Run("MonkeyMultiVol", testWeedVolMonkeyMultiVol) |
|||
t.Run("MonkeyConfigRestart", testWeedVolMonkeyConfigRestart) |
|||
t.Run("MonkeyAttachDetach", testWeedVolMonkeyAttachDetach) |
|||
t.Run("MonkeyWALFull", testWeedVolMonkeyWALFull) |
|||
|
|||
// 3B-4: Filesystem Stress
|
|||
t.Run("FsMkfsExt4Stress", testWeedVolFsMkfsStress) |
|||
t.Run("FsTarExtract", testWeedVolFsTarExtract) |
|||
t.Run("FsLongSoak", testWeedVolFsLongSoak) |
|||
t.Run("FsPostgres", testWeedVolFsPostgres) |
|||
t.Run("FsFsstress", testWeedVolFsFsstress) |
|||
} |
|||
|
|||
// newWeedTestTarget creates a WeedTarget with test-specific config and cleanup.
|
|||
func newWeedTestTarget(t *testing.T, volSize string) (*WeedTarget, *ISCSIClient, string) { |
|||
cfg := DefaultTargetConfig() |
|||
name := strings.ReplaceAll(t.Name(), "/", "-") |
|||
cfg.IQN = "weedvol:" + strings.ToLower(name) |
|||
if volSize != "" { |
|||
cfg.VolSize = volSize |
|||
} |
|||
|
|||
wt := NewWeedTarget(targetNode, cfg) |
|||
iscsiC := NewISCSIClient(clientNode) |
|||
host := targetHost() |
|||
|
|||
t.Cleanup(func() { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) |
|||
defer cancel() |
|||
iscsiC.Logout(ctx, wt.IQN()) |
|||
iscsiC.CleanupAll(ctx, wt.iqnPrefix) |
|||
wt.Stop(ctx) |
|||
wt.Cleanup(ctx) |
|||
}) |
|||
t.Cleanup(func() { artifacts.Collect(t, wt) }) |
|||
|
|||
return wt, iscsiC, host |
|||
} |
|||
|
|||
// startAndLoginWeed creates vol, starts weed volume, discovers, logs in.
|
|||
func startAndLoginWeed(t *testing.T, ctx context.Context, wt *WeedTarget, iscsiC *ISCSIClient, host string) string { |
|||
t.Helper() |
|||
if err := wt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start weed: %v", err) |
|||
} |
|||
if _, err := iscsiC.Discover(ctx, host, wt.config.Port); err != nil { |
|||
t.Fatalf("discover: %v", err) |
|||
} |
|||
dev, err := iscsiC.Login(ctx, wt.IQN()) |
|||
if err != nil { |
|||
t.Fatalf("login: %v", err) |
|||
} |
|||
return dev |
|||
} |
|||
|
|||
// ============================================================
|
|||
// 3B-1: Smoke Tests
|
|||
// ============================================================
|
|||
|
|||
func testWeedVolDiscovery(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "50M") |
|||
if err := wt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
iqns, err := iscsiC.Discover(ctx, host, wt.config.Port) |
|||
if err != nil { |
|||
t.Fatalf("discover: %v", err) |
|||
} |
|||
|
|||
found := false |
|||
for _, iqn := range iqns { |
|||
if iqn == wt.IQN() { |
|||
found = true |
|||
} |
|||
} |
|||
if !found { |
|||
t.Fatalf("IQN %s not found in discovery: %v", wt.IQN(), iqns) |
|||
} |
|||
t.Logf("discovered IQN: %s", wt.IQN()) |
|||
} |
|||
|
|||
func testWeedVolLoginIO(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "50M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
// Write 1MB + read back + verify
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s bs=4k count=1000 oflag=direct 2>/dev/null", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("dd write failed") |
|||
} |
|||
|
|||
sum1, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=1000 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
sum2, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=1000 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
|
|||
if firstLine(sum1) != firstLine(sum2) { |
|||
t.Fatalf("checksum mismatch: %s vs %s", firstLine(sum1), firstLine(sum2)) |
|||
} |
|||
t.Logf("checksums match: %s", firstLine(sum1)) |
|||
} |
|||
|
|||
func testWeedVolMkfsExt4(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
mnt := "/tmp/blockvol-mnt" |
|||
|
|||
t.Cleanup(func() { |
|||
cctx, cc := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cc() |
|||
clientNode.RunRoot(cctx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
}) |
|||
|
|||
// mkfs + mount + write + unmount + remount + verify
|
|||
_, stderr, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("mkfs.ext4 failed: %s", stderr) |
|||
} |
|||
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
_, _, code, _ = clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("mount failed") |
|||
} |
|||
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("bash -c 'echo weedvol-test-data | tee %s/testfile.txt'", mnt)) |
|||
clientNode.RunRoot(ctx, "sync") |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("umount %s", mnt)) |
|||
|
|||
time.Sleep(1 * time.Second) |
|||
_, _, code, _ = clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("remount failed") |
|||
} |
|||
|
|||
stdout, _, _, _ := clientNode.RunRoot(ctx, fmt.Sprintf("cat %s/testfile.txt", mnt)) |
|||
if !strings.Contains(stdout, "weedvol-test-data") { |
|||
t.Fatalf("file content mismatch: %q", stdout) |
|||
} |
|||
t.Log("ext4: file persists across mount cycles via weed volume") |
|||
} |
|||
|
|||
func testWeedVolFioVerify(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-verify --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=4k --size=50M --randrepeat=1 --direct=1 --ioengine=libaio "+ |
|||
"--output-format=json", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
if strings.Contains(stdout, "\"verify_errors\"") && !strings.Contains(stdout, "\"verify_errors\" : 0") { |
|||
t.Fatal("fio verify errors") |
|||
} |
|||
t.Log("fio verify passed via weed volume") |
|||
} |
|||
|
|||
func testWeedVolHeartbeat(t *testing.T) { |
|||
// Heartbeat requires weed master running. Skip for now -- would need
|
|||
// a full master+volume setup. Test that the volume starts and serves.
|
|||
t.Skip("requires weed master for heartbeat verification") |
|||
} |
|||
|
|||
func testWeedVolAttachScript(t *testing.T) { |
|||
// The attach script requires weed master to look up volumes.
|
|||
// Skip for now -- script works via master API.
|
|||
t.Skip("requires weed master for attach script") |
|||
} |
|||
|
|||
// ============================================================
|
|||
// 3B-2: WAL Pressure + Group Commit
|
|||
// ============================================================
|
|||
|
|||
func testWeedVolPressureSustained(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
// Sustained write larger than default WAL
|
|||
_, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-sustained --filename=%s --rw=write --bs=64k "+ |
|||
"--size=80M --direct=1 --ioengine=libaio", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
t.Log("sustained write pressure passed via weed volume") |
|||
} |
|||
|
|||
func testWeedVolPressureSync(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
runtime := 60 |
|||
if testing.Short() { |
|||
runtime = 15 |
|||
} |
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-sync --filename=%s --rw=randwrite --bs=4k "+ |
|||
"--size=50M --fdatasync=1 --numjobs=16 --direct=1 --ioengine=libaio "+ |
|||
"--runtime=%d --time_based --group_reporting --output-format=json", dev, runtime)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
|
|||
if idx := strings.Index(stdout, "\"iops\""); idx >= 0 { |
|||
end := idx + 30 |
|||
if end > len(stdout) { |
|||
end = len(stdout) |
|||
} |
|||
t.Logf("sync batch IOPS: %s...", stdout[idx:end]) |
|||
} |
|||
t.Log("fdatasync pressure passed via weed volume") |
|||
} |
|||
|
|||
func testWeedVolPressureCrash(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
// Write with fdatasync
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-crash --filename=%s --rw=write --bs=4k --size=10M "+ |
|||
"--fdatasync=1 --direct=1 --ioengine=libaio", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio write failed") |
|||
} |
|||
|
|||
// Record checksum
|
|||
sum1, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=2560 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
|
|||
// Kill
|
|||
t.Log("killing weed volume...") |
|||
iscsiC.Logout(ctx, wt.IQN()) |
|||
iscsiC.CleanupAll(ctx, wt.iqnPrefix) |
|||
wt.Kill9() |
|||
|
|||
// Restart
|
|||
t.Log("restarting weed volume...") |
|||
if err := wt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart: %v", err) |
|||
} |
|||
|
|||
iscsiC.Discover(ctx, host, wt.config.Port) |
|||
dev, err := iscsiC.Login(ctx, wt.IQN()) |
|||
if err != nil { |
|||
t.Fatalf("re-login: %v", err) |
|||
} |
|||
|
|||
sum2, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=2560 iflag=direct 2>/dev/null | md5sum", dev)) |
|||
|
|||
if firstLine(sum1) != firstLine(sum2) { |
|||
t.Fatalf("synced data corrupted: %s vs %s", firstLine(sum1), firstLine(sum2)) |
|||
} |
|||
t.Log("crash recovery: synced data intact via weed volume") |
|||
} |
|||
|
|||
func testWeedVolPressureBatch(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
runtime := 30 |
|||
if testing.Short() { |
|||
runtime = 10 |
|||
} |
|||
// Heavy concurrent fdatasync -- should trigger group commit batching
|
|||
_, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-batch --filename=%s --rw=randwrite --bs=4k "+ |
|||
"--size=50M --fdatasync=1 --numjobs=32 --direct=1 --ioengine=libaio "+ |
|||
"--runtime=%d --time_based --group_reporting", dev, runtime)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
t.Log("group commit batch pressure passed via weed volume") |
|||
} |
|||
|
|||
// ============================================================
|
|||
// 3B-3: Chaos Monkey
|
|||
// ============================================================
|
|||
|
|||
func testWeedVolMonkeyReconnect(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
if err := wt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
n := 10 |
|||
if testing.Short() { |
|||
n = 3 |
|||
} |
|||
for i := 0; i < n; i++ { |
|||
t.Logf("reconnect %d/%d", i+1, n) |
|||
|
|||
iscsiC.Discover(ctx, host, wt.config.Port) |
|||
dev, err := iscsiC.Login(ctx, wt.IQN()) |
|||
if err != nil { |
|||
t.Fatalf("iter %d login: %v", i, err) |
|||
} |
|||
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s bs=1M count=1 oflag=direct 2>/dev/null", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("iter %d dd write failed", i) |
|||
} |
|||
|
|||
if err := iscsiC.Logout(ctx, wt.IQN()); err != nil { |
|||
t.Fatalf("iter %d logout: %v", i, err) |
|||
} |
|||
time.Sleep(200 * time.Millisecond) |
|||
} |
|||
t.Logf("%dx reconnect completed via weed volume", n) |
|||
} |
|||
|
|||
func testWeedVolMonkeyMultiVol(t *testing.T) { |
|||
// Multi-volume: create 2 .blk files in block dir, verify both discoverable
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt := NewWeedTarget(targetNode, DefaultTargetConfig()) |
|||
iscsiC := NewISCSIClient(clientNode) |
|||
host := targetHost() |
|||
|
|||
t.Cleanup(func() { |
|||
cctx, cc := context.WithTimeout(context.Background(), 30*time.Second) |
|||
defer cc() |
|||
iscsiC.CleanupAll(cctx, wt.iqnPrefix) |
|||
wt.Stop(cctx) |
|||
wt.Cleanup(cctx) |
|||
}) |
|||
t.Cleanup(func() { artifacts.Collect(t, wt) }) |
|||
|
|||
// Create block dir with 2 volume files
|
|||
wt.node.Run(ctx, fmt.Sprintf("rm -rf %s && mkdir -p %s", wt.blockDir, wt.blockDir)) |
|||
wt.node.Run(ctx, fmt.Sprintf("truncate -s 50M %s/vol1.blk", wt.blockDir)) |
|||
wt.node.Run(ctx, fmt.Sprintf("truncate -s 50M %s/vol2.blk", wt.blockDir)) |
|||
|
|||
if err := wt.Start(ctx, false); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
iqns, err := iscsiC.Discover(ctx, host, wt.config.Port) |
|||
if err != nil { |
|||
t.Fatalf("discover: %v", err) |
|||
} |
|||
|
|||
iqn1 := wt.iqnPrefix + "vol1" |
|||
iqn2 := wt.iqnPrefix + "vol2" |
|||
found1, found2 := false, false |
|||
for _, iqn := range iqns { |
|||
if iqn == iqn1 { |
|||
found1 = true |
|||
} |
|||
if iqn == iqn2 { |
|||
found2 = true |
|||
} |
|||
} |
|||
if !found1 || !found2 { |
|||
t.Fatalf("expected both %s and %s in discovery, got: %v", iqn1, iqn2, iqns) |
|||
} |
|||
|
|||
// Login to both and do I/O
|
|||
dev1, err := iscsiC.Login(ctx, iqn1) |
|||
if err != nil { |
|||
t.Fatalf("login vol1: %v", err) |
|||
} |
|||
dev2, err := iscsiC.Login(ctx, iqn2) |
|||
if err != nil { |
|||
t.Fatalf("login vol2: %v", err) |
|||
} |
|||
|
|||
// Write different data to each
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("dd if=/dev/urandom of=%s bs=4k count=100 oflag=direct 2>/dev/null", dev1)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("dd if=/dev/urandom of=%s bs=4k count=100 oflag=direct 2>/dev/null", dev2)) |
|||
|
|||
sum1, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=100 iflag=direct 2>/dev/null | md5sum", dev1)) |
|||
sum2, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=%s bs=4k count=100 iflag=direct 2>/dev/null | md5sum", dev2)) |
|||
|
|||
if firstLine(sum1) == firstLine(sum2) { |
|||
t.Fatalf("volumes should have different data") |
|||
} |
|||
|
|||
iscsiC.Logout(ctx, iqn1) |
|||
iscsiC.Logout(ctx, iqn2) |
|||
t.Logf("2 volumes served independently: %s %s", dev1, dev2) |
|||
} |
|||
|
|||
func testWeedVolMonkeyConfigRestart(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
// fio phase 1
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-cfg1 --filename=%s --rw=randrw --bs=4k "+ |
|||
"--size=10M --direct=1 --ioengine=libaio --randrepeat=1", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio phase 1 failed") |
|||
} |
|||
|
|||
// Logout + stop + restart
|
|||
iscsiC.Logout(ctx, wt.IQN()) |
|||
iscsiC.CleanupAll(ctx, wt.iqnPrefix) |
|||
wt.Stop(ctx) |
|||
|
|||
if err := wt.Start(ctx, false); err != nil { |
|||
t.Fatalf("restart: %v", err) |
|||
} |
|||
|
|||
iscsiC.Discover(ctx, host, wt.config.Port) |
|||
dev, err := iscsiC.Login(ctx, wt.IQN()) |
|||
if err != nil { |
|||
t.Fatalf("re-login: %v", err) |
|||
} |
|||
|
|||
// fio phase 2
|
|||
_, _, code, _ = clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-cfg2 --filename=%s --rw=randrw --bs=4k "+ |
|||
"--size=10M --direct=1 --ioengine=libaio --randrepeat=1", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio phase 2 failed") |
|||
} |
|||
t.Log("config restart passed via weed volume") |
|||
} |
|||
|
|||
func testWeedVolMonkeyAttachDetach(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
if err := wt.Start(ctx, true); err != nil { |
|||
t.Fatalf("start: %v", err) |
|||
} |
|||
|
|||
n := 5 |
|||
if testing.Short() { |
|||
n = 3 |
|||
} |
|||
for i := 0; i < n; i++ { |
|||
t.Logf("attach/detach %d/%d", i+1, n) |
|||
|
|||
iscsiC.Discover(ctx, host, wt.config.Port) |
|||
dev, err := iscsiC.Login(ctx, wt.IQN()) |
|||
if err != nil { |
|||
t.Fatalf("iter %d login: %v", i, err) |
|||
} |
|||
|
|||
_, _, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-ad%d --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=4k --size=10M --direct=1 --ioengine=libaio --randrepeat=1", i, dev)) |
|||
if code != 0 { |
|||
t.Fatalf("iter %d fio failed", i) |
|||
} |
|||
|
|||
if err := iscsiC.Logout(ctx, wt.IQN()); err != nil { |
|||
t.Fatalf("iter %d logout: %v", i, err) |
|||
} |
|||
time.Sleep(200 * time.Millisecond) |
|||
} |
|||
t.Logf("%dx attach/detach completed via weed volume", n) |
|||
} |
|||
|
|||
func testWeedVolMonkeyWALFull(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
// Use small volume to pressure WAL
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "50M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
_, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-walfull --filename=%s --rw=write --bs=64k "+ |
|||
"--size=40M --direct=1 --ioengine=libaio", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
t.Log("WAL full pressure passed via weed volume") |
|||
} |
|||
|
|||
// ============================================================
|
|||
// 3B-4: Filesystem Stress
|
|||
// ============================================================
|
|||
|
|||
func testWeedVolFsMkfsStress(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "100M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
mnt := "/tmp/blockvol-mnt" |
|||
|
|||
t.Cleanup(func() { |
|||
cctx, cc := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cc() |
|||
clientNode.RunRoot(cctx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
}) |
|||
|
|||
// mkfs + mount + create many files + verify
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
_, _, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("mount failed") |
|||
} |
|||
|
|||
// Create 200 files
|
|||
for i := 0; i < 200; i++ { |
|||
clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s/file%d bs=1k count=5 2>/dev/null", mnt, i)) |
|||
} |
|||
|
|||
clientNode.RunRoot(ctx, "sync") |
|||
|
|||
// Count files
|
|||
stdout, _, _, _ := clientNode.RunRoot(ctx, fmt.Sprintf("ls %s | wc -l", mnt)) |
|||
count := strings.TrimSpace(stdout) |
|||
t.Logf("created %s files on ext4 via weed volume", count) |
|||
|
|||
// Unmount + remount + verify count
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("umount %s", mnt)) |
|||
time.Sleep(1 * time.Second) |
|||
_, _, code, _ = clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("remount failed") |
|||
} |
|||
|
|||
stdout2, _, _, _ := clientNode.RunRoot(ctx, fmt.Sprintf("ls %s | wc -l", mnt)) |
|||
if strings.TrimSpace(stdout2) != count { |
|||
t.Fatalf("file count mismatch after remount: %s vs %s", count, strings.TrimSpace(stdout2)) |
|||
} |
|||
t.Log("ext4 stress: 200 files persist via weed volume") |
|||
} |
|||
|
|||
func testWeedVolFsTarExtract(t *testing.T) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "200M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
mnt := "/tmp/blockvol-mnt" |
|||
|
|||
t.Cleanup(func() { |
|||
cctx, cc := context.WithTimeout(context.Background(), 10*time.Second) |
|||
defer cc() |
|||
clientNode.RunRoot(cctx, fmt.Sprintf("umount -f %s 2>/dev/null", mnt)) |
|||
}) |
|||
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkfs.ext4 -F %s", dev)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", mnt)) |
|||
_, _, code, _ := clientNode.RunRoot(ctx, fmt.Sprintf("mount %s %s", dev, mnt)) |
|||
if code != 0 { |
|||
t.Fatalf("mount failed") |
|||
} |
|||
|
|||
// Create source files, tar, extract, verify checksums
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s/src", mnt)) |
|||
for i := 0; i < 100; i++ { |
|||
clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("dd if=/dev/urandom of=%s/src/file%d bs=1k count=10 2>/dev/null", mnt, i)) |
|||
} |
|||
|
|||
clientNode.RunRoot(ctx, fmt.Sprintf("cd %s && tar cf archive.tar src/", mnt)) |
|||
clientNode.RunRoot(ctx, fmt.Sprintf("mkdir -p %s/dst && cd %s/dst && tar xf %s/archive.tar", mnt, mnt, mnt)) |
|||
|
|||
sum1, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("cd %s/src && find . -type f -exec md5sum {} \\; | sort", mnt)) |
|||
sum2, _, _, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("cd %s/dst/src && find . -type f -exec md5sum {} \\; | sort", mnt)) |
|||
if sum1 != sum2 { |
|||
t.Fatalf("tar extract checksums differ") |
|||
} |
|||
t.Log("tar extract + verify passed via weed volume") |
|||
} |
|||
|
|||
func testWeedVolFsLongSoak(t *testing.T) { |
|||
if !clientNode.HasCommand("fio") { |
|||
t.Skip("fio required") |
|||
} |
|||
if testing.Short() { |
|||
t.Skip("skipping long soak in short mode") |
|||
} |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 35*time.Minute) |
|||
defer cancel() |
|||
|
|||
wt, iscsiC, host := newWeedTestTarget(t, "200M") |
|||
dev := startAndLoginWeed(t, ctx, wt, iscsiC, host) |
|||
|
|||
stdout, stderr, code, _ := clientNode.RunRoot(ctx, |
|||
fmt.Sprintf("fio --name=wv-soak --filename=%s --rw=randrw --verify=crc32 "+ |
|||
"--bs=4k --size=100M --randrepeat=1 --direct=1 --ioengine=libaio "+ |
|||
"--runtime=1800 --time_based --output-format=json", dev)) |
|||
if code != 0 { |
|||
t.Fatalf("fio: code=%d stderr=%s", code, stderr) |
|||
} |
|||
if strings.Contains(stdout, "\"verify_errors\"") && !strings.Contains(stdout, "\"verify_errors\" : 0") { |
|||
t.Fatal("fio verify errors during soak") |
|||
} |
|||
t.Log("30-minute soak passed via weed volume") |
|||
} |
|||
|
|||
func testWeedVolFsPostgres(t *testing.T) { |
|||
if !clientNode.HasCommand("pg_isready") { |
|||
t.Skip("postgresql not available") |
|||
} |
|||
t.Skip("postgres integration requires dedicated setup") |
|||
} |
|||
|
|||
func testWeedVolFsFsstress(t *testing.T) { |
|||
if !clientNode.HasCommand("fsstress") { |
|||
t.Skip("fsstress not available (xfstests)") |
|||
} |
|||
t.Skip("fsstress requires XFS support (P3-BUG-11)") |
|||
} |
|||
|
|||
// ensureWeedBinaryDeployed verifies the weed binary was built in TestWeedVol.
|
|||
// Individual subtests should not be run standalone since they depend on TestWeedVol
|
|||
// building and deploying the binary first.
|
|||
func ensureWeedBinaryDeployed(t *testing.T) { |
|||
t.Helper() |
|||
if weedBinary == "" { |
|||
t.Skip("weed binary not built -- run TestWeedVol parent test") |
|||
} |
|||
// Verify it exists
|
|||
if _, err := os.Stat(weedBinary); err != nil { |
|||
absPath, _ := filepath.Abs(weedBinary) |
|||
t.Skipf("weed binary not found at %s", absPath) |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue