You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

628 lines
17 KiB

package loadtest
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"net/http"
"os"
"sort"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/test/volume_server/framework"
"github.com/seaweedfs/seaweedfs/test/volume_server/matrix"
)
// Run with:
// go test -v -count=1 -timeout 300s -run BenchmarkVolumeServer ./test/volume_server/loadtest/...
// VOLUME_SERVER_IMPL=rust go test -v -count=1 -timeout 300s -run BenchmarkVolumeServer ./test/volume_server/loadtest/...
//
// Compare results:
// go test -count=1 -timeout 300s -run BenchmarkVolumeServer -bench . ./test/volume_server/loadtest/... | tee go.txt
// VOLUME_SERVER_IMPL=rust go test -count=1 -timeout 300s -run BenchmarkVolumeServer -bench . ./test/volume_server/loadtest/... | tee rust.txt
// Step-by-step payload sizes: 1KB → 4KB → 16KB → 64KB → 256KB → 1MB → 4MB → 8MB
var payloadSteps = []struct {
name string
size int
}{
{"1KB", 1 << 10},
{"4KB", 4 << 10},
{"16KB", 16 << 10},
{"64KB", 64 << 10},
{"256KB", 256 << 10},
{"1MB", 1 << 20},
{"4MB", 4 << 20},
{"8MB", 8 << 20},
}
func implName() string {
if os.Getenv("VOLUME_SERVER_IMPL") == "rust" {
return "rust"
}
return "go"
}
// setupCluster starts a volume cluster and returns the admin URL and cleanup.
func setupCluster(tb testing.TB) (adminURL string, grpcAddr string, cleanup func()) {
tb.Helper()
cluster := framework.StartVolumeCluster(tb, matrix.P1())
return cluster.VolumeAdminURL(), cluster.VolumeGRPCAddress(), cluster.Stop
}
// allocateVolume allocates a volume via gRPC and returns its ID.
func allocateVolume(tb testing.TB, grpcAddr string, volumeID uint32) {
tb.Helper()
conn, client := framework.DialVolumeServer(tb, grpcAddr)
defer conn.Close()
framework.AllocateVolume(tb, client, volumeID, "loadtest")
}
func makePayload(size int) []byte {
data := make([]byte, size)
rand.Read(data)
return data
}
// uploadFile uploads data and returns the file ID used.
func uploadFile(client *http.Client, adminURL string, volumeID uint32, key uint64, cookie uint32, data []byte) error {
fid := framework.NewFileID(volumeID, key, cookie)
url := fmt.Sprintf("%s/%s", adminURL, fid)
req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(data))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/octet-stream")
resp, err := client.Do(req)
if err != nil {
return err
}
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
if resp.StatusCode >= 400 {
return fmt.Errorf("upload %s: status %d", fid, resp.StatusCode)
}
return nil
}
// downloadFile reads a file and discards the body.
func downloadFile(client *http.Client, adminURL string, volumeID uint32, key uint64, cookie uint32) error {
fid := framework.NewFileID(volumeID, key, cookie)
url := fmt.Sprintf("%s/%s", adminURL, fid)
resp, err := client.Get(url)
if err != nil {
return err
}
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
if resp.StatusCode >= 400 {
return fmt.Errorf("download %s: status %d", fid, resp.StatusCode)
}
return nil
}
// deleteFile deletes a file.
func deleteFile(client *http.Client, adminURL string, volumeID uint32, key uint64, cookie uint32) error {
fid := framework.NewFileID(volumeID, key, cookie)
url := fmt.Sprintf("%s/%s", adminURL, fid)
req, err := http.NewRequest(http.MethodDelete, url, nil)
if err != nil {
return err
}
resp, err := client.Do(req)
if err != nil {
return err
}
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
return nil
}
// --- Throughput load tests (not Go benchmarks, manual timing for comparison) ---
// TestBenchmarkVolumeServer runs a suite of load tests printing ops/sec and latency.
func TestBenchmarkVolumeServer(t *testing.T) {
if testing.Short() {
t.Skip("skipping load test in short mode")
}
impl := implName()
adminURL, grpcAddr, cleanup := setupCluster(t)
defer cleanup()
const volumeID = uint32(10)
allocateVolume(t, grpcAddr, volumeID)
httpClient := &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
MaxIdleConnsPerHost: 128,
MaxConnsPerHost: 128,
},
}
// opsForSize returns fewer ops for larger payloads to keep test time reasonable.
opsForSize := func(size, concurrency int) int {
switch {
case size >= 4<<20:
if concurrency > 1 {
return 64
}
return 30
case size >= 1<<20:
if concurrency > 1 {
return 200
}
return 100
case size >= 64<<10:
if concurrency > 1 {
return 500
}
return 300
default:
if concurrency > 1 {
return 1000
}
return 500
}
}
// Step-by-step upload: 1KB → 4KB → 16KB → 64KB → 256KB → 1MB → 4MB → 8MB
for _, ps := range payloadSteps {
for _, mode := range []struct {
label string
concurrency int
}{
{"seq", 1},
{"c16", 16},
} {
name := fmt.Sprintf("Upload/%s/%s", ps.name, mode.label)
numOps := opsForSize(ps.size, mode.concurrency)
t.Run(fmt.Sprintf("%s/%s", impl, name), func(t *testing.T) {
payload := makePayload(ps.size)
runThroughputTest(t, impl, name, httpClient, adminURL, volumeID,
payload, numOps, mode.concurrency, false, false)
})
}
}
// Step-by-step download: 1KB → 4KB → 16KB → 64KB → 256KB → 1MB → 4MB → 8MB
for _, ps := range payloadSteps {
for _, mode := range []struct {
label string
concurrency int
}{
{"seq", 1},
{"c16", 16},
} {
name := fmt.Sprintf("Download/%s/%s", ps.name, mode.label)
numOps := opsForSize(ps.size, mode.concurrency)
t.Run(fmt.Sprintf("%s/%s", impl, name), func(t *testing.T) {
payload := makePayload(ps.size)
runThroughputTest(t, impl, name, httpClient, adminURL, volumeID,
payload, numOps, mode.concurrency, true, false)
})
}
}
// Mixed read/write at each size
for _, ps := range payloadSteps {
name := fmt.Sprintf("Mixed/%s/c16", ps.name)
numOps := opsForSize(ps.size, 16)
t.Run(fmt.Sprintf("%s/%s", impl, name), func(t *testing.T) {
payload := makePayload(ps.size)
runThroughputTest(t, impl, name, httpClient, adminURL, volumeID,
payload, numOps, 16, false, true)
})
}
// Delete test
t.Run(fmt.Sprintf("%s/Delete/1KB/c16", impl), func(t *testing.T) {
payload := makePayload(1 << 10)
numOps := 1000
baseKey := uint64(900000)
for i := 0; i < numOps; i++ {
if err := uploadFile(httpClient, adminURL, volumeID, baseKey+uint64(i), 1, payload); err != nil {
t.Fatalf("pre-upload for delete %d: %v", i, err)
}
}
var ops atomic.Int64
var totalLatency atomic.Int64
start := time.Now()
var wg sync.WaitGroup
concurrency := 16
opsPerWorker := numOps / concurrency
for w := 0; w < concurrency; w++ {
workerBase := baseKey + uint64(w*opsPerWorker)
wg.Add(1)
go func(wb uint64) {
defer wg.Done()
for i := 0; i < opsPerWorker; i++ {
opStart := time.Now()
deleteFile(httpClient, adminURL, volumeID, wb+uint64(i), 1)
totalLatency.Add(time.Since(opStart).Nanoseconds())
ops.Add(1)
}
}(workerBase)
}
wg.Wait()
elapsed := time.Since(start)
totalOps := ops.Load()
avgLatencyUs := float64(totalLatency.Load()) / float64(totalOps) / 1000.0
opsPerSec := float64(totalOps) / elapsed.Seconds()
t.Logf("RESULT impl=%-4s test=%-22s ops=%-6d errors=%-4d elapsed=%-10s ops/s=%-10.1f avg_lat=%-10.0fus",
impl, "Delete/1KB/c16", totalOps, 0, elapsed.Round(time.Millisecond), opsPerSec, avgLatencyUs)
})
}
// runThroughputTest is the shared core for throughput tests.
// keyOffset separates key ranges so concurrent tests in the same volume don't collide.
// keyCounter provides globally unique key ranges. Starts at 1 because key=0 is invalid.
var keyCounter atomic.Uint64
func init() {
keyCounter.Store(1)
}
func runThroughputTest(
t *testing.T, impl, name string,
httpClient *http.Client, adminURL string, volumeID uint32,
payload []byte, numOps, concurrency int,
isDownload, isMixed bool,
) {
t.Helper()
// Each call gets a unique key range
baseKey := keyCounter.Add(uint64(numOps*2)) - uint64(numOps*2)
// Pre-upload for download / mixed
if isDownload || isMixed {
for i := 0; i < numOps; i++ {
if err := uploadFile(httpClient, adminURL, volumeID, baseKey+uint64(i), 1, payload); err != nil {
t.Fatalf("pre-upload %d: %v", i, err)
}
}
}
uploadBase := baseKey
if !isDownload && !isMixed {
uploadBase = baseKey + uint64(numOps) // fresh range for uploads
}
var ops atomic.Int64
var errors atomic.Int64
var totalLatency atomic.Int64
start := time.Now()
var wg sync.WaitGroup
opsPerWorker := numOps / concurrency
remainder := numOps % concurrency
for w := 0; w < concurrency; w++ {
n := opsPerWorker
if w < remainder {
n++
}
var workerBase uint64
if w < remainder {
workerBase = uploadBase + uint64(w*(opsPerWorker+1))
} else {
workerBase = uploadBase + uint64(remainder*(opsPerWorker+1)) + uint64((w-remainder)*opsPerWorker)
}
wg.Add(1)
go func(wb uint64, count int) {
defer wg.Done()
for i := 0; i < count; i++ {
key := wb + uint64(i)
opStart := time.Now()
var err error
if isMixed {
if i%2 == 0 {
err = uploadFile(httpClient, adminURL, volumeID, key, 1, payload)
} else {
err = downloadFile(httpClient, adminURL, volumeID, key, 1)
}
} else if isDownload {
err = downloadFile(httpClient, adminURL, volumeID, key, 1)
} else {
err = uploadFile(httpClient, adminURL, volumeID, key, 1, payload)
}
totalLatency.Add(time.Since(opStart).Nanoseconds())
ops.Add(1)
if err != nil {
errors.Add(1)
}
}
}(workerBase, n)
}
wg.Wait()
elapsed := time.Since(start)
totalOps := ops.Load()
totalErrs := errors.Load()
avgLatencyUs := float64(totalLatency.Load()) / float64(totalOps) / 1000.0
opsPerSec := float64(totalOps) / elapsed.Seconds()
throughputMBs := opsPerSec * float64(len(payload)) / (1024 * 1024)
t.Logf("RESULT impl=%-4s test=%-22s ops=%-6d errors=%-4d elapsed=%-10s ops/s=%-10.1f avg_lat=%-10.0fus throughput=%.2f MB/s",
impl, name, totalOps, totalErrs, elapsed.Round(time.Millisecond), opsPerSec, avgLatencyUs, throughputMBs)
}
// TestLatencyPercentiles measures p50/p95/p99 latencies for upload and download at each size.
func TestLatencyPercentiles(t *testing.T) {
if testing.Short() {
t.Skip("skipping load test in short mode")
}
impl := implName()
adminURL, grpcAddr, cleanup := setupCluster(t)
defer cleanup()
const volumeID = uint32(20)
allocateVolume(t, grpcAddr, volumeID)
httpClient := &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
MaxIdleConnsPerHost: 64,
MaxConnsPerHost: 64,
},
}
latOpsForSize := func(size int) int {
switch {
case size >= 4<<20:
return 30
case size >= 1<<20:
return 100
default:
return 300
}
}
for _, ps := range payloadSteps {
for _, dl := range []struct {
prefix string
isDownload bool
}{
{"Upload", false},
{"Download", true},
} {
name := fmt.Sprintf("%s/%s", dl.prefix, ps.name)
numOps := latOpsForSize(ps.size)
t.Run(fmt.Sprintf("%s/%s", impl, name), func(t *testing.T) {
payload := makePayload(ps.size)
baseKey := keyCounter.Add(uint64(numOps * 2))
if dl.isDownload {
for i := 0; i < numOps; i++ {
if err := uploadFile(httpClient, adminURL, volumeID, baseKey+uint64(i), 2, payload); err != nil {
t.Fatalf("pre-upload: %v", err)
}
}
}
uploadBase := baseKey
if !dl.isDownload {
uploadBase = baseKey + uint64(numOps)
}
latencies := make([]time.Duration, numOps)
for i := 0; i < numOps; i++ {
key := uploadBase + uint64(i)
start := time.Now()
if dl.isDownload {
downloadFile(httpClient, adminURL, volumeID, key, 2)
} else {
uploadFile(httpClient, adminURL, volumeID, key, 2, payload)
}
latencies[i] = time.Since(start)
}
sortDurations(latencies)
p50 := latencies[len(latencies)*50/100]
p95 := latencies[len(latencies)*95/100]
p99 := latencies[len(latencies)*99/100]
min := latencies[0]
max := latencies[len(latencies)-1]
t.Logf("RESULT impl=%-4s test=%-20s n=%-4d min=%-10s p50=%-10s p95=%-10s p99=%-10s max=%-10s",
impl, name, numOps, min.Round(time.Microsecond), p50.Round(time.Microsecond), p95.Round(time.Microsecond), p99.Round(time.Microsecond), max.Round(time.Microsecond))
})
}
}
}
func sortDurations(d []time.Duration) {
sort.Slice(d, func(i, j int) bool { return d[i] < d[j] })
}
// TestSustainedP99 runs high-concurrency load for a sustained period (default 60s,
// override with LOADTEST_DURATION=120s) and reports p50/p95/p99/p999 latencies.
// This reveals tail latency differences that short tests miss (GC pauses, lock contention, etc).
//
// Run:
// go test -v -count=1 -timeout 600s -run TestSustainedP99 ./test/volume_server/loadtest/...
// VOLUME_SERVER_IMPL=rust go test -v -count=1 -timeout 600s -run TestSustainedP99 ./test/volume_server/loadtest/...
// LOADTEST_DURATION=120s VOLUME_SERVER_IMPL=rust go test -v -count=1 -timeout 600s -run TestSustainedP99 ./test/volume_server/loadtest/...
func TestSustainedP99(t *testing.T) {
if testing.Short() {
t.Skip("skipping sustained load test in short mode")
}
duration := 60 * time.Second
if d := os.Getenv("LOADTEST_DURATION"); d != "" {
parsed, err := time.ParseDuration(d)
if err == nil && parsed > 0 {
duration = parsed
}
}
impl := implName()
adminURL, grpcAddr, cleanup := setupCluster(t)
defer cleanup()
httpClient := &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
MaxIdleConnsPerHost: 128,
MaxConnsPerHost: 128,
},
}
type scenario struct {
name string
size int
concurrency int
isDownload bool
}
scenarios := []scenario{
{"Upload/1KB/c16", 1 << 10, 16, false},
{"Upload/64KB/c16", 64 << 10, 16, false},
{"Download/1KB/c16", 1 << 10, 16, true},
{"Download/64KB/c16", 64 << 10, 16, true},
}
var nextVolID atomic.Uint32
nextVolID.Store(30)
for _, sc := range scenarios {
t.Run(fmt.Sprintf("%s/%s", impl, sc.name), func(t *testing.T) {
// Each scenario gets its own volume to avoid filling up
volumeID := nextVolID.Add(1) - 1
allocateVolume(t, grpcAddr, volumeID)
payload := makePayload(sc.size)
// Pre-upload a pool of files for download tests
poolSize := 500
baseKey := keyCounter.Add(uint64(poolSize*2)) - uint64(poolSize*2)
if sc.isDownload {
t.Logf("Pre-uploading %d files for download test...", poolSize)
for i := 0; i < poolSize; i++ {
if err := uploadFile(httpClient, adminURL, volumeID, baseKey+uint64(i), 3, payload); err != nil {
t.Fatalf("pre-upload %d: %v", i, err)
}
}
}
// Collect latencies from all workers
type latencyBucket struct {
mu sync.Mutex
latencies []time.Duration
}
bucket := &latencyBucket{
latencies: make([]time.Duration, 0, 100000),
}
var totalOps atomic.Int64
var totalErrors atomic.Int64
deadline := time.Now().Add(duration)
start := time.Now()
// For uploads, pre-seed the pool so subsequent writes are overwrites (no volume fill)
if !sc.isDownload {
t.Logf("Pre-seeding %d files for upload overwrite test...", poolSize)
for i := 0; i < poolSize; i++ {
if err := uploadFile(httpClient, adminURL, volumeID, baseKey+uint64(i), 3, payload); err != nil {
t.Fatalf("pre-seed %d: %v", i, err)
}
}
}
var wg sync.WaitGroup
for w := 0; w < sc.concurrency; w++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
localLats := make([]time.Duration, 0, 8192)
var i uint64
for time.Now().Before(deadline) {
// Cycle through the pool to avoid filling up the volume
key := baseKey + uint64(int(i)%poolSize)
opStart := time.Now()
var err error
if sc.isDownload {
err = downloadFile(httpClient, adminURL, volumeID, key, 3)
} else {
err = uploadFile(httpClient, adminURL, volumeID, key, 3, payload)
}
lat := time.Since(opStart)
localLats = append(localLats, lat)
totalOps.Add(1)
if err != nil {
totalErrors.Add(1)
}
i++
// Flush local buffer periodically
if len(localLats) >= 8192 {
bucket.mu.Lock()
bucket.latencies = append(bucket.latencies, localLats...)
bucket.mu.Unlock()
localLats = localLats[:0]
}
}
// Final flush
if len(localLats) > 0 {
bucket.mu.Lock()
bucket.latencies = append(bucket.latencies, localLats...)
bucket.mu.Unlock()
}
}(w)
}
wg.Wait()
elapsed := time.Since(start)
lats := bucket.latencies
n := len(lats)
ops := totalOps.Load()
errs := totalErrors.Load()
opsPerSec := float64(ops) / elapsed.Seconds()
sortDurations(lats)
pct := func(p float64) time.Duration {
idx := int(float64(n) * p / 100.0)
if idx >= n {
idx = n - 1
}
return lats[idx]
}
t.Logf("RESULT impl=%-4s test=%-22s duration=%-6s ops=%-8d errors=%-4d ops/s=%-10.1f",
impl, sc.name, elapsed.Round(time.Second), ops, errs, opsPerSec)
t.Logf(" p50=%-10s p90=%-10s p95=%-10s p99=%-10s p999=%-10s max=%-10s",
pct(50).Round(time.Microsecond),
pct(90).Round(time.Microsecond),
pct(95).Round(time.Microsecond),
pct(99).Round(time.Microsecond),
pct(99.9).Round(time.Microsecond),
lats[n-1].Round(time.Microsecond))
})
}
}