You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
307 lines
8.5 KiB
307 lines
8.5 KiB
package weed_server
|
|
|
|
import (
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
|
|
)
|
|
|
|
func TestQABlockHeartbeatCollector(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
run func(t *testing.T)
|
|
}{
|
|
// --- Stop lifecycle ---
|
|
{name: "stop_before_run_deadlocks", run: testStopBeforeRunDeadlocks},
|
|
{name: "double_stop_no_panic", run: testDoubleStopNoPanic},
|
|
{name: "stop_during_callback", run: testStopDuringCallback},
|
|
|
|
// --- Interval edge cases ---
|
|
{name: "zero_interval_panics", run: testZeroIntervalPanics},
|
|
{name: "very_short_interval", run: testVeryShortInterval},
|
|
|
|
// --- Callback edge cases ---
|
|
{name: "callback_panic_crashes_goroutine", run: testCallbackPanicCrashesGoroutine},
|
|
{name: "callback_slow_blocks_next_tick", run: testCallbackSlowBlocksNextTick},
|
|
{name: "callback_set_after_run", run: testCallbackSetAfterRun},
|
|
|
|
// --- Concurrency ---
|
|
{name: "concurrent_stop_calls", run: testConcurrentStopCalls},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
tt.run(t)
|
|
})
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Stop lifecycle
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func testStopBeforeRunDeadlocks(t *testing.T) {
|
|
// BUG-CP4B2-1: Stop() before Run() blocks forever on <-c.done.
|
|
// done channel is unbuffered and only closed by Run()'s defer.
|
|
// If Run() never starts, Stop() hangs.
|
|
bs := newTestBlockService(t)
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 100*time.Millisecond)
|
|
|
|
stopped := make(chan struct{})
|
|
go func() {
|
|
collector.Stop()
|
|
close(stopped)
|
|
}()
|
|
|
|
select {
|
|
case <-stopped:
|
|
// Good: Stop() returned. Bug is fixed.
|
|
case <-time.After(2 * time.Second):
|
|
t.Error("BUG-CP4B2-1: Stop() before Run() deadlocked (blocked >2s on <-c.done)")
|
|
// We can't recover from this -- the goroutine is leaked.
|
|
// Start Run() to unblock.
|
|
go collector.Run()
|
|
<-stopped
|
|
}
|
|
}
|
|
|
|
func testDoubleStopNoPanic(t *testing.T) {
|
|
bs := newTestBlockService(t)
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 10*time.Millisecond)
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {})
|
|
|
|
go collector.Run()
|
|
time.Sleep(30 * time.Millisecond)
|
|
|
|
// First stop.
|
|
collector.Stop()
|
|
|
|
// Second stop should not panic (sync.Once + closed done channel).
|
|
done := make(chan struct{})
|
|
go func() {
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
t.Errorf("double Stop() panicked: %v", r)
|
|
}
|
|
close(done)
|
|
}()
|
|
collector.Stop()
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatal("double Stop() blocked >2s")
|
|
}
|
|
}
|
|
|
|
func testStopDuringCallback(t *testing.T) {
|
|
bs := newTestBlockService(t)
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 10*time.Millisecond)
|
|
|
|
callbackStarted := make(chan struct{})
|
|
callbackRelease := make(chan struct{})
|
|
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {
|
|
select {
|
|
case callbackStarted <- struct{}{}:
|
|
default:
|
|
}
|
|
<-callbackRelease
|
|
})
|
|
|
|
go collector.Run()
|
|
|
|
// Wait for callback to start.
|
|
select {
|
|
case <-callbackStarted:
|
|
case <-time.After(2 * time.Second):
|
|
close(callbackRelease)
|
|
t.Fatal("callback never started")
|
|
return
|
|
}
|
|
|
|
// Stop while callback is blocked. Stop should block until Run() exits.
|
|
stopDone := make(chan struct{})
|
|
go func() {
|
|
collector.Stop()
|
|
close(stopDone)
|
|
}()
|
|
|
|
// Stop should be blocked because callback is still running.
|
|
select {
|
|
case <-stopDone:
|
|
close(callbackRelease)
|
|
// Stop returned while callback was blocked -- this means Run()
|
|
// exited mid-callback? Let's see...
|
|
t.Log("Stop() returned while callback was blocked (Run exited between ticks)")
|
|
case <-time.After(100 * time.Millisecond):
|
|
// Expected: Stop is blocked because Run() is in the callback.
|
|
// Actually, Run()'s select will pick stopCh on the NEXT iteration,
|
|
// not mid-callback. So callback must complete first.
|
|
close(callbackRelease)
|
|
<-stopDone
|
|
t.Log("Stop() waited for callback to finish before returning (correct)")
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Interval edge cases
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func testZeroIntervalPanics(t *testing.T) {
|
|
// BUG-CP4B2-2 (fixed): zero interval is clamped to minHeartbeatInterval.
|
|
// Verify: no panic, collector runs normally, callbacks fire.
|
|
bs := newTestBlockService(t)
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 0)
|
|
|
|
var count atomic.Int64
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {
|
|
count.Add(1)
|
|
})
|
|
|
|
go collector.Run()
|
|
time.Sleep(30 * time.Millisecond)
|
|
collector.Stop()
|
|
|
|
n := count.Load()
|
|
if n < 1 {
|
|
t.Errorf("expected at least 1 callback with clamped interval, got %d", n)
|
|
}
|
|
t.Logf("zero interval (clamped): %d callbacks in 30ms", n)
|
|
}
|
|
|
|
func testVeryShortInterval(t *testing.T) {
|
|
bs := newTestBlockService(t)
|
|
var count atomic.Int64
|
|
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 1*time.Millisecond)
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {
|
|
count.Add(1)
|
|
})
|
|
|
|
go collector.Run()
|
|
time.Sleep(50 * time.Millisecond)
|
|
collector.Stop()
|
|
|
|
n := count.Load()
|
|
if n < 5 {
|
|
t.Errorf("expected >= 5 callbacks at 1ms interval over 50ms, got %d", n)
|
|
}
|
|
t.Logf("1ms interval: %d callbacks in 50ms", n)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Callback edge cases
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func testCallbackPanicCrashesGoroutine(t *testing.T) {
|
|
// BUG-CP4B2-3 (fixed): safeCallback recovers panics. Run() continues.
|
|
// Verify: panic is logged, collector keeps running, subsequent callbacks fire.
|
|
bs := newTestBlockService(t)
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 10*time.Millisecond)
|
|
|
|
var callCount atomic.Int64
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {
|
|
n := callCount.Add(1)
|
|
if n == 1 {
|
|
panic("deliberate test panic in callback")
|
|
}
|
|
})
|
|
|
|
go collector.Run()
|
|
|
|
// Wait for multiple callbacks (first panics, subsequent should still fire).
|
|
time.Sleep(100 * time.Millisecond)
|
|
collector.Stop()
|
|
|
|
n := callCount.Load()
|
|
if n < 2 {
|
|
t.Errorf("expected >= 2 callbacks (first panics, rest recover), got %d", n)
|
|
}
|
|
t.Logf("callback panic recovery: %d callbacks total (first panicked, rest recovered)", n)
|
|
}
|
|
|
|
func testCallbackSlowBlocksNextTick(t *testing.T) {
|
|
bs := newTestBlockService(t)
|
|
var count atomic.Int64
|
|
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 10*time.Millisecond)
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {
|
|
count.Add(1)
|
|
time.Sleep(50 * time.Millisecond) // 5x the interval
|
|
})
|
|
|
|
go collector.Run()
|
|
time.Sleep(200 * time.Millisecond)
|
|
collector.Stop()
|
|
|
|
n := count.Load()
|
|
// With 50ms callback sleep and 10ms interval, we should get ~4 callbacks
|
|
// (200ms / 50ms), not 20 (200ms / 10ms). Slow callback blocks the loop.
|
|
if n > 8 {
|
|
t.Errorf("expected slow callback to throttle ticks, got %d callbacks", n)
|
|
}
|
|
t.Logf("slow callback: %d callbacks in 200ms (10ms interval, 50ms callback)", n)
|
|
}
|
|
|
|
func testCallbackSetAfterRun(t *testing.T) {
|
|
// Setting SetStatusCallback after Run() starts -- now safe with cbMu
|
|
// (BUG-CP4B3-2 fix).
|
|
bs := newTestBlockService(t)
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 10*time.Millisecond)
|
|
// Start with nil callback.
|
|
|
|
go collector.Run()
|
|
|
|
// Set callback after Run started. With cbMu, this is race-free.
|
|
time.Sleep(5 * time.Millisecond)
|
|
var called atomic.Bool
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {
|
|
called.Store(true)
|
|
})
|
|
|
|
time.Sleep(50 * time.Millisecond)
|
|
collector.Stop()
|
|
|
|
t.Logf("callback set after Run: called=%v", called.Load())
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Concurrency
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func testConcurrentStopCalls(t *testing.T) {
|
|
bs := newTestBlockService(t)
|
|
collector := NewBlockVolumeHeartbeatCollector(bs, 10*time.Millisecond)
|
|
collector.SetStatusCallback(func(msgs []blockvol.BlockVolumeInfoMessage) {})
|
|
|
|
go collector.Run()
|
|
time.Sleep(30 * time.Millisecond)
|
|
|
|
// 10 goroutines all calling Stop concurrently.
|
|
var wg atomic.Int64
|
|
done := make(chan struct{})
|
|
for i := 0; i < 10; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Add(-1)
|
|
collector.Stop()
|
|
}()
|
|
}
|
|
|
|
go func() {
|
|
for wg.Load() > 0 {
|
|
time.Sleep(1 * time.Millisecond)
|
|
}
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
// All returned.
|
|
case <-time.After(5 * time.Second):
|
|
t.Fatal("concurrent Stop() calls blocked >5s")
|
|
}
|
|
}
|