You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
584 lines
13 KiB
584 lines
13 KiB
package blockvol
|
|
|
|
import (
|
|
"errors"
|
|
"sync"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestGroupCommitter(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
run func(t *testing.T)
|
|
}{
|
|
{name: "group_single_barrier", run: testGroupSingleBarrier},
|
|
{name: "group_batch_10", run: testGroupBatch10},
|
|
{name: "group_max_delay", run: testGroupMaxDelay},
|
|
{name: "group_max_batch", run: testGroupMaxBatch},
|
|
{name: "group_fsync_error", run: testGroupFsyncError},
|
|
{name: "group_sequential", run: testGroupSequential},
|
|
{name: "group_shutdown", run: testGroupShutdown},
|
|
{name: "group_submit_after_stop", run: testGroupSubmitAfterStop},
|
|
// Phase 3 Task 1.4: Adaptive group commit.
|
|
{name: "adaptive_single_immediate", run: testAdaptiveSingleImmediate},
|
|
{name: "adaptive_batch_above_watermark", run: testAdaptiveBatchAboveWatermark},
|
|
{name: "adaptive_max_batch_triggers_early", run: testAdaptiveMaxBatchTriggersEarly},
|
|
{name: "adaptive_watermark_zero_compat", run: testAdaptiveWatermarkZeroCompat},
|
|
{name: "adaptive_threshold_boundary", run: testAdaptiveThresholdBoundary},
|
|
{name: "adaptive_ramp_up", run: testAdaptiveRampUp},
|
|
{name: "adaptive_max_delay_honored", run: testAdaptiveMaxDelayHonored},
|
|
{name: "adaptive_fsync_error_all_waiters", run: testAdaptiveFsyncErrorAllWaiters},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
tt.run(t)
|
|
})
|
|
}
|
|
}
|
|
|
|
func testGroupSingleBarrier(t *testing.T) {
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 10 * time.Millisecond,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("Submit: %v", err)
|
|
}
|
|
|
|
if c := syncCalls.Load(); c != 1 {
|
|
t.Errorf("syncCalls = %d, want 1", c)
|
|
}
|
|
}
|
|
|
|
func testGroupBatch10(t *testing.T) {
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 50 * time.Millisecond,
|
|
MaxBatch: 64,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
const n = 10
|
|
var wg sync.WaitGroup
|
|
errs := make([]error, n)
|
|
|
|
// Launch all 10 concurrently.
|
|
wg.Add(n)
|
|
for i := 0; i < n; i++ {
|
|
go func(idx int) {
|
|
defer wg.Done()
|
|
errs[idx] = gc.Submit()
|
|
}(i)
|
|
}
|
|
wg.Wait()
|
|
|
|
for i, err := range errs {
|
|
if err != nil {
|
|
t.Errorf("Submit[%d]: %v", i, err)
|
|
}
|
|
}
|
|
|
|
// All 10 should be batched into 1 fsync (maybe 2 if timing is unlucky).
|
|
if c := syncCalls.Load(); c > 2 {
|
|
t.Errorf("syncCalls = %d, want 1-2 (batched)", c)
|
|
}
|
|
}
|
|
|
|
func testGroupMaxDelay(t *testing.T) {
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error { return nil },
|
|
MaxDelay: 5 * time.Millisecond,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
start := time.Now()
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("Submit: %v", err)
|
|
}
|
|
elapsed := time.Since(start)
|
|
|
|
// Should complete within ~maxDelay + some margin, not take much longer.
|
|
if elapsed > 50*time.Millisecond {
|
|
t.Errorf("Submit took %v, expected ~5ms", elapsed)
|
|
}
|
|
}
|
|
|
|
func testGroupMaxBatch(t *testing.T) {
|
|
var syncCalls atomic.Uint64
|
|
const batch = 64
|
|
|
|
// Use a slow sync to ensure we can detect immediate trigger.
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 5 * time.Second, // very long delay -- should NOT wait this long
|
|
MaxBatch: batch,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(batch)
|
|
for i := 0; i < batch; i++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
gc.Submit()
|
|
}()
|
|
}
|
|
|
|
// Wait with timeout -- if maxBatch triggers, should complete fast.
|
|
done := make(chan struct{})
|
|
go func() {
|
|
wg.Wait()
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
// Good -- completed without waiting 5 seconds.
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatal("maxBatch=64 did not trigger immediate flush")
|
|
}
|
|
}
|
|
|
|
func testGroupFsyncError(t *testing.T) {
|
|
errIO := errors.New("simulated EIO")
|
|
var degraded atomic.Bool
|
|
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
return errIO
|
|
},
|
|
MaxDelay: 10 * time.Millisecond,
|
|
OnDegraded: func() { degraded.Store(true) },
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
const n = 5
|
|
var wg sync.WaitGroup
|
|
errs := make([]error, n)
|
|
|
|
wg.Add(n)
|
|
for i := 0; i < n; i++ {
|
|
go func(idx int) {
|
|
defer wg.Done()
|
|
errs[idx] = gc.Submit()
|
|
}(i)
|
|
}
|
|
wg.Wait()
|
|
|
|
for i, err := range errs {
|
|
if !errors.Is(err, errIO) {
|
|
t.Errorf("Submit[%d]: got %v, want %v", i, err, errIO)
|
|
}
|
|
}
|
|
|
|
if !degraded.Load() {
|
|
t.Error("onDegraded was not called")
|
|
}
|
|
}
|
|
|
|
func testGroupSequential(t *testing.T) {
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 10 * time.Millisecond,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
// First sync.
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("Submit 1: %v", err)
|
|
}
|
|
|
|
// Second sync (after first completes).
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("Submit 2: %v", err)
|
|
}
|
|
|
|
if c := syncCalls.Load(); c != 2 {
|
|
t.Errorf("syncCalls = %d, want 2 (two separate fsyncs)", c)
|
|
}
|
|
}
|
|
|
|
func testGroupShutdown(t *testing.T) {
|
|
// Block fsync so we can shut down while waiters are pending.
|
|
syncStarted := make(chan struct{})
|
|
syncBlock := make(chan struct{})
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
close(syncStarted)
|
|
<-syncBlock // block until test releases
|
|
return nil
|
|
},
|
|
MaxDelay: 1 * time.Millisecond,
|
|
})
|
|
go gc.Run()
|
|
|
|
// Submit one request that will block on fsync.
|
|
errCh1 := make(chan error, 1)
|
|
go func() {
|
|
errCh1 <- gc.Submit()
|
|
}()
|
|
|
|
// Wait for fsync to start.
|
|
<-syncStarted
|
|
|
|
// Submit another request while fsync is in progress.
|
|
errCh2 := make(chan error, 1)
|
|
go func() {
|
|
errCh2 <- gc.Submit()
|
|
}()
|
|
time.Sleep(5 * time.Millisecond) // let it enqueue
|
|
|
|
// Stop the group committer (will drain pending with ErrGroupCommitShutdown).
|
|
go func() {
|
|
time.Sleep(10 * time.Millisecond)
|
|
close(syncBlock) // unblock the fsync first
|
|
}()
|
|
gc.Stop()
|
|
|
|
// First waiter should get nil (fsync succeeded).
|
|
if err := <-errCh1; err != nil {
|
|
t.Errorf("first waiter: %v, want nil", err)
|
|
}
|
|
|
|
// Second waiter should get shutdown error.
|
|
if err := <-errCh2; !errors.Is(err, ErrGroupCommitShutdown) {
|
|
t.Errorf("second waiter: %v, want ErrGroupCommitShutdown", err)
|
|
}
|
|
}
|
|
|
|
// --- Phase 3 Task 1.4: Adaptive group commit tests ---
|
|
|
|
func testAdaptiveSingleImmediate(t *testing.T) {
|
|
// With lowWatermark=4, a single Submit should flush immediately (no delay).
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 5 * time.Second, // very long delay
|
|
LowWatermark: 4,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
start := time.Now()
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("Submit: %v", err)
|
|
}
|
|
elapsed := time.Since(start)
|
|
|
|
if syncCalls.Load() != 1 {
|
|
t.Errorf("syncCalls = %d, want 1", syncCalls.Load())
|
|
}
|
|
// Should complete much faster than 5s maxDelay.
|
|
if elapsed > 500*time.Millisecond {
|
|
t.Errorf("Submit took %v, expected immediate (low watermark skip)", elapsed)
|
|
}
|
|
}
|
|
|
|
func testAdaptiveBatchAboveWatermark(t *testing.T) {
|
|
// With lowWatermark=4 and a slow sync, 10 concurrent submits should batch.
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
time.Sleep(5 * time.Millisecond) // slow sync so requests queue up
|
|
return nil
|
|
},
|
|
MaxDelay: 50 * time.Millisecond,
|
|
MaxBatch: 64,
|
|
LowWatermark: 4,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
const n = 10
|
|
var wg sync.WaitGroup
|
|
// Pre-enqueue all requests, then start Run.
|
|
wg.Add(n)
|
|
for i := 0; i < n; i++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
gc.Submit()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
// With slow sync, many should batch. At most n fsyncs (no batching), at least 1.
|
|
c := syncCalls.Load()
|
|
if c == 0 {
|
|
t.Error("syncCalls = 0, want > 0")
|
|
}
|
|
t.Logf("adaptive batch: %d submits, %d fsyncs", n, c)
|
|
}
|
|
|
|
func testAdaptiveMaxBatchTriggersEarly(t *testing.T) {
|
|
// maxBatch should still trigger immediate flush even with adaptive watermark.
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error { return nil },
|
|
MaxDelay: 5 * time.Second,
|
|
MaxBatch: 8,
|
|
LowWatermark: 4,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(8)
|
|
for i := 0; i < 8; i++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
gc.Submit()
|
|
}()
|
|
}
|
|
|
|
done := make(chan struct{})
|
|
go func() {
|
|
wg.Wait()
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatal("maxBatch=8 did not trigger flush with adaptive watermark")
|
|
}
|
|
}
|
|
|
|
func testAdaptiveWatermarkZeroCompat(t *testing.T) {
|
|
// LowWatermark=0 should preserve Phase 2 behavior (always wait for delay).
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 10 * time.Millisecond,
|
|
LowWatermark: 0,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
// Two sequential submits should each trigger their own fsync.
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("Submit 1: %v", err)
|
|
}
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("Submit 2: %v", err)
|
|
}
|
|
|
|
if c := syncCalls.Load(); c != 2 {
|
|
t.Errorf("syncCalls = %d, want 2 (no watermark optimization)", c)
|
|
}
|
|
}
|
|
|
|
func testAdaptiveThresholdBoundary(t *testing.T) {
|
|
// Exactly lowWatermark concurrent submits: should batch and flush.
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 50 * time.Millisecond,
|
|
MaxBatch: 64,
|
|
LowWatermark: 4,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
const n = 4 // exactly lowWatermark
|
|
var wg sync.WaitGroup
|
|
wg.Add(n)
|
|
for i := 0; i < n; i++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
gc.Submit()
|
|
}()
|
|
}
|
|
|
|
done := make(chan struct{})
|
|
go func() {
|
|
wg.Wait()
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
// All 4 should complete. SyncCount should be >= 1.
|
|
if c := syncCalls.Load(); c == 0 {
|
|
t.Error("syncCalls = 0, want >= 1")
|
|
}
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatal("threshold boundary: timed out waiting for 4 submits")
|
|
}
|
|
}
|
|
|
|
func testAdaptiveRampUp(t *testing.T) {
|
|
// Start with single callers (below watermark -> immediate), then ramp up.
|
|
var syncCalls atomic.Uint64
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
syncCalls.Add(1)
|
|
return nil
|
|
},
|
|
MaxDelay: 50 * time.Millisecond,
|
|
MaxBatch: 64,
|
|
LowWatermark: 4,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
// Phase 1: single serial submits (below watermark, immediate).
|
|
for i := 0; i < 5; i++ {
|
|
if err := gc.Submit(); err != nil {
|
|
t.Fatalf("serial Submit %d: %v", i, err)
|
|
}
|
|
}
|
|
serialSyncs := syncCalls.Load()
|
|
if serialSyncs != 5 {
|
|
t.Errorf("serial phase: syncCalls = %d, want 5 (1 per call)", serialSyncs)
|
|
}
|
|
|
|
// Phase 2: burst 64 concurrent submits (above watermark, batched).
|
|
var wg sync.WaitGroup
|
|
wg.Add(64)
|
|
for i := 0; i < 64; i++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
gc.Submit()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
totalSyncs := syncCalls.Load()
|
|
batchSyncs := totalSyncs - serialSyncs
|
|
// 64 submits should complete without deadlock. Some batching expected
|
|
// but fast syncFunc means each call may flush individually.
|
|
if batchSyncs > 64 {
|
|
t.Errorf("batch phase: %d fsyncs for 64 submits, want <= 64", batchSyncs)
|
|
}
|
|
t.Logf("ramp_up: serial=%d, batch=%d fsyncs for 64 submits", serialSyncs, batchSyncs)
|
|
}
|
|
|
|
func testAdaptiveMaxDelayHonored(t *testing.T) {
|
|
// 2 concurrent submits (below watermark) should still complete promptly.
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error { return nil },
|
|
MaxDelay: 5 * time.Second,
|
|
LowWatermark: 4,
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(2)
|
|
for i := 0; i < 2; i++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
gc.Submit()
|
|
}()
|
|
}
|
|
|
|
done := make(chan struct{})
|
|
go func() {
|
|
wg.Wait()
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
// Good: completed without waiting 5s maxDelay (adaptive skip).
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatal("2 submits should complete quickly (below watermark), took >2s")
|
|
}
|
|
}
|
|
|
|
func testAdaptiveFsyncErrorAllWaiters(t *testing.T) {
|
|
// Inject fsync error during batched flush: all waiters must receive the error.
|
|
errIO := errors.New("simulated disk error")
|
|
var degradedCalled atomic.Bool
|
|
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error {
|
|
return errIO
|
|
},
|
|
MaxDelay: 50 * time.Millisecond,
|
|
MaxBatch: 64,
|
|
LowWatermark: 4,
|
|
OnDegraded: func() { degradedCalled.Store(true) },
|
|
})
|
|
go gc.Run()
|
|
defer gc.Stop()
|
|
|
|
const n = 8
|
|
var wg sync.WaitGroup
|
|
errs := make([]error, n)
|
|
wg.Add(n)
|
|
for i := 0; i < n; i++ {
|
|
go func(idx int) {
|
|
defer wg.Done()
|
|
errs[idx] = gc.Submit()
|
|
}(i)
|
|
}
|
|
wg.Wait()
|
|
|
|
for i, err := range errs {
|
|
if !errors.Is(err, errIO) {
|
|
t.Errorf("Submit[%d]: got %v, want %v", i, err, errIO)
|
|
}
|
|
}
|
|
if !degradedCalled.Load() {
|
|
t.Error("OnDegraded was not called")
|
|
}
|
|
}
|
|
|
|
func testGroupSubmitAfterStop(t *testing.T) {
|
|
gc := NewGroupCommitter(GroupCommitterConfig{
|
|
SyncFunc: func() error { return nil },
|
|
MaxDelay: 10 * time.Millisecond,
|
|
})
|
|
go gc.Run()
|
|
gc.Stop()
|
|
|
|
// Submit after Stop must return ErrGroupCommitShutdown, not deadlock.
|
|
done := make(chan error, 1)
|
|
go func() {
|
|
done <- gc.Submit()
|
|
}()
|
|
|
|
select {
|
|
case err := <-done:
|
|
if !errors.Is(err, ErrGroupCommitShutdown) {
|
|
t.Errorf("Submit after Stop: %v, want ErrGroupCommitShutdown", err)
|
|
}
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatal("Submit after Stop deadlocked")
|
|
}
|
|
}
|