package blockvol import ( "errors" "fmt" "sync" "sync/atomic" "time" ) var ( ErrGroupCommitShutdown = errors.New("blockvol: group committer shut down") ErrGroupCommitPanic = errors.New("blockvol: group committer panic in syncFunc") ) // GroupCommitter batches SyncCache requests and performs a single fsync // for the entire batch. This amortizes the cost of fsync across many callers. type GroupCommitter struct { syncFunc func() error // called to fsync (injectable for testing) maxDelay time.Duration // max wait before flushing a partial batch maxBatch int // flush immediately when this many waiters accumulate lowWatermark int // skip delay if fewer pending (0 = always wait) onDegraded func() // called when fsync fails postSyncCheck func() error // called after sync; error fails waiters metrics *EngineMetrics mu sync.Mutex pending []chan error stopped bool // set under mu by Run() before draining notifyCh chan struct{} stopCh chan struct{} done chan struct{} stopOnce sync.Once syncCount atomic.Uint64 // number of fsyncs performed (for testing) } // GroupCommitterConfig configures the group committer. type GroupCommitterConfig struct { SyncFunc func() error // required: the fsync function MaxDelay time.Duration // default 1ms MaxBatch int // default 64 LowWatermark int // skip delay if fewer pending (0 = always wait) OnDegraded func() // optional: called on fsync error PostSyncCheck func() error // optional: called after successful sync; error fails all waiters Metrics *EngineMetrics // optional; if nil, no metrics recorded } // NewGroupCommitter creates a new group committer. Call Run() to start it. func NewGroupCommitter(cfg GroupCommitterConfig) *GroupCommitter { if cfg.MaxDelay == 0 { cfg.MaxDelay = 1 * time.Millisecond } if cfg.MaxBatch == 0 { cfg.MaxBatch = 64 } if cfg.OnDegraded == nil { cfg.OnDegraded = func() {} } return &GroupCommitter{ syncFunc: cfg.SyncFunc, maxDelay: cfg.MaxDelay, maxBatch: cfg.MaxBatch, lowWatermark: cfg.LowWatermark, onDegraded: cfg.OnDegraded, postSyncCheck: cfg.PostSyncCheck, metrics: cfg.Metrics, notifyCh: make(chan struct{}, 1), stopCh: make(chan struct{}), done: make(chan struct{}), } } // Run is the main loop. Call this in a goroutine. func (gc *GroupCommitter) Run() { defer close(gc.done) for { // Wait for first waiter or shutdown. select { case <-gc.stopCh: gc.markStoppedAndDrain() return case <-gc.notifyCh: } // Adaptive: if pending count is below low watermark, flush immediately // without waiting for more waiters. skipDelay := false if gc.lowWatermark > 0 { gc.mu.Lock() n := len(gc.pending) gc.mu.Unlock() if n < gc.lowWatermark { skipDelay = true } } if !skipDelay { // Collect batch: wait up to maxDelay for more waiters, or until maxBatch reached. deadline := time.NewTimer(gc.maxDelay) collectDone := false for !collectDone { gc.mu.Lock() n := len(gc.pending) gc.mu.Unlock() if n >= gc.maxBatch { deadline.Stop() collectDone = true break } select { case <-gc.stopCh: deadline.Stop() gc.markStoppedAndDrain() return case <-deadline.C: collectDone = true case <-gc.notifyCh: continue } } } // Take all pending waiters. gc.mu.Lock() batch := gc.pending gc.pending = nil gc.mu.Unlock() if len(batch) == 0 { continue } // Perform fsync with panic recovery. A panic in syncFunc must // not leave waiters hung --notify them and drain stragglers. syncStart := time.Now() err := gc.callSyncFunc() gc.syncCount.Add(1) if err == nil && gc.postSyncCheck != nil { err = gc.postSyncCheck() } if err != nil { gc.onDegraded() } // Record metrics. if gc.metrics != nil { gc.metrics.RecordGroupCommitBatch(len(batch), time.Since(syncStart)) } // Wake all waiters. for _, ch := range batch { ch <- err } // If syncFunc panicked, stop accepting new work. if errors.Is(err, ErrGroupCommitPanic) { gc.markStoppedAndDrain() return } } } // Submit submits a sync request and blocks until the batch fsync completes. // Returns nil on success, the fsync error, or ErrGroupCommitShutdown if stopped. func (gc *GroupCommitter) Submit() error { ch := make(chan error, 1) gc.mu.Lock() if gc.stopped { gc.mu.Unlock() return ErrGroupCommitShutdown } gc.pending = append(gc.pending, ch) gc.mu.Unlock() // Non-blocking notify to wake Run(). select { case gc.notifyCh <- struct{}{}: default: } return <-ch } // Stop shuts down the group committer. Pending waiters receive ErrGroupCommitShutdown. // Safe to call multiple times. func (gc *GroupCommitter) Stop() { gc.stopOnce.Do(func() { close(gc.stopCh) }) <-gc.done } // SyncCount returns the number of fsyncs performed (for testing). func (gc *GroupCommitter) SyncCount() uint64 { return gc.syncCount.Load() } // callSyncFunc calls syncFunc with panic recovery. Returns ErrGroupCommitPanic // wrapping the panic value if syncFunc panics. func (gc *GroupCommitter) callSyncFunc() (err error) { defer func() { if r := recover(); r != nil { err = fmt.Errorf("%w: %v", ErrGroupCommitPanic, r) } }() return gc.syncFunc() } // markStoppedAndDrain sets the stopped flag under mu and drains all pending // waiters with ErrGroupCommitShutdown. This ensures no new Submit() can // enqueue after we drain, closing the race window from QA-002. func (gc *GroupCommitter) markStoppedAndDrain() { gc.mu.Lock() gc.stopped = true batch := gc.pending gc.pending = nil gc.mu.Unlock() for _, ch := range batch { ch <- ErrGroupCommitShutdown } }