From 00f871178997ae39d1d851146bfdd80e974f2e7f Mon Sep 17 00:00:00 2001 From: chrislu Date: Wed, 29 Oct 2025 13:35:12 -0700 Subject: [PATCH] refactoring retrying --- weed/filer/filer_deletion.go | 54 +++++++++++++++---------------- weed/filer/filer_deletion_test.go | 25 ++++++++++++++ 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/weed/filer/filer_deletion.go b/weed/filer/filer_deletion.go index 07f73d302..739fcaa00 100644 --- a/weed/filer/filer_deletion.go +++ b/weed/filer/filer_deletion.go @@ -104,6 +104,28 @@ func NewDeletionRetryQueue() *DeletionRetryQueue { return q } +// calculateBackoff calculates the exponential backoff delay for a given retry count. +// Uses exponential backoff formula: InitialRetryDelay * 2^(retryCount-1) +// The first retry (retryCount=1) uses InitialRetryDelay, second uses 2x, third uses 4x, etc. +// Includes overflow protection and caps at MaxRetryDelay. +func calculateBackoff(retryCount int) time.Duration { + // The first retry is attempt 1, but shift should start at 0 + shiftAmount := uint(retryCount - 1) + if shiftAmount > 63 { + // Prevent overflow: use max delay directly + return MaxRetryDelay + } + + delay := InitialRetryDelay * time.Duration(1< MaxRetryDelay { + delay = MaxRetryDelay + } + + return delay +} + // AddOrUpdate adds a new failed deletion or updates an existing one // Time complexity: O(log N) for insertion/update func (q *DeletionRetryQueue) AddOrUpdate(fileId string, errorMsg string) { @@ -114,19 +136,7 @@ func (q *DeletionRetryQueue) AddOrUpdate(fileId string, errorMsg string) { if item, exists := q.itemIndex[fileId]; exists { item.RetryCount++ item.LastError = errorMsg - // Calculate next retry time with exponential backoff with overflow protection - shiftAmount := uint(item.RetryCount - 1) - var delay time.Duration - if shiftAmount > 63 { - // Prevent overflow: use max delay directly - delay = MaxRetryDelay - } else { - delay = InitialRetryDelay * time.Duration(1< MaxRetryDelay { - delay = MaxRetryDelay - } - } + delay := calculateBackoff(item.RetryCount) item.NextRetryAt = time.Now().Add(delay) // Re-heapify since NextRetryAt changed heap.Fix(&q.heap, item.heapIndex) @@ -159,21 +169,9 @@ func (q *DeletionRetryQueue) RequeueForRetry(item *DeletionRetryItem, errorMsg s item.LastError = errorMsg // Calculate next retry time with exponential backoff - // Check for potential overflow in the shift operation - shiftAmount := uint(item.RetryCount - 1) - if shiftAmount > 63 { - // Prevent overflow: use max delay directly - item.NextRetryAt = time.Now().Add(MaxRetryDelay) - glog.V(2).Infof("requeued retry for %s: attempt %d (capped at max delay)", item.FileId, item.RetryCount) - } else { - delay := InitialRetryDelay * time.Duration(1< MaxRetryDelay { - delay = MaxRetryDelay - } - item.NextRetryAt = time.Now().Add(delay) - glog.V(2).Infof("requeued retry for %s: attempt %d, next retry in %v", item.FileId, item.RetryCount, delay) - } + delay := calculateBackoff(item.RetryCount) + item.NextRetryAt = time.Now().Add(delay) + glog.V(2).Infof("requeued retry for %s: attempt %d, next retry in %v", item.FileId, item.RetryCount, delay) // Re-add to heap and index heap.Push(&q.heap, item) diff --git a/weed/filer/filer_deletion_test.go b/weed/filer/filer_deletion_test.go index 306232872..f31bae84f 100644 --- a/weed/filer/filer_deletion_test.go +++ b/weed/filer/filer_deletion_test.go @@ -130,6 +130,31 @@ func TestDeletionRetryQueue_MaxAttemptsReached(t *testing.T) { } } +func TestCalculateBackoff(t *testing.T) { + testCases := []struct { + retryCount int + expectedDelay time.Duration + description string + }{ + {1, InitialRetryDelay, "first retry"}, + {2, InitialRetryDelay * 2, "second retry"}, + {3, InitialRetryDelay * 4, "third retry"}, + {4, InitialRetryDelay * 8, "fourth retry"}, + {5, InitialRetryDelay * 16, "fifth retry"}, + {10, MaxRetryDelay, "capped at max delay"}, + {65, MaxRetryDelay, "overflow protection (shift > 63)"}, + {100, MaxRetryDelay, "very high retry count"}, + } + + for _, tc := range testCases { + result := calculateBackoff(tc.retryCount) + if result != tc.expectedDelay { + t.Errorf("%s (retry %d): expected %v, got %v", + tc.description, tc.retryCount, tc.expectedDelay, result) + } + } +} + func TestIsRetryableError(t *testing.T) { testCases := []struct { error string