seaweedfs/weed/filer/filer_deletion.go


								package filer


								import (

									"container/heap"

									"context"

									"fmt"

									"strings"

									"sync"

									"time"


									"google.golang.org/grpc"


									"github.com/seaweedfs/seaweedfs/weed/storage"

									"github.com/seaweedfs/seaweedfs/weed/util"


									"github.com/seaweedfs/seaweedfs/weed/glog"

									"github.com/seaweedfs/seaweedfs/weed/operation"

									"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"

									"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"

									"github.com/seaweedfs/seaweedfs/weed/wdclient"

								)


								const (

									// Maximum number of retry attempts for failed deletions

									MaxRetryAttempts = 10

									// Initial retry delay (will be doubled with each attempt)

									InitialRetryDelay = 5 * time.Minute

									// Maximum retry delay

									MaxRetryDelay = 6 * time.Hour

									// Interval for checking retry queue for ready items

									DeletionRetryPollInterval = 1 * time.Minute

									// Maximum number of items to process per retry iteration

									DeletionRetryBatchSize = 1000

									// Maximum number of error details to include in log messages

									MaxLoggedErrorDetails = 10

									// Interval for polling the deletion queue for new items

									// Using a prime number to de-synchronize with other periodic tasks

									DeletionPollInterval = 1123 * time.Millisecond

									// Maximum number of file IDs to delete per batch (roughly 20 bytes per file ID)

									DeletionBatchSize = 100000

								)


								// retryablePatterns contains error message patterns that indicate temporary/transient conditions

								// that should be retried. These patterns are based on actual error messages from the deletion pipeline.

								var retryablePatterns = []string{

									"is read only",              // Volume temporarily read-only (tiering, maintenance)

									"error reading from server", // Network I/O errors

									"connection reset by peer",  // Network connection issues

									"closed network connection", // Network connection closed unexpectedly

									"connection refused",        // Server temporarily unavailable

									"timeout",                   // Operation timeout (network or server)

									"deadline exceeded",         // Context deadline exceeded

									"context canceled",          // Context cancellation (may be transient)

									"lookup error",              // Volume lookup failures

									"lookup failed",             // Volume server discovery issues

									"too many requests",         // Rate limiting / backpressure

									"service unavailable",       // HTTP 503 errors

									"temporarily unavailable",   // Temporary service issues

									"try again",                 // Explicit retry suggestion

									"i/o timeout",               // Network I/O timeout

									"broken pipe",               // Connection broken during operation

								}


								// DeletionRetryItem represents a file deletion that failed and needs to be retried

								type DeletionRetryItem struct {

									FileId      string

									RetryCount  int

									NextRetryAt time.Time

									LastError   string

									heapIndex   int  // index in the heap (for heap.Interface)

									inFlight    bool // true when item is being processed, prevents duplicate additions

								}


								// retryHeap implements heap.Interface for DeletionRetryItem

								// Items are ordered by NextRetryAt (earliest first)

								type retryHeap []*DeletionRetryItem


								// Compile-time assertion that retryHeap implements heap.Interface

								var _ heap.Interface = (*retryHeap)(nil)


								func (h retryHeap) Len() int { return len(h) }


								func (h retryHeap) Less(i, j int) bool {

									return h[i].NextRetryAt.Before(h[j].NextRetryAt)

								}


								func (h retryHeap) Swap(i, j int) {

									h[i], h[j] = h[j], h[i]

									h[i].heapIndex = i

									h[j].heapIndex = j

								}


								func (h *retryHeap) Push(x any) {

									item := x.(*DeletionRetryItem)

									item.heapIndex = len(*h)

									*h = append(*h, item)

								}


								func (h *retryHeap) Pop() any {

									old := *h

									n := len(old)

									item := old[n-1]

									old[n-1] = nil      // avoid memory leak

									item.heapIndex = -1 // mark as removed

									*h = old[0 : n-1]

									return item

								}


								// DeletionRetryQueue manages the queue of failed deletions that need to be retried.

								// Uses a min-heap ordered by NextRetryAt for efficient retrieval of ready items.

								//

								// LIMITATION: Current implementation stores retry queue in memory only.

								// On filer restart, all pending retries are lost. With MaxRetryDelay up to 6 hours,

								// process restarts during this window will cause retry state loss.

								//

								// TODO: Consider persisting retry queue to durable storage for production resilience:

								//   - Option 1: Leverage existing Filer store (KV operations)

								//   - Option 2: Periodic snapshots to disk with recovery on startup

								//   - Option 3: Write-ahead log for retry queue mutations

								//   - Trade-offs: Performance vs durability, complexity vs reliability

								//

								// For now, accepting in-memory storage as pragmatic initial implementation.

								// Lost retries will be eventually consistent as files remain in deletion queue.

								type DeletionRetryQueue struct {

									heap      retryHeap

									itemIndex map[string]*DeletionRetryItem // for O(1) lookup by FileId

									lock      sync.Mutex

								}


								// NewDeletionRetryQueue creates a new retry queue

								func NewDeletionRetryQueue() *DeletionRetryQueue {

									q := &DeletionRetryQueue{

										heap:      make(retryHeap, 0),

										itemIndex: make(map[string]*DeletionRetryItem),

									}

									heap.Init(&q.heap)

									return q

								}


								// calculateBackoff calculates the exponential backoff delay for a given retry count.

								// Uses exponential backoff formula: InitialRetryDelay * 2^(retryCount-1)

								// The first retry (retryCount=1) uses InitialRetryDelay, second uses 2x, third uses 4x, etc.

								// Includes overflow protection and caps at MaxRetryDelay.

								func calculateBackoff(retryCount int) time.Duration {

									// The first retry is attempt 1, but shift should start at 0

									if retryCount <= 1 {

										return InitialRetryDelay

									}


									shiftAmount := uint(retryCount - 1)


									// time.Duration is an int64. A left shift of 63 or more will result in a

									// negative number or zero. The multiplication can also overflow much earlier

									// (around a shift of 25 for a 5-minute initial delay).

									// The `delay <= 0` check below correctly catches all these overflow cases.

									delay := InitialRetryDelay << shiftAmount


									if delay <= 0 || delay > MaxRetryDelay {

										return MaxRetryDelay

									}


									return delay

								}


								// AddOrUpdate adds a new failed deletion or updates an existing one

								// Time complexity: O(log N) for insertion/update

								func (q *DeletionRetryQueue) AddOrUpdate(fileId string, errorMsg string) {

									q.lock.Lock()

									defer q.lock.Unlock()


									// Check if item already exists (including in-flight items)

									if item, exists := q.itemIndex[fileId]; exists {

										// Item is already in the queue or being processed. Just update the error.

										// The existing retry schedule should proceed.

										// RetryCount is only incremented in RequeueForRetry when an actual retry is performed.

										item.LastError = errorMsg

										if item.inFlight {

											glog.V(2).Infof("retry for %s in-flight: attempt %d, will preserve retry state", fileId, item.RetryCount)

										} else {

											glog.V(2).Infof("retry for %s already scheduled: attempt %d, next retry in %v", fileId, item.RetryCount, time.Until(item.NextRetryAt))

										}

										return

									}


									// Add new item

									delay := InitialRetryDelay

									item := &DeletionRetryItem{

										FileId:      fileId,

										RetryCount:  1,

										NextRetryAt: time.Now().Add(delay),

										LastError:   errorMsg,

										inFlight:    false,

									}

									heap.Push(&q.heap, item)

									q.itemIndex[fileId] = item

									glog.V(2).Infof("added retry for %s: next retry in %v", fileId, delay)

								}


								// RequeueForRetry re-adds a previously failed item back to the queue with incremented retry count.

								// This method MUST be used when re-queuing items from processRetryBatch to preserve retry state.

								// Time complexity: O(log N) for insertion

								func (q *DeletionRetryQueue) RequeueForRetry(item *DeletionRetryItem, errorMsg string) {

									q.lock.Lock()

									defer q.lock.Unlock()


									// Increment retry count

									item.RetryCount++

									item.LastError = errorMsg


									// Calculate next retry time with exponential backoff

									delay := calculateBackoff(item.RetryCount)

									item.NextRetryAt = time.Now().Add(delay)

									item.inFlight = false // Clear in-flight flag

									glog.V(2).Infof("requeued retry for %s: attempt %d, next retry in %v", item.FileId, item.RetryCount, delay)


									// Re-add to heap (item still in itemIndex)

									heap.Push(&q.heap, item)

								}


								// GetReadyItems returns items that are ready to be retried and marks them as in-flight

								// Time complexity: O(K log N) where K is the number of ready items

								// Items are processed in order of NextRetryAt (earliest first)

								func (q *DeletionRetryQueue) GetReadyItems(maxItems int) []*DeletionRetryItem {

									q.lock.Lock()

									defer q.lock.Unlock()


									now := time.Now()

									var readyItems []*DeletionRetryItem


									// Peek at items from the top of the heap (earliest NextRetryAt)

									for len(q.heap) > 0 && len(readyItems) < maxItems {

										item := q.heap[0]


										// If the earliest item is not ready yet, no other items are ready either

										if item.NextRetryAt.After(now) {

											break

										}


										// Remove from heap but keep in itemIndex with inFlight flag

										heap.Pop(&q.heap)


										if item.RetryCount <= MaxRetryAttempts {

											item.inFlight = true // Mark as being processed

											readyItems = append(readyItems, item)

										} else {

											// Max attempts reached, log and discard completely

											delete(q.itemIndex, item.FileId)

											glog.Warningf("max retry attempts (%d) reached for %s, last error: %s", MaxRetryAttempts, item.FileId, item.LastError)

										}

									}


									return readyItems

								}


								// Remove removes an item from the queue (called when deletion succeeds or fails permanently)

								// Time complexity: O(1)

								func (q *DeletionRetryQueue) Remove(item *DeletionRetryItem) {

									q.lock.Lock()

									defer q.lock.Unlock()


									// Item was already removed from heap by GetReadyItems, just remove from index

									delete(q.itemIndex, item.FileId)

								}


								// Size returns the current size of the retry queue

								func (q *DeletionRetryQueue) Size() int {

									q.lock.Lock()

									defer q.lock.Unlock()

									return len(q.heap)

								}


								func LookupByMasterClientFn(masterClient *wdclient.MasterClient) func(vids []string) (map[string]*operation.LookupResult, error) {

									return func(vids []string) (map[string]*operation.LookupResult, error) {

										m := make(map[string]*operation.LookupResult)

										for _, vid := range vids {

											locs, _ := masterClient.GetVidLocations(vid)

											var locations []operation.Location

											for _, loc := range locs {

												locations = append(locations, operation.Location{

													Url:       loc.Url,

													PublicUrl: loc.PublicUrl,

													GrpcPort:  loc.GrpcPort,

												})

											}

											m[vid] = &operation.LookupResult{

												VolumeOrFileId: vid,

												Locations:      locations,

											}

										}

										return m, nil

									}

								}


								func (f *Filer) loopProcessingDeletion() {


									lookupFunc := LookupByMasterClientFn(f.MasterClient)


									// Start retry processor in a separate goroutine

									go f.loopProcessingDeletionRetry(lookupFunc)


									ticker := time.NewTicker(DeletionPollInterval)

									defer ticker.Stop()


									for {

										select {

										case <-f.deletionQuit:

											glog.V(0).Infof("deletion processor shutting down")

											return

										case <-ticker.C:

											f.fileIdDeletionQueue.Consume(func(fileIds []string) {

												for i := 0; i < len(fileIds); i += DeletionBatchSize {

													end := i + DeletionBatchSize

													if end > len(fileIds) {

														end = len(fileIds)

													}

													toDeleteFileIds := fileIds[i:end]

													f.processDeletionBatch(toDeleteFileIds, lookupFunc)

												}

											})

										}

									}

								}


								// processDeletionBatch handles deletion of a batch of file IDs and processes results.

								// It classifies errors into retryable and permanent categories, adds retryable failures

								// to the retry queue, and logs appropriate messages.

								func (f *Filer) processDeletionBatch(toDeleteFileIds []string, lookupFunc func([]string) (map[string]*operation.LookupResult, error)) {

									// Deduplicate file IDs to prevent incorrect retry count increments for the same file ID within a single batch.

									uniqueFileIdsSlice := make([]string, 0, len(toDeleteFileIds))

									processed := make(map[string]struct{}, len(toDeleteFileIds))

									for _, fileId := range toDeleteFileIds {

										if _, found := processed[fileId]; !found {

											processed[fileId] = struct{}{}

											uniqueFileIdsSlice = append(uniqueFileIdsSlice, fileId)

										}

									}


									if len(uniqueFileIdsSlice) == 0 {

										return

									}


									// Delete files and classify outcomes

									outcomes := deleteFilesAndClassify(f.GrpcDialOption, uniqueFileIdsSlice, lookupFunc)


									// Process outcomes

									var successCount, notFoundCount, retryableErrorCount, permanentErrorCount int

									var errorDetails []string


									for _, fileId := range uniqueFileIdsSlice {

										outcome := outcomes[fileId]


										switch outcome.status {

										case deletionOutcomeSuccess:

											successCount++

										case deletionOutcomeNotFound:

											notFoundCount++

										case deletionOutcomeRetryable, deletionOutcomeNoResult:

											retryableErrorCount++

											f.DeletionRetryQueue.AddOrUpdate(fileId, outcome.errorMsg)

											if len(errorDetails) < MaxLoggedErrorDetails {

												errorDetails = append(errorDetails, fileId+": "+outcome.errorMsg+" (will retry)")

											}

										case deletionOutcomePermanent:

											permanentErrorCount++

											if len(errorDetails) < MaxLoggedErrorDetails {

												errorDetails = append(errorDetails, fileId+": "+outcome.errorMsg+" (permanent)")

											}

										}

									}


									if successCount > 0 || notFoundCount > 0 {

										glog.V(2).Infof("deleted %d files successfully, %d already deleted (not found)", successCount, notFoundCount)

									}


									totalErrors := retryableErrorCount + permanentErrorCount

									if totalErrors > 0 {

										logMessage := fmt.Sprintf("failed to delete %d/%d files (%d retryable, %d permanent)",

											totalErrors, len(uniqueFileIdsSlice), retryableErrorCount, permanentErrorCount)

										if len(errorDetails) > 0 {

											if totalErrors > MaxLoggedErrorDetails {

												logMessage += fmt.Sprintf(" (showing first %d)", len(errorDetails))

											}

											glog.V(0).Infof("%s: %v", logMessage, strings.Join(errorDetails, "; "))

										} else {

											glog.V(0).Info(logMessage)

										}

									}


									if f.DeletionRetryQueue.Size() > 0 {

										glog.V(2).Infof("retry queue size: %d", f.DeletionRetryQueue.Size())

									}

								}


								const (

									deletionOutcomeSuccess   = "success"

									deletionOutcomeNotFound  = "not_found"

									deletionOutcomeRetryable = "retryable"

									deletionOutcomePermanent = "permanent"

									deletionOutcomeNoResult  = "no_result"

								)


								// deletionOutcome represents the result of classifying deletion results for a file

								type deletionOutcome struct {

									status   string // One of the deletionOutcome* constants

									errorMsg string

								}


								// deleteFilesAndClassify performs deletion and classifies outcomes for a list of file IDs

								func deleteFilesAndClassify(grpcDialOption grpc.DialOption, fileIds []string, lookupFunc func([]string) (map[string]*operation.LookupResult, error)) map[string]deletionOutcome {

									// Perform deletion

									results := operation.DeleteFileIdsWithLookupVolumeId(grpcDialOption, fileIds, lookupFunc)


									// Group results by file ID to handle multiple results for replicated volumes

									resultsByFileId := make(map[string][]*volume_server_pb.DeleteResult)

									for _, result := range results {

										resultsByFileId[result.FileId] = append(resultsByFileId[result.FileId], result)

									}


									// Classify outcome for each file

									outcomes := make(map[string]deletionOutcome, len(fileIds))

									for _, fileId := range fileIds {

										outcomes[fileId] = classifyDeletionOutcome(fileId, resultsByFileId)

									}


									return outcomes

								}


								// classifyDeletionOutcome examines all deletion results for a file ID and determines the overall outcome

								// Uses a single pass through results with early return for permanent errors (highest priority)

								// Priority: Permanent > Retryable > Success > Not Found

								func classifyDeletionOutcome(fileId string, resultsByFileId map[string][]*volume_server_pb.DeleteResult) deletionOutcome {

									fileIdResults, found := resultsByFileId[fileId]

									if !found || len(fileIdResults) == 0 {

										return deletionOutcome{

											status:   deletionOutcomeNoResult,

											errorMsg: "no deletion result from volume server",

										}

									}


									var firstRetryableError string

									hasSuccess := false


									for _, res := range fileIdResults {

										if res.Error == "" {

											hasSuccess = true

											continue

										}

										if strings.Contains(res.Error, storage.ErrorDeleted.Error()) || res.Error == "not found" {

											continue

										}


										if isRetryableError(res.Error) {

											if firstRetryableError == "" {

												firstRetryableError = res.Error

											}

										} else {

											// Permanent error takes highest precedence - return immediately

											return deletionOutcome{status: deletionOutcomePermanent, errorMsg: res.Error}

										}

									}


									if firstRetryableError != "" {

										return deletionOutcome{status: deletionOutcomeRetryable, errorMsg: firstRetryableError}

									}


									if hasSuccess {

										return deletionOutcome{status: deletionOutcomeSuccess, errorMsg: ""}

									}


									// If we are here, all results were "not found"

									return deletionOutcome{status: deletionOutcomeNotFound, errorMsg: ""}

								}


								// isRetryableError determines if an error is retryable based on its message.

								//

								// Current implementation uses string matching which is brittle and may break

								// if error messages change in dependencies. This is acceptable for the initial

								// implementation but should be improved in the future.

								//

								// TODO: Consider these improvements for more robust error handling:

								//   - Pass DeleteResult instead of just error string to access Status codes

								//   - Use HTTP status codes (503 Service Unavailable, 429 Too Many Requests, etc.)

								//   - Implement structured error types that can be checked with errors.Is/errors.As

								//   - Extract and check gRPC status codes for better classification

								//   - Add error wrapping in the deletion pipeline to preserve error context

								//

								// For now, we use conservative string matching for known transient error patterns.

								func isRetryableError(errorMsg string) bool {

									// Empty errors are not retryable

									if errorMsg == "" {

										return false

									}


									errorLower := strings.ToLower(errorMsg)

									for _, pattern := range retryablePatterns {

										if strings.Contains(errorLower, pattern) {

											return true

										}

									}

									return false

								}


								// loopProcessingDeletionRetry processes the retry queue for failed deletions

								func (f *Filer) loopProcessingDeletionRetry(lookupFunc func([]string) (map[string]*operation.LookupResult, error)) {


									ticker := time.NewTicker(DeletionRetryPollInterval)

									defer ticker.Stop()


									for {

										select {

										case <-f.deletionQuit:

											glog.V(0).Infof("retry processor shutting down, %d items remaining in queue", f.DeletionRetryQueue.Size())

											return

										case <-ticker.C:

											// Process all ready items in batches until queue is empty

											totalProcessed := 0

											for {

												readyItems := f.DeletionRetryQueue.GetReadyItems(DeletionRetryBatchSize)

												if len(readyItems) == 0 {

													break

												}


												f.processRetryBatch(readyItems, lookupFunc)

												totalProcessed += len(readyItems)

											}


											if totalProcessed > 0 {

												glog.V(1).Infof("retried deletion of %d files", totalProcessed)

											}

										}

									}

								}


								// processRetryBatch attempts to retry deletion of files and processes results.

								// Successfully deleted items are removed from tracking, retryable failures are

								// re-queued with updated retry counts, and permanent errors are logged and discarded.

								func (f *Filer) processRetryBatch(readyItems []*DeletionRetryItem, lookupFunc func([]string) (map[string]*operation.LookupResult, error)) {

									// Extract file IDs from retry items

									fileIds := make([]string, 0, len(readyItems))

									for _, item := range readyItems {

										fileIds = append(fileIds, item.FileId)

									}


									// Delete files and classify outcomes

									outcomes := deleteFilesAndClassify(f.GrpcDialOption, fileIds, lookupFunc)


									// Process outcomes - iterate over readyItems to ensure all items are accounted for

									var successCount, notFoundCount, retryCount, permanentErrorCount int

									for _, item := range readyItems {

										outcome := outcomes[item.FileId]


										switch outcome.status {

										case deletionOutcomeSuccess:

											successCount++

											f.DeletionRetryQueue.Remove(item) // Remove from queue (success)

											glog.V(2).Infof("retry successful for %s after %d attempts", item.FileId, item.RetryCount)

										case deletionOutcomeNotFound:

											notFoundCount++

											f.DeletionRetryQueue.Remove(item) // Remove from queue (already deleted)

										case deletionOutcomeRetryable, deletionOutcomeNoResult:

											retryCount++

											if outcome.status == deletionOutcomeNoResult {

												glog.Warningf("no deletion result for retried file %s, re-queuing to avoid loss", item.FileId)

											}

											f.DeletionRetryQueue.RequeueForRetry(item, outcome.errorMsg)

										case deletionOutcomePermanent:

											permanentErrorCount++

											f.DeletionRetryQueue.Remove(item) // Remove from queue (permanent failure)

											glog.Warningf("permanent error on retry for %s after %d attempts: %s", item.FileId, item.RetryCount, outcome.errorMsg)

										}

									}


									if successCount > 0 || notFoundCount > 0 {

										glog.V(1).Infof("retry: deleted %d files successfully, %d already deleted", successCount, notFoundCount)

									}

									if retryCount > 0 {

										glog.V(1).Infof("retry: %d files still failing, will retry again later", retryCount)

									}

									if permanentErrorCount > 0 {

										glog.Warningf("retry: %d files failed with permanent errors", permanentErrorCount)

									}

								}


								func (f *Filer) DeleteUncommittedChunks(ctx context.Context, chunks []*filer_pb.FileChunk) {

									f.doDeleteChunks(ctx, chunks)

								}


								func (f *Filer) DeleteChunks(ctx context.Context, fullpath util.FullPath, chunks []*filer_pb.FileChunk) {

									rule := f.FilerConf.MatchStorageRule(string(fullpath))

									if rule.DisableChunkDeletion {

										return

									}

									f.doDeleteChunks(ctx, chunks)

								}


								func (f *Filer) doDeleteChunks(ctx context.Context, chunks []*filer_pb.FileChunk) {

									for _, chunk := range chunks {

										if !chunk.IsChunkManifest {

											f.fileIdDeletionQueue.EnQueue(chunk.GetFileIdString())

											continue

										}

										dataChunks, manifestResolveErr := ResolveOneChunkManifest(ctx, f.MasterClient.LookupFileId, chunk)

										if manifestResolveErr != nil {

											glog.V(0).InfofCtx(ctx, "failed to resolve manifest %s: %v", chunk.FileId, manifestResolveErr)

										}

										for _, dChunk := range dataChunks {

											f.fileIdDeletionQueue.EnQueue(dChunk.GetFileIdString())

										}

										f.fileIdDeletionQueue.EnQueue(chunk.GetFileIdString())

									}

								}


								func (f *Filer) DeleteChunksNotRecursive(chunks []*filer_pb.FileChunk) {

									for _, chunk := range chunks {

										f.fileIdDeletionQueue.EnQueue(chunk.GetFileIdString())

									}

								}


								func (f *Filer) deleteChunksIfNotNew(ctx context.Context, oldEntry, newEntry *Entry) {

									var oldChunks, newChunks []*filer_pb.FileChunk

									if oldEntry != nil {

										oldChunks = oldEntry.GetChunks()

									}

									if newEntry != nil {

										newChunks = newEntry.GetChunks()

									}


									toDelete, err := MinusChunks(ctx, f.MasterClient.GetLookupFileIdFunction(), oldChunks, newChunks)

									if err != nil {

										glog.ErrorfCtx(ctx, "Failed to resolve old entry chunks when delete old entry chunks. new: %s, old: %s", newChunks, oldChunks)

										return

									}

									f.DeleteChunksNotRecursive(toDelete)

								}