package lifecycle import ( "context" "fmt" "math" "path" "strings" "time" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb" pluginworker "github.com/seaweedfs/seaweedfs/weed/plugin/worker" "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" ) type executionResult struct { objectsExpired int64 objectsScanned int64 deleteMarkersClean int64 mpuAborted int64 errors int64 } // executeLifecycleForBucket processes lifecycle rules for a single bucket: // 1. Reads filer.conf to get TTL rules for the bucket's collection // 2. Walks the bucket directory tree to find expired objects // 3. Deletes expired objects (unless dry run) func (h *Handler) executeLifecycleForBucket( ctx context.Context, filerClient filer_pb.SeaweedFilerClient, config Config, bucket, bucketsPath string, sender pluginworker.ExecutionSender, jobID string, ) (*executionResult, error) { result := &executionResult{} // Load filer.conf to verify TTL rules still exist. fc, err := loadFilerConf(ctx, filerClient) if err != nil { return result, fmt.Errorf("load filer conf: %w", err) } collection := bucket ttlRules := fc.GetCollectionTtls(collection) if len(ttlRules) == 0 { glog.V(1).Infof("s3_lifecycle: bucket %s has no lifecycle rules, skipping", bucket) return result, nil } _ = sender.SendProgress(&plugin_pb.JobProgressUpdate{ JobId: jobID, JobType: jobType, State: plugin_pb.JobState_JOB_STATE_RUNNING, ProgressPercent: 10, Stage: "scanning", Message: fmt.Sprintf("scanning bucket %s for expired objects (%d rules)", bucket, len(ttlRules)), }) // Shared budget across all phases so we don't exceed MaxDeletesPerBucket. remaining := config.MaxDeletesPerBucket // Find expired objects. expired, scanned, err := listExpiredObjects(ctx, filerClient, bucketsPath, bucket, remaining) result.objectsScanned = scanned if err != nil { return result, fmt.Errorf("list expired objects: %w", err) } if len(expired) > 0 { glog.V(1).Infof("s3_lifecycle: bucket %s: found %d expired objects out of %d scanned", bucket, len(expired), scanned) } else { glog.V(1).Infof("s3_lifecycle: bucket %s: scanned %d objects, none expired", bucket, scanned) } if config.DryRun && len(expired) > 0 { result.objectsExpired = int64(len(expired)) _ = sender.SendProgress(&plugin_pb.JobProgressUpdate{ JobId: jobID, JobType: jobType, State: plugin_pb.JobState_JOB_STATE_RUNNING, ProgressPercent: 100, Stage: "dry_run", Message: fmt.Sprintf("dry run: would delete %d expired objects", len(expired)), }) return result, nil } // Delete expired objects in batches. if len(expired) > 0 { _ = sender.SendProgress(&plugin_pb.JobProgressUpdate{ JobId: jobID, JobType: jobType, State: plugin_pb.JobState_JOB_STATE_RUNNING, ProgressPercent: 50, Stage: "deleting", Message: fmt.Sprintf("deleting %d expired objects", len(expired)), }) var batchSize int if config.BatchSize <= 0 { batchSize = defaultBatchSize } else if config.BatchSize > math.MaxInt { batchSize = math.MaxInt } else { batchSize = int(config.BatchSize) } for i := 0; i < len(expired); i += batchSize { select { case <-ctx.Done(): return result, ctx.Err() default: } end := i + batchSize if end > len(expired) { end = len(expired) } batch := expired[i:end] deleted, errs, batchErr := deleteExpiredObjects(ctx, filerClient, batch) result.objectsExpired += int64(deleted) result.errors += int64(errs) if batchErr != nil { return result, batchErr } progress := float64(end)/float64(len(expired))*50 + 50 // 50-100% _ = sender.SendProgress(&plugin_pb.JobProgressUpdate{ JobId: jobID, JobType: jobType, State: plugin_pb.JobState_JOB_STATE_RUNNING, ProgressPercent: progress, Stage: "deleting", Message: fmt.Sprintf("deleted %d/%d expired objects", result.objectsExpired, len(expired)), }) } remaining -= result.objectsExpired + result.errors if remaining < 0 { remaining = 0 } } // Delete marker cleanup. if config.DeleteMarkerCleanup && remaining > 0 { _ = sender.SendProgress(&plugin_pb.JobProgressUpdate{ JobId: jobID, JobType: jobType, State: plugin_pb.JobState_JOB_STATE_RUNNING, Stage: "cleaning_delete_markers", Message: "cleaning expired delete markers", }) cleaned, cleanErrs, cleanCtxErr := cleanupDeleteMarkers(ctx, filerClient, bucketsPath, bucket, remaining) result.deleteMarkersClean = int64(cleaned) result.errors += int64(cleanErrs) if cleanCtxErr != nil { return result, cleanCtxErr } remaining -= int64(cleaned + cleanErrs) if remaining < 0 { remaining = 0 } } // Abort incomplete multipart uploads. if config.AbortMPUDays > 0 && remaining > 0 { _ = sender.SendProgress(&plugin_pb.JobProgressUpdate{ JobId: jobID, JobType: jobType, State: plugin_pb.JobState_JOB_STATE_RUNNING, Stage: "aborting_mpus", Message: fmt.Sprintf("aborting multipart uploads older than %d days", config.AbortMPUDays), }) aborted, abortErrs, abortCtxErr := abortIncompleteMPUs(ctx, filerClient, bucketsPath, bucket, config.AbortMPUDays, remaining) result.mpuAborted = int64(aborted) result.errors += int64(abortErrs) if abortCtxErr != nil { return result, abortCtxErr } } return result, nil } // cleanupDeleteMarkers scans the bucket for entries marked as delete markers // (via the S3 versioning extended attribute) and removes them. // // NOTE: This currently removes delete markers unconditionally without checking // whether prior non-expired versions exist. In versioned buckets, removing a // delete marker can resurface an older version. A future enhancement should // query version metadata before removal to match AWS ExpiredObjectDeleteMarker // semantics (only remove when no non-current versions remain). func cleanupDeleteMarkers( ctx context.Context, client filer_pb.SeaweedFilerClient, bucketsPath, bucket string, limit int64, ) (cleaned, errors int, ctxErr error) { bucketPath := path.Join(bucketsPath, bucket) dirsToProcess := []string{bucketPath} for len(dirsToProcess) > 0 { if ctx.Err() != nil { return cleaned, errors, ctx.Err() } dir := dirsToProcess[0] dirsToProcess = dirsToProcess[1:] listErr := filer_pb.SeaweedList(ctx, client, dir, "", func(entry *filer_pb.Entry, isLast bool) error { if entry.IsDirectory { // Skip .uploads directories. if entry.Name != ".uploads" { dirsToProcess = append(dirsToProcess, path.Join(dir, entry.Name)) } return nil } if isDeleteMarker(entry) { if err := filer_pb.DoRemove(ctx, client, dir, entry.Name, true, false, false, false, nil); err != nil { glog.V(1).Infof("s3_lifecycle: failed to remove delete marker %s/%s: %v", dir, entry.Name, err) errors++ } else { cleaned++ } } if limit > 0 && int64(cleaned+errors) >= limit { return fmt.Errorf("limit reached") } return nil }, "", false, 10000) if listErr != nil && !strings.Contains(listErr.Error(), "limit reached") { return cleaned, errors, fmt.Errorf("list %s: %w", dir, listErr) } if limit > 0 && int64(cleaned+errors) >= limit { break } } return cleaned, errors, nil } // isDeleteMarker checks if an entry is an S3 delete marker. func isDeleteMarker(entry *filer_pb.Entry) bool { if entry == nil || entry.Extended == nil { return false } return string(entry.Extended[s3_constants.ExtDeleteMarkerKey]) == "true" } // abortIncompleteMPUs scans the .uploads directory under a bucket and // removes multipart upload entries older than the specified number of days. func abortIncompleteMPUs( ctx context.Context, client filer_pb.SeaweedFilerClient, bucketsPath, bucket string, olderThanDays, limit int64, ) (aborted, errors int, ctxErr error) { uploadsDir := path.Join(bucketsPath, bucket, ".uploads") cutoff := time.Now().Add(-time.Duration(olderThanDays) * 24 * time.Hour) listErr := filer_pb.SeaweedList(ctx, client, uploadsDir, "", func(entry *filer_pb.Entry, isLast bool) error { if ctx.Err() != nil { return ctx.Err() } if !entry.IsDirectory { return nil } // Each subdirectory under .uploads is one multipart upload. // Check the directory creation time. if entry.Attributes != nil && entry.Attributes.Crtime > 0 { created := time.Unix(entry.Attributes.Crtime, 0) if created.Before(cutoff) { uploadPath := path.Join(uploadsDir, entry.Name) if err := filer_pb.DoRemove(ctx, client, uploadsDir, entry.Name, true, true, true, false, nil); err != nil { glog.V(1).Infof("s3_lifecycle: failed to abort MPU %s: %v", uploadPath, err) errors++ } else { aborted++ } } } if limit > 0 && int64(aborted+errors) >= limit { return fmt.Errorf("limit reached") } return nil }, "", false, 10000) if listErr != nil && !strings.Contains(listErr.Error(), "limit reached") { return aborted, errors, fmt.Errorf("list uploads in %s: %w", uploadsDir, listErr) } return aborted, errors, nil } // deleteExpiredObjects deletes a batch of expired objects from the filer. // Returns a non-nil error when the context is canceled mid-batch. func deleteExpiredObjects( ctx context.Context, client filer_pb.SeaweedFilerClient, objects []expiredObject, ) (deleted, errors int, ctxErr error) { for _, obj := range objects { if ctx.Err() != nil { return deleted, errors, ctx.Err() } err := filer_pb.DoRemove(ctx, client, obj.dir, obj.name, true, false, false, false, nil) if err != nil { glog.V(1).Infof("s3_lifecycle: failed to delete %s/%s: %v", obj.dir, obj.name, err) errors++ continue } deleted++ } return deleted, errors, nil } // nowUnix returns the current time as a Unix timestamp. func nowUnix() int64 { return time.Now().Unix() }