You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
9.9 KiB
328 lines
9.9 KiB
package lifecycle
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
|
pluginworker "github.com/seaweedfs/seaweedfs/weed/plugin/worker"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
|
)
|
|
|
|
type executionResult struct {
|
|
objectsExpired int64
|
|
objectsScanned int64
|
|
deleteMarkersClean int64
|
|
mpuAborted int64
|
|
errors int64
|
|
}
|
|
|
|
// executeLifecycleForBucket processes lifecycle rules for a single bucket:
|
|
// 1. Reads filer.conf to get TTL rules for the bucket's collection
|
|
// 2. Walks the bucket directory tree to find expired objects
|
|
// 3. Deletes expired objects (unless dry run)
|
|
func (h *Handler) executeLifecycleForBucket(
|
|
ctx context.Context,
|
|
filerClient filer_pb.SeaweedFilerClient,
|
|
config Config,
|
|
bucket, bucketsPath string,
|
|
sender pluginworker.ExecutionSender,
|
|
jobID string,
|
|
) (*executionResult, error) {
|
|
result := &executionResult{}
|
|
|
|
// Load filer.conf to verify TTL rules still exist.
|
|
fc, err := loadFilerConf(ctx, filerClient)
|
|
if err != nil {
|
|
return result, fmt.Errorf("load filer conf: %w", err)
|
|
}
|
|
|
|
collection := bucket
|
|
ttlRules := fc.GetCollectionTtls(collection)
|
|
if len(ttlRules) == 0 {
|
|
glog.V(1).Infof("s3_lifecycle: bucket %s has no lifecycle rules, skipping", bucket)
|
|
return result, nil
|
|
}
|
|
|
|
_ = sender.SendProgress(&plugin_pb.JobProgressUpdate{
|
|
JobId: jobID,
|
|
JobType: jobType,
|
|
State: plugin_pb.JobState_JOB_STATE_RUNNING,
|
|
ProgressPercent: 10,
|
|
Stage: "scanning",
|
|
Message: fmt.Sprintf("scanning bucket %s for expired objects (%d rules)", bucket, len(ttlRules)),
|
|
})
|
|
|
|
// Shared budget across all phases so we don't exceed MaxDeletesPerBucket.
|
|
remaining := config.MaxDeletesPerBucket
|
|
|
|
// Find expired objects.
|
|
expired, scanned, err := listExpiredObjects(ctx, filerClient, bucketsPath, bucket, remaining)
|
|
result.objectsScanned = scanned
|
|
if err != nil {
|
|
return result, fmt.Errorf("list expired objects: %w", err)
|
|
}
|
|
|
|
if len(expired) > 0 {
|
|
glog.V(1).Infof("s3_lifecycle: bucket %s: found %d expired objects out of %d scanned", bucket, len(expired), scanned)
|
|
} else {
|
|
glog.V(1).Infof("s3_lifecycle: bucket %s: scanned %d objects, none expired", bucket, scanned)
|
|
}
|
|
|
|
if config.DryRun && len(expired) > 0 {
|
|
result.objectsExpired = int64(len(expired))
|
|
_ = sender.SendProgress(&plugin_pb.JobProgressUpdate{
|
|
JobId: jobID,
|
|
JobType: jobType,
|
|
State: plugin_pb.JobState_JOB_STATE_RUNNING,
|
|
ProgressPercent: 100,
|
|
Stage: "dry_run",
|
|
Message: fmt.Sprintf("dry run: would delete %d expired objects", len(expired)),
|
|
})
|
|
return result, nil
|
|
}
|
|
|
|
// Delete expired objects in batches.
|
|
if len(expired) > 0 {
|
|
_ = sender.SendProgress(&plugin_pb.JobProgressUpdate{
|
|
JobId: jobID,
|
|
JobType: jobType,
|
|
State: plugin_pb.JobState_JOB_STATE_RUNNING,
|
|
ProgressPercent: 50,
|
|
Stage: "deleting",
|
|
Message: fmt.Sprintf("deleting %d expired objects", len(expired)),
|
|
})
|
|
|
|
var batchSize int
|
|
if config.BatchSize <= 0 {
|
|
batchSize = defaultBatchSize
|
|
} else if config.BatchSize > math.MaxInt {
|
|
batchSize = math.MaxInt
|
|
} else {
|
|
batchSize = int(config.BatchSize)
|
|
}
|
|
|
|
for i := 0; i < len(expired); i += batchSize {
|
|
select {
|
|
case <-ctx.Done():
|
|
return result, ctx.Err()
|
|
default:
|
|
}
|
|
|
|
end := i + batchSize
|
|
if end > len(expired) {
|
|
end = len(expired)
|
|
}
|
|
batch := expired[i:end]
|
|
|
|
deleted, errs, batchErr := deleteExpiredObjects(ctx, filerClient, batch)
|
|
result.objectsExpired += int64(deleted)
|
|
result.errors += int64(errs)
|
|
|
|
if batchErr != nil {
|
|
return result, batchErr
|
|
}
|
|
|
|
progress := float64(end)/float64(len(expired))*50 + 50 // 50-100%
|
|
_ = sender.SendProgress(&plugin_pb.JobProgressUpdate{
|
|
JobId: jobID,
|
|
JobType: jobType,
|
|
State: plugin_pb.JobState_JOB_STATE_RUNNING,
|
|
ProgressPercent: progress,
|
|
Stage: "deleting",
|
|
Message: fmt.Sprintf("deleted %d/%d expired objects", result.objectsExpired, len(expired)),
|
|
})
|
|
}
|
|
|
|
remaining -= result.objectsExpired + result.errors
|
|
if remaining < 0 {
|
|
remaining = 0
|
|
}
|
|
}
|
|
|
|
// Delete marker cleanup.
|
|
if config.DeleteMarkerCleanup && remaining > 0 {
|
|
_ = sender.SendProgress(&plugin_pb.JobProgressUpdate{
|
|
JobId: jobID, JobType: jobType,
|
|
State: plugin_pb.JobState_JOB_STATE_RUNNING,
|
|
Stage: "cleaning_delete_markers", Message: "cleaning expired delete markers",
|
|
})
|
|
cleaned, cleanErrs, cleanCtxErr := cleanupDeleteMarkers(ctx, filerClient, bucketsPath, bucket, remaining)
|
|
result.deleteMarkersClean = int64(cleaned)
|
|
result.errors += int64(cleanErrs)
|
|
if cleanCtxErr != nil {
|
|
return result, cleanCtxErr
|
|
}
|
|
remaining -= int64(cleaned + cleanErrs)
|
|
if remaining < 0 {
|
|
remaining = 0
|
|
}
|
|
}
|
|
|
|
// Abort incomplete multipart uploads.
|
|
if config.AbortMPUDays > 0 && remaining > 0 {
|
|
_ = sender.SendProgress(&plugin_pb.JobProgressUpdate{
|
|
JobId: jobID, JobType: jobType,
|
|
State: plugin_pb.JobState_JOB_STATE_RUNNING,
|
|
Stage: "aborting_mpus", Message: fmt.Sprintf("aborting multipart uploads older than %d days", config.AbortMPUDays),
|
|
})
|
|
aborted, abortErrs, abortCtxErr := abortIncompleteMPUs(ctx, filerClient, bucketsPath, bucket, config.AbortMPUDays, remaining)
|
|
result.mpuAborted = int64(aborted)
|
|
result.errors += int64(abortErrs)
|
|
if abortCtxErr != nil {
|
|
return result, abortCtxErr
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// cleanupDeleteMarkers scans the bucket for entries marked as delete markers
|
|
// (via the S3 versioning extended attribute) and removes them.
|
|
//
|
|
// NOTE: This currently removes delete markers unconditionally without checking
|
|
// whether prior non-expired versions exist. In versioned buckets, removing a
|
|
// delete marker can resurface an older version. A future enhancement should
|
|
// query version metadata before removal to match AWS ExpiredObjectDeleteMarker
|
|
// semantics (only remove when no non-current versions remain).
|
|
func cleanupDeleteMarkers(
|
|
ctx context.Context,
|
|
client filer_pb.SeaweedFilerClient,
|
|
bucketsPath, bucket string,
|
|
limit int64,
|
|
) (cleaned, errors int, ctxErr error) {
|
|
bucketPath := path.Join(bucketsPath, bucket)
|
|
|
|
dirsToProcess := []string{bucketPath}
|
|
for len(dirsToProcess) > 0 {
|
|
if ctx.Err() != nil {
|
|
return cleaned, errors, ctx.Err()
|
|
}
|
|
|
|
dir := dirsToProcess[0]
|
|
dirsToProcess = dirsToProcess[1:]
|
|
|
|
listErr := filer_pb.SeaweedList(ctx, client, dir, "", func(entry *filer_pb.Entry, isLast bool) error {
|
|
if entry.IsDirectory {
|
|
// Skip .uploads directories.
|
|
if entry.Name != ".uploads" {
|
|
dirsToProcess = append(dirsToProcess, path.Join(dir, entry.Name))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
if isDeleteMarker(entry) {
|
|
if err := filer_pb.DoRemove(ctx, client, dir, entry.Name, true, false, false, false, nil); err != nil {
|
|
glog.V(1).Infof("s3_lifecycle: failed to remove delete marker %s/%s: %v", dir, entry.Name, err)
|
|
errors++
|
|
} else {
|
|
cleaned++
|
|
}
|
|
}
|
|
|
|
if limit > 0 && int64(cleaned+errors) >= limit {
|
|
return fmt.Errorf("limit reached")
|
|
}
|
|
return nil
|
|
}, "", false, 10000)
|
|
|
|
if listErr != nil && !strings.Contains(listErr.Error(), "limit reached") {
|
|
return cleaned, errors, fmt.Errorf("list %s: %w", dir, listErr)
|
|
}
|
|
|
|
if limit > 0 && int64(cleaned+errors) >= limit {
|
|
break
|
|
}
|
|
}
|
|
return cleaned, errors, nil
|
|
}
|
|
|
|
// isDeleteMarker checks if an entry is an S3 delete marker.
|
|
func isDeleteMarker(entry *filer_pb.Entry) bool {
|
|
if entry == nil || entry.Extended == nil {
|
|
return false
|
|
}
|
|
return string(entry.Extended[s3_constants.ExtDeleteMarkerKey]) == "true"
|
|
}
|
|
|
|
// abortIncompleteMPUs scans the .uploads directory under a bucket and
|
|
// removes multipart upload entries older than the specified number of days.
|
|
func abortIncompleteMPUs(
|
|
ctx context.Context,
|
|
client filer_pb.SeaweedFilerClient,
|
|
bucketsPath, bucket string,
|
|
olderThanDays, limit int64,
|
|
) (aborted, errors int, ctxErr error) {
|
|
uploadsDir := path.Join(bucketsPath, bucket, ".uploads")
|
|
cutoff := time.Now().Add(-time.Duration(olderThanDays) * 24 * time.Hour)
|
|
|
|
listErr := filer_pb.SeaweedList(ctx, client, uploadsDir, "", func(entry *filer_pb.Entry, isLast bool) error {
|
|
if ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
|
|
if !entry.IsDirectory {
|
|
return nil
|
|
}
|
|
|
|
// Each subdirectory under .uploads is one multipart upload.
|
|
// Check the directory creation time.
|
|
if entry.Attributes != nil && entry.Attributes.Crtime > 0 {
|
|
created := time.Unix(entry.Attributes.Crtime, 0)
|
|
if created.Before(cutoff) {
|
|
uploadPath := path.Join(uploadsDir, entry.Name)
|
|
if err := filer_pb.DoRemove(ctx, client, uploadsDir, entry.Name, true, true, true, false, nil); err != nil {
|
|
glog.V(1).Infof("s3_lifecycle: failed to abort MPU %s: %v", uploadPath, err)
|
|
errors++
|
|
} else {
|
|
aborted++
|
|
}
|
|
}
|
|
}
|
|
|
|
if limit > 0 && int64(aborted+errors) >= limit {
|
|
return fmt.Errorf("limit reached")
|
|
}
|
|
return nil
|
|
}, "", false, 10000)
|
|
|
|
if listErr != nil && !strings.Contains(listErr.Error(), "limit reached") {
|
|
return aborted, errors, fmt.Errorf("list uploads in %s: %w", uploadsDir, listErr)
|
|
}
|
|
|
|
return aborted, errors, nil
|
|
}
|
|
|
|
// deleteExpiredObjects deletes a batch of expired objects from the filer.
|
|
// Returns a non-nil error when the context is canceled mid-batch.
|
|
func deleteExpiredObjects(
|
|
ctx context.Context,
|
|
client filer_pb.SeaweedFilerClient,
|
|
objects []expiredObject,
|
|
) (deleted, errors int, ctxErr error) {
|
|
for _, obj := range objects {
|
|
if ctx.Err() != nil {
|
|
return deleted, errors, ctx.Err()
|
|
}
|
|
|
|
err := filer_pb.DoRemove(ctx, client, obj.dir, obj.name, true, false, false, false, nil)
|
|
if err != nil {
|
|
glog.V(1).Infof("s3_lifecycle: failed to delete %s/%s: %v", obj.dir, obj.name, err)
|
|
errors++
|
|
continue
|
|
}
|
|
deleted++
|
|
}
|
|
return deleted, errors, nil
|
|
}
|
|
|
|
// nowUnix returns the current time as a Unix timestamp.
|
|
func nowUnix() int64 {
|
|
return time.Now().Unix()
|
|
}
|