From 98f545c7fae6d87dcb9b0253955d19b0ca886591 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 28 Mar 2026 11:16:58 -0700 Subject: [PATCH] lifecycle worker: detect buckets via lifecycle XML metadata (#8808) * s3api: extend lifecycle XML types with NoncurrentVersionExpiration, AbortIncompleteMultipartUpload Add missing S3 lifecycle rule types to the XML data model: - NoncurrentVersionExpiration with NoncurrentDays and NewerNoncurrentVersions - NoncurrentVersionTransition with NoncurrentDays and StorageClass - AbortIncompleteMultipartUpload with DaysAfterInitiation - Filter.ObjectSizeGreaterThan and ObjectSizeLessThan - And.ObjectSizeGreaterThan and ObjectSizeLessThan - Filter.UnmarshalXML to properly parse Tag, And, and size filter elements Each new type follows the existing set-field pattern for conditional XML marshaling. No behavior changes - these types are not yet wired into handlers or the lifecycle worker. * s3lifecycle: add lifecycle rule evaluator package New package weed/s3api/s3lifecycle/ provides a pure-function lifecycle rule evaluation engine. The evaluator accepts flattened Rule structs and ObjectInfo metadata, and returns the appropriate Action. Components: - evaluator.go: Evaluate() for per-object actions with S3 priority ordering (delete marker > noncurrent version > current expiration), ShouldExpireNoncurrentVersion() with NewerNoncurrentVersions support, EvaluateMPUAbort() for multipart upload rules - filter.go: prefix, tag, and size-based filter matching - tags.go: ExtractTags() extracts S3 tags from filer Extended metadata, HasTagRules() for scan-time optimization - version_time.go: GetVersionTimestamp() extracts timestamps from SeaweedFS version IDs (both old and new format) Comprehensive test coverage: 54 tests covering all action types, filter combinations, edge cases, and version ID formats. * s3api: add UnmarshalXML for Expiration, Transition, ExpireDeleteMarker Add UnmarshalXML methods that set the internal 'set' flag during XML parsing. Previously these flags were only set programmatically, causing XML round-trip to drop elements. This ensures lifecycle configurations stored as XML survive unmarshal/marshal cycles correctly. Add comprehensive XML round-trip tests for all lifecycle rule types including NoncurrentVersionExpiration, AbortIncompleteMultipartUpload, Filter with Tag/And/size constraints, and a complete Terraform-style lifecycle configuration. * s3lifecycle: address review feedback - Fix version_time.go overflow: guard timestampPart > MaxInt64 before the inversion subtraction to prevent uint64 wrap - Make all expiry checks inclusive (!now.Before instead of now.After) so actions trigger at the exact scheduled instant - Add NoncurrentIndex to ObjectInfo so Evaluate() can properly handle NewerNoncurrentVersions via ShouldExpireNoncurrentVersion() - Add test for high-bit overflow version ID * s3lifecycle: guard ShouldExpireNoncurrentVersion against zero SuccessorModTime Add early return when obj.IsLatest or obj.SuccessorModTime.IsZero() to prevent premature expiration of versions with uninitialized successor timestamps (zero value would compute to epoch, always expired). * lifecycle worker: detect buckets with lifecycle XML, not just filer.conf TTLs Update the detection phase to check for stored lifecycle XML in bucket metadata (key: s3-bucket-lifecycle-configuration-xml) in addition to filer.conf TTL entries. A bucket is proposed for lifecycle processing if it has lifecycle XML OR filer.conf TTLs (backward compatible). New proposal parameters: - has_lifecycle_xml: whether the bucket has stored lifecycle XML - versioning_status: the bucket's versioning state (Enabled/Suspended/"") These parameters will be used by the execution phase (subsequent PR) to determine which evaluation path to use. * lifecycle worker: update detection function comment to reflect XML support --------- Co-authored-by: Copilot --- weed/plugin/worker/lifecycle/detection.go | 35 +++-- .../plugin/worker/lifecycle/detection_test.go | 132 ++++++++++++++++++ 2 files changed, 158 insertions(+), 9 deletions(-) create mode 100644 weed/plugin/worker/lifecycle/detection_test.go diff --git a/weed/plugin/worker/lifecycle/detection.go b/weed/plugin/worker/lifecycle/detection.go index e88e680ca..d8267b2f0 100644 --- a/weed/plugin/worker/lifecycle/detection.go +++ b/weed/plugin/worker/lifecycle/detection.go @@ -10,11 +10,15 @@ import ( "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" "github.com/seaweedfs/seaweedfs/weed/util/wildcard" ) +const lifecycleXMLKey = "s3-bucket-lifecycle-configuration-xml" + // detectBucketsWithLifecycleRules scans all S3 buckets to find those -// with lifecycle (TTL) rules configured in filer.conf. +// with lifecycle rules, either TTL entries in filer.conf or lifecycle +// XML stored in bucket metadata. func (h *Handler) detectBucketsWithLifecycleRules( ctx context.Context, filerClient filer_pb.SeaweedFilerClient, @@ -53,25 +57,38 @@ func (h *Handler) detectBucketsWithLifecycleRules( continue } - // Derive the collection name for this bucket. + // Check for lifecycle rules from two sources: + // 1. filer.conf TTLs (legacy Expiration.Days fast path) + // 2. Stored lifecycle XML in bucket metadata (full rule support) collection := bucketName ttls := fc.GetCollectionTtls(collection) - if len(ttls) == 0 { + + hasLifecycleXML := entry.Extended != nil && len(entry.Extended[lifecycleXMLKey]) > 0 + versioningStatus := "" + if entry.Extended != nil { + versioningStatus = string(entry.Extended[s3_constants.ExtVersioningKey]) + } + + ruleCount := int64(len(ttls)) + if !hasLifecycleXML && ruleCount == 0 { continue } - glog.V(2).Infof("s3_lifecycle: bucket %s has %d lifecycle rule(s)", bucketName, len(ttls)) + glog.V(2).Infof("s3_lifecycle: bucket %s has %d TTL rule(s), lifecycle_xml=%v, versioning=%s", + bucketName, ruleCount, hasLifecycleXML, versioningStatus) proposal := &plugin_pb.JobProposal{ ProposalId: fmt.Sprintf("s3_lifecycle:%s", bucketName), JobType: jobType, - Summary: fmt.Sprintf("Lifecycle management for bucket %s (%d rules)", bucketName, len(ttls)), + Summary: fmt.Sprintf("Lifecycle management for bucket %s", bucketName), DedupeKey: fmt.Sprintf("s3_lifecycle:%s", bucketName), Parameters: map[string]*plugin_pb.ConfigValue{ - "bucket": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketName}}, - "buckets_path": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketsPath}}, - "collection": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: collection}}, - "rule_count": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(len(ttls))}}, + "bucket": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketName}}, + "buckets_path": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: bucketsPath}}, + "collection": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: collection}}, + "rule_count": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: ruleCount}}, + "has_lifecycle_xml": {Kind: &plugin_pb.ConfigValue_BoolValue{BoolValue: hasLifecycleXML}}, + "versioning_status": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: versioningStatus}}, }, Labels: map[string]string{ "bucket": bucketName, diff --git a/weed/plugin/worker/lifecycle/detection_test.go b/weed/plugin/worker/lifecycle/detection_test.go new file mode 100644 index 000000000..d9ff86688 --- /dev/null +++ b/weed/plugin/worker/lifecycle/detection_test.go @@ -0,0 +1,132 @@ +package lifecycle + +import ( + "testing" + + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" +) + +func TestBucketHasLifecycleXML(t *testing.T) { + tests := []struct { + name string + extended map[string][]byte + want bool + }{ + { + name: "has_lifecycle_xml", + extended: map[string][]byte{lifecycleXMLKey: []byte("")}, + want: true, + }, + { + name: "empty_lifecycle_xml", + extended: map[string][]byte{lifecycleXMLKey: {}}, + want: false, + }, + { + name: "no_lifecycle_xml", + extended: map[string][]byte{"other-key": []byte("value")}, + want: false, + }, + { + name: "nil_extended", + extended: nil, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.extended != nil && len(tt.extended[lifecycleXMLKey]) > 0 + if got != tt.want { + t.Errorf("hasLifecycleXML = %v, want %v", got, tt.want) + } + }) + } +} + +func TestBucketVersioningStatus(t *testing.T) { + tests := []struct { + name string + extended map[string][]byte + want string + }{ + { + name: "versioning_enabled", + extended: map[string][]byte{ + s3_constants.ExtVersioningKey: []byte("Enabled"), + }, + want: "Enabled", + }, + { + name: "versioning_suspended", + extended: map[string][]byte{ + s3_constants.ExtVersioningKey: []byte("Suspended"), + }, + want: "Suspended", + }, + { + name: "no_versioning", + extended: map[string][]byte{}, + want: "", + }, + { + name: "nil_extended", + extended: nil, + want: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var got string + if tt.extended != nil { + got = string(tt.extended[s3_constants.ExtVersioningKey]) + } + if got != tt.want { + t.Errorf("versioningStatus = %q, want %q", got, tt.want) + } + }) + } +} + +func TestDetectionProposalParameters(t *testing.T) { + // Verify that bucket entries with lifecycle XML or TTL rules produce + // proposals with the expected parameters. + t.Run("bucket_with_lifecycle_xml_and_versioning", func(t *testing.T) { + entry := &filer_pb.Entry{ + Name: "my-bucket", + IsDirectory: true, + Extended: map[string][]byte{ + lifecycleXMLKey: []byte(`Enabled`), + s3_constants.ExtVersioningKey: []byte("Enabled"), + }, + } + + hasXML := entry.Extended != nil && len(entry.Extended[lifecycleXMLKey]) > 0 + versioning := "" + if entry.Extended != nil { + versioning = string(entry.Extended[s3_constants.ExtVersioningKey]) + } + + if !hasXML { + t.Error("expected hasLifecycleXML=true") + } + if versioning != "Enabled" { + t.Errorf("expected versioning=Enabled, got %q", versioning) + } + }) + + t.Run("bucket_without_lifecycle_or_ttl_is_skipped", func(t *testing.T) { + entry := &filer_pb.Entry{ + Name: "empty-bucket", + IsDirectory: true, + Extended: map[string][]byte{}, + } + + hasXML := entry.Extended != nil && len(entry.Extended[lifecycleXMLKey]) > 0 + ttlCount := 0 // simulated: no TTL rules in filer.conf + + if hasXML || ttlCount > 0 { + t.Error("expected bucket to be skipped (no lifecycle XML, no TTLs)") + } + }) +}