Browse Source

Persist S3 bucket counter metrics across idle periods (#8595)

* Stop deleting counter metrics during bucket TTL cleanup

Counter metrics (traffic bytes, request counts, object counts) are
monotonically increasing by design. Deleting them after 10 minutes of
bucket inactivity causes them to vanish from /metrics output and reset
to zero when traffic resumes, breaking Prometheus rate()/increase()
queries and making historical traffic reporting impossible.

Only delete gauges and histograms in the TTL cleanup loop, as these
represent current state and are safely re-populated on next activity.

Fixes https://github.com/seaweedfs/seaweedfs/issues/8521

* Clean up all bucket metrics on bucket deletion

Add DeleteBucketMetrics() to delete all metrics (including counters)
for a bucket when it is explicitly deleted. This prevents unbounded
label cardinality from accumulating for buckets that no longer exist.

Called from DeleteBucketHandler after successful bucket deletion.

* Reduce mutex scope in bucket metrics TTL sweep

Collect expired bucket names under the lock, then release before
calling DeletePartialMatch on Prometheus metrics. This prevents
RecordBucketActiveTime from blocking during the expensive cleanup.
pull/8596/head
Chris Lu 6 days ago
committed by GitHub
parent
commit
0a5c5ed4ce
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 4
      weed/s3api/s3api_bucket_handlers.go
  2. 30
      weed/stats/metrics.go

4
weed/s3api/s3api_bucket_handlers.go

@ -20,6 +20,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
stats_collect "github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
@ -396,8 +397,9 @@ func (s3a *S3ApiServer) DeleteBucketHandler(w http.ResponseWriter, r *http.Reque
return
}
// Clean up bucket-related caches and locks after successful deletion
// Clean up bucket-related caches, locks, and metrics after successful deletion
s3a.invalidateBucketConfigCache(bucket)
stats_collect.DeleteBucketMetrics(bucket)
s3err.WriteEmptyResponse(w, r, http.StatusNoContent)
}

30
weed/stats/metrics.go

@ -573,6 +573,26 @@ func RecordBucketActiveTime(bucket string) {
bucketLastActiveLock.Unlock()
}
func DeleteBucketMetrics(bucket string) {
bucketLastActiveLock.Lock()
delete(bucketLastActiveTsNs, bucket)
bucketLastActiveLock.Unlock()
labels := prometheus.Labels{"bucket": bucket}
c := S3RequestCounter.DeletePartialMatch(labels)
c += S3RequestHistogram.DeletePartialMatch(labels)
c += S3TimeToFirstByteHistogram.DeletePartialMatch(labels)
c += S3BucketTrafficReceivedBytesCounter.DeletePartialMatch(labels)
c += S3BucketTrafficSentBytesCounter.DeletePartialMatch(labels)
c += S3DeletedObjectsCounter.DeletePartialMatch(labels)
c += S3UploadedObjectsCounter.DeletePartialMatch(labels)
c += S3BucketSizeBytesGauge.DeletePartialMatch(labels)
c += S3BucketPhysicalSizeBytesGauge.DeletePartialMatch(labels)
c += S3BucketObjectCountGauge.DeletePartialMatch(labels)
glog.V(0).Infof("delete bucket metrics, %s: %d", bucket, c)
}
func DeleteCollectionMetrics(collection string) {
labels := prometheus.Labels{"collection": collection}
c := MasterReplicaPlacementMismatch.DeletePartialMatch(labels)
@ -605,13 +625,11 @@ func bucketMetricTTLControl() {
for _, bucket := range expiredBuckets {
labels := prometheus.Labels{"bucket": bucket}
c := S3RequestCounter.DeletePartialMatch(labels)
c += S3RequestHistogram.DeletePartialMatch(labels)
// Only delete gauges and histograms, which represent current state.
// Counters (traffic, requests, objects) must persist for the process
// lifetime so that Prometheus rate()/increase() queries work correctly.
c := S3RequestHistogram.DeletePartialMatch(labels)
c += S3TimeToFirstByteHistogram.DeletePartialMatch(labels)
c += S3BucketTrafficReceivedBytesCounter.DeletePartialMatch(labels)
c += S3BucketTrafficSentBytesCounter.DeletePartialMatch(labels)
c += S3DeletedObjectsCounter.DeletePartialMatch(labels)
c += S3UploadedObjectsCounter.DeletePartialMatch(labels)
c += S3BucketSizeBytesGauge.DeletePartialMatch(labels)
c += S3BucketPhysicalSizeBytesGauge.DeletePartialMatch(labels)
c += S3BucketObjectCountGauge.DeletePartialMatch(labels)

Loading…
Cancel
Save