From 9987a65e8aa11a54781a6e5bae774e13748e0452 Mon Sep 17 00:00:00 2001 From: zouyixiong Date: Fri, 13 Dec 2024 12:34:02 +0800 Subject: [PATCH] fix: record and delete bucket metrics after inactive (#6349) --- weed/s3api/stats.go | 9 ++++++--- weed/stats/metrics.go | 46 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/weed/s3api/stats.go b/weed/s3api/stats.go index 415ec55dd..616862958 100644 --- a/weed/s3api/stats.go +++ b/weed/s3api/stats.go @@ -1,12 +1,13 @@ package s3api import ( - "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" - stats_collect "github.com/seaweedfs/seaweedfs/weed/stats" - "github.com/seaweedfs/seaweedfs/weed/util" "net/http" "strconv" "time" + + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" + stats_collect "github.com/seaweedfs/seaweedfs/weed/stats" + "github.com/seaweedfs/seaweedfs/weed/util" ) func track(f http.HandlerFunc, action string) http.HandlerFunc { @@ -25,10 +26,12 @@ func track(f http.HandlerFunc, action string) http.HandlerFunc { } stats_collect.S3RequestHistogram.WithLabelValues(action, bucket).Observe(time.Since(start).Seconds()) stats_collect.S3RequestCounter.WithLabelValues(action, strconv.Itoa(recorder.Status), bucket).Inc() + stats_collect.RecordBucketActiveTime(bucket) } } func TimeToFirstByte(action string, start time.Time, r *http.Request) { bucket, _ := s3_constants.GetBucketAndObject(r) stats_collect.S3TimeToFirstByteHistogram.WithLabelValues(action, bucket).Observe(float64(time.Since(start).Milliseconds())) + stats_collect.RecordBucketActiveTime(bucket) } diff --git a/weed/stats/metrics.go b/weed/stats/metrics.go index c482f19b5..b9a596a6a 100644 --- a/weed/stats/metrics.go +++ b/weed/stats/metrics.go @@ -23,9 +23,11 @@ const ( NoWriteOrDelete = "noWriteOrDelete" NoWriteCanDelete = "noWriteCanDelete" IsDiskSpaceLow = "isDiskSpaceLow" + bucketAtiveTTL = 10 * time.Minute ) var readOnlyVolumeTypes = [4]string{IsReadOnly, NoWriteOrDelete, NoWriteCanDelete, IsDiskSpaceLow} +var bucketLastActiveTsNs map[string]int64 = map[string]int64{} var ( Gather = prometheus.NewRegistry() @@ -281,6 +283,7 @@ var ( Name: "request_total", Help: "Counter of s3 requests.", }, []string{"type", "code", "bucket"}) + S3HandlerCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: Namespace, @@ -288,6 +291,7 @@ var ( Name: "handler_total", Help: "Counter of s3 server handlers.", }, []string{"type"}) + S3RequestHistogram = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: Namespace, @@ -296,6 +300,7 @@ var ( Help: "Bucketed histogram of s3 request processing time.", Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24), }, []string{"type", "bucket"}) + S3TimeToFirstByteHistogram = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: Namespace, @@ -354,6 +359,8 @@ func init() { Gather.MustRegister(S3RequestHistogram) Gather.MustRegister(S3InFlightRequestsGauge) Gather.MustRegister(S3TimeToFirstByteHistogram) + + go bucketMetricTTLControl() } func LoopPushingMetric(name, instance, addr string, intervalSeconds int) { @@ -401,11 +408,40 @@ func SourceName(port uint32) string { return net.JoinHostPort(hostname, strconv.Itoa(int(port))) } -// todo - can be changed to DeletePartialMatch when https://github.com/prometheus/client_golang/pull/1013 gets released +func RecordBucketActiveTime(bucket string) { + bucketLastActiveTsNs[bucket] = time.Now().UnixNano() +} + func DeleteCollectionMetrics(collection string) { - VolumeServerDiskSizeGauge.DeleteLabelValues(collection, "normal") - for _, volume_type := range readOnlyVolumeTypes { - VolumeServerReadOnlyVolumeGauge.DeleteLabelValues(collection, volume_type) + labels := prometheus.Labels{"collection": collection} + c := MasterReplicaPlacementMismatch.DeletePartialMatch(labels) + c += MasterVolumeLayoutWritable.DeletePartialMatch(labels) + c += MasterVolumeLayoutCrowded.DeletePartialMatch(labels) + c += VolumeServerDiskSizeGauge.DeletePartialMatch(labels) + c += VolumeServerVolumeGauge.DeletePartialMatch(labels) + c += VolumeServerReadOnlyVolumeGauge.DeletePartialMatch(labels) + + glog.V(0).Infof("delete collection metrics, %s: %d", collection, c) +} + +func bucketMetricTTLControl() { + ttlNs := bucketAtiveTTL.Nanoseconds() + for { + now := time.Now().UnixNano() + + for bucket, ts := range bucketLastActiveTsNs { + if (now - ts) > ttlNs { + delete(bucketLastActiveTsNs, bucket) + + labels := prometheus.Labels{"bucket": bucket} + c := S3RequestCounter.DeletePartialMatch(labels) + c += S3RequestHistogram.DeletePartialMatch(labels) + c += S3TimeToFirstByteHistogram.DeletePartialMatch(labels) + glog.V(0).Infof("delete inactive bucket metrics, %s: %d", bucket, c) + } + } + + time.Sleep(bucketAtiveTTL) } - VolumeServerVolumeGauge.DeleteLabelValues(collection, "volume") + }