Browse Source

Metrics: Add Prometheus metrics for concurrent upload tracking (#7555)

* metrics: add Prometheus metrics for concurrent upload tracking

Add Prometheus metrics to monitor concurrent upload activity for both
filer and S3 servers. This provides visibility into the upload limiting
feature added in the previous PR.

New Metrics:
- SeaweedFS_filer_in_flight_upload_bytes: Current bytes being uploaded to filer
- SeaweedFS_filer_in_flight_upload_count: Current number of uploads to filer
- SeaweedFS_s3_in_flight_upload_bytes: Current bytes being uploaded to S3
- SeaweedFS_s3_in_flight_upload_count: Current number of uploads to S3

The metrics are updated atomically whenever uploads start or complete,
providing real-time visibility into upload concurrency levels.

This helps operators:
- Monitor upload concurrency in real-time
- Set appropriate limits based on actual usage patterns
- Detect potential bottlenecks or capacity issues
- Track the effectiveness of upload limiting configuration

* grafana: add dashboard panels for concurrent upload metrics

Add 4 new panels to the Grafana dashboard to visualize the concurrent
upload metrics added in this PR:

Filer Section:
- Filer Concurrent Uploads: Shows current number of concurrent uploads
- Filer Concurrent Upload Bytes: Shows current bytes being uploaded

S3 Gateway Section:
- S3 Concurrent Uploads: Shows current number of concurrent uploads
- S3 Concurrent Upload Bytes: Shows current bytes being uploaded

These panels help operators monitor upload concurrency in real-time and
tune the upload limiting configuration based on actual usage patterns.

* more efficient
pull/7559/head
Chris Lu 3 weeks ago
committed by GitHub
parent
commit
848bec6d24
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 333
      other/metrics/grafana_seaweedfs.json
  2. 22
      weed/s3api/s3api_circuit_breaker.go
  3. 14
      weed/server/filer_server_handlers.go
  4. 36
      weed/stats/metrics.go

333
other/metrics/grafana_seaweedfs.json

@ -459,6 +459,170 @@
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 25
},
"id": 86,
"legend": {
"avg": false,
"current": true,
"max": false,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "SeaweedFS_filer_in_flight_upload_count",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "uploads",
"refId": "A",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Filer Concurrent Uploads",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 25
},
"id": 87,
"legend": {
"avg": false,
"current": true,
"max": false,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "SeaweedFS_filer_in_flight_upload_bytes",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "bytes",
"refId": "A",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Filer Concurrent Upload Bytes",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
@ -479,7 +643,172 @@
"y": 1
},
"id": 70,
"panels": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 31
},
"id": 88,
"legend": {
"avg": false,
"current": true,
"max": false,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "SeaweedFS_s3_in_flight_upload_count",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "uploads",
"refId": "A",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "S3 Concurrent Uploads",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 31
},
"id": 89,
"legend": {
"avg": false,
"current": true,
"max": false,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "SeaweedFS_s3_in_flight_upload_bytes",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "bytes",
"refId": "A",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "S3 Concurrent Upload Bytes",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"title": "S3 Gateway",
"type": "row"
@ -2693,4 +3022,4 @@
"title": "SeaweedFS",
"uid": "nh02dOVnz",
"version": 2
}
}

22
weed/s3api/s3api_circuit_breaker.go

@ -3,6 +3,10 @@ package s3api
import (
"errors"
"fmt"
"net/http"
"sync"
"sync/atomic"
"github.com/gorilla/mux"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
@ -11,9 +15,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/pb/s3_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
"net/http"
"sync"
"sync/atomic"
"github.com/seaweedfs/seaweedfs/weed/stats"
)
type CircuitBreaker struct {
@ -121,12 +123,18 @@ func (cb *CircuitBreaker) Limit(f func(w http.ResponseWriter, r *http.Request),
cb.s3a.inFlightDataLimitCond.L.Unlock()
// Increment counters
atomic.AddInt64(&cb.s3a.inFlightUploads, 1)
atomic.AddInt64(&cb.s3a.inFlightDataSize, contentLength)
newUploads := atomic.AddInt64(&cb.s3a.inFlightUploads, 1)
newSize := atomic.AddInt64(&cb.s3a.inFlightDataSize, contentLength)
// Update metrics
stats.S3InFlightUploadCountGauge.Set(float64(newUploads))
stats.S3InFlightUploadBytesGauge.Set(float64(newSize))
defer func() {
// Decrement counters
atomic.AddInt64(&cb.s3a.inFlightUploads, -1)
atomic.AddInt64(&cb.s3a.inFlightDataSize, -contentLength)
newUploads := atomic.AddInt64(&cb.s3a.inFlightUploads, -1)
newSize := atomic.AddInt64(&cb.s3a.inFlightDataSize, -contentLength)
// Update metrics
stats.S3InFlightUploadCountGauge.Set(float64(newUploads))
stats.S3InFlightUploadBytesGauge.Set(float64(newSize))
cb.s3a.inFlightDataLimitCond.Signal()
}()
}

14
weed/server/filer_server_handlers.go

@ -112,12 +112,18 @@ func (fs *FilerServer) filerHandler(w http.ResponseWriter, r *http.Request) {
fs.inFlightDataLimitCond.L.Unlock()
// Increment counters
atomic.AddInt64(&fs.inFlightUploads, 1)
atomic.AddInt64(&fs.inFlightDataSize, contentLength)
newUploads := atomic.AddInt64(&fs.inFlightUploads, 1)
newSize := atomic.AddInt64(&fs.inFlightDataSize, contentLength)
// Update metrics
stats.FilerInFlightUploadCountGauge.Set(float64(newUploads))
stats.FilerInFlightUploadBytesGauge.Set(float64(newSize))
defer func() {
// Decrement counters
atomic.AddInt64(&fs.inFlightUploads, -1)
atomic.AddInt64(&fs.inFlightDataSize, -contentLength)
newUploads := atomic.AddInt64(&fs.inFlightUploads, -1)
newSize := atomic.AddInt64(&fs.inFlightDataSize, -contentLength)
// Update metrics
stats.FilerInFlightUploadCountGauge.Set(float64(newUploads))
stats.FilerInFlightUploadBytesGauge.Set(float64(newSize))
fs.inFlightDataLimitCond.Signal()
}()

36
weed/stats/metrics.go

@ -167,6 +167,22 @@ var (
Help: "Current number of in-flight requests being handled by filer.",
}, []string{"type"})
FilerInFlightUploadBytesGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "filer",
Name: "in_flight_upload_bytes",
Help: "Current number of bytes being uploaded to filer.",
})
FilerInFlightUploadCountGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "filer",
Name: "in_flight_upload_count",
Help: "Current number of uploads in progress to filer.",
})
FilerServerLastSendTsOfSubscribeGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: Namespace,
@ -371,6 +387,22 @@ var (
Help: "Current number of in-flight requests being handled by s3.",
}, []string{"type"})
S3InFlightUploadBytesGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "s3",
Name: "in_flight_upload_bytes",
Help: "Current number of bytes being uploaded to S3.",
})
S3InFlightUploadCountGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "s3",
Name: "in_flight_upload_count",
Help: "Current number of uploads in progress to S3.",
})
S3BucketTrafficReceivedBytesCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: Namespace,
@ -422,6 +454,8 @@ func init() {
Gather.MustRegister(FilerHandlerCounter)
Gather.MustRegister(FilerRequestHistogram)
Gather.MustRegister(FilerInFlightRequestsGauge)
Gather.MustRegister(FilerInFlightUploadBytesGauge)
Gather.MustRegister(FilerInFlightUploadCountGauge)
Gather.MustRegister(FilerStoreCounter)
Gather.MustRegister(FilerStoreHistogram)
Gather.MustRegister(FilerSyncOffsetGauge)
@ -450,6 +484,8 @@ func init() {
Gather.MustRegister(S3HandlerCounter)
Gather.MustRegister(S3RequestHistogram)
Gather.MustRegister(S3InFlightRequestsGauge)
Gather.MustRegister(S3InFlightUploadBytesGauge)
Gather.MustRegister(S3InFlightUploadCountGauge)
Gather.MustRegister(S3TimeToFirstByteHistogram)
Gather.MustRegister(S3BucketTrafficReceivedBytesCounter)
Gather.MustRegister(S3BucketTrafficSentBytesCounter)

Loading…
Cancel
Save