You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
8.3 KiB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
4 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
  1. package stats
  2. import (
  3. "fmt"
  4. "log"
  5. "net"
  6. "net/http"
  7. "os"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "github.com/prometheus/client_golang/prometheus"
  12. "github.com/prometheus/client_golang/prometheus/collectors"
  13. "github.com/prometheus/client_golang/prometheus/promhttp"
  14. "github.com/prometheus/client_golang/prometheus/push"
  15. "github.com/seaweedfs/seaweedfs/weed/glog"
  16. )
  17. // Readonly volume types
  18. const (
  19. Namespace = "SeaweedFS"
  20. IsReadOnly = "IsReadOnly"
  21. NoWriteOrDelete = "noWriteOrDelete"
  22. NoWriteCanDelete = "noWriteCanDelete"
  23. IsDiskSpaceLow = "isDiskSpaceLow"
  24. )
  25. var readOnlyVolumeTypes = [4]string{IsReadOnly, NoWriteOrDelete, NoWriteCanDelete, IsDiskSpaceLow}
  26. var (
  27. Gather = prometheus.NewRegistry()
  28. MasterClientConnectCounter = prometheus.NewCounterVec(
  29. prometheus.CounterOpts{
  30. Namespace: Namespace,
  31. Subsystem: "wdclient",
  32. Name: "connect_updates",
  33. Help: "Counter of master client leader updates.",
  34. }, []string{"type"})
  35. MasterRaftIsleader = prometheus.NewGauge(
  36. prometheus.GaugeOpts{
  37. Namespace: Namespace,
  38. Subsystem: "master",
  39. Name: "is_leader",
  40. Help: "is leader",
  41. })
  42. MasterReceivedHeartbeatCounter = prometheus.NewCounterVec(
  43. prometheus.CounterOpts{
  44. Namespace: Namespace,
  45. Subsystem: "master",
  46. Name: "received_heartbeats",
  47. Help: "Counter of master received heartbeat.",
  48. }, []string{"type"})
  49. MasterReplicaPlacementMismatch = prometheus.NewGaugeVec(
  50. prometheus.GaugeOpts{
  51. Namespace: Namespace,
  52. Subsystem: "master",
  53. Name: "replica_placement_mismatch",
  54. Help: "replica placement mismatch",
  55. }, []string{"collection", "id"})
  56. MasterLeaderChangeCounter = prometheus.NewCounterVec(
  57. prometheus.CounterOpts{
  58. Namespace: Namespace,
  59. Subsystem: "master",
  60. Name: "leader_changes",
  61. Help: "Counter of master leader changes.",
  62. }, []string{"type"})
  63. FilerRequestCounter = prometheus.NewCounterVec(
  64. prometheus.CounterOpts{
  65. Namespace: Namespace,
  66. Subsystem: "filer",
  67. Name: "request_total",
  68. Help: "Counter of filer requests.",
  69. }, []string{"type"})
  70. FilerRequestHistogram = prometheus.NewHistogramVec(
  71. prometheus.HistogramOpts{
  72. Namespace: Namespace,
  73. Subsystem: "filer",
  74. Name: "request_seconds",
  75. Help: "Bucketed histogram of filer request processing time.",
  76. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  77. }, []string{"type"})
  78. FilerServerLastSendTsOfSubscribeGauge = prometheus.NewGaugeVec(
  79. prometheus.GaugeOpts{
  80. Namespace: Namespace,
  81. Subsystem: "filer",
  82. Name: "last_send_timestamp_of_subscribe",
  83. Help: "The last send timestamp of the filer subscription.",
  84. }, []string{"sourceFiler", "clientName", "path"})
  85. FilerStoreCounter = prometheus.NewCounterVec(
  86. prometheus.CounterOpts{
  87. Namespace: Namespace,
  88. Subsystem: "filerStore",
  89. Name: "request_total",
  90. Help: "Counter of filer store requests.",
  91. }, []string{"store", "type"})
  92. FilerStoreHistogram = prometheus.NewHistogramVec(
  93. prometheus.HistogramOpts{
  94. Namespace: Namespace,
  95. Subsystem: "filerStore",
  96. Name: "request_seconds",
  97. Help: "Bucketed histogram of filer store request processing time.",
  98. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  99. }, []string{"store", "type"})
  100. FilerSyncOffsetGauge = prometheus.NewGaugeVec(
  101. prometheus.GaugeOpts{
  102. Namespace: Namespace,
  103. Subsystem: "filerSync",
  104. Name: "sync_offset",
  105. Help: "The offset of the filer synchronization service.",
  106. }, []string{"sourceFiler", "targetFiler", "clientName", "path"})
  107. VolumeServerRequestCounter = prometheus.NewCounterVec(
  108. prometheus.CounterOpts{
  109. Namespace: Namespace,
  110. Subsystem: "volumeServer",
  111. Name: "request_total",
  112. Help: "Counter of volume server requests.",
  113. }, []string{"type"})
  114. VolumeServerRequestHistogram = prometheus.NewHistogramVec(
  115. prometheus.HistogramOpts{
  116. Namespace: Namespace,
  117. Subsystem: "volumeServer",
  118. Name: "request_seconds",
  119. Help: "Bucketed histogram of volume server request processing time.",
  120. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  121. }, []string{"type"})
  122. VolumeServerVolumeCounter = prometheus.NewGaugeVec(
  123. prometheus.GaugeOpts{
  124. Namespace: Namespace,
  125. Subsystem: "volumeServer",
  126. Name: "volumes",
  127. Help: "Number of volumes or shards.",
  128. }, []string{"collection", "type"})
  129. VolumeServerReadOnlyVolumeGauge = prometheus.NewGaugeVec(
  130. prometheus.GaugeOpts{
  131. Namespace: Namespace,
  132. Subsystem: "volumeServer",
  133. Name: "read_only_volumes",
  134. Help: "Number of read only volumes.",
  135. }, []string{"collection", "type"})
  136. VolumeServerMaxVolumeCounter = prometheus.NewGauge(
  137. prometheus.GaugeOpts{
  138. Namespace: Namespace,
  139. Subsystem: "volumeServer",
  140. Name: "max_volumes",
  141. Help: "Maximum number of volumes.",
  142. })
  143. VolumeServerDiskSizeGauge = prometheus.NewGaugeVec(
  144. prometheus.GaugeOpts{
  145. Namespace: Namespace,
  146. Subsystem: "volumeServer",
  147. Name: "total_disk_size",
  148. Help: "Actual disk size used by volumes.",
  149. }, []string{"collection", "type"})
  150. VolumeServerResourceGauge = prometheus.NewGaugeVec(
  151. prometheus.GaugeOpts{
  152. Namespace: Namespace,
  153. Subsystem: "volumeServer",
  154. Name: "resource",
  155. Help: "Resource usage",
  156. }, []string{"name", "type"})
  157. S3RequestCounter = prometheus.NewCounterVec(
  158. prometheus.CounterOpts{
  159. Namespace: Namespace,
  160. Subsystem: "s3",
  161. Name: "request_total",
  162. Help: "Counter of s3 requests.",
  163. }, []string{"type", "code", "bucket"})
  164. S3RequestHistogram = prometheus.NewHistogramVec(
  165. prometheus.HistogramOpts{
  166. Namespace: Namespace,
  167. Subsystem: "s3",
  168. Name: "request_seconds",
  169. Help: "Bucketed histogram of s3 request processing time.",
  170. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  171. }, []string{"type", "bucket"})
  172. )
  173. func init() {
  174. Gather.MustRegister(MasterClientConnectCounter)
  175. Gather.MustRegister(MasterRaftIsleader)
  176. Gather.MustRegister(MasterReceivedHeartbeatCounter)
  177. Gather.MustRegister(MasterLeaderChangeCounter)
  178. Gather.MustRegister(MasterReplicaPlacementMismatch)
  179. Gather.MustRegister(FilerRequestCounter)
  180. Gather.MustRegister(FilerRequestHistogram)
  181. Gather.MustRegister(FilerStoreCounter)
  182. Gather.MustRegister(FilerStoreHistogram)
  183. Gather.MustRegister(FilerSyncOffsetGauge)
  184. Gather.MustRegister(FilerServerLastSendTsOfSubscribeGauge)
  185. Gather.MustRegister(collectors.NewGoCollector())
  186. Gather.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
  187. Gather.MustRegister(VolumeServerRequestCounter)
  188. Gather.MustRegister(VolumeServerRequestHistogram)
  189. Gather.MustRegister(VolumeServerVolumeCounter)
  190. Gather.MustRegister(VolumeServerMaxVolumeCounter)
  191. Gather.MustRegister(VolumeServerReadOnlyVolumeGauge)
  192. Gather.MustRegister(VolumeServerDiskSizeGauge)
  193. Gather.MustRegister(VolumeServerResourceGauge)
  194. Gather.MustRegister(S3RequestCounter)
  195. Gather.MustRegister(S3RequestHistogram)
  196. }
  197. func LoopPushingMetric(name, instance, addr string, intervalSeconds int) {
  198. if addr == "" || intervalSeconds == 0 {
  199. return
  200. }
  201. glog.V(0).Infof("%s server sends metrics to %s every %d seconds", name, addr, intervalSeconds)
  202. pusher := push.New(addr, name).Gatherer(Gather).Grouping("instance", instance)
  203. for {
  204. err := pusher.Push()
  205. if err != nil && !strings.HasPrefix(err.Error(), "unexpected status code 200") {
  206. glog.V(0).Infof("could not push metrics to prometheus push gateway %s: %v", addr, err)
  207. }
  208. if intervalSeconds <= 0 {
  209. intervalSeconds = 15
  210. }
  211. time.Sleep(time.Duration(intervalSeconds) * time.Second)
  212. }
  213. }
  214. func StartMetricsServer(port int) {
  215. if port == 0 {
  216. return
  217. }
  218. http.Handle("/metrics", promhttp.HandlerFor(Gather, promhttp.HandlerOpts{}))
  219. log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", port), nil))
  220. }
  221. func SourceName(port uint32) string {
  222. hostname, err := os.Hostname()
  223. if err != nil {
  224. return "unknown"
  225. }
  226. return net.JoinHostPort(hostname, strconv.Itoa(int(port)))
  227. }
  228. // todo - can be changed to DeletePartialMatch when https://github.com/prometheus/client_golang/pull/1013 gets released
  229. func DeleteCollectionMetrics(collection string) {
  230. VolumeServerDiskSizeGauge.DeleteLabelValues(collection, "normal")
  231. for _, volume_type := range readOnlyVolumeTypes {
  232. VolumeServerReadOnlyVolumeGauge.DeleteLabelValues(collection, volume_type)
  233. }
  234. VolumeServerVolumeCounter.DeleteLabelValues(collection, "volume")
  235. }