You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

342 lines
11 KiB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
4 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
  1. package stats
  2. import (
  3. "log"
  4. "net"
  5. "net/http"
  6. "os"
  7. "strconv"
  8. "strings"
  9. "time"
  10. "github.com/prometheus/client_golang/prometheus"
  11. "github.com/prometheus/client_golang/prometheus/collectors"
  12. "github.com/prometheus/client_golang/prometheus/promhttp"
  13. "github.com/prometheus/client_golang/prometheus/push"
  14. "github.com/seaweedfs/seaweedfs/weed/glog"
  15. )
  16. // Readonly volume types
  17. const (
  18. Namespace = "SeaweedFS"
  19. IsReadOnly = "IsReadOnly"
  20. NoWriteOrDelete = "noWriteOrDelete"
  21. NoWriteCanDelete = "noWriteCanDelete"
  22. IsDiskSpaceLow = "isDiskSpaceLow"
  23. )
  24. var readOnlyVolumeTypes = [4]string{IsReadOnly, NoWriteOrDelete, NoWriteCanDelete, IsDiskSpaceLow}
  25. var (
  26. Gather = prometheus.NewRegistry()
  27. MasterClientConnectCounter = prometheus.NewCounterVec(
  28. prometheus.CounterOpts{
  29. Namespace: Namespace,
  30. Subsystem: "wdclient",
  31. Name: "connect_updates",
  32. Help: "Counter of master client leader updates.",
  33. }, []string{"type"})
  34. MasterRaftIsleader = prometheus.NewGauge(
  35. prometheus.GaugeOpts{
  36. Namespace: Namespace,
  37. Subsystem: "master",
  38. Name: "is_leader",
  39. Help: "is leader",
  40. })
  41. MasterAdminLock = prometheus.NewGaugeVec(
  42. prometheus.GaugeOpts{
  43. Namespace: Namespace,
  44. Subsystem: "master",
  45. Name: "admin_lock",
  46. Help: "admin lock",
  47. }, []string{"client"})
  48. MasterReceivedHeartbeatCounter = prometheus.NewCounterVec(
  49. prometheus.CounterOpts{
  50. Namespace: Namespace,
  51. Subsystem: "master",
  52. Name: "received_heartbeats",
  53. Help: "Counter of master received heartbeat.",
  54. }, []string{"type"})
  55. MasterReplicaPlacementMismatch = prometheus.NewGaugeVec(
  56. prometheus.GaugeOpts{
  57. Namespace: Namespace,
  58. Subsystem: "master",
  59. Name: "replica_placement_mismatch",
  60. Help: "replica placement mismatch",
  61. }, []string{"collection", "id"})
  62. MasterLeaderChangeCounter = prometheus.NewCounterVec(
  63. prometheus.CounterOpts{
  64. Namespace: Namespace,
  65. Subsystem: "master",
  66. Name: "leader_changes",
  67. Help: "Counter of master leader changes.",
  68. }, []string{"type"})
  69. FilerRequestCounter = prometheus.NewCounterVec(
  70. prometheus.CounterOpts{
  71. Namespace: Namespace,
  72. Subsystem: "filer",
  73. Name: "request_total",
  74. Help: "Counter of filer requests.",
  75. }, []string{"type", "code"})
  76. FilerHandlerCounter = prometheus.NewCounterVec(
  77. prometheus.CounterOpts{
  78. Namespace: Namespace,
  79. Subsystem: "filer",
  80. Name: "handler_total",
  81. Help: "Counter of filer handlers.",
  82. }, []string{"type"})
  83. FilerRequestHistogram = prometheus.NewHistogramVec(
  84. prometheus.HistogramOpts{
  85. Namespace: Namespace,
  86. Subsystem: "filer",
  87. Name: "request_seconds",
  88. Help: "Bucketed histogram of filer request processing time.",
  89. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  90. }, []string{"type"})
  91. FilerServerLastSendTsOfSubscribeGauge = prometheus.NewGaugeVec(
  92. prometheus.GaugeOpts{
  93. Namespace: Namespace,
  94. Subsystem: "filer",
  95. Name: "last_send_timestamp_of_subscribe",
  96. Help: "The last send timestamp of the filer subscription.",
  97. }, []string{"sourceFiler", "clientName", "path"})
  98. FilerStoreCounter = prometheus.NewCounterVec(
  99. prometheus.CounterOpts{
  100. Namespace: Namespace,
  101. Subsystem: "filerStore",
  102. Name: "request_total",
  103. Help: "Counter of filer store requests.",
  104. }, []string{"store", "type"})
  105. FilerStoreHistogram = prometheus.NewHistogramVec(
  106. prometheus.HistogramOpts{
  107. Namespace: Namespace,
  108. Subsystem: "filerStore",
  109. Name: "request_seconds",
  110. Help: "Bucketed histogram of filer store request processing time.",
  111. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  112. }, []string{"store", "type"})
  113. FilerSyncOffsetGauge = prometheus.NewGaugeVec(
  114. prometheus.GaugeOpts{
  115. Namespace: Namespace,
  116. Subsystem: "filerSync",
  117. Name: "sync_offset",
  118. Help: "The offset of the filer synchronization service.",
  119. }, []string{"sourceFiler", "targetFiler", "clientName", "path"})
  120. VolumeServerRequestCounter = prometheus.NewCounterVec(
  121. prometheus.CounterOpts{
  122. Namespace: Namespace,
  123. Subsystem: "volumeServer",
  124. Name: "request_total",
  125. Help: "Counter of volume server requests.",
  126. }, []string{"type", "code"})
  127. VolumeServerHandlerCounter = prometheus.NewCounterVec(
  128. prometheus.CounterOpts{
  129. Namespace: Namespace,
  130. Subsystem: "volumeServer",
  131. Name: "handler_total",
  132. Help: "Counter of volume server handlers.",
  133. }, []string{"type"})
  134. VolumeServerVacuumingCompactCounter = prometheus.NewCounterVec(
  135. prometheus.CounterOpts{
  136. Namespace: Namespace,
  137. Subsystem: "volumeServer",
  138. Name: "vacuuming_compact_count",
  139. Help: "Counter of volume vacuuming Compact counter",
  140. }, []string{"success"})
  141. VolumeServerVacuumingCommitCounter = prometheus.NewCounterVec(
  142. prometheus.CounterOpts{
  143. Namespace: Namespace,
  144. Subsystem: "volumeServer",
  145. Name: "vacuuming_commit_count",
  146. Help: "Counter of volume vacuuming commit counter",
  147. }, []string{"success"})
  148. VolumeServerVacuumingHistogram = prometheus.NewHistogramVec(
  149. prometheus.HistogramOpts{
  150. Namespace: Namespace,
  151. Subsystem: "volumeServer",
  152. Name: "vacuuming_seconds",
  153. Help: "Bucketed histogram of volume server vacuuming processing time.",
  154. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  155. }, []string{"type"})
  156. VolumeServerRequestHistogram = prometheus.NewHistogramVec(
  157. prometheus.HistogramOpts{
  158. Namespace: Namespace,
  159. Subsystem: "volumeServer",
  160. Name: "request_seconds",
  161. Help: "Bucketed histogram of volume server request processing time.",
  162. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  163. }, []string{"type"})
  164. VolumeServerVolumeCounter = prometheus.NewGaugeVec(
  165. prometheus.GaugeOpts{
  166. Namespace: Namespace,
  167. Subsystem: "volumeServer",
  168. Name: "volumes",
  169. Help: "Number of volumes or shards.",
  170. }, []string{"collection", "type"})
  171. VolumeServerReadOnlyVolumeGauge = prometheus.NewGaugeVec(
  172. prometheus.GaugeOpts{
  173. Namespace: Namespace,
  174. Subsystem: "volumeServer",
  175. Name: "read_only_volumes",
  176. Help: "Number of read only volumes.",
  177. }, []string{"collection", "type"})
  178. VolumeServerMaxVolumeCounter = prometheus.NewGauge(
  179. prometheus.GaugeOpts{
  180. Namespace: Namespace,
  181. Subsystem: "volumeServer",
  182. Name: "max_volumes",
  183. Help: "Maximum number of volumes.",
  184. })
  185. VolumeServerDiskSizeGauge = prometheus.NewGaugeVec(
  186. prometheus.GaugeOpts{
  187. Namespace: Namespace,
  188. Subsystem: "volumeServer",
  189. Name: "total_disk_size",
  190. Help: "Actual disk size used by volumes.",
  191. }, []string{"collection", "type"})
  192. VolumeServerResourceGauge = prometheus.NewGaugeVec(
  193. prometheus.GaugeOpts{
  194. Namespace: Namespace,
  195. Subsystem: "volumeServer",
  196. Name: "resource",
  197. Help: "Resource usage",
  198. }, []string{"name", "type"})
  199. S3RequestCounter = prometheus.NewCounterVec(
  200. prometheus.CounterOpts{
  201. Namespace: Namespace,
  202. Subsystem: "s3",
  203. Name: "request_total",
  204. Help: "Counter of s3 requests.",
  205. }, []string{"type", "code", "bucket"})
  206. S3RequestHistogram = prometheus.NewHistogramVec(
  207. prometheus.HistogramOpts{
  208. Namespace: Namespace,
  209. Subsystem: "s3",
  210. Name: "request_seconds",
  211. Help: "Bucketed histogram of s3 request processing time.",
  212. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  213. }, []string{"type", "bucket"})
  214. S3TimeToFirstByteHistogram = prometheus.NewHistogramVec(
  215. prometheus.HistogramOpts{
  216. Namespace: Namespace,
  217. Subsystem: "s3",
  218. Name: "time_to_first_byte_millisecond",
  219. Help: "Bucketed histogram of s3 time to first byte request processing time.",
  220. Buckets: prometheus.ExponentialBuckets(0.001, 2, 27),
  221. }, []string{"type", "bucket"})
  222. )
  223. func init() {
  224. Gather.MustRegister(MasterClientConnectCounter)
  225. Gather.MustRegister(MasterRaftIsleader)
  226. Gather.MustRegister(MasterAdminLock)
  227. Gather.MustRegister(MasterReceivedHeartbeatCounter)
  228. Gather.MustRegister(MasterLeaderChangeCounter)
  229. Gather.MustRegister(MasterReplicaPlacementMismatch)
  230. Gather.MustRegister(FilerRequestCounter)
  231. Gather.MustRegister(FilerHandlerCounter)
  232. Gather.MustRegister(FilerRequestHistogram)
  233. Gather.MustRegister(FilerStoreCounter)
  234. Gather.MustRegister(FilerStoreHistogram)
  235. Gather.MustRegister(FilerSyncOffsetGauge)
  236. Gather.MustRegister(FilerServerLastSendTsOfSubscribeGauge)
  237. Gather.MustRegister(collectors.NewGoCollector())
  238. Gather.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
  239. Gather.MustRegister(VolumeServerRequestCounter)
  240. Gather.MustRegister(VolumeServerHandlerCounter)
  241. Gather.MustRegister(VolumeServerRequestHistogram)
  242. Gather.MustRegister(VolumeServerVacuumingCompactCounter)
  243. Gather.MustRegister(VolumeServerVacuumingCommitCounter)
  244. Gather.MustRegister(VolumeServerVacuumingHistogram)
  245. Gather.MustRegister(VolumeServerVolumeCounter)
  246. Gather.MustRegister(VolumeServerMaxVolumeCounter)
  247. Gather.MustRegister(VolumeServerReadOnlyVolumeGauge)
  248. Gather.MustRegister(VolumeServerDiskSizeGauge)
  249. Gather.MustRegister(VolumeServerResourceGauge)
  250. Gather.MustRegister(S3RequestCounter)
  251. Gather.MustRegister(S3RequestHistogram)
  252. Gather.MustRegister(S3TimeToFirstByteHistogram)
  253. }
  254. func LoopPushingMetric(name, instance, addr string, intervalSeconds int) {
  255. if addr == "" || intervalSeconds == 0 {
  256. return
  257. }
  258. glog.V(0).Infof("%s server sends metrics to %s every %d seconds", name, addr, intervalSeconds)
  259. pusher := push.New(addr, name).Gatherer(Gather).Grouping("instance", instance)
  260. for {
  261. err := pusher.Push()
  262. if err != nil && !strings.HasPrefix(err.Error(), "unexpected status code 200") {
  263. glog.V(0).Infof("could not push metrics to prometheus push gateway %s: %v", addr, err)
  264. }
  265. if intervalSeconds <= 0 {
  266. intervalSeconds = 15
  267. }
  268. time.Sleep(time.Duration(intervalSeconds) * time.Second)
  269. }
  270. }
  271. func JoinHostPort(host string, port int) string {
  272. portStr := strconv.Itoa(port)
  273. if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
  274. return host + ":" + portStr
  275. }
  276. return net.JoinHostPort(host, portStr)
  277. }
  278. func StartMetricsServer(ip string, port int) {
  279. if port == 0 {
  280. return
  281. }
  282. http.Handle("/metrics", promhttp.HandlerFor(Gather, promhttp.HandlerOpts{}))
  283. log.Fatal(http.ListenAndServe(JoinHostPort(ip, port), nil))
  284. }
  285. func SourceName(port uint32) string {
  286. hostname, err := os.Hostname()
  287. if err != nil {
  288. return "unknown"
  289. }
  290. return net.JoinHostPort(hostname, strconv.Itoa(int(port)))
  291. }
  292. // todo - can be changed to DeletePartialMatch when https://github.com/prometheus/client_golang/pull/1013 gets released
  293. func DeleteCollectionMetrics(collection string) {
  294. VolumeServerDiskSizeGauge.DeleteLabelValues(collection, "normal")
  295. for _, volume_type := range readOnlyVolumeTypes {
  296. VolumeServerReadOnlyVolumeGauge.DeleteLabelValues(collection, volume_type)
  297. }
  298. VolumeServerVolumeCounter.DeleteLabelValues(collection, "volume")
  299. }