From 28efe3152459b9be946ca1b05e940d7392e33126 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 24 Jan 2022 19:09:43 +0500 Subject: [PATCH 1/3] new master metrics --- weed/server/master_grpc_server.go | 17 +++++++++++++++-- weed/server/master_server.go | 2 ++ weed/stats/metrics.go | 24 ++++++++++++++++++++++++ weed/wdclient/masterclient.go | 9 ++++++++- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go index 7411bbc99..f1d495c66 100644 --- a/weed/server/master_grpc_server.go +++ b/weed/server/master_grpc_server.go @@ -3,6 +3,7 @@ package weed_server import ( "context" "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/stats" "github.com/chrislusf/seaweedfs/weed/storage/backend" "github.com/chrislusf/seaweedfs/weed/util" "net" @@ -57,6 +58,7 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ } else { glog.Warningf("SendHeartbeat.Recv: %v", err) } + stats.MasterReceivedHeartbeatCounter.WithLabelValues("error").Inc() return err } @@ -74,12 +76,15 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ glog.Warningf("SendHeartbeat.Send volume size to %s:%d %v", dn.Ip, dn.Port, err) return err } + stats.MasterReceivedHeartbeatCounter.WithLabelValues("dataNode").Inc() dn.Counter++ } dn.AdjustMaxVolumeCounts(heartbeat.MaxVolumeCounts) glog.V(4).Infof("master received heartbeat %s", heartbeat.String()) + stats.MasterReceivedHeartbeatCounter.WithLabelValues("total").Inc() + var dataCenter string if dc := dn.GetDataCenter(); dc != nil { dataCenter = string(dc.Id()) @@ -89,6 +94,12 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ PublicUrl: dn.PublicUrl, DataCenter: dataCenter, } + if len(heartbeat.NewVolumes) > 0 { + stats.FilerRequestCounter.WithLabelValues("newVolumes").Inc() + } + if len(heartbeat.DeletedVolumes) > 0 { + stats.FilerRequestCounter.WithLabelValues("deletedVolumes").Inc() + } if len(heartbeat.NewVolumes) > 0 || len(heartbeat.DeletedVolumes) > 0 { // process delta volume ids if exists for fast volume id updates for _, volInfo := range heartbeat.NewVolumes { @@ -103,6 +114,7 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ if len(heartbeat.Volumes) > 0 || heartbeat.HasNoVolumes { // process heartbeat.Volumes + stats.MasterReceivedHeartbeatCounter.WithLabelValues("Volumes").Inc() newVolumes, deletedVolumes := ms.Topo.SyncDataNodeRegistration(heartbeat.Volumes, dn) for _, v := range newVolumes { @@ -116,7 +128,7 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ } if len(heartbeat.NewEcShards) > 0 || len(heartbeat.DeletedEcShards) > 0 { - + stats.MasterReceivedHeartbeatCounter.WithLabelValues("newEcShards").Inc() // update master internal volume layouts ms.Topo.IncrementalSyncDataNodeEcShards(heartbeat.NewEcShards, heartbeat.DeletedEcShards, dn) @@ -133,7 +145,8 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ } if len(heartbeat.EcShards) > 0 || heartbeat.HasNoEcShards { - glog.V(1).Infof("master received ec shards from %s: %+v", dn.Url(), heartbeat.EcShards) + stats.MasterReceivedHeartbeatCounter.WithLabelValues("ecShards").Inc() + glog.V(4).Infof("master received ec shards from %s: %+v", dn.Url(), heartbeat.EcShards) newShards, deletedShards := ms.Topo.SyncDataNodeEcShards(heartbeat.EcShards, dn) // broadcast the ec vid changes to master clients diff --git a/weed/server/master_server.go b/weed/server/master_server.go index 3851c4d2a..671432d5c 100644 --- a/weed/server/master_server.go +++ b/weed/server/master_server.go @@ -2,6 +2,7 @@ package weed_server import ( "fmt" + "github.com/chrislusf/seaweedfs/weed/stats" "net/http" "net/http/httputil" "net/url" @@ -162,6 +163,7 @@ func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) { ms.Topo.RaftServer = raftServer.raftServer ms.Topo.RaftServer.AddEventListener(raft.LeaderChangeEventType, func(e raft.Event) { glog.V(0).Infof("leader change event: %+v => %+v", e.PrevValue(), e.Value()) + stats.MasterLeaderChangeCounter.WithLabelValues(fmt.Sprintf("%+v", e.Value())).Inc() if ms.Topo.RaftServer.Leader() != "" { glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "becomes leader.") } diff --git a/weed/stats/metrics.go b/weed/stats/metrics.go index 1e1681f9a..b43da609f 100644 --- a/weed/stats/metrics.go +++ b/weed/stats/metrics.go @@ -20,6 +20,30 @@ import ( var ( Gather = prometheus.NewRegistry() + MasterClientConnectCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "SeaweedFS", + Subsystem: "wdclient", + Name: "connect updates", + Help: "Counter of master client leader updates.", + }, []string{"type"}) + + MasterReceivedHeartbeatCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "SeaweedFS", + Subsystem: "master", + Name: "received heartbeats", + Help: "Counter of master received heartbeat.", + }, []string{"type"}) + + MasterLeaderChangeCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "SeaweedFS", + Subsystem: "master", + Name: "leader changes", + Help: "Counter of master leader changes.", + }, []string{"type"}) + FilerRequestCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "SeaweedFS", diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go index 672b3ac49..0d62422dd 100644 --- a/weed/wdclient/masterclient.go +++ b/weed/wdclient/masterclient.go @@ -2,6 +2,7 @@ package wdclient import ( "context" + "github.com/chrislusf/seaweedfs/weed/stats" "math/rand" "time" @@ -96,14 +97,15 @@ func (mc *MasterClient) tryAllMasters() { func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedLeader pb.ServerAddress) { glog.V(1).Infof("%s masterClient Connecting to master %v", mc.clientType, master) + stats.MasterClientConnectCounter.WithLabelValues("total").Inc() gprcErr := pb.WithMasterClient(true, master, mc.grpcDialOption, func(client master_pb.SeaweedClient) error { - ctx, cancel := context.WithCancel(context.Background()) defer cancel() stream, err := client.KeepConnected(ctx) if err != nil { glog.V(1).Infof("%s masterClient failed to keep connected to %s: %v", mc.clientType, master, err) + stats.MasterClientConnectCounter.WithLabelValues("failedToKeepConnected").Inc() return err } @@ -113,6 +115,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL Version: util.Version(), }); err != nil { glog.V(0).Infof("%s masterClient failed to send to %s: %v", mc.clientType, master, err) + stats.MasterClientConnectCounter.WithLabelValues("failedToSend").Inc() return err } @@ -123,6 +126,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL resp, err := stream.Recv() if err != nil { glog.V(0).Infof("%s masterClient failed to receive from %s: %v", mc.clientType, master, err) + stats.MasterClientConnectCounter.WithLabelValues("failedToReceive").Inc() return err } @@ -131,6 +135,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL if resp.VolumeLocation.Leader != "" { glog.V(0).Infof("redirected to leader %v", resp.VolumeLocation.Leader) nextHintedLeader = pb.ServerAddress(resp.VolumeLocation.Leader) + stats.MasterClientConnectCounter.WithLabelValues("redirectedToleader").Inc() return nil } @@ -159,6 +164,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL } else { glog.V(0).Infof("- %s %s leader:%v\n", update.NodeType, update.Address, update.IsLeader) } + stats.MasterClientConnectCounter.WithLabelValues("onPeerUpdate").Inc() mc.OnPeerUpdate(update) } } @@ -167,6 +173,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL }) if gprcErr != nil { + stats.MasterClientConnectCounter.WithLabelValues("failed").Inc() glog.V(1).Infof("%s masterClient failed to connect with master %v: %v", mc.clientType, master, gprcErr) } return From c9952759c428a0b07929e5aa92c8853afaecb800 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 24 Jan 2022 20:13:07 +0500 Subject: [PATCH 2/3] metrics master is leader --- weed/server/master_grpc_server.go | 3 +++ weed/stats/metrics.go | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go index f1d495c66..50fcc0d62 100644 --- a/weed/server/master_grpc_server.go +++ b/weed/server/master_grpc_server.go @@ -237,7 +237,10 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ } case <-ticker.C: if !ms.Topo.IsLeader() { + stats.MasterRaftIsleader.Set(0) return ms.informNewLeader(stream) + } else { + stats.MasterRaftIsleader.Set(1) } case <-stopChan: return nil diff --git a/weed/stats/metrics.go b/weed/stats/metrics.go index b43da609f..c764ba63d 100644 --- a/weed/stats/metrics.go +++ b/weed/stats/metrics.go @@ -28,6 +28,14 @@ var ( Help: "Counter of master client leader updates.", }, []string{"type"}) + MasterRaftIsleader = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "SeaweedFS", + Subsystem: "master", + Name: "is leader", + Help: "is leader", + }) + MasterReceivedHeartbeatCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "SeaweedFS", @@ -153,6 +161,11 @@ var ( ) func init() { + Gather.MustRegister(MasterClientConnectCounter) + Gather.MustRegister(MasterRaftIsleader) + Gather.MustRegister(MasterReceivedHeartbeatCounter) + Gather.MustRegister(MasterLeaderChangeCounter) + Gather.MustRegister(FilerRequestCounter) Gather.MustRegister(FilerRequestHistogram) Gather.MustRegister(FilerStoreCounter) From 5c9259fa3c724102539a908cfec3ea2363db5eb3 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 25 Jan 2022 14:42:47 +0500 Subject: [PATCH 3/3] fix metrics master name --- weed/stats/metrics.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/weed/stats/metrics.go b/weed/stats/metrics.go index c764ba63d..8381999c2 100644 --- a/weed/stats/metrics.go +++ b/weed/stats/metrics.go @@ -24,7 +24,7 @@ var ( prometheus.CounterOpts{ Namespace: "SeaweedFS", Subsystem: "wdclient", - Name: "connect updates", + Name: "connect_updates", Help: "Counter of master client leader updates.", }, []string{"type"}) @@ -32,7 +32,7 @@ var ( prometheus.GaugeOpts{ Namespace: "SeaweedFS", Subsystem: "master", - Name: "is leader", + Name: "is_leader", Help: "is leader", }) @@ -40,7 +40,7 @@ var ( prometheus.CounterOpts{ Namespace: "SeaweedFS", Subsystem: "master", - Name: "received heartbeats", + Name: "received_heartbeats", Help: "Counter of master received heartbeat.", }, []string{"type"}) @@ -48,7 +48,7 @@ var ( prometheus.CounterOpts{ Namespace: "SeaweedFS", Subsystem: "master", - Name: "leader changes", + Name: "leader_changes", Help: "Counter of master leader changes.", }, []string{"type"})