From 4b5c0e3fa9a6e61e1cfa000567773b23fba39918 Mon Sep 17 00:00:00 2001 From: chrislu Date: Fri, 1 Apr 2022 17:27:49 -0700 Subject: [PATCH] check cluster connectivities --- weed/shell/command_cluster_check.go | 157 ++++++++++++++++++++++++++++ weed/wdclient/masterclient.go | 5 + 2 files changed, 162 insertions(+) create mode 100644 weed/shell/command_cluster_check.go diff --git a/weed/shell/command_cluster_check.go b/weed/shell/command_cluster_check.go new file mode 100644 index 000000000..2100dff91 --- /dev/null +++ b/weed/shell/command_cluster_check.go @@ -0,0 +1,157 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "github.com/chrislusf/seaweedfs/weed/cluster" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "io" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" +) + +func init() { + Commands = append(Commands, &commandClusterCheck{}) +} + +type commandClusterCheck struct { +} + +func (c *commandClusterCheck) Name() string { + return "cluster.check" +} + +func (c *commandClusterCheck) Help() string { + return `check current cluster network connectivity + + cluster.check + +` +} + +func (c *commandClusterCheck) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + clusterPsCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + if err = clusterPsCommand.Parse(args); err != nil { + return nil + } + + // collect filers + var filers []pb.ServerAddress + err = commandEnv.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error { + resp, err := client.ListClusterNodes(context.Background(), &master_pb.ListClusterNodesRequest{ + ClientType: cluster.FilerType, + }) + + for _, node := range resp.ClusterNodes { + filers = append(filers, pb.ServerAddress(node.Address)) + } + return err + }) + if err != nil { + return + } + fmt.Fprintf(writer, "the cluster has %d filers: %+v\n", len(filers), filers) + + // collect volume servers + var volumeServers []pb.ServerAddress + t, _, err := collectTopologyInfo(commandEnv, 0) + if err != nil { + return err + } + for _, dc := range t.DataCenterInfos { + for _, r := range dc.RackInfos { + for _, dn := range r.DataNodeInfos { + volumeServers = append(volumeServers, pb.NewServerAddressFromDataNode(dn)) + } + } + } + fmt.Fprintf(writer, "the cluster has %d volume servers: %+v\n", len(volumeServers), volumeServers) + + // collect all masters + var masters []pb.ServerAddress + for _, master := range commandEnv.MasterClient.GetMasters() { + masters = append(masters, master) + } + + // check from master to volume servers + for _, master := range masters { + for _, volumeServer := range volumeServers { + fmt.Fprintf(writer, "checking master %s to volume server %s ... ", string(master), string(volumeServer)) + err := pb.WithMasterClient(false, master, commandEnv.option.GrpcDialOption, func(client master_pb.SeaweedClient) error { + _, err := client.Ping(context.Background(), &master_pb.PingRequest{ + Target: string(volumeServer), + TargetType: cluster.VolumeServerType, + }) + return err + }) + if err == nil { + fmt.Fprintf(writer, "ok\n") + } else { + fmt.Fprintf(writer, "%v\n", err) + } + } + } + + // check from volume servers to masters + for _, volumeServer := range volumeServers { + for _, master := range masters { + fmt.Fprintf(writer, "checking volume server %s to master %s ... ", string(volumeServer), string(master)) + err := pb.WithVolumeServerClient(false, volumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + _, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{ + Target: string(master), + TargetType: cluster.MasterType, + }) + return err + }) + if err == nil { + fmt.Fprintf(writer, "ok\n") + } else { + fmt.Fprintf(writer, "%v\n", err) + } + } + } + + // check from filers to masters + for _, filer := range filers { + for _, master := range masters { + fmt.Fprintf(writer, "checking filer %s to master %s ... ", string(filer), string(master)) + err := pb.WithFilerClient(false, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + _, err := client.Ping(context.Background(), &filer_pb.PingRequest{ + Target: string(master), + TargetType: cluster.MasterType, + }) + return err + }) + if err == nil { + fmt.Fprintf(writer, "ok\n") + } else { + fmt.Fprintf(writer, "%v\n", err) + } + } + } + + // check from filers to volume servers + for _, filer := range filers { + for _, volumeServer := range volumeServers { + fmt.Fprintf(writer, "checking filer %s to volume server %s ... ", string(filer), string(volumeServer)) + err := pb.WithFilerClient(false, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + _, err := client.Ping(context.Background(), &filer_pb.PingRequest{ + Target: string(volumeServer), + TargetType: cluster.VolumeServerType, + }) + return err + }) + if err == nil { + fmt.Fprintf(writer, "ok\n") + } else { + fmt.Fprintf(writer, "%v\n", err) + } + } + } + + return nil +} diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go index daf74c1be..53236fc6d 100644 --- a/weed/wdclient/masterclient.go +++ b/weed/wdclient/masterclient.go @@ -41,6 +41,11 @@ func (mc *MasterClient) GetMaster() pb.ServerAddress { return mc.currentMaster } +func (mc *MasterClient) GetMasters() map[string]pb.ServerAddress { + mc.WaitUntilConnected() + return mc.masters +} + func (mc *MasterClient) WaitUntilConnected() { for mc.currentMaster == "" { time.Sleep(time.Duration(rand.Int31n(200)) * time.Millisecond)