You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

233 lines
7.1 KiB

  1. package shell
  2. import (
  3. "context"
  4. "flag"
  5. "fmt"
  6. "github.com/chrislusf/seaweedfs/weed/cluster"
  7. "github.com/chrislusf/seaweedfs/weed/pb"
  8. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  9. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  10. "io"
  11. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  12. )
  13. func init() {
  14. Commands = append(Commands, &commandClusterCheck{})
  15. }
  16. type commandClusterCheck struct {
  17. }
  18. func (c *commandClusterCheck) Name() string {
  19. return "cluster.check"
  20. }
  21. func (c *commandClusterCheck) Help() string {
  22. return `check current cluster network connectivity
  23. cluster.check
  24. `
  25. }
  26. func (c *commandClusterCheck) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  27. clusterPsCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  28. if err = clusterPsCommand.Parse(args); err != nil {
  29. return nil
  30. }
  31. // collect topology information
  32. topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
  33. if err != nil {
  34. return err
  35. }
  36. fmt.Fprintf(writer, "Topology volumeSizeLimit:%d MB%s\n", volumeSizeLimitMb, diskInfosToString(topologyInfo.DiskInfos))
  37. emptyDiskTypeDiskInfo, emptyDiskTypeFound := topologyInfo.DiskInfos[""]
  38. hddDiskTypeDiskInfo, hddDiskTypeFound := topologyInfo.DiskInfos["hdd"]
  39. if !emptyDiskTypeFound && !hddDiskTypeFound || emptyDiskTypeDiskInfo.VolumeCount == 0 && hddDiskTypeDiskInfo.VolumeCount == 0 {
  40. return fmt.Errorf("Need to a hdd disk type!")
  41. }
  42. // collect filers
  43. var filers []pb.ServerAddress
  44. err = commandEnv.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
  45. resp, err := client.ListClusterNodes(context.Background(), &master_pb.ListClusterNodesRequest{
  46. ClientType: cluster.FilerType,
  47. })
  48. for _, node := range resp.ClusterNodes {
  49. filers = append(filers, pb.ServerAddress(node.Address))
  50. }
  51. return err
  52. })
  53. if err != nil {
  54. return
  55. }
  56. fmt.Fprintf(writer, "the cluster has %d filers: %+v\n", len(filers), filers)
  57. // collect volume servers
  58. var volumeServers []pb.ServerAddress
  59. t, _, err := collectTopologyInfo(commandEnv, 0)
  60. if err != nil {
  61. return err
  62. }
  63. for _, dc := range t.DataCenterInfos {
  64. for _, r := range dc.RackInfos {
  65. for _, dn := range r.DataNodeInfos {
  66. volumeServers = append(volumeServers, pb.NewServerAddressFromDataNode(dn))
  67. }
  68. }
  69. }
  70. fmt.Fprintf(writer, "the cluster has %d volume servers: %+v\n", len(volumeServers), volumeServers)
  71. // collect all masters
  72. var masters []pb.ServerAddress
  73. for _, master := range commandEnv.MasterClient.GetMasters() {
  74. masters = append(masters, master)
  75. }
  76. // check from master to volume servers
  77. for _, master := range masters {
  78. for _, volumeServer := range volumeServers {
  79. fmt.Fprintf(writer, "checking master %s to volume server %s ... ", string(master), string(volumeServer))
  80. err := pb.WithMasterClient(false, master, commandEnv.option.GrpcDialOption, func(client master_pb.SeaweedClient) error {
  81. _, err := client.Ping(context.Background(), &master_pb.PingRequest{
  82. Target: string(volumeServer),
  83. TargetType: cluster.VolumeServerType,
  84. })
  85. return err
  86. })
  87. if err == nil {
  88. fmt.Fprintf(writer, "ok\n")
  89. } else {
  90. fmt.Fprintf(writer, "%v\n", err)
  91. }
  92. }
  93. }
  94. // check between masters
  95. for _, sourceMaster := range masters {
  96. for _, targetMaster := range masters {
  97. if sourceMaster == targetMaster {
  98. continue
  99. }
  100. fmt.Fprintf(writer, "checking master %s to %s ... ", string(sourceMaster), string(targetMaster))
  101. err := pb.WithMasterClient(false, sourceMaster, commandEnv.option.GrpcDialOption, func(client master_pb.SeaweedClient) error {
  102. _, err := client.Ping(context.Background(), &master_pb.PingRequest{
  103. Target: string(targetMaster),
  104. TargetType: cluster.MasterType,
  105. })
  106. return err
  107. })
  108. if err == nil {
  109. fmt.Fprintf(writer, "ok\n")
  110. } else {
  111. fmt.Fprintf(writer, "%v\n", err)
  112. }
  113. }
  114. }
  115. // check from volume servers to masters
  116. for _, volumeServer := range volumeServers {
  117. for _, master := range masters {
  118. fmt.Fprintf(writer, "checking volume server %s to master %s ... ", string(volumeServer), string(master))
  119. err := pb.WithVolumeServerClient(false, volumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  120. _, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{
  121. Target: string(master),
  122. TargetType: cluster.MasterType,
  123. })
  124. return err
  125. })
  126. if err == nil {
  127. fmt.Fprintf(writer, "ok\n")
  128. } else {
  129. fmt.Fprintf(writer, "%v\n", err)
  130. }
  131. }
  132. }
  133. // check from filers to masters
  134. for _, filer := range filers {
  135. for _, master := range masters {
  136. fmt.Fprintf(writer, "checking filer %s to master %s ... ", string(filer), string(master))
  137. err := pb.WithFilerClient(false, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
  138. _, err := client.Ping(context.Background(), &filer_pb.PingRequest{
  139. Target: string(master),
  140. TargetType: cluster.MasterType,
  141. })
  142. return err
  143. })
  144. if err == nil {
  145. fmt.Fprintf(writer, "ok\n")
  146. } else {
  147. fmt.Fprintf(writer, "%v\n", err)
  148. }
  149. }
  150. }
  151. // check from filers to volume servers
  152. for _, filer := range filers {
  153. for _, volumeServer := range volumeServers {
  154. fmt.Fprintf(writer, "checking filer %s to volume server %s ... ", string(filer), string(volumeServer))
  155. err := pb.WithFilerClient(false, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
  156. _, err := client.Ping(context.Background(), &filer_pb.PingRequest{
  157. Target: string(volumeServer),
  158. TargetType: cluster.VolumeServerType,
  159. })
  160. return err
  161. })
  162. if err == nil {
  163. fmt.Fprintf(writer, "ok\n")
  164. } else {
  165. fmt.Fprintf(writer, "%v\n", err)
  166. }
  167. }
  168. }
  169. // check between volume servers
  170. for _, sourceVolumeServer := range volumeServers {
  171. for _, targetVolumeServer := range volumeServers {
  172. if sourceVolumeServer == targetVolumeServer {
  173. continue
  174. }
  175. fmt.Fprintf(writer, "checking volume server %s to %s ... ", string(sourceVolumeServer), string(targetVolumeServer))
  176. err := pb.WithVolumeServerClient(false, sourceVolumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  177. _, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{
  178. Target: string(targetVolumeServer),
  179. TargetType: cluster.VolumeServerType,
  180. })
  181. return err
  182. })
  183. if err == nil {
  184. fmt.Fprintf(writer, "ok\n")
  185. } else {
  186. fmt.Fprintf(writer, "%v\n", err)
  187. }
  188. }
  189. }
  190. // check between filers, and need to connect to itself
  191. for _, sourceFiler := range filers {
  192. for _, targetFiler := range filers {
  193. fmt.Fprintf(writer, "checking filer %s to %s ... ", string(sourceFiler), string(targetFiler))
  194. err := pb.WithFilerClient(false, sourceFiler, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
  195. _, err := client.Ping(context.Background(), &filer_pb.PingRequest{
  196. Target: string(targetFiler),
  197. TargetType: cluster.FilerType,
  198. })
  199. return err
  200. })
  201. if err == nil {
  202. fmt.Fprintf(writer, "ok\n")
  203. } else {
  204. fmt.Fprintf(writer, "%v\n", err)
  205. }
  206. }
  207. }
  208. return nil
  209. }