You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

248 lines
7.9 KiB

  1. package shell
  2. import (
  3. "context"
  4. "flag"
  5. "fmt"
  6. "github.com/chrislusf/seaweedfs/weed/cluster"
  7. "github.com/chrislusf/seaweedfs/weed/pb"
  8. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  9. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  10. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  11. "io"
  12. )
  13. func init() {
  14. Commands = append(Commands, &commandClusterCheck{})
  15. }
  16. type commandClusterCheck struct {
  17. }
  18. func (c *commandClusterCheck) Name() string {
  19. return "cluster.check"
  20. }
  21. func (c *commandClusterCheck) Help() string {
  22. return `check current cluster network connectivity
  23. cluster.check
  24. `
  25. }
  26. func (c *commandClusterCheck) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  27. clusterPsCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  28. if err = clusterPsCommand.Parse(args); err != nil {
  29. return nil
  30. }
  31. // collect topology information
  32. topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
  33. if err != nil {
  34. return err
  35. }
  36. fmt.Fprintf(writer, "Topology volumeSizeLimit:%d MB%s\n", volumeSizeLimitMb, diskInfosToString(topologyInfo.DiskInfos))
  37. emptyDiskTypeDiskInfo, emptyDiskTypeFound := topologyInfo.DiskInfos[""]
  38. hddDiskTypeDiskInfo, hddDiskTypeFound := topologyInfo.DiskInfos["hdd"]
  39. if !emptyDiskTypeFound && !hddDiskTypeFound {
  40. return fmt.Errorf("Need to a hdd disk type!")
  41. }
  42. if emptyDiskTypeFound && emptyDiskTypeDiskInfo.VolumeCount == 0 || hddDiskTypeFound && hddDiskTypeDiskInfo.VolumeCount == 0 {
  43. return fmt.Errorf("Need to a hdd disk type!")
  44. }
  45. // collect filers
  46. var filers []pb.ServerAddress
  47. err = commandEnv.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
  48. resp, err := client.ListClusterNodes(context.Background(), &master_pb.ListClusterNodesRequest{
  49. ClientType: cluster.FilerType,
  50. })
  51. for _, node := range resp.ClusterNodes {
  52. filers = append(filers, pb.ServerAddress(node.Address))
  53. }
  54. return err
  55. })
  56. if err != nil {
  57. return
  58. }
  59. fmt.Fprintf(writer, "the cluster has %d filers: %+v\n", len(filers), filers)
  60. // collect volume servers
  61. var volumeServers []pb.ServerAddress
  62. t, _, err := collectTopologyInfo(commandEnv, 0)
  63. if err != nil {
  64. return err
  65. }
  66. for _, dc := range t.DataCenterInfos {
  67. for _, r := range dc.RackInfos {
  68. for _, dn := range r.DataNodeInfos {
  69. volumeServers = append(volumeServers, pb.NewServerAddressFromDataNode(dn))
  70. }
  71. }
  72. }
  73. fmt.Fprintf(writer, "the cluster has %d volume servers: %+v\n", len(volumeServers), volumeServers)
  74. // collect all masters
  75. var masters []pb.ServerAddress
  76. for _, master := range commandEnv.MasterClient.GetMasters() {
  77. masters = append(masters, master)
  78. }
  79. // check from master to volume servers
  80. for _, master := range masters {
  81. for _, volumeServer := range volumeServers {
  82. fmt.Fprintf(writer, "checking master %s to volume server %s ... ", string(master), string(volumeServer))
  83. err := pb.WithMasterClient(false, master, commandEnv.option.GrpcDialOption, func(client master_pb.SeaweedClient) error {
  84. pong, err := client.Ping(context.Background(), &master_pb.PingRequest{
  85. Target: string(volumeServer),
  86. TargetType: cluster.VolumeServerType,
  87. })
  88. if err == nil {
  89. printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
  90. }
  91. return err
  92. })
  93. if err != nil {
  94. fmt.Fprintf(writer, "%v\n", err)
  95. }
  96. }
  97. }
  98. // check between masters
  99. for _, sourceMaster := range masters {
  100. for _, targetMaster := range masters {
  101. if sourceMaster == targetMaster {
  102. continue
  103. }
  104. fmt.Fprintf(writer, "checking master %s to %s ... ", string(sourceMaster), string(targetMaster))
  105. err := pb.WithMasterClient(false, sourceMaster, commandEnv.option.GrpcDialOption, func(client master_pb.SeaweedClient) error {
  106. pong, err := client.Ping(context.Background(), &master_pb.PingRequest{
  107. Target: string(targetMaster),
  108. TargetType: cluster.MasterType,
  109. })
  110. if err == nil {
  111. printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
  112. }
  113. return err
  114. })
  115. if err != nil {
  116. fmt.Fprintf(writer, "%v\n", err)
  117. }
  118. }
  119. }
  120. // check from volume servers to masters
  121. for _, volumeServer := range volumeServers {
  122. for _, master := range masters {
  123. fmt.Fprintf(writer, "checking volume server %s to master %s ... ", string(volumeServer), string(master))
  124. err := pb.WithVolumeServerClient(false, volumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  125. pong, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{
  126. Target: string(master),
  127. TargetType: cluster.MasterType,
  128. })
  129. if err == nil {
  130. printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
  131. }
  132. return err
  133. })
  134. if err != nil {
  135. fmt.Fprintf(writer, "%v\n", err)
  136. }
  137. }
  138. }
  139. // check from filers to masters
  140. for _, filer := range filers {
  141. for _, master := range masters {
  142. fmt.Fprintf(writer, "checking filer %s to master %s ... ", string(filer), string(master))
  143. err := pb.WithFilerClient(false, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
  144. pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
  145. Target: string(master),
  146. TargetType: cluster.MasterType,
  147. })
  148. if err == nil {
  149. printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
  150. }
  151. return err
  152. })
  153. if err != nil {
  154. fmt.Fprintf(writer, "%v\n", err)
  155. }
  156. }
  157. }
  158. // check from filers to volume servers
  159. for _, filer := range filers {
  160. for _, volumeServer := range volumeServers {
  161. fmt.Fprintf(writer, "checking filer %s to volume server %s ... ", string(filer), string(volumeServer))
  162. err := pb.WithFilerClient(false, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
  163. pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
  164. Target: string(volumeServer),
  165. TargetType: cluster.VolumeServerType,
  166. })
  167. if err == nil {
  168. printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
  169. }
  170. return err
  171. })
  172. if err != nil {
  173. fmt.Fprintf(writer, "%v\n", err)
  174. }
  175. }
  176. }
  177. // check between volume servers
  178. for _, sourceVolumeServer := range volumeServers {
  179. for _, targetVolumeServer := range volumeServers {
  180. if sourceVolumeServer == targetVolumeServer {
  181. continue
  182. }
  183. fmt.Fprintf(writer, "checking volume server %s to %s ... ", string(sourceVolumeServer), string(targetVolumeServer))
  184. err := pb.WithVolumeServerClient(false, sourceVolumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  185. pong, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{
  186. Target: string(targetVolumeServer),
  187. TargetType: cluster.VolumeServerType,
  188. })
  189. if err == nil {
  190. printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
  191. }
  192. return err
  193. })
  194. if err != nil {
  195. fmt.Fprintf(writer, "%v\n", err)
  196. }
  197. }
  198. }
  199. // check between filers, and need to connect to itself
  200. for _, sourceFiler := range filers {
  201. for _, targetFiler := range filers {
  202. fmt.Fprintf(writer, "checking filer %s to %s ... ", string(sourceFiler), string(targetFiler))
  203. err := pb.WithFilerClient(false, sourceFiler, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
  204. pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
  205. Target: string(targetFiler),
  206. TargetType: cluster.FilerType,
  207. })
  208. if err == nil {
  209. printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
  210. }
  211. return err
  212. })
  213. if err != nil {
  214. fmt.Fprintf(writer, "%v\n", err)
  215. }
  216. }
  217. }
  218. return nil
  219. }
  220. func printTiming(writer io.Writer, startNs, remoteNs, stopNs int64) {
  221. roundTripTimeMs := float32(stopNs-startNs) / 1000000
  222. deltaTimeMs := float32(remoteNs-(startNs+stopNs)/2) / 1000000
  223. fmt.Fprintf(writer, "ok round trip %.3fms clock delta %.3fms\n", roundTripTimeMs, deltaTimeMs)
  224. }