You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

200 lines
6.5 KiB

5 years ago
5 years ago
5 years ago
  1. package weed_server
  2. import (
  3. "fmt"
  4. "net"
  5. "time"
  6. "google.golang.org/grpc"
  7. "github.com/chrislusf/seaweedfs/weed/pb"
  8. "github.com/chrislusf/seaweedfs/weed/security"
  9. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  10. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  11. "golang.org/x/net/context"
  12. "github.com/chrislusf/seaweedfs/weed/glog"
  13. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  14. "github.com/chrislusf/seaweedfs/weed/util"
  15. )
  16. func (vs *VolumeServer) GetMaster() string {
  17. return vs.currentMaster
  18. }
  19. func (vs *VolumeServer) heartbeat() {
  20. glog.V(0).Infof("Volume server start with seed master nodes: %v", vs.SeedMasterNodes)
  21. vs.store.SetDataCenter(vs.dataCenter)
  22. vs.store.SetRack(vs.rack)
  23. grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.volume")
  24. var err error
  25. var newLeader string
  26. for {
  27. for _, master := range vs.SeedMasterNodes {
  28. if newLeader != "" {
  29. // the new leader may actually is the same master
  30. // need to wait a bit before adding itself
  31. time.Sleep(3 * time.Second)
  32. master = newLeader
  33. }
  34. masterGrpcAddress, parseErr := pb.ParseServerToGrpcAddress(master)
  35. if parseErr != nil {
  36. glog.V(0).Infof("failed to parse master grpc %v: %v", masterGrpcAddress, parseErr)
  37. continue
  38. }
  39. vs.store.MasterAddress = master
  40. newLeader, err = vs.doHeartbeat(master, masterGrpcAddress, grpcDialOption, time.Duration(vs.pulseSeconds)*time.Second)
  41. if err != nil {
  42. glog.V(0).Infof("heartbeat error: %v", err)
  43. time.Sleep(time.Duration(vs.pulseSeconds) * time.Second)
  44. newLeader = ""
  45. vs.store.MasterAddress = ""
  46. }
  47. }
  48. }
  49. }
  50. func (vs *VolumeServer) doHeartbeat(masterNode, masterGrpcAddress string, grpcDialOption grpc.DialOption, sleepInterval time.Duration) (newLeader string, err error) {
  51. grpcConection, err := pb.GrpcDial(context.Background(), masterGrpcAddress, grpcDialOption)
  52. if err != nil {
  53. return "", fmt.Errorf("fail to dial %s : %v", masterNode, err)
  54. }
  55. defer grpcConection.Close()
  56. client := master_pb.NewSeaweedClient(grpcConection)
  57. stream, err := client.SendHeartbeat(context.Background())
  58. if err != nil {
  59. glog.V(0).Infof("SendHeartbeat to %s: %v", masterNode, err)
  60. return "", err
  61. }
  62. glog.V(0).Infof("Heartbeat to: %v", masterNode)
  63. vs.currentMaster = masterNode
  64. doneChan := make(chan error, 1)
  65. go func() {
  66. for {
  67. in, err := stream.Recv()
  68. if err != nil {
  69. doneChan <- err
  70. return
  71. }
  72. if in.GetVolumeSizeLimit() != 0 && vs.store.GetVolumeSizeLimit() != in.GetVolumeSizeLimit() {
  73. vs.store.SetVolumeSizeLimit(in.GetVolumeSizeLimit())
  74. if vs.store.MaybeAdjustVolumeMax() {
  75. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  76. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  77. }
  78. }
  79. }
  80. if in.GetLeader() != "" && masterNode != in.GetLeader() && !isSameIP(in.GetLeader(), masterNode) {
  81. glog.V(0).Infof("Volume Server found a new master newLeader: %v instead of %v", in.GetLeader(), masterNode)
  82. newLeader = in.GetLeader()
  83. doneChan <- nil
  84. return
  85. }
  86. if in.GetMetricsAddress() != "" && vs.MetricsAddress != in.GetMetricsAddress() {
  87. vs.MetricsAddress = in.GetMetricsAddress()
  88. vs.MetricsIntervalSec = int(in.GetMetricsIntervalSeconds())
  89. }
  90. if len(in.StorageBackends) > 0 {
  91. backend.LoadFromPbStorageBackends(in.StorageBackends)
  92. }
  93. }
  94. }()
  95. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  96. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  97. return "", err
  98. }
  99. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  100. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  101. return "", err
  102. }
  103. volumeTickChan := time.Tick(sleepInterval)
  104. ecShardTickChan := time.Tick(17 * sleepInterval)
  105. for {
  106. select {
  107. case volumeMessage := <-vs.store.NewVolumesChan:
  108. deltaBeat := &master_pb.Heartbeat{
  109. NewVolumes: []*master_pb.VolumeShortInformationMessage{
  110. &volumeMessage,
  111. },
  112. }
  113. glog.V(1).Infof("volume server %s:%d adds volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  114. if err = stream.Send(deltaBeat); err != nil {
  115. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  116. return "", err
  117. }
  118. case ecShardMessage := <-vs.store.NewEcShardsChan:
  119. deltaBeat := &master_pb.Heartbeat{
  120. NewEcShards: []*master_pb.VolumeEcShardInformationMessage{
  121. &ecShardMessage,
  122. },
  123. }
  124. glog.V(1).Infof("volume server %s:%d adds ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  125. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  126. if err = stream.Send(deltaBeat); err != nil {
  127. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  128. return "", err
  129. }
  130. case volumeMessage := <-vs.store.DeletedVolumesChan:
  131. deltaBeat := &master_pb.Heartbeat{
  132. DeletedVolumes: []*master_pb.VolumeShortInformationMessage{
  133. &volumeMessage,
  134. },
  135. }
  136. glog.V(1).Infof("volume server %s:%d deletes volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  137. if err = stream.Send(deltaBeat); err != nil {
  138. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  139. return "", err
  140. }
  141. case ecShardMessage := <-vs.store.DeletedEcShardsChan:
  142. deltaBeat := &master_pb.Heartbeat{
  143. DeletedEcShards: []*master_pb.VolumeEcShardInformationMessage{
  144. &ecShardMessage,
  145. },
  146. }
  147. glog.V(1).Infof("volume server %s:%d deletes ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  148. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  149. if err = stream.Send(deltaBeat); err != nil {
  150. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  151. return "", err
  152. }
  153. case <-volumeTickChan:
  154. glog.V(4).Infof("volume server %s:%d heartbeat", vs.store.Ip, vs.store.Port)
  155. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  156. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  157. return "", err
  158. }
  159. case <-ecShardTickChan:
  160. glog.V(4).Infof("volume server %s:%d ec heartbeat", vs.store.Ip, vs.store.Port)
  161. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  162. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  163. return "", err
  164. }
  165. case err = <-doneChan:
  166. return
  167. }
  168. }
  169. }
  170. func isSameIP(ip string, host string) bool {
  171. ips, err := net.LookupIP(host)
  172. if err != nil {
  173. return false
  174. }
  175. for _, t := range ips {
  176. if ip == t.String() {
  177. return true
  178. }
  179. }
  180. return false
  181. }