You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

203 lines
6.6 KiB

5 years ago
5 years ago
  1. package weed_server
  2. import (
  3. "fmt"
  4. "time"
  5. "google.golang.org/grpc"
  6. "github.com/chrislusf/seaweedfs/weed/pb"
  7. "github.com/chrislusf/seaweedfs/weed/security"
  8. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  9. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  10. "golang.org/x/net/context"
  11. "github.com/chrislusf/seaweedfs/weed/glog"
  12. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  13. "github.com/chrislusf/seaweedfs/weed/util"
  14. )
  15. func (vs *VolumeServer) GetMaster() string {
  16. return vs.currentMaster
  17. }
  18. func (vs *VolumeServer) heartbeat() {
  19. glog.V(0).Infof("Volume server start with seed master nodes: %v", vs.SeedMasterNodes)
  20. vs.store.SetDataCenter(vs.dataCenter)
  21. vs.store.SetRack(vs.rack)
  22. grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.volume")
  23. var err error
  24. var newLeader string
  25. for vs.isHeartbeating {
  26. for _, master := range vs.SeedMasterNodes {
  27. if newLeader != "" {
  28. // the new leader may actually is the same master
  29. // need to wait a bit before adding itself
  30. time.Sleep(3 * time.Second)
  31. master = newLeader
  32. }
  33. masterGrpcAddress, parseErr := pb.ParseServerToGrpcAddress(master)
  34. if parseErr != nil {
  35. glog.V(0).Infof("failed to parse master grpc %v: %v", masterGrpcAddress, parseErr)
  36. continue
  37. }
  38. vs.store.MasterAddress = master
  39. newLeader, err = vs.doHeartbeat(master, masterGrpcAddress, grpcDialOption, time.Duration(vs.pulseSeconds)*time.Second)
  40. if err != nil {
  41. glog.V(0).Infof("heartbeat error: %v", err)
  42. time.Sleep(time.Duration(vs.pulseSeconds) * time.Second)
  43. newLeader = ""
  44. vs.store.MasterAddress = ""
  45. }
  46. if !vs.isHeartbeating {
  47. break
  48. }
  49. }
  50. }
  51. }
  52. func (vs *VolumeServer) StopHeartbeat() (isAlreadyStopping bool) {
  53. if !vs.isHeartbeating {
  54. return true
  55. }
  56. vs.isHeartbeating = false
  57. vs.stopChan <- true
  58. return false
  59. }
  60. func (vs *VolumeServer) doHeartbeat(masterNode, masterGrpcAddress string, grpcDialOption grpc.DialOption, sleepInterval time.Duration) (newLeader string, err error) {
  61. ctx, cancel := context.WithCancel(context.Background())
  62. defer cancel()
  63. grpcConection, err := pb.GrpcDial(ctx, masterGrpcAddress, grpcDialOption)
  64. if err != nil {
  65. return "", fmt.Errorf("fail to dial %s : %v", masterNode, err)
  66. }
  67. defer grpcConection.Close()
  68. client := master_pb.NewSeaweedClient(grpcConection)
  69. stream, err := client.SendHeartbeat(ctx)
  70. if err != nil {
  71. glog.V(0).Infof("SendHeartbeat to %s: %v", masterNode, err)
  72. return "", err
  73. }
  74. glog.V(0).Infof("Heartbeat to: %v", masterNode)
  75. vs.currentMaster = masterNode
  76. doneChan := make(chan error, 1)
  77. go func() {
  78. for {
  79. in, err := stream.Recv()
  80. if err != nil {
  81. doneChan <- err
  82. return
  83. }
  84. if in.GetVolumeSizeLimit() != 0 && vs.store.GetVolumeSizeLimit() != in.GetVolumeSizeLimit() {
  85. vs.store.SetVolumeSizeLimit(in.GetVolumeSizeLimit())
  86. if vs.store.MaybeAdjustVolumeMax() {
  87. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  88. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", vs.currentMaster, err)
  89. }
  90. }
  91. }
  92. if in.GetLeader() != "" && vs.currentMaster != in.GetLeader() {
  93. glog.V(0).Infof("Volume Server found a new master newLeader: %v instead of %v", in.GetLeader(), vs.currentMaster)
  94. newLeader = in.GetLeader()
  95. doneChan <- nil
  96. return
  97. }
  98. if in.GetMetricsAddress() != "" && vs.MetricsAddress != in.GetMetricsAddress() {
  99. vs.MetricsAddress = in.GetMetricsAddress()
  100. vs.MetricsIntervalSec = int(in.GetMetricsIntervalSeconds())
  101. }
  102. if len(in.StorageBackends) > 0 {
  103. backend.LoadFromPbStorageBackends(in.StorageBackends)
  104. }
  105. }
  106. }()
  107. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  108. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  109. return "", err
  110. }
  111. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  112. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  113. return "", err
  114. }
  115. volumeTickChan := time.Tick(sleepInterval)
  116. ecShardTickChan := time.Tick(17 * sleepInterval)
  117. for {
  118. select {
  119. case volumeMessage := <-vs.store.NewVolumesChan:
  120. deltaBeat := &master_pb.Heartbeat{
  121. NewVolumes: []*master_pb.VolumeShortInformationMessage{
  122. &volumeMessage,
  123. },
  124. }
  125. glog.V(1).Infof("volume server %s:%d adds volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  126. if err = stream.Send(deltaBeat); err != nil {
  127. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  128. return "", err
  129. }
  130. case ecShardMessage := <-vs.store.NewEcShardsChan:
  131. deltaBeat := &master_pb.Heartbeat{
  132. NewEcShards: []*master_pb.VolumeEcShardInformationMessage{
  133. &ecShardMessage,
  134. },
  135. }
  136. glog.V(1).Infof("volume server %s:%d adds ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  137. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  138. if err = stream.Send(deltaBeat); err != nil {
  139. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  140. return "", err
  141. }
  142. case volumeMessage := <-vs.store.DeletedVolumesChan:
  143. deltaBeat := &master_pb.Heartbeat{
  144. DeletedVolumes: []*master_pb.VolumeShortInformationMessage{
  145. &volumeMessage,
  146. },
  147. }
  148. glog.V(1).Infof("volume server %s:%d deletes volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  149. if err = stream.Send(deltaBeat); err != nil {
  150. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  151. return "", err
  152. }
  153. case ecShardMessage := <-vs.store.DeletedEcShardsChan:
  154. deltaBeat := &master_pb.Heartbeat{
  155. DeletedEcShards: []*master_pb.VolumeEcShardInformationMessage{
  156. &ecShardMessage,
  157. },
  158. }
  159. glog.V(1).Infof("volume server %s:%d deletes ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  160. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  161. if err = stream.Send(deltaBeat); err != nil {
  162. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  163. return "", err
  164. }
  165. case <-volumeTickChan:
  166. glog.V(4).Infof("volume server %s:%d heartbeat", vs.store.Ip, vs.store.Port)
  167. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  168. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  169. return "", err
  170. }
  171. case <-ecShardTickChan:
  172. glog.V(4).Infof("volume server %s:%d ec heartbeat", vs.store.Ip, vs.store.Port)
  173. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  174. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  175. return "", err
  176. }
  177. case err = <-doneChan:
  178. return
  179. case <-vs.stopChan:
  180. return
  181. }
  182. }
  183. }