You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

218 lines
7.0 KiB

5 years ago
5 years ago
  1. package weed_server
  2. import (
  3. "fmt"
  4. "github.com/chrislusf/seaweedfs/weed/operation"
  5. "time"
  6. "google.golang.org/grpc"
  7. "github.com/chrislusf/seaweedfs/weed/pb"
  8. "github.com/chrislusf/seaweedfs/weed/security"
  9. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  10. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  11. "golang.org/x/net/context"
  12. "github.com/chrislusf/seaweedfs/weed/glog"
  13. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  14. "github.com/chrislusf/seaweedfs/weed/util"
  15. )
  16. func (vs *VolumeServer) GetMaster() string {
  17. return vs.currentMaster
  18. }
  19. func (vs *VolumeServer) checkWithMaster() (err error) {
  20. for _, master := range vs.SeedMasterNodes {
  21. err = operation.WithMasterServerClient(master, vs.grpcDialOption, func(masterClient master_pb.SeaweedClient) error {
  22. resp, err := masterClient.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
  23. if err != nil {
  24. return fmt.Errorf("get master %s configuration: %v", master, err)
  25. }
  26. vs.MetricsAddress, vs.MetricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSeconds)
  27. backend.LoadFromPbStorageBackends(resp.StorageBackends)
  28. return nil
  29. })
  30. if err == nil {
  31. return
  32. } else {
  33. glog.V(0).Infof("checkWithMaster %s: %v", master, err)
  34. }
  35. }
  36. return
  37. }
  38. func (vs *VolumeServer) heartbeat() {
  39. glog.V(0).Infof("Volume server start with seed master nodes: %v", vs.SeedMasterNodes)
  40. vs.store.SetDataCenter(vs.dataCenter)
  41. vs.store.SetRack(vs.rack)
  42. grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.volume")
  43. var err error
  44. var newLeader string
  45. for vs.isHeartbeating {
  46. for _, master := range vs.SeedMasterNodes {
  47. if newLeader != "" {
  48. // the new leader may actually is the same master
  49. // need to wait a bit before adding itself
  50. time.Sleep(3 * time.Second)
  51. master = newLeader
  52. }
  53. masterGrpcAddress, parseErr := pb.ParseServerToGrpcAddress(master)
  54. if parseErr != nil {
  55. glog.V(0).Infof("failed to parse master grpc %v: %v", masterGrpcAddress, parseErr)
  56. continue
  57. }
  58. vs.store.MasterAddress = master
  59. newLeader, err = vs.doHeartbeat(master, masterGrpcAddress, grpcDialOption, time.Duration(vs.pulseSeconds)*time.Second)
  60. if err != nil {
  61. glog.V(0).Infof("heartbeat error: %v", err)
  62. time.Sleep(time.Duration(vs.pulseSeconds) * time.Second)
  63. newLeader = ""
  64. vs.store.MasterAddress = ""
  65. }
  66. if !vs.isHeartbeating {
  67. break
  68. }
  69. }
  70. }
  71. }
  72. func (vs *VolumeServer) StopHeartbeat() (isAlreadyStopping bool) {
  73. if !vs.isHeartbeating {
  74. return true
  75. }
  76. vs.isHeartbeating = false
  77. vs.stopChan <- true
  78. return false
  79. }
  80. func (vs *VolumeServer) doHeartbeat(masterNode, masterGrpcAddress string, grpcDialOption grpc.DialOption, sleepInterval time.Duration) (newLeader string, err error) {
  81. ctx, cancel := context.WithCancel(context.Background())
  82. defer cancel()
  83. grpcConection, err := pb.GrpcDial(ctx, masterGrpcAddress, grpcDialOption)
  84. if err != nil {
  85. return "", fmt.Errorf("fail to dial %s : %v", masterNode, err)
  86. }
  87. defer grpcConection.Close()
  88. client := master_pb.NewSeaweedClient(grpcConection)
  89. stream, err := client.SendHeartbeat(ctx)
  90. if err != nil {
  91. glog.V(0).Infof("SendHeartbeat to %s: %v", masterNode, err)
  92. return "", err
  93. }
  94. glog.V(0).Infof("Heartbeat to: %v", masterNode)
  95. vs.currentMaster = masterNode
  96. doneChan := make(chan error, 1)
  97. go func() {
  98. for {
  99. in, err := stream.Recv()
  100. if err != nil {
  101. doneChan <- err
  102. return
  103. }
  104. if in.GetVolumeSizeLimit() != 0 && vs.store.GetVolumeSizeLimit() != in.GetVolumeSizeLimit() {
  105. vs.store.SetVolumeSizeLimit(in.GetVolumeSizeLimit())
  106. if vs.store.MaybeAdjustVolumeMax() {
  107. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  108. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", vs.currentMaster, err)
  109. }
  110. }
  111. }
  112. if in.GetLeader() != "" && vs.currentMaster != in.GetLeader() {
  113. glog.V(0).Infof("Volume Server found a new master newLeader: %v instead of %v", in.GetLeader(), vs.currentMaster)
  114. newLeader = in.GetLeader()
  115. doneChan <- nil
  116. return
  117. }
  118. }
  119. }()
  120. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  121. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  122. return "", err
  123. }
  124. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  125. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  126. return "", err
  127. }
  128. volumeTickChan := time.Tick(sleepInterval)
  129. ecShardTickChan := time.Tick(17 * sleepInterval)
  130. for {
  131. select {
  132. case volumeMessage := <-vs.store.NewVolumesChan:
  133. deltaBeat := &master_pb.Heartbeat{
  134. NewVolumes: []*master_pb.VolumeShortInformationMessage{
  135. &volumeMessage,
  136. },
  137. }
  138. glog.V(1).Infof("volume server %s:%d adds volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  139. if err = stream.Send(deltaBeat); err != nil {
  140. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  141. return "", err
  142. }
  143. case ecShardMessage := <-vs.store.NewEcShardsChan:
  144. deltaBeat := &master_pb.Heartbeat{
  145. NewEcShards: []*master_pb.VolumeEcShardInformationMessage{
  146. &ecShardMessage,
  147. },
  148. }
  149. glog.V(1).Infof("volume server %s:%d adds ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  150. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  151. if err = stream.Send(deltaBeat); err != nil {
  152. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  153. return "", err
  154. }
  155. case volumeMessage := <-vs.store.DeletedVolumesChan:
  156. deltaBeat := &master_pb.Heartbeat{
  157. DeletedVolumes: []*master_pb.VolumeShortInformationMessage{
  158. &volumeMessage,
  159. },
  160. }
  161. glog.V(1).Infof("volume server %s:%d deletes volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  162. if err = stream.Send(deltaBeat); err != nil {
  163. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  164. return "", err
  165. }
  166. case ecShardMessage := <-vs.store.DeletedEcShardsChan:
  167. deltaBeat := &master_pb.Heartbeat{
  168. DeletedEcShards: []*master_pb.VolumeEcShardInformationMessage{
  169. &ecShardMessage,
  170. },
  171. }
  172. glog.V(1).Infof("volume server %s:%d deletes ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  173. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  174. if err = stream.Send(deltaBeat); err != nil {
  175. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  176. return "", err
  177. }
  178. case <-volumeTickChan:
  179. glog.V(4).Infof("volume server %s:%d heartbeat", vs.store.Ip, vs.store.Port)
  180. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  181. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  182. return "", err
  183. }
  184. case <-ecShardTickChan:
  185. glog.V(4).Infof("volume server %s:%d ec heartbeat", vs.store.Ip, vs.store.Port)
  186. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  187. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  188. return "", err
  189. }
  190. case err = <-doneChan:
  191. return
  192. case <-vs.stopChan:
  193. return
  194. }
  195. }
  196. }