You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

233 lines
7.7 KiB

5 years ago
3 years ago
3 years ago
4 years ago
4 years ago
  1. package weed_server
  2. import (
  3. "fmt"
  4. "github.com/chrislusf/seaweedfs/weed/operation"
  5. "time"
  6. "google.golang.org/grpc"
  7. "github.com/chrislusf/seaweedfs/weed/pb"
  8. "github.com/chrislusf/seaweedfs/weed/security"
  9. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  10. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  11. "golang.org/x/net/context"
  12. "github.com/chrislusf/seaweedfs/weed/glog"
  13. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  14. "github.com/chrislusf/seaweedfs/weed/util"
  15. )
  16. func (vs *VolumeServer) GetMaster() pb.ServerAddress {
  17. return vs.currentMaster
  18. }
  19. func (vs *VolumeServer) checkWithMaster() (err error) {
  20. for {
  21. for _, master := range vs.SeedMasterNodes {
  22. err = operation.WithMasterServerClient(master, vs.grpcDialOption, func(masterClient master_pb.SeaweedClient) error {
  23. resp, err := masterClient.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
  24. if err != nil {
  25. return fmt.Errorf("get master %s configuration: %v", master, err)
  26. }
  27. vs.metricsAddress, vs.metricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSeconds)
  28. backend.LoadFromPbStorageBackends(resp.StorageBackends)
  29. return nil
  30. })
  31. if err == nil {
  32. return
  33. } else {
  34. glog.V(0).Infof("checkWithMaster %s: %v", master, err)
  35. }
  36. }
  37. time.Sleep(1790 * time.Millisecond)
  38. }
  39. }
  40. func (vs *VolumeServer) heartbeat() {
  41. glog.V(0).Infof("Volume server start with seed master nodes: %v", vs.SeedMasterNodes)
  42. vs.store.SetDataCenter(vs.dataCenter)
  43. vs.store.SetRack(vs.rack)
  44. grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.volume")
  45. var err error
  46. var newLeader pb.ServerAddress
  47. for vs.isHeartbeating {
  48. for _, master := range vs.SeedMasterNodes {
  49. if newLeader != "" {
  50. // the new leader may actually is the same master
  51. // need to wait a bit before adding itself
  52. time.Sleep(3 * time.Second)
  53. master = newLeader
  54. }
  55. vs.store.MasterAddress = master
  56. newLeader, err = vs.doHeartbeat(master, grpcDialOption, time.Duration(vs.pulseSeconds)*time.Second)
  57. if err != nil {
  58. glog.V(0).Infof("heartbeat error: %v", err)
  59. time.Sleep(time.Duration(vs.pulseSeconds) * time.Second)
  60. newLeader = ""
  61. vs.store.MasterAddress = ""
  62. }
  63. if !vs.isHeartbeating {
  64. break
  65. }
  66. }
  67. }
  68. }
  69. func (vs *VolumeServer) StopHeartbeat() (isAlreadyStopping bool) {
  70. if !vs.isHeartbeating {
  71. return true
  72. }
  73. vs.isHeartbeating = false
  74. close(vs.stopChan)
  75. return false
  76. }
  77. func (vs *VolumeServer) doHeartbeat(masterAddress pb.ServerAddress, grpcDialOption grpc.DialOption, sleepInterval time.Duration) (newLeader pb.ServerAddress, err error) {
  78. ctx, cancel := context.WithCancel(context.Background())
  79. defer cancel()
  80. grpcConection, err := pb.GrpcDial(ctx, masterAddress.ToGrpcAddress(), grpcDialOption)
  81. if err != nil {
  82. return "", fmt.Errorf("fail to dial %s : %v", masterAddress, err)
  83. }
  84. defer grpcConection.Close()
  85. client := master_pb.NewSeaweedClient(grpcConection)
  86. stream, err := client.SendHeartbeat(ctx)
  87. if err != nil {
  88. glog.V(0).Infof("SendHeartbeat to %s: %v", masterAddress, err)
  89. return "", err
  90. }
  91. glog.V(0).Infof("Heartbeat to: %v", masterAddress)
  92. vs.currentMaster = masterAddress
  93. doneChan := make(chan error, 1)
  94. go func() {
  95. for {
  96. in, err := stream.Recv()
  97. if err != nil {
  98. doneChan <- err
  99. return
  100. }
  101. if in.GetVolumeSizeLimit() != 0 && vs.store.GetVolumeSizeLimit() != in.GetVolumeSizeLimit() {
  102. vs.store.SetVolumeSizeLimit(in.GetVolumeSizeLimit())
  103. if vs.store.MaybeAdjustVolumeMax() {
  104. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  105. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", vs.currentMaster, err)
  106. return
  107. }
  108. }
  109. }
  110. if in.GetLeader() != "" && string(vs.currentMaster) != in.GetLeader() {
  111. glog.V(0).Infof("Volume Server found a new master newLeader: %v instead of %v", in.GetLeader(), vs.currentMaster)
  112. newLeader = pb.ServerAddress(in.GetLeader())
  113. doneChan <- nil
  114. return
  115. }
  116. }
  117. }()
  118. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  119. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  120. return "", err
  121. }
  122. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  123. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  124. return "", err
  125. }
  126. volumeTickChan := time.Tick(sleepInterval)
  127. ecShardTickChan := time.Tick(17 * sleepInterval)
  128. for {
  129. select {
  130. case volumeMessage := <-vs.store.NewVolumesChan:
  131. deltaBeat := &master_pb.Heartbeat{
  132. NewVolumes: []*master_pb.VolumeShortInformationMessage{
  133. &volumeMessage,
  134. },
  135. }
  136. glog.V(0).Infof("volume server %s:%d adds volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  137. if err = stream.Send(deltaBeat); err != nil {
  138. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  139. return "", err
  140. }
  141. case ecShardMessage := <-vs.store.NewEcShardsChan:
  142. deltaBeat := &master_pb.Heartbeat{
  143. NewEcShards: []*master_pb.VolumeEcShardInformationMessage{
  144. &ecShardMessage,
  145. },
  146. }
  147. glog.V(0).Infof("volume server %s:%d adds ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  148. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  149. if err = stream.Send(deltaBeat); err != nil {
  150. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  151. return "", err
  152. }
  153. case volumeMessage := <-vs.store.DeletedVolumesChan:
  154. deltaBeat := &master_pb.Heartbeat{
  155. DeletedVolumes: []*master_pb.VolumeShortInformationMessage{
  156. &volumeMessage,
  157. },
  158. }
  159. glog.V(0).Infof("volume server %s:%d deletes volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  160. if err = stream.Send(deltaBeat); err != nil {
  161. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  162. return "", err
  163. }
  164. case ecShardMessage := <-vs.store.DeletedEcShardsChan:
  165. deltaBeat := &master_pb.Heartbeat{
  166. DeletedEcShards: []*master_pb.VolumeEcShardInformationMessage{
  167. &ecShardMessage,
  168. },
  169. }
  170. glog.V(0).Infof("volume server %s:%d deletes ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  171. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  172. if err = stream.Send(deltaBeat); err != nil {
  173. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  174. return "", err
  175. }
  176. case <-volumeTickChan:
  177. glog.V(4).Infof("volume server %s:%d heartbeat", vs.store.Ip, vs.store.Port)
  178. vs.store.MaybeAdjustVolumeMax()
  179. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  180. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  181. return "", err
  182. }
  183. case <-ecShardTickChan:
  184. glog.V(4).Infof("volume server %s:%d ec heartbeat", vs.store.Ip, vs.store.Port)
  185. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  186. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  187. return "", err
  188. }
  189. case err = <-doneChan:
  190. return
  191. case <-vs.stopChan:
  192. var volumeMessages []*master_pb.VolumeInformationMessage
  193. emptyBeat := &master_pb.Heartbeat{
  194. Ip: vs.store.Ip,
  195. Port: uint32(vs.store.Port),
  196. PublicUrl: vs.store.PublicUrl,
  197. MaxFileKey: uint64(0),
  198. DataCenter: vs.store.GetDataCenter(),
  199. Rack: vs.store.GetRack(),
  200. Volumes: volumeMessages,
  201. HasNoVolumes: len(volumeMessages) == 0,
  202. }
  203. glog.V(1).Infof("volume server %s:%d stops and deletes all volumes", vs.store.Ip, vs.store.Port)
  204. if err = stream.Send(emptyBeat); err != nil {
  205. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  206. return "", err
  207. }
  208. return
  209. }
  210. }
  211. }