You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

239 lines
7.8 KiB

5 years ago
5 years ago
4 years ago
  1. package weed_server
  2. import (
  3. "fmt"
  4. "github.com/chrislusf/seaweedfs/weed/operation"
  5. "time"
  6. "google.golang.org/grpc"
  7. "github.com/chrislusf/seaweedfs/weed/pb"
  8. "github.com/chrislusf/seaweedfs/weed/security"
  9. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  10. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  11. "golang.org/x/net/context"
  12. "github.com/chrislusf/seaweedfs/weed/glog"
  13. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  14. "github.com/chrislusf/seaweedfs/weed/util"
  15. )
  16. func (vs *VolumeServer) GetMaster() string {
  17. return vs.currentMaster
  18. }
  19. func (vs *VolumeServer) checkWithMaster() (err error) {
  20. isConnected := false
  21. for !isConnected {
  22. for _, master := range vs.SeedMasterNodes {
  23. err = operation.WithMasterServerClient(master, vs.grpcDialOption, func(masterClient master_pb.SeaweedClient) error {
  24. resp, err := masterClient.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
  25. if err != nil {
  26. return fmt.Errorf("get master %s configuration: %v", master, err)
  27. }
  28. vs.metricsAddress, vs.metricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSeconds)
  29. backend.LoadFromPbStorageBackends(resp.StorageBackends)
  30. return nil
  31. })
  32. if err == nil {
  33. return
  34. } else {
  35. glog.V(0).Infof("checkWithMaster %s: %v", master, err)
  36. }
  37. }
  38. time.Sleep(1790 * time.Millisecond)
  39. }
  40. return
  41. }
  42. func (vs *VolumeServer) heartbeat() {
  43. glog.V(0).Infof("Volume server start with seed master nodes: %v", vs.SeedMasterNodes)
  44. vs.store.SetDataCenter(vs.dataCenter)
  45. vs.store.SetRack(vs.rack)
  46. grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.volume")
  47. var err error
  48. var newLeader string
  49. for vs.isHeartbeating {
  50. for _, master := range vs.SeedMasterNodes {
  51. if newLeader != "" {
  52. // the new leader may actually is the same master
  53. // need to wait a bit before adding itself
  54. time.Sleep(3 * time.Second)
  55. master = newLeader
  56. }
  57. masterGrpcAddress, parseErr := pb.ParseServerToGrpcAddress(master)
  58. if parseErr != nil {
  59. glog.V(0).Infof("failed to parse master grpc %v: %v", masterGrpcAddress, parseErr)
  60. continue
  61. }
  62. vs.store.MasterAddress = master
  63. newLeader, err = vs.doHeartbeat(master, masterGrpcAddress, grpcDialOption, time.Duration(vs.pulseSeconds)*time.Second)
  64. if err != nil {
  65. glog.V(0).Infof("heartbeat error: %v", err)
  66. time.Sleep(time.Duration(vs.pulseSeconds) * time.Second)
  67. newLeader = ""
  68. vs.store.MasterAddress = ""
  69. }
  70. if !vs.isHeartbeating {
  71. break
  72. }
  73. }
  74. }
  75. }
  76. func (vs *VolumeServer) StopHeartbeat() (isAlreadyStopping bool) {
  77. if !vs.isHeartbeating {
  78. return true
  79. }
  80. vs.isHeartbeating = false
  81. close(vs.stopChan)
  82. return false
  83. }
  84. func (vs *VolumeServer) doHeartbeat(masterNode, masterGrpcAddress string, grpcDialOption grpc.DialOption, sleepInterval time.Duration) (newLeader string, err error) {
  85. ctx, cancel := context.WithCancel(context.Background())
  86. defer cancel()
  87. grpcConection, err := pb.GrpcDial(ctx, masterGrpcAddress, grpcDialOption)
  88. if err != nil {
  89. return "", fmt.Errorf("fail to dial %s : %v", masterNode, err)
  90. }
  91. defer grpcConection.Close()
  92. client := master_pb.NewSeaweedClient(grpcConection)
  93. stream, err := client.SendHeartbeat(ctx)
  94. if err != nil {
  95. glog.V(0).Infof("SendHeartbeat to %s: %v", masterNode, err)
  96. return "", err
  97. }
  98. glog.V(0).Infof("Heartbeat to: %v", masterNode)
  99. vs.currentMaster = masterNode
  100. doneChan := make(chan error, 1)
  101. go func() {
  102. for {
  103. in, err := stream.Recv()
  104. if err != nil {
  105. doneChan <- err
  106. return
  107. }
  108. if in.GetVolumeSizeLimit() != 0 && vs.store.GetVolumeSizeLimit() != in.GetVolumeSizeLimit() {
  109. vs.store.SetVolumeSizeLimit(in.GetVolumeSizeLimit())
  110. if vs.store.MaybeAdjustVolumeMax() {
  111. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  112. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", vs.currentMaster, err)
  113. }
  114. }
  115. }
  116. if in.GetLeader() != "" && vs.currentMaster != in.GetLeader() {
  117. glog.V(0).Infof("Volume Server found a new master newLeader: %v instead of %v", in.GetLeader(), vs.currentMaster)
  118. newLeader = in.GetLeader()
  119. doneChan <- nil
  120. return
  121. }
  122. }
  123. }()
  124. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  125. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  126. return "", err
  127. }
  128. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  129. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  130. return "", err
  131. }
  132. volumeTickChan := time.Tick(sleepInterval)
  133. ecShardTickChan := time.Tick(17 * sleepInterval)
  134. for {
  135. select {
  136. case volumeMessage := <-vs.store.NewVolumesChan:
  137. deltaBeat := &master_pb.Heartbeat{
  138. NewVolumes: []*master_pb.VolumeShortInformationMessage{
  139. &volumeMessage,
  140. },
  141. }
  142. glog.V(1).Infof("volume server %s:%d adds volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  143. if err = stream.Send(deltaBeat); err != nil {
  144. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  145. return "", err
  146. }
  147. case ecShardMessage := <-vs.store.NewEcShardsChan:
  148. deltaBeat := &master_pb.Heartbeat{
  149. NewEcShards: []*master_pb.VolumeEcShardInformationMessage{
  150. &ecShardMessage,
  151. },
  152. }
  153. glog.V(1).Infof("volume server %s:%d adds ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  154. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  155. if err = stream.Send(deltaBeat); err != nil {
  156. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  157. return "", err
  158. }
  159. case volumeMessage := <-vs.store.DeletedVolumesChan:
  160. deltaBeat := &master_pb.Heartbeat{
  161. DeletedVolumes: []*master_pb.VolumeShortInformationMessage{
  162. &volumeMessage,
  163. },
  164. }
  165. glog.V(1).Infof("volume server %s:%d deletes volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  166. if err = stream.Send(deltaBeat); err != nil {
  167. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  168. return "", err
  169. }
  170. case ecShardMessage := <-vs.store.DeletedEcShardsChan:
  171. deltaBeat := &master_pb.Heartbeat{
  172. DeletedEcShards: []*master_pb.VolumeEcShardInformationMessage{
  173. &ecShardMessage,
  174. },
  175. }
  176. glog.V(1).Infof("volume server %s:%d deletes ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  177. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  178. if err = stream.Send(deltaBeat); err != nil {
  179. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  180. return "", err
  181. }
  182. case <-volumeTickChan:
  183. glog.V(4).Infof("volume server %s:%d heartbeat", vs.store.Ip, vs.store.Port)
  184. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  185. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  186. return "", err
  187. }
  188. case <-ecShardTickChan:
  189. glog.V(4).Infof("volume server %s:%d ec heartbeat", vs.store.Ip, vs.store.Port)
  190. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  191. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err)
  192. return "", err
  193. }
  194. case err = <-doneChan:
  195. return
  196. case <-vs.stopChan:
  197. var volumeMessages []*master_pb.VolumeInformationMessage
  198. emptyBeat := &master_pb.Heartbeat{
  199. Ip: vs.store.Ip,
  200. Port: uint32(vs.store.Port),
  201. PublicUrl: vs.store.PublicUrl,
  202. MaxVolumeCount: uint32(0),
  203. MaxFileKey: uint64(0),
  204. DataCenter: vs.store.GetDataCenter(),
  205. Rack: vs.store.GetRack(),
  206. Volumes: volumeMessages,
  207. HasNoVolumes: len(volumeMessages) == 0,
  208. }
  209. glog.V(1).Infof("volume server %s:%d stops and deletes all volumes", vs.store.Ip, vs.store.Port)
  210. if err = stream.Send(emptyBeat); err != nil {
  211. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err)
  212. return "", err
  213. }
  214. return
  215. }
  216. }
  217. }