You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

276 lines
8.8 KiB

3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
8 months ago
8 months ago
4 years ago
4 years ago
4 years ago
  1. package weed_server
  2. import (
  3. "fmt"
  4. "os"
  5. "time"
  6. "github.com/seaweedfs/seaweedfs/weed/operation"
  7. "google.golang.org/grpc"
  8. "github.com/seaweedfs/seaweedfs/weed/pb"
  9. "github.com/seaweedfs/seaweedfs/weed/security"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/backend"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  12. "golang.org/x/net/context"
  13. "github.com/seaweedfs/seaweedfs/weed/glog"
  14. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  15. "github.com/seaweedfs/seaweedfs/weed/util"
  16. )
  17. func (vs *VolumeServer) GetMaster(ctx context.Context) pb.ServerAddress {
  18. return vs.currentMaster
  19. }
  20. func (vs *VolumeServer) checkWithMaster() (err error) {
  21. for {
  22. for _, master := range vs.SeedMasterNodes {
  23. err = operation.WithMasterServerClient(false, master, vs.grpcDialOption, func(masterClient master_pb.SeaweedClient) error {
  24. resp, err := masterClient.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
  25. if err != nil {
  26. return fmt.Errorf("get master %s configuration: %v", master, err)
  27. }
  28. vs.metricsAddress, vs.metricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSeconds)
  29. backend.LoadFromPbStorageBackends(resp.StorageBackends)
  30. return nil
  31. })
  32. if err == nil {
  33. return
  34. } else {
  35. glog.V(0).Infof("checkWithMaster %s: %v", master, err)
  36. }
  37. }
  38. time.Sleep(1790 * time.Millisecond)
  39. }
  40. }
  41. func (vs *VolumeServer) heartbeat() {
  42. glog.V(0).Infof("Volume server start with seed master nodes: %v", vs.SeedMasterNodes)
  43. vs.store.SetDataCenter(vs.dataCenter)
  44. vs.store.SetRack(vs.rack)
  45. grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.volume")
  46. var err error
  47. var newLeader pb.ServerAddress
  48. for vs.isHeartbeating {
  49. for _, master := range vs.SeedMasterNodes {
  50. if newLeader != "" {
  51. // the new leader may actually is the same master
  52. // need to wait a bit before adding itself
  53. time.Sleep(3 * time.Second)
  54. master = newLeader
  55. }
  56. vs.store.MasterAddress = master
  57. newLeader, err = vs.doHeartbeat(master, grpcDialOption, time.Duration(vs.pulseSeconds)*time.Second)
  58. if err != nil {
  59. glog.V(0).Infof("heartbeat to %s error: %v", master, err)
  60. time.Sleep(time.Duration(vs.pulseSeconds) * time.Second)
  61. newLeader = ""
  62. vs.store.MasterAddress = ""
  63. }
  64. if !vs.isHeartbeating {
  65. break
  66. }
  67. }
  68. }
  69. }
  70. func (vs *VolumeServer) StopHeartbeat() (isAlreadyStopping bool) {
  71. if !vs.isHeartbeating {
  72. return true
  73. }
  74. vs.isHeartbeating = false
  75. close(vs.stopChan)
  76. return false
  77. }
  78. func (vs *VolumeServer) doHeartbeat(masterAddress pb.ServerAddress, grpcDialOption grpc.DialOption, sleepInterval time.Duration) (newLeader pb.ServerAddress, err error) {
  79. ctx, cancel := context.WithCancel(context.Background())
  80. defer cancel()
  81. grpcConnection, err := pb.GrpcDial(ctx, masterAddress.ToGrpcAddress(), false, grpcDialOption)
  82. if err != nil {
  83. return "", fmt.Errorf("fail to dial %s : %v", masterAddress, err)
  84. }
  85. defer grpcConnection.Close()
  86. client := master_pb.NewSeaweedClient(grpcConnection)
  87. stream, err := client.SendHeartbeat(ctx)
  88. if err != nil {
  89. glog.V(0).Infof("SendHeartbeat to %s: %v", masterAddress, err)
  90. return "", err
  91. }
  92. glog.V(0).Infof("Heartbeat to: %v", masterAddress)
  93. vs.currentMaster = masterAddress
  94. doneChan := make(chan error, 1)
  95. go func() {
  96. for {
  97. in, err := stream.Recv()
  98. if err != nil {
  99. doneChan <- err
  100. return
  101. }
  102. if len(in.DuplicatedUuids) > 0 {
  103. var duplicateDir []string
  104. for _, loc := range vs.store.Locations {
  105. for _, uuid := range in.DuplicatedUuids {
  106. if uuid == loc.DirectoryUuid {
  107. duplicateDir = append(duplicateDir, loc.Directory)
  108. }
  109. }
  110. }
  111. glog.Errorf("Shut down Volume Server due to duplicate volume directories: %v", duplicateDir)
  112. os.Exit(1)
  113. }
  114. volumeOptsChanged := false
  115. if vs.store.GetPreallocate() != in.GetPreallocate() {
  116. vs.store.SetPreallocate(in.GetPreallocate())
  117. volumeOptsChanged = true
  118. }
  119. if in.GetVolumeSizeLimit() != 0 && vs.store.GetVolumeSizeLimit() != in.GetVolumeSizeLimit() {
  120. vs.store.SetVolumeSizeLimit(in.GetVolumeSizeLimit())
  121. volumeOptsChanged = true
  122. }
  123. if volumeOptsChanged {
  124. if vs.store.MaybeAdjustVolumeMax() {
  125. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  126. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", vs.currentMaster, err)
  127. return
  128. }
  129. }
  130. }
  131. if in.GetLeader() != "" && string(vs.currentMaster) != in.GetLeader() {
  132. glog.V(0).Infof("Volume Server found a new master newLeader: %v instead of %v", in.GetLeader(), vs.currentMaster)
  133. newLeader = pb.ServerAddress(in.GetLeader())
  134. doneChan <- nil
  135. return
  136. }
  137. }
  138. }()
  139. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  140. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  141. return "", err
  142. }
  143. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  144. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  145. return "", err
  146. }
  147. volumeTickChan := time.NewTicker(sleepInterval)
  148. defer volumeTickChan.Stop()
  149. ecShardTickChan := time.NewTicker(17 * sleepInterval)
  150. defer ecShardTickChan.Stop()
  151. dataCenter := vs.store.GetDataCenter()
  152. rack := vs.store.GetRack()
  153. ip := vs.store.Ip
  154. port := uint32(vs.store.Port)
  155. for {
  156. select {
  157. case volumeMessage := <-vs.store.NewVolumesChan:
  158. deltaBeat := &master_pb.Heartbeat{
  159. Ip: ip,
  160. Port: port,
  161. DataCenter: dataCenter,
  162. Rack: rack,
  163. NewVolumes: []*master_pb.VolumeShortInformationMessage{
  164. &volumeMessage,
  165. },
  166. }
  167. glog.V(0).Infof("volume server %s:%d adds volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  168. if err = stream.Send(deltaBeat); err != nil {
  169. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  170. return "", err
  171. }
  172. case ecShardMessage := <-vs.store.NewEcShardsChan:
  173. deltaBeat := &master_pb.Heartbeat{
  174. Ip: ip,
  175. Port: port,
  176. DataCenter: dataCenter,
  177. Rack: rack,
  178. NewEcShards: []*master_pb.VolumeEcShardInformationMessage{
  179. &ecShardMessage,
  180. },
  181. }
  182. glog.V(0).Infof("volume server %s:%d adds ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  183. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  184. if err = stream.Send(deltaBeat); err != nil {
  185. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  186. return "", err
  187. }
  188. case volumeMessage := <-vs.store.DeletedVolumesChan:
  189. deltaBeat := &master_pb.Heartbeat{
  190. Ip: ip,
  191. Port: port,
  192. DataCenter: dataCenter,
  193. Rack: rack,
  194. DeletedVolumes: []*master_pb.VolumeShortInformationMessage{
  195. &volumeMessage,
  196. },
  197. }
  198. glog.V(0).Infof("volume server %s:%d deletes volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id)
  199. if err = stream.Send(deltaBeat); err != nil {
  200. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  201. return "", err
  202. }
  203. case ecShardMessage := <-vs.store.DeletedEcShardsChan:
  204. deltaBeat := &master_pb.Heartbeat{
  205. Ip: ip,
  206. Port: port,
  207. DataCenter: dataCenter,
  208. Rack: rack,
  209. DeletedEcShards: []*master_pb.VolumeEcShardInformationMessage{
  210. &ecShardMessage,
  211. },
  212. }
  213. glog.V(0).Infof("volume server %s:%d deletes ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id,
  214. erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds())
  215. if err = stream.Send(deltaBeat); err != nil {
  216. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  217. return "", err
  218. }
  219. case <-volumeTickChan.C:
  220. glog.V(4).Infof("volume server %s:%d heartbeat", vs.store.Ip, vs.store.Port)
  221. vs.store.MaybeAdjustVolumeMax()
  222. if err = stream.Send(vs.store.CollectHeartbeat()); err != nil {
  223. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  224. return "", err
  225. }
  226. case <-ecShardTickChan.C:
  227. glog.V(4).Infof("volume server %s:%d ec heartbeat", vs.store.Ip, vs.store.Port)
  228. if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil {
  229. glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterAddress, err)
  230. return "", err
  231. }
  232. case err = <-doneChan:
  233. return
  234. case <-vs.stopChan:
  235. var volumeMessages []*master_pb.VolumeInformationMessage
  236. emptyBeat := &master_pb.Heartbeat{
  237. Ip: ip,
  238. Port: port,
  239. PublicUrl: vs.store.PublicUrl,
  240. MaxFileKey: uint64(0),
  241. DataCenter: dataCenter,
  242. Rack: rack,
  243. Volumes: volumeMessages,
  244. HasNoVolumes: len(volumeMessages) == 0,
  245. }
  246. glog.V(1).Infof("volume server %s:%d stops and deletes all volumes", vs.store.Ip, vs.store.Port)
  247. if err = stream.Send(emptyBeat); err != nil {
  248. glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterAddress, err)
  249. return "", err
  250. }
  251. return
  252. }
  253. }
  254. }