You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

303 lines
8.5 KiB

6 years ago
6 years ago
6 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
5 years ago
  1. package weed_server
  2. import (
  3. "context"
  4. "fmt"
  5. "net"
  6. "strings"
  7. "time"
  8. "github.com/chrislusf/raft"
  9. "google.golang.org/grpc/peer"
  10. "github.com/chrislusf/seaweedfs/weed/glog"
  11. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  12. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  13. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  14. "github.com/chrislusf/seaweedfs/weed/topology"
  15. )
  16. func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServer) error {
  17. var dn *topology.DataNode
  18. t := ms.Topo
  19. defer func() {
  20. if dn != nil {
  21. // if the volume server disconnects and reconnects quickly
  22. // the unregister and register can race with each other
  23. t.UnRegisterDataNode(dn)
  24. glog.V(0).Infof("unregister disconnected volume server %s:%d", dn.Ip, dn.Port)
  25. message := &master_pb.VolumeLocation{
  26. Url: dn.Url(),
  27. PublicUrl: dn.PublicUrl,
  28. }
  29. for _, v := range dn.GetVolumes() {
  30. message.DeletedVids = append(message.DeletedVids, uint32(v.Id))
  31. }
  32. for _, s := range dn.GetEcShards() {
  33. message.DeletedVids = append(message.DeletedVids, uint32(s.VolumeId))
  34. }
  35. if len(message.DeletedVids) > 0 {
  36. ms.clientChansLock.RLock()
  37. for _, ch := range ms.clientChans {
  38. ch <- message
  39. }
  40. ms.clientChansLock.RUnlock()
  41. }
  42. }
  43. }()
  44. for {
  45. heartbeat, err := stream.Recv()
  46. if err != nil {
  47. if dn != nil {
  48. glog.Warningf("SendHeartbeat.Recv server %s:%d : %v", dn.Ip, dn.Port, err)
  49. } else {
  50. glog.Warningf("SendHeartbeat.Recv: %v", err)
  51. }
  52. return err
  53. }
  54. t.Sequence.SetMax(heartbeat.MaxFileKey)
  55. if dn == nil {
  56. dcName, rackName := t.Configuration.Locate(heartbeat.Ip, heartbeat.DataCenter, heartbeat.Rack)
  57. dc := t.GetOrCreateDataCenter(dcName)
  58. rack := dc.GetOrCreateRack(rackName)
  59. dn = rack.GetOrCreateDataNode(heartbeat.Ip,
  60. int(heartbeat.Port), heartbeat.PublicUrl,
  61. int64(heartbeat.MaxVolumeCount))
  62. glog.V(0).Infof("added volume server %v:%d", heartbeat.GetIp(), heartbeat.GetPort())
  63. if err := stream.Send(&master_pb.HeartbeatResponse{
  64. VolumeSizeLimit: uint64(ms.option.VolumeSizeLimitMB) * 1024 * 1024,
  65. MetricsAddress: ms.option.MetricsAddress,
  66. MetricsIntervalSeconds: uint32(ms.option.MetricsIntervalSec),
  67. StorageBackends: backend.ToPbStorageBackends(),
  68. }); err != nil {
  69. glog.Warningf("SendHeartbeat.Send volume size to %s:%d %v", dn.Ip, dn.Port, err)
  70. return err
  71. }
  72. }
  73. if heartbeat.MaxVolumeCount != 0 && dn.GetMaxVolumeCount() != int64(heartbeat.MaxVolumeCount) {
  74. delta := int64(heartbeat.MaxVolumeCount) - dn.GetMaxVolumeCount()
  75. dn.UpAdjustMaxVolumeCountDelta(delta)
  76. }
  77. glog.V(4).Infof("master received heartbeat %s", heartbeat.String())
  78. message := &master_pb.VolumeLocation{
  79. Url: dn.Url(),
  80. PublicUrl: dn.PublicUrl,
  81. }
  82. if len(heartbeat.NewVolumes) > 0 || len(heartbeat.DeletedVolumes) > 0 {
  83. // process delta volume ids if exists for fast volume id updates
  84. for _, volInfo := range heartbeat.NewVolumes {
  85. message.NewVids = append(message.NewVids, volInfo.Id)
  86. }
  87. for _, volInfo := range heartbeat.DeletedVolumes {
  88. message.DeletedVids = append(message.DeletedVids, volInfo.Id)
  89. }
  90. // update master internal volume layouts
  91. t.IncrementalSyncDataNodeRegistration(heartbeat.NewVolumes, heartbeat.DeletedVolumes, dn)
  92. }
  93. if len(heartbeat.Volumes) > 0 || heartbeat.HasNoVolumes {
  94. // process heartbeat.Volumes
  95. newVolumes, deletedVolumes := t.SyncDataNodeRegistration(heartbeat.Volumes, dn)
  96. for _, v := range newVolumes {
  97. glog.V(0).Infof("master see new volume %d from %s", uint32(v.Id), dn.Url())
  98. message.NewVids = append(message.NewVids, uint32(v.Id))
  99. }
  100. for _, v := range deletedVolumes {
  101. glog.V(0).Infof("master see deleted volume %d from %s", uint32(v.Id), dn.Url())
  102. message.DeletedVids = append(message.DeletedVids, uint32(v.Id))
  103. }
  104. }
  105. if len(heartbeat.NewEcShards) > 0 || len(heartbeat.DeletedEcShards) > 0 {
  106. // update master internal volume layouts
  107. t.IncrementalSyncDataNodeEcShards(heartbeat.NewEcShards, heartbeat.DeletedEcShards, dn)
  108. for _, s := range heartbeat.NewEcShards {
  109. message.NewVids = append(message.NewVids, s.Id)
  110. }
  111. for _, s := range heartbeat.DeletedEcShards {
  112. if dn.HasVolumesById(needle.VolumeId(s.Id)) {
  113. continue
  114. }
  115. message.DeletedVids = append(message.DeletedVids, s.Id)
  116. }
  117. }
  118. if len(heartbeat.EcShards) > 0 || heartbeat.HasNoEcShards {
  119. glog.V(1).Infof("master recieved ec shards from %s: %+v", dn.Url(), heartbeat.EcShards)
  120. newShards, deletedShards := t.SyncDataNodeEcShards(heartbeat.EcShards, dn)
  121. // broadcast the ec vid changes to master clients
  122. for _, s := range newShards {
  123. message.NewVids = append(message.NewVids, uint32(s.VolumeId))
  124. }
  125. for _, s := range deletedShards {
  126. if dn.HasVolumesById(s.VolumeId) {
  127. continue
  128. }
  129. message.DeletedVids = append(message.DeletedVids, uint32(s.VolumeId))
  130. }
  131. }
  132. if len(message.NewVids) > 0 || len(message.DeletedVids) > 0 {
  133. ms.clientChansLock.RLock()
  134. for host, ch := range ms.clientChans {
  135. glog.V(0).Infof("master send to %s: %s", host, message.String())
  136. ch <- message
  137. }
  138. ms.clientChansLock.RUnlock()
  139. }
  140. // tell the volume servers about the leader
  141. newLeader, err := t.Leader()
  142. if err != nil {
  143. glog.Warningf("SendHeartbeat find leader: %v", err)
  144. return err
  145. }
  146. if err := stream.Send(&master_pb.HeartbeatResponse{
  147. Leader: newLeader,
  148. }); err != nil {
  149. glog.Warningf("SendHeartbeat.Send response to to %s:%d %v", dn.Ip, dn.Port, err)
  150. return err
  151. }
  152. }
  153. }
  154. // KeepConnected keep a stream gRPC call to the master. Used by clients to know the master is up.
  155. // And clients gets the up-to-date list of volume locations
  156. func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServer) error {
  157. req, err := stream.Recv()
  158. if err != nil {
  159. return err
  160. }
  161. if !ms.Topo.IsLeader() {
  162. return ms.informNewLeader(stream)
  163. }
  164. peerAddress := findClientAddress(stream.Context(), req.GrpcPort)
  165. stopChan := make(chan bool)
  166. clientName, messageChan := ms.addClient(req.Name, peerAddress)
  167. defer ms.deleteClient(clientName)
  168. for _, message := range ms.Topo.ToVolumeLocations() {
  169. if err := stream.Send(message); err != nil {
  170. return err
  171. }
  172. }
  173. go func() {
  174. for {
  175. _, err := stream.Recv()
  176. if err != nil {
  177. glog.V(2).Infof("- client %v: %v", clientName, err)
  178. stopChan <- true
  179. break
  180. }
  181. }
  182. }()
  183. ticker := time.NewTicker(5 * time.Second)
  184. for {
  185. select {
  186. case message := <-messageChan:
  187. if err := stream.Send(message); err != nil {
  188. glog.V(0).Infof("=> client %v: %+v", clientName, message)
  189. return err
  190. }
  191. case <-ticker.C:
  192. if !ms.Topo.IsLeader() {
  193. return ms.informNewLeader(stream)
  194. }
  195. case <-stopChan:
  196. return nil
  197. }
  198. }
  199. }
  200. func (ms *MasterServer) informNewLeader(stream master_pb.Seaweed_KeepConnectedServer) error {
  201. leader, err := ms.Topo.Leader()
  202. if err != nil {
  203. glog.Errorf("topo leader: %v", err)
  204. return raft.NotLeaderError
  205. }
  206. if err := stream.Send(&master_pb.VolumeLocation{
  207. Leader: leader,
  208. }); err != nil {
  209. return err
  210. }
  211. return nil
  212. }
  213. func (ms *MasterServer) addClient(clientType string, clientAddress string) (clientName string, messageChan chan *master_pb.VolumeLocation) {
  214. clientName = clientType + "@" + clientAddress
  215. glog.V(0).Infof("+ client %v", clientName)
  216. messageChan = make(chan *master_pb.VolumeLocation)
  217. ms.clientChansLock.Lock()
  218. ms.clientChans[clientName] = messageChan
  219. ms.clientChansLock.Unlock()
  220. return
  221. }
  222. func (ms *MasterServer) deleteClient(clientName string) {
  223. glog.V(0).Infof("- client %v", clientName)
  224. ms.clientChansLock.Lock()
  225. delete(ms.clientChans, clientName)
  226. ms.clientChansLock.Unlock()
  227. }
  228. func findClientAddress(ctx context.Context, grpcPort uint32) string {
  229. // fmt.Printf("FromContext %+v\n", ctx)
  230. pr, ok := peer.FromContext(ctx)
  231. if !ok {
  232. glog.Error("failed to get peer from ctx")
  233. return ""
  234. }
  235. if pr.Addr == net.Addr(nil) {
  236. glog.Error("failed to get peer address")
  237. return ""
  238. }
  239. if grpcPort == 0 {
  240. return pr.Addr.String()
  241. }
  242. if tcpAddr, ok := pr.Addr.(*net.TCPAddr); ok {
  243. externalIP := tcpAddr.IP
  244. return fmt.Sprintf("%s:%d", externalIP, grpcPort)
  245. }
  246. return pr.Addr.String()
  247. }
  248. func (ms *MasterServer) ListMasterClients(ctx context.Context, req *master_pb.ListMasterClientsRequest) (*master_pb.ListMasterClientsResponse, error) {
  249. resp := &master_pb.ListMasterClientsResponse{}
  250. ms.clientChansLock.RLock()
  251. defer ms.clientChansLock.RUnlock()
  252. for k := range ms.clientChans {
  253. if strings.HasPrefix(k, req.ClientType+"@") {
  254. resp.GrpcAddresses = append(resp.GrpcAddresses, k[len(req.ClientType)+1:])
  255. }
  256. }
  257. return resp, nil
  258. }