Browse Source

Fix deadlock with KeepConnected and SendHeartbeat

There's the potential where we're writing to a clientConn and it goes away
and we're stuck keeping a read lock on clientChansLock. This causes
KeepConnected to not be able to remove the client since it requires a write
lock on clientChansLock. This ends up backing up SendHeartbeat because it
can't get a read lock.
pull/1475/head
James Hartig 4 years ago
parent
commit
91e4eca1e9
  1. 10
      weed/server/master_grpc_server.go

10
weed/server/master_grpc_server.go

@ -187,7 +187,8 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ
peerAddress := findClientAddress(stream.Context(), req.GrpcPort) peerAddress := findClientAddress(stream.Context(), req.GrpcPort)
stopChan := make(chan bool)
// buffer by 1 so we don't end up getting stuck writing to stopChan forever
stopChan := make(chan bool, 1)
clientName, messageChan := ms.addClient(req.Name, peerAddress) clientName, messageChan := ms.addClient(req.Name, peerAddress)
@ -247,7 +248,12 @@ func (ms *MasterServer) addClient(clientType string, clientAddress string) (clie
clientName = clientType + "@" + clientAddress clientName = clientType + "@" + clientAddress
glog.V(0).Infof("+ client %v", clientName) glog.V(0).Infof("+ client %v", clientName)
messageChan = make(chan *master_pb.VolumeLocation)
// we buffer this because otherwise we end up in a potential deadlock where
// the KeepConnected loop is no longer listening on this channel but we're
// trying to send to it in SendHeartbeat and so we can't lock the
// clientChansLock to remove the channel and we're stuck writing to it
// 100 is probably overkill
messageChan = make(chan *master_pb.VolumeLocation, 100)
ms.clientChansLock.Lock() ms.clientChansLock.Lock()
ms.clientChans[clientName] = messageChan ms.clientChans[clientName] = messageChan

Loading…
Cancel
Save