@@ -309,7 +309,7 @@ def _reset_find_coordinator_future(self, result):
309
309
self ._find_coordinator_future = None
310
310
311
311
def lookup_coordinator (self ):
312
- with self ._lock :
312
+ with self ._client . _lock , self . _lock :
313
313
if self ._find_coordinator_future is not None :
314
314
return self ._find_coordinator_future
315
315
@@ -883,6 +883,7 @@ def _handle_leave_group_response(self, response):
883
883
884
884
def _send_heartbeat_request (self ):
885
885
"""Send a heartbeat request"""
886
+ # Note: acquire both client + coordinator lock before calling
886
887
if self .coordinator_unknown ():
887
888
e = Errors .CoordinatorNotAvailableError (self .coordinator_id )
888
889
return Future ().failure (e )
@@ -1054,7 +1055,9 @@ def run(self):
1054
1055
heartbeat_log .debug ('Heartbeat thread closed' )
1055
1056
1056
1057
def _run_once (self ):
1057
- with self .coordinator ._client ._lock , self .coordinator ._lock :
1058
+ self .coordinator ._client ._lock .acquire ()
1059
+ self .coordinator ._lock .acquire ()
1060
+ try :
1058
1061
if self .enabled and self .coordinator .state is MemberState .STABLE :
1059
1062
# TODO: When consumer.wakeup() is implemented, we need to
1060
1063
# disable here to prevent propagating an exception to this
@@ -1063,27 +1066,26 @@ def _run_once(self):
1063
1066
# failure callback in consumer poll
1064
1067
self .coordinator ._client .poll (timeout_ms = 0 )
1065
1068
1066
- with self .coordinator ._lock :
1067
1069
if not self .enabled :
1068
1070
heartbeat_log .debug ('Heartbeat disabled. Waiting' )
1071
+ self .coordinator ._client ._lock .release ()
1069
1072
self .coordinator ._lock .wait ()
1070
1073
heartbeat_log .debug ('Heartbeat re-enabled.' )
1071
- return
1072
1074
1073
- if self .coordinator .state is not MemberState .STABLE :
1075
+ elif self .coordinator .state is not MemberState .STABLE :
1074
1076
# the group is not stable (perhaps because we left the
1075
1077
# group or because the coordinator kicked us out), so
1076
1078
# disable heartbeats and wait for the main thread to rejoin.
1077
1079
heartbeat_log .debug ('Group state is not stable, disabling heartbeats' )
1078
1080
self .disable ()
1079
- return
1080
1081
1081
- if self .coordinator .coordinator_unknown ():
1082
+ elif self .coordinator .coordinator_unknown ():
1082
1083
future = self .coordinator .lookup_coordinator ()
1083
1084
if not future .is_done or future .failed ():
1084
1085
# the immediate future check ensures that we backoff
1085
1086
# properly in the case that no brokers are available
1086
1087
# to connect to (and the future is automatically failed).
1088
+ self .coordinator ._client ._lock .release ()
1087
1089
self .coordinator ._lock .wait (self .coordinator .config ['retry_backoff_ms' ] / 1000 )
1088
1090
1089
1091
elif self .coordinator .heartbeat .session_timeout_expired ():
@@ -1098,24 +1100,27 @@ def _run_once(self):
1098
1100
# foreground thread has stalled in between calls to
1099
1101
# poll(), so we explicitly leave the group.
1100
1102
heartbeat_log .warning ('Heartbeat poll expired, leaving group' )
1101
- ### XXX
1102
- # maybe_leave_group acquires client + coordinator lock;
1103
- # if we hold coordinator lock before calling, we risk deadlock
1104
- # release() is safe here because this is the last code in the current context
1105
- self .coordinator ._lock .release ()
1106
1103
self .coordinator .maybe_leave_group ()
1107
1104
1108
1105
elif not self .coordinator .heartbeat .should_heartbeat ():
1109
1106
# poll again after waiting for the retry backoff in case
1110
1107
# the heartbeat failed or the coordinator disconnected
1111
1108
heartbeat_log .log (0 , 'Not ready to heartbeat, waiting' )
1109
+ self .coordinator ._client ._lock .release ()
1112
1110
self .coordinator ._lock .wait (self .coordinator .config ['retry_backoff_ms' ] / 1000 )
1113
1111
1114
1112
else :
1115
1113
self .coordinator .heartbeat .sent_heartbeat ()
1116
1114
future = self .coordinator ._send_heartbeat_request ()
1117
1115
future .add_callback (self ._handle_heartbeat_success )
1118
1116
future .add_errback (self ._handle_heartbeat_failure )
1117
+ finally :
1118
+ self .coordinator ._lock .release ()
1119
+ try :
1120
+ # Possibly released in block above to allow coordinator lock wait()
1121
+ self .coordinator ._client ._lock .release ()
1122
+ except RuntimeError :
1123
+ pass
1119
1124
1120
1125
def _handle_heartbeat_success (self , result ):
1121
1126
with self .coordinator ._lock :
0 commit comments