@@ -250,6 +250,11 @@ def coordinator(self):
250
250
else :
251
251
return self .coordinator_id
252
252
253
+ def connected (self ):
254
+ """Return True iff the coordinator node is connected"""
255
+ with self ._lock :
256
+ return self .coordinator_id is not None and self ._client .connected (self .coordinator_id )
257
+
253
258
def ensure_coordinator_ready (self , timeout_ms = None ):
254
259
"""Block until the coordinator for this group is known.
255
260
@@ -1058,28 +1063,28 @@ def _run_once(self):
1058
1063
self .coordinator ._client ._lock .acquire ()
1059
1064
self .coordinator ._lock .acquire ()
1060
1065
try :
1061
- if self .enabled and self .coordinator .state is MemberState .STABLE :
1062
- # TODO: When consumer.wakeup() is implemented, we need to
1063
- # disable here to prevent propagating an exception to this
1064
- # heartbeat thread
1065
- # must get client._lock, or maybe deadlock at heartbeat
1066
- # failure callback in consumer poll
1067
- self .coordinator ._client .poll (timeout_ms = 0 )
1068
-
1069
1066
if not self .enabled :
1070
1067
heartbeat_log .debug ('Heartbeat disabled. Waiting' )
1071
1068
self .coordinator ._client ._lock .release ()
1072
1069
self .coordinator ._lock .wait ()
1073
- heartbeat_log .debug ('Heartbeat re-enabled.' )
1070
+ if self .enabled :
1071
+ heartbeat_log .debug ('Heartbeat re-enabled.' )
1072
+ return
1074
1073
1075
- elif self .coordinator .state is not MemberState .STABLE :
1074
+ if self .coordinator .state is not MemberState .STABLE :
1076
1075
# the group is not stable (perhaps because we left the
1077
1076
# group or because the coordinator kicked us out), so
1078
1077
# disable heartbeats and wait for the main thread to rejoin.
1079
1078
heartbeat_log .debug ('Group state is not stable, disabling heartbeats' )
1080
1079
self .disable ()
1080
+ return
1081
+
1082
+ # TODO: When consumer.wakeup() is implemented, we need to
1083
+ # disable here to prevent propagating an exception to this
1084
+ # heartbeat thread
1085
+ self .coordinator ._client .poll (timeout_ms = 0 )
1081
1086
1082
- elif self .coordinator .coordinator_unknown ():
1087
+ if self .coordinator .coordinator_unknown ():
1083
1088
future = self .coordinator .lookup_coordinator ()
1084
1089
if not future .is_done or future .failed ():
1085
1090
# the immediate future check ensures that we backoff
@@ -1088,6 +1093,10 @@ def _run_once(self):
1088
1093
self .coordinator ._client ._lock .release ()
1089
1094
self .coordinator ._lock .wait (self .coordinator .config ['retry_backoff_ms' ] / 1000 )
1090
1095
1096
+ elif not self .coordinator .connected ():
1097
+ self .coordinator ._client ._lock .release ()
1098
+ self .coordinator ._lock .wait (self .coordinator .config ['retry_backoff_ms' ] / 1000 )
1099
+
1091
1100
elif self .coordinator .heartbeat .session_timeout_expired ():
1092
1101
# the session timeout has expired without seeing a
1093
1102
# successful heartbeat, so we should probably make sure
@@ -1103,11 +1112,10 @@ def _run_once(self):
1103
1112
self .coordinator .maybe_leave_group ()
1104
1113
1105
1114
elif not self .coordinator .heartbeat .should_heartbeat ():
1106
- # poll again after waiting for the retry backoff in case
1107
- # the heartbeat failed or the coordinator disconnected
1108
- heartbeat_log .log (0 , 'Not ready to heartbeat, waiting' )
1115
+ next_hb = self .coordinator .heartbeat .time_to_next_heartbeat ()
1116
+ heartbeat_log .debug ('Waiting %0.1f secs to send next heartbeat' , next_hb )
1109
1117
self .coordinator ._client ._lock .release ()
1110
- self .coordinator ._lock .wait (self . coordinator . config [ 'retry_backoff_ms' ] / 1000 )
1118
+ self .coordinator ._lock .wait (next_hb )
1111
1119
1112
1120
else :
1113
1121
self .coordinator .heartbeat .sent_heartbeat ()
0 commit comments