Skip to content

Commit bec2da2

Browse files
committed
Do not handle modes if job is to be cancelled.
The db-maintenance and pause modes have special handling at the top of the handle_job method that eagerly handle those options. This introduced a bug that job that are to be be cancelled do not get cancelled if those modes affect them. This means that user's db jobs are not cancelled when in db-maintenance mode, for example. This quick fix simply protects the eager mode handling to apply only to jobs that are not about to be cancelled, which will let the cancellation logic proceed as normal. This is probably not the ideal solution, but a) this is affecting users right now with the extended db-maintenance mode and b) we are about to completely refactor all the things, so we can live with a less than ideal solution
1 parent 6e22169 commit bec2da2

File tree

2 files changed

+73
-22
lines changed

2 files changed

+73
-22
lines changed

jobrunner/run.py

+23-21
Original file line numberDiff line numberDiff line change
@@ -215,32 +215,34 @@ def handle_job(job, api, mode=None, paused=None):
215215
synchronous_transitions = getattr(api, "synchronous_transitions", [])
216216
is_synchronous = False
217217

218-
# handle special modes before considering executor state, as they ignore it
219-
if paused:
220-
if job.state == State.PENDING:
221-
# do not start the job, keep it pending
218+
# only consider these modes if we are not about to cancel the job
219+
if not job_definition.cancelled:
220+
# handle special modes before considering executor state, as they ignore it
221+
if paused:
222+
if job.state == State.PENDING:
223+
# do not start the job, keep it pending
224+
set_code(
225+
job,
226+
StatusCode.WAITING_PAUSED,
227+
"Backend is currently paused for maintenance, job will start once this is completed",
228+
)
229+
return
230+
231+
if mode == "db-maintenance" and job_definition.allow_database_access:
232+
if job.state == State.RUNNING:
233+
log.warning(f"DB maintenance mode active, killing db job {job.id}")
234+
# we ignore the JobStatus returned from these API calls, as this is not a hard error
235+
api.terminate(job_definition)
236+
api.cleanup(job_definition)
237+
238+
# reset state to pending and exit
222239
set_code(
223240
job,
224-
StatusCode.WAITING_PAUSED,
225-
"Backend is currently paused for maintenance, job will start once this is completed",
241+
StatusCode.WAITING_DB_MAINTENANCE,
242+
"Waiting for database to finish maintenance",
226243
)
227244
return
228245

229-
if mode == "db-maintenance" and job_definition.allow_database_access:
230-
if job.state == State.RUNNING:
231-
log.warning(f"DB maintenance mode active, killing db job {job.id}")
232-
# we ignore the JobStatus returned from these API calls, as this is not a hard error
233-
api.terminate(job_definition)
234-
api.cleanup(job_definition)
235-
236-
# reset state to pending and exit
237-
set_code(
238-
job,
239-
StatusCode.WAITING_DB_MAINTENANCE,
240-
"Waiting for database to finish maintenance",
241-
)
242-
return
243-
244246
try:
245247
initial_status = api.get_status(job_definition)
246248
except ExecutorRetry as retry:

tests/test_run.py

+50-1
Original file line numberDiff line numberDiff line change
@@ -731,15 +731,36 @@ def test_handle_pending_db_maintenance_mode(db, backend_db_config):
731731

732732
# executor state
733733
assert api.get_status(job).state == ExecutorState.UNKNOWN
734-
# our state
735734
assert job.state == State.PENDING
735+
assert job.status_code == StatusCode.WAITING_DB_MAINTENANCE
736736
assert job.status_message == "Waiting for database to finish maintenance"
737737
assert job.started_at is None
738738

739739
spans = get_trace("jobs")
740740
assert spans[-1].name == "CREATED"
741741

742742

743+
def test_handle_pending_cancelled_db_maintenance_mode(db, backend_db_config):
744+
api = StubExecutorAPI()
745+
job = api.add_test_job(
746+
ExecutorState.UNKNOWN,
747+
State.PENDING,
748+
run_command="cohortextractor:latest generate_cohort",
749+
requires_db=True,
750+
cancelled=True,
751+
)
752+
753+
run.handle_job(job, api, mode="db-maintenance")
754+
755+
# executor state
756+
assert api.get_status(job).state == ExecutorState.UNKNOWN
757+
# our state
758+
assert job.state == State.FAILED
759+
assert job.status_code == StatusCode.CANCELLED_BY_USER
760+
assert job.status_message == "Cancelled by user"
761+
assert job.started_at is None
762+
763+
743764
def test_handle_running_db_maintenance_mode(db, backend_db_config):
744765
api = StubExecutorAPI()
745766
job = api.add_test_job(
@@ -759,13 +780,41 @@ def test_handle_running_db_maintenance_mode(db, backend_db_config):
759780

760781
# our state
761782
assert job.state == State.PENDING
783+
assert job.status_code == StatusCode.WAITING_DB_MAINTENANCE
762784
assert job.status_message == "Waiting for database to finish maintenance"
763785
assert job.started_at is None
764786

765787
spans = get_trace("jobs")
766788
assert spans[-1].name == "EXECUTING"
767789

768790

791+
def test_handle_running_cancelled_db_maintenance_mode(db, backend_db_config):
792+
api = StubExecutorAPI()
793+
job = api.add_test_job(
794+
ExecutorState.EXECUTING,
795+
State.RUNNING,
796+
StatusCode.EXECUTING,
797+
run_command="cohortextractor:latest generate_cohort",
798+
requires_db=True,
799+
cancelled=True,
800+
)
801+
802+
run.handle_job(job, api, mode="db-maintenance")
803+
804+
# cancellation of running jobs puts it into EXECUTED for later finalization
805+
# executor state
806+
assert job.id in api.tracker["terminate"]
807+
assert api.get_status(job).state == ExecutorState.EXECUTED
808+
809+
# our state
810+
assert job.state == State.RUNNING
811+
assert job.status_code == StatusCode.EXECUTED
812+
assert job.status_message == "Cancelled whilst executing"
813+
814+
spans = get_trace("jobs")
815+
assert spans[-1].name == "EXECUTING"
816+
817+
769818
def test_handle_pending_pause_mode(db, backend_db_config):
770819
api = StubExecutorAPI()
771820
job = api.add_test_job(

0 commit comments

Comments
 (0)