Skip to content

Commit be92eb2

Browse files
committed
use lookup table for status
Signed-off-by: Kevin <[email protected]>
1 parent 47381fb commit be92eb2

File tree

1 file changed

+57
-42
lines changed

1 file changed

+57
-42
lines changed

Diff for: src/codeflare_sdk/cluster/cluster.py

+57-42
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,50 @@
5353
from kubernetes import config
5454

5555

56+
AW_STATUS_TO_READINESS = {
57+
AppWrapperStatus.RUNNING: (False, CodeFlareClusterStatus.STARTING),
58+
AppWrapperStatus.COMPLETED: (False, CodeFlareClusterStatus.STARTING),
59+
AppWrapperStatus.RUNNING_HOLD_COMPLETION: (False, CodeFlareClusterStatus.STARTING),
60+
AppWrapperStatus.FAILED: (False, CodeFlareClusterStatus.FAILED),
61+
AppWrapperStatus.DELETED: (False, CodeFlareClusterStatus.FAILED),
62+
AppWrapperStatus.PENDING: (False, CodeFlareClusterStatus.QUEUED),
63+
AppWrapperStatus.QUEUEING: (False, CodeFlareClusterStatus.QUEUEING),
64+
}
65+
66+
CLUSTER_STATUS_TO_READINESS = {
67+
RayClusterStatus.UNKNOWN: (False, CodeFlareClusterStatus.STARTING),
68+
RayClusterStatus.READY: (True, CodeFlareClusterStatus.READY),
69+
RayClusterStatus.UNHEALTHY: (False, CodeFlareClusterStatus.FAILED),
70+
RayClusterStatus.FAILED: (False, CodeFlareClusterStatus.FAILED),
71+
}
72+
73+
74+
# cluster = _ray_cluster_status(self.config.name, self.config.namespace)
75+
# if cluster:
76+
# if cluster.status == RayClusterStatus.UNKNOWN:
77+
# ready = False
78+
# status = CodeFlareClusterStatus.STARTING
79+
# if cluster.status == RayClusterStatus.READY:
80+
# ready = True
81+
# status = CodeFlareClusterStatus.READY
82+
# elif cluster.status in [
83+
# RayClusterStatus.UNHEALTHY,
84+
# RayClusterStatus.FAILED,
85+
# ]:
86+
# ready = False
87+
# status = CodeFlareClusterStatus.FAILED
88+
89+
# if print_to_console:
90+
# # overriding the number of gpus with requested
91+
# cluster.worker_gpu = self.config.num_gpus
92+
# pretty_print.print_cluster_status(cluster)
93+
# elif print_to_console:
94+
# if status == CodeFlareClusterStatus.UNKNOWN:
95+
# pretty_print.print_no_resources_found()
96+
# else:
97+
# pretty_print.print_app_wrappers_status([appwrapper], starting=True)
98+
99+
56100
class Cluster:
57101
"""
58102
An object for requesting, bringing up, and taking down resources.
@@ -289,52 +333,23 @@ def status(
289333
# check the app wrapper status
290334
appwrapper = _app_wrapper_status(self.config.name, self.config.namespace)
291335
if appwrapper:
292-
if appwrapper.status in [
293-
AppWrapperStatus.RUNNING,
294-
AppWrapperStatus.COMPLETED,
295-
AppWrapperStatus.RUNNING_HOLD_COMPLETION,
296-
]:
297-
ready = False
298-
status = CodeFlareClusterStatus.STARTING
299-
elif appwrapper.status in [
300-
AppWrapperStatus.FAILED,
301-
AppWrapperStatus.DELETED,
302-
]:
303-
ready = False
304-
status = CodeFlareClusterStatus.FAILED # should deleted be separate
305-
return status, ready # exit early, no need to check ray status
306-
elif appwrapper.status in [
307-
AppWrapperStatus.PENDING,
308-
AppWrapperStatus.QUEUEING,
309-
]:
310-
ready = False
311-
if appwrapper.status == AppWrapperStatus.PENDING:
312-
status = CodeFlareClusterStatus.QUEUED
313-
else:
314-
status = CodeFlareClusterStatus.QUEUEING
315-
if print_to_console:
316-
pretty_print.print_app_wrappers_status([appwrapper])
317-
return (
318-
status,
319-
ready,
320-
) # no need to check the ray status since still in queue
336+
if print_to_console:
337+
pretty_print.print_app_wrappers_status([appwrapper])
338+
ready, status = AW_STATUS_TO_READINESS.get(
339+
appwrapper.status, (ready, status)
340+
)
341+
if (
342+
status != CodeFlareClusterStatus.UNKNOWN
343+
and status != CodeFlareClusterStatus.STARTING
344+
):
345+
return ready, status
321346

322347
# check the ray cluster status
323348
cluster = _ray_cluster_status(self.config.name, self.config.namespace)
324349
if cluster:
325-
if cluster.status == RayClusterStatus.UNKNOWN:
326-
ready = False
327-
status = CodeFlareClusterStatus.STARTING
328-
if cluster.status == RayClusterStatus.READY:
329-
ready = True
330-
status = CodeFlareClusterStatus.READY
331-
elif cluster.status in [
332-
RayClusterStatus.UNHEALTHY,
333-
RayClusterStatus.FAILED,
334-
]:
335-
ready = False
336-
status = CodeFlareClusterStatus.FAILED
337-
350+
ready, status = CLUSTER_STATUS_TO_READINESS.get(
351+
cluster.status, (ready, status)
352+
)
338353
if print_to_console:
339354
# overriding the number of gpus with requested
340355
cluster.worker_gpu = self.config.num_gpus

0 commit comments

Comments
 (0)