|
53 | 53 | from kubernetes import config
|
54 | 54 |
|
55 | 55 |
|
| 56 | +AW_STATUS_TO_READINESS = { |
| 57 | + AppWrapperStatus.RUNNING: (False, CodeFlareClusterStatus.STARTING), |
| 58 | + AppWrapperStatus.COMPLETED: (False, CodeFlareClusterStatus.STARTING), |
| 59 | + AppWrapperStatus.RUNNING_HOLD_COMPLETION: (False, CodeFlareClusterStatus.STARTING), |
| 60 | + AppWrapperStatus.FAILED: (False, CodeFlareClusterStatus.FAILED), |
| 61 | + AppWrapperStatus.DELETED: (False, CodeFlareClusterStatus.FAILED), |
| 62 | + AppWrapperStatus.PENDING: (False, CodeFlareClusterStatus.QUEUED), |
| 63 | + AppWrapperStatus.QUEUEING: (False, CodeFlareClusterStatus.QUEUEING), |
| 64 | +} |
| 65 | + |
| 66 | +CLUSTER_STATUS_TO_READINESS = { |
| 67 | + RayClusterStatus.UNKNOWN: (False, CodeFlareClusterStatus.STARTING), |
| 68 | + RayClusterStatus.READY: (True, CodeFlareClusterStatus.READY), |
| 69 | + RayClusterStatus.UNHEALTHY: (False, CodeFlareClusterStatus.FAILED), |
| 70 | + RayClusterStatus.FAILED: (False, CodeFlareClusterStatus.FAILED), |
| 71 | +} |
| 72 | + |
| 73 | + |
| 74 | +# cluster = _ray_cluster_status(self.config.name, self.config.namespace) |
| 75 | +# if cluster: |
| 76 | +# if cluster.status == RayClusterStatus.UNKNOWN: |
| 77 | +# ready = False |
| 78 | +# status = CodeFlareClusterStatus.STARTING |
| 79 | +# if cluster.status == RayClusterStatus.READY: |
| 80 | +# ready = True |
| 81 | +# status = CodeFlareClusterStatus.READY |
| 82 | +# elif cluster.status in [ |
| 83 | +# RayClusterStatus.UNHEALTHY, |
| 84 | +# RayClusterStatus.FAILED, |
| 85 | +# ]: |
| 86 | +# ready = False |
| 87 | +# status = CodeFlareClusterStatus.FAILED |
| 88 | + |
| 89 | +# if print_to_console: |
| 90 | +# # overriding the number of gpus with requested |
| 91 | +# cluster.worker_gpu = self.config.num_gpus |
| 92 | +# pretty_print.print_cluster_status(cluster) |
| 93 | +# elif print_to_console: |
| 94 | +# if status == CodeFlareClusterStatus.UNKNOWN: |
| 95 | +# pretty_print.print_no_resources_found() |
| 96 | +# else: |
| 97 | +# pretty_print.print_app_wrappers_status([appwrapper], starting=True) |
| 98 | + |
| 99 | + |
56 | 100 | class Cluster:
|
57 | 101 | """
|
58 | 102 | An object for requesting, bringing up, and taking down resources.
|
@@ -289,52 +333,23 @@ def status(
|
289 | 333 | # check the app wrapper status
|
290 | 334 | appwrapper = _app_wrapper_status(self.config.name, self.config.namespace)
|
291 | 335 | if appwrapper:
|
292 |
| - if appwrapper.status in [ |
293 |
| - AppWrapperStatus.RUNNING, |
294 |
| - AppWrapperStatus.COMPLETED, |
295 |
| - AppWrapperStatus.RUNNING_HOLD_COMPLETION, |
296 |
| - ]: |
297 |
| - ready = False |
298 |
| - status = CodeFlareClusterStatus.STARTING |
299 |
| - elif appwrapper.status in [ |
300 |
| - AppWrapperStatus.FAILED, |
301 |
| - AppWrapperStatus.DELETED, |
302 |
| - ]: |
303 |
| - ready = False |
304 |
| - status = CodeFlareClusterStatus.FAILED # should deleted be separate |
305 |
| - return status, ready # exit early, no need to check ray status |
306 |
| - elif appwrapper.status in [ |
307 |
| - AppWrapperStatus.PENDING, |
308 |
| - AppWrapperStatus.QUEUEING, |
309 |
| - ]: |
310 |
| - ready = False |
311 |
| - if appwrapper.status == AppWrapperStatus.PENDING: |
312 |
| - status = CodeFlareClusterStatus.QUEUED |
313 |
| - else: |
314 |
| - status = CodeFlareClusterStatus.QUEUEING |
315 |
| - if print_to_console: |
316 |
| - pretty_print.print_app_wrappers_status([appwrapper]) |
317 |
| - return ( |
318 |
| - status, |
319 |
| - ready, |
320 |
| - ) # no need to check the ray status since still in queue |
| 336 | + if print_to_console: |
| 337 | + pretty_print.print_app_wrappers_status([appwrapper]) |
| 338 | + ready, status = AW_STATUS_TO_READINESS.get( |
| 339 | + appwrapper.status, (ready, status) |
| 340 | + ) |
| 341 | + if ( |
| 342 | + status != CodeFlareClusterStatus.UNKNOWN |
| 343 | + and status != CodeFlareClusterStatus.STARTING |
| 344 | + ): |
| 345 | + return ready, status |
321 | 346 |
|
322 | 347 | # check the ray cluster status
|
323 | 348 | cluster = _ray_cluster_status(self.config.name, self.config.namespace)
|
324 | 349 | if cluster:
|
325 |
| - if cluster.status == RayClusterStatus.UNKNOWN: |
326 |
| - ready = False |
327 |
| - status = CodeFlareClusterStatus.STARTING |
328 |
| - if cluster.status == RayClusterStatus.READY: |
329 |
| - ready = True |
330 |
| - status = CodeFlareClusterStatus.READY |
331 |
| - elif cluster.status in [ |
332 |
| - RayClusterStatus.UNHEALTHY, |
333 |
| - RayClusterStatus.FAILED, |
334 |
| - ]: |
335 |
| - ready = False |
336 |
| - status = CodeFlareClusterStatus.FAILED |
337 |
| - |
| 350 | + ready, status = CLUSTER_STATUS_TO_READINESS.get( |
| 351 | + cluster.status, (ready, status) |
| 352 | + ) |
338 | 353 | if print_to_console:
|
339 | 354 | # overriding the number of gpus with requested
|
340 | 355 | cluster.worker_gpu = self.config.num_gpus
|
|
0 commit comments