1
+ """
2
+ Defines the RayJobConfiguration dataclass for specifying KubeRay RayJob custom resources.
3
+ """
4
+
5
+ from dataclasses import dataclass , field
6
+ from typing import Dict , List , Optional , Union
7
+
8
+ from codeflare_sdk .ray .cluster .config import ClusterConfiguration
9
+ import corev1_client # Placeholder for kubernetes.client.models.V1PodTemplateSpec
10
+
11
+ # Placeholder for V1PodTemplateSpec until actual import is resolved
12
+ # from kubernetes.client.models import V1PodTemplateSpec
13
+ # For now, using a generic Dict as a placeholder
14
+ V1PodTemplateSpec = Dict [str , Any ]
15
+
16
+
17
+ @dataclass
18
+ class RayJobConfiguration :
19
+ """
20
+ Configuration for a KubeRay RayJob.
21
+
22
+ Args:
23
+ name: Name of the RayJob.
24
+ namespace: Namespace for the RayJob.
25
+ entrypoint: Command to execute for the job.
26
+ runtime_env_yaml: Runtime environment configuration as a YAML string.
27
+ job_id: Optional ID for the job. Auto-generated if not set.
28
+ active_deadline_seconds: Duration in seconds the job may be active.
29
+ backoff_limit: Number of retries before marking job as failed.
30
+ deletion_policy: Policy for resource deletion on job completion.
31
+ Valid values: "DeleteCluster", "DeleteWorkers", "DeleteSelf", "DeleteNone".
32
+ submission_mode: How the Ray job is submitted to the RayCluster.
33
+ Valid values: "K8sJobMode", "HTTPMode", "InteractiveMode".
34
+ managed_by: Controller managing the RayJob (e.g., "kueue.x-k8s.io/multikueue").
35
+ ray_cluster_spec: Specification for the RayCluster if created by this RayJob.
36
+ cluster_selector: Labels to select an existing RayCluster.
37
+ submitter_pod_template: Pod template for the job submitter (if K8sJobMode).
38
+ shutdown_after_job_finishes: Whether to delete the RayCluster after job completion.
39
+ ttl_seconds_after_finished: TTL for RayCluster cleanup after job completion.
40
+ suspend: Whether to suspend the RayJob (prevents RayCluster creation).
41
+ metadata: Metadata for the RayJob.
42
+ submitter_config_backoff_limit: BackoffLimit for the submitter Kubernetes Job.
43
+ """
44
+ name : str
45
+ namespace : Optional [str ] = None
46
+ entrypoint : str
47
+ runtime_env_yaml : Optional [str ] = None
48
+ job_id : Optional [str ] = None
49
+ active_deadline_seconds : Optional [int ] = None
50
+ backoff_limit : int = 0 # KubeRay default is 0
51
+ deletion_policy : Optional [str ] = None # Needs validation: DeleteCluster, DeleteWorkers, DeleteSelf, DeleteNone
52
+ submission_mode : str = "K8sJobMode" # KubeRay default
53
+ managed_by : Optional [str ] = None
54
+ ray_cluster_spec : Optional [ClusterConfiguration ] = None
55
+ cluster_selector : Dict [str , str ] = field (default_factory = dict )
56
+ submitter_pod_template : Optional [V1PodTemplateSpec ] = None # Kubernetes V1PodTemplateSpec
57
+ shutdown_after_job_finishes : bool = True # Common default, KubeRay itself doesn't default this in RayJobSpec directly
58
+ ttl_seconds_after_finished : int = 0 # KubeRay default
59
+ suspend : bool = False
60
+ metadata : Dict [str , str ] = field (default_factory = dict )
61
+ submitter_config_backoff_limit : Optional [int ] = None
62
+
63
+
64
+ def __post_init__ (self ):
65
+ if self .deletion_policy and self .deletion_policy not in [
66
+ "DeleteCluster" ,
67
+ "DeleteWorkers" ,
68
+ "DeleteSelf" ,
69
+ "DeleteNone" ,
70
+ ]:
71
+ raise ValueError (
72
+ "deletion_policy must be one of 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf', or 'DeleteNone'"
73
+ )
74
+
75
+ if self .submission_mode not in ["K8sJobMode" , "HTTPMode" , "InteractiveMode" ]:
76
+ raise ValueError (
77
+ "submission_mode must be one of 'K8sJobMode', 'HTTPMode', or 'InteractiveMode'"
78
+ )
79
+
80
+ if self .managed_by and self .managed_by not in [
81
+ "ray.io/kuberay-operator" ,
82
+ "kueue.x-k8s.io/multikueue" ,
83
+ ]:
84
+ raise ValueError (
85
+ "managed_by field value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'"
86
+ )
87
+
88
+ if self .ray_cluster_spec and self .cluster_selector :
89
+ raise ValueError ("Only one of ray_cluster_spec or cluster_selector can be provided." )
90
+
91
+ if not self .ray_cluster_spec and not self .cluster_selector and self .submission_mode != "InteractiveMode" :
92
+ # In interactive mode, a cluster might already exist and the user connects to it.
93
+ # Otherwise, a RayJob needs either a spec to create a cluster or a selector to find one.
94
+ raise ValueError (
95
+ "Either ray_cluster_spec (to create a new cluster) or cluster_selector (to use an existing one) must be specified unless in InteractiveMode."
96
+ )
97
+
98
+ # TODO: Add validation for submitter_pod_template if submission_mode is K8sJobMode
99
+ # TODO: Add type validation for all fields
100
+ pass
0 commit comments