@@ -19,22 +19,10 @@ package v1alpha1
19
19
import (
20
20
autoscalingv2 "k8s.io/api/autoscaling/v2"
21
21
corev1 "k8s.io/api/core/v1"
22
+ "k8s.io/apimachinery/pkg/api/resource"
22
23
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23
24
)
24
25
25
- // BackendRuntimeArg is the preset arguments for easy to use.
26
- // Three preset names are provided: default, speculative-decoding, model-parallelism,
27
- // do not change the name.
28
- type BackendRuntimeArg struct {
29
- // Name represents the identifier of the backendRuntime argument.
30
- // +kubebuilder:default=default
31
- // +optional
32
- Name * string `json:"name,omitempty"`
33
- // Flags represents all the preset configurations.
34
- // Flag around with {{ .CONFIG }} is a configuration waiting for render.
35
- Flags []string `json:"flags,omitempty"`
36
- }
37
-
38
26
// HPATrigger represents the configuration of the HorizontalPodAutoscaler.
39
27
// Inspired by kubernetes.io/pkg/apis/autoscaling/types.go#HorizontalPodAutoscalerSpec.
40
28
// Note: HPA component should be installed in prior.
@@ -55,17 +43,6 @@ type HPATrigger struct {
55
43
Behavior * autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
56
44
}
57
45
58
- // NamedScaleTrigger defines the rules to scale the workloads.
59
- // Only one trigger cloud work at a time. The name is used to identify
60
- // the trigger in backendRuntime.
61
- type NamedScaleTrigger struct {
62
- // Name represents the identifier of the scale trigger, e.g. some triggers defined for
63
- // latency sensitive workloads, some are defined for throughput sensitive workloads.
64
- Name string `json:"name,omitempty"`
65
- // HPA represents the trigger configuration of the HorizontalPodAutoscaler.
66
- HPA * HPATrigger `json:"hpa,omitempty"`
67
- }
68
-
69
46
// ScaleTrigger defines the rules to scale the workloads.
70
47
// Only one trigger cloud work at a time, mostly used in Playground.
71
48
type ScaleTrigger struct {
@@ -83,6 +60,30 @@ type MultiHostCommands struct {
83
60
Worker []string `json:"worker,omitempty"`
84
61
}
85
62
63
+ // RecommendedConfig represents the recommended configurations for the backendRuntime,
64
+ // user can choose one of them to apply.
65
+ type RecommendedConfig struct {
66
+ // Name represents the identifier of the config.
67
+ Name string `json:"name"`
68
+ // Args represents all the arguments for the command.
69
+ // Argument around with {{ .CONFIG }} is a configuration waiting for render.
70
+ // +optional
71
+ Args []string `json:"args,omitempty"`
72
+ // Resources represents the resource requirements for backend, like cpu/mem,
73
+ // accelerators like GPU should not be defined here, but at the model flavors,
74
+ // or the values here will be overwritten.
75
+ // +optional
76
+ Resources * ResourceRequirements `json:"resources,omitempty"`
77
+ // SharedMemorySize represents the size of /dev/shm required in the runtime of
78
+ // inference workload.
79
+ // +optional
80
+ SharedMemorySize * resource.Quantity `json:"sharedMemorySize,omitempty"`
81
+ // ScaleTrigger defines the rules to scale the workloads.
82
+ // Only one trigger cloud work at a time.
83
+ // +optional
84
+ ScaleTrigger * ScaleTrigger `json:"scaleTrigger,omitempty"`
85
+ }
86
+
86
87
// BackendRuntimeSpec defines the desired state of BackendRuntime
87
88
type BackendRuntimeSpec struct {
88
89
// Commands represents the default commands for the backendRuntime.
@@ -98,16 +99,9 @@ type BackendRuntimeSpec struct {
98
99
// Version represents the default version of the backendRuntime.
99
100
// It will be appended to the image as a tag.
100
101
Version string `json:"version"`
101
- // Args represents the preset arguments of the backendRuntime.
102
- // They can be appended or overwritten by the Playground backendRuntimeConfig.
103
- Args []BackendRuntimeArg `json:"args,omitempty"`
104
102
// Envs represents the environments set to the container.
105
103
// +optional
106
104
Envs []corev1.EnvVar `json:"envs,omitempty"`
107
- // Resources represents the resource requirements for backendRuntime, like cpu/mem,
108
- // accelerators like GPU should not be defined here, but at the model flavors,
109
- // or the values here will be overwritten.
110
- Resources ResourceRequirements `json:"resources"`
111
105
// Periodic probe of backend liveness.
112
106
// Backend will be restarted if the probe fails.
113
107
// Cannot be updated.
@@ -124,10 +118,9 @@ type BackendRuntimeSpec struct {
124
118
// when it might take a long time to load data or warm a cache, than during steady-state operation.
125
119
// +optional
126
120
StartupProbe * corev1.Probe `json:"startupProbe,omitempty"`
127
- // ScaleTriggers represents a set of triggers preset to be used by Playground.
128
- // If Playground not specify the scale trigger, the 0-index trigger will be used.
121
+ // RecommendedConfigs represents the recommended configurations for the backendRuntime.
129
122
// +optional
130
- ScaleTriggers []NamedScaleTrigger `json:"scaleTriggers ,omitempty"`
123
+ RecommendedConfigs []RecommendedConfig `json:"recommendedConfigs ,omitempty"`
131
124
}
132
125
133
126
// BackendRuntimeStatus defines the observed state of BackendRuntime
0 commit comments