Skip to content

Commit cb07bc6

Browse files
committed
Add support for setting gdrcopyEnabled
Signed-off-by: Evan Lezar <[email protected]>
1 parent 506098f commit cb07bc6

File tree

9 files changed

+40
-6
lines changed

9 files changed

+40
-6
lines changed

api/config/v1/flags.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ type CommandLineFlags struct {
6060
MpsRoot *string `json:"mpsRoot,omitempty" yaml:"mpsRoot,omitempty"`
6161
NvidiaDriverRoot *string `json:"nvidiaDriverRoot,omitempty" yaml:"nvidiaDriverRoot,omitempty"`
6262
NvidiaDevRoot *string `json:"nvidiaDevRoot,omitempty" yaml:"nvidiaDevRoot,omitempty"`
63+
GDRCopyEnabled *bool `json:"gdrcopyEnabled" yaml:"gdrcopyEnabled"`
6364
GDSEnabled *bool `json:"gdsEnabled" yaml:"gdsEnabled"`
6465
MOFEDEnabled *bool `json:"mofedEnabled" yaml:"mofedEnabled"`
6566
UseNodeFeatureAPI *bool `json:"useNodeFeatureAPI" yaml:"useNodeFeatureAPI"`
@@ -126,6 +127,8 @@ func (f *Flags) UpdateFromCLIFlags(c *cli.Context, flags []cli.Flag) {
126127
updateFromCLIFlag(&f.NvidiaDriverRoot, c, n)
127128
case "dev-root", "nvidia-dev-root":
128129
updateFromCLIFlag(&f.NvidiaDevRoot, c, n)
130+
case "gdrcopy-enabled":
131+
updateFromCLIFlag(&f.GDRCopyEnabled, c, n)
129132
case "gds-enabled":
130133
updateFromCLIFlag(&f.GDSEnabled, c, n)
131134
case "mofed-enabled":

api/config/v1/flags_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ func TestMarshalFlags(t *testing.T) {
160160
output: `{
161161
"migStrategy": null,
162162
"failOnInitError": null,
163+
"gdrcopyEnabled": null,
163164
"gdsEnabled": null,
164165
"mofedEnabled": null,
165166
"useNodeFeatureAPI": null,
@@ -177,6 +178,7 @@ func TestMarshalFlags(t *testing.T) {
177178
output: `{
178179
"migStrategy": null,
179180
"failOnInitError": null,
181+
"gdrcopyEnabled": null,
180182
"gdsEnabled": null,
181183
"mofedEnabled": null,
182184
"useNodeFeatureAPI": null,
@@ -201,6 +203,7 @@ func TestMarshalFlags(t *testing.T) {
201203
output: `{
202204
"migStrategy": null,
203205
"failOnInitError": null,
206+
"gdrcopyEnabled": null,
204207
"gdsEnabled": null,
205208
"mofedEnabled": null,
206209
"useNodeFeatureAPI": null,

cmd/nvidia-device-plugin/main.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,19 @@ func main() {
100100
Usage: "the desired strategy for passing device IDs to the underlying runtime:\n\t\t[uuid | index]",
101101
EnvVars: []string{"DEVICE_ID_STRATEGY"},
102102
},
103+
&cli.BoolFlag{
104+
Name: "gdrcopy-enabled",
105+
Usage: "ensure that containers that request NVIDIA GPU resources are started with GDRCopy support",
106+
EnvVars: []string{"GDRCOPY_ENABLED"},
107+
},
103108
&cli.BoolFlag{
104109
Name: "gds-enabled",
105-
Usage: "ensure that containers are started with NVIDIA_GDS=enabled",
110+
Usage: "ensure that containers that request NVIDIA GPU resources are started with GPUDirect Storage support",
106111
EnvVars: []string{"GDS_ENABLED"},
107112
},
108113
&cli.BoolFlag{
109114
Name: "mofed-enabled",
110-
Usage: "ensure that containers are started with NVIDIA_MOFED=enabled",
115+
Usage: "ensure that containers that request NVIDIA GPU resources are started with MOFED support",
111116
EnvVars: []string{"MOFED_ENABLED"},
112117
},
113118
&cli.StringFlag{

cmd/nvidia-device-plugin/plugin-manager.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ func GetPlugins(ctx context.Context, infolib info.Interface, nvmllib nvml.Interf
5454
cdi.WithNvidiaCTKPath(*config.Flags.Plugin.NvidiaCTKPath),
5555
cdi.WithDeviceIDStrategy(*config.Flags.Plugin.DeviceIDStrategy),
5656
cdi.WithVendor("k8s.device-plugin.nvidia.com"),
57+
cdi.WithGdrcopyEnabled(*config.Flags.GDRCopyEnabled),
5758
cdi.WithGdsEnabled(*config.Flags.GDSEnabled),
5859
cdi.WithMofedEnabled(*config.Flags.MOFEDEnabled),
5960
cdi.WithImexChannels(imexChannels),

deployments/helm/nvidia-device-plugin/templates/daemonset-device-plugin.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ spec:
169169
- name: NVIDIA_CDI_HOOK_PATH
170170
value: {{ .Values.cdi.nvidiaHookPath }}
171171
{{- end }}
172+
{{- if typeIs "bool" .Values.gdrcopyEnabled }}
173+
- name: GDRCOPY_ENABLED
174+
value: {{ .Values.gdrcopyEnabled | quote }}
175+
{{- end }}
172176
{{- if typeIs "bool" .Values.gdsEnabled }}
173177
- name: GDS_ENABLED
174178
value: {{ .Values.gdsEnabled | quote }}

deployments/helm/nvidia-device-plugin/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ failOnInitError: null
3333
deviceListStrategy: null
3434
deviceIDStrategy: null
3535
nvidiaDriverRoot: null
36+
gdrcopyEnabled: null
3637
gdsEnabled: null
3738
mofedEnabled: null
3839
deviceDiscoveryStrategy: null

internal/cdi/cdi.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ type cdiHandler struct {
5757

5858
deviceListStrategies spec.DeviceListStrategies
5959

60-
gdsEnabled bool
61-
mofedEnabled bool
60+
gdsEnabled bool
61+
mofedEnabled bool
62+
gdrcopyEnabled bool
6263

6364
imexChannels imex.Channels
6465

@@ -134,6 +135,9 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
134135
}
135136

136137
var additionalModes []string
138+
if c.gdrcopyEnabled {
139+
additionalModes = append(additionalModes, "gdrcopy")
140+
}
137141
if c.gdsEnabled {
138142
additionalModes = append(additionalModes, "gds")
139143
}

internal/cdi/options.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,21 @@ func WithVendor(vendor string) Option {
8080
}
8181
}
8282

83-
// WithGdsEnabled provides and option to set whether a GDS CDI spec should be generated
83+
// WithGdrcopyEnabled provides an option to set whether a GDS CDI spec should be generated
84+
func WithGdrcopyEnabled(enabled bool) Option {
85+
return func(c *cdiHandler) {
86+
c.gdrcopyEnabled = enabled
87+
}
88+
}
89+
90+
// WithGdsEnabled provides an option to set whether a GDS CDI spec should be generated
8491
func WithGdsEnabled(enabled bool) Option {
8592
return func(c *cdiHandler) {
8693
c.gdsEnabled = enabled
8794
}
8895
}
8996

90-
// WithMofedEnabled provides and option to set whether a MOFED CDI spec should be generated
97+
// WithMofedEnabled provides an option to set whether a MOFED CDI spec should be generated
9198
func WithMofedEnabled(enabled bool) Option {
9299
return func(c *cdiHandler) {
93100
c.mofedEnabled = enabled

internal/plugin/server.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,9 @@ func (plugin *nvidiaDevicePlugin) getAllocateResponse(requestIds []string) (*plu
351351
if *plugin.config.Flags.Plugin.PassDeviceSpecs {
352352
response.Devices = append(response.Devices, plugin.apiDeviceSpecs(*plugin.config.Flags.NvidiaDevRoot, requestIds)...)
353353
}
354+
if *plugin.config.Flags.GDRCopyEnabled {
355+
response.Envs["NVIDIA_GDRCOPY"] = "enabled"
356+
}
354357
if *plugin.config.Flags.GDSEnabled {
355358
response.Envs["NVIDIA_GDS"] = "enabled"
356359
}
@@ -383,6 +386,9 @@ func (plugin *nvidiaDevicePlugin) updateResponseForCDI(response *pluginapi.Conta
383386
if *plugin.config.Flags.MOFEDEnabled {
384387
devices = append(devices, plugin.cdiHandler.QualifiedName("mofed", "all"))
385388
}
389+
if *plugin.config.Flags.GDRCopyEnabled {
390+
devices = append(devices, plugin.cdiHandler.QualifiedName("gdrcopy", "all"))
391+
}
386392

387393
if len(devices) == 0 {
388394
return nil

0 commit comments

Comments
 (0)