Skip to content

Commit 7bb7845

Browse files
committed
Make gated modes (GDS, MOFED, GDRCOPY) optional in CDI
Signed-off-by: Evan Lezar <[email protected]>
1 parent cb07bc6 commit 7bb7845

File tree

6 files changed

+91
-22
lines changed

6 files changed

+91
-22
lines changed

internal/cdi/api.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ package cdi
1818

1919
// Interface provides the API to the 'cdi' package
2020
//
21-
//go:generate moq -stub -out api_mock.go . Interface
21+
//go:generate moq -rm -fmt=goimports -stub -out api_mock.go . Interface
2222
type Interface interface {
2323
CreateSpecFile() error
2424
QualifiedName(string, string) string
25+
AdditionalDevices() []string
2526
}

internal/cdi/api_mock.go

Lines changed: 42 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/cdi/cdi.go

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ type cdiHandler struct {
6363

6464
imexChannels imex.Channels
6565

66-
cdilibs map[string]nvcdi.SpecGenerator
66+
cdilibs map[string]nvcdi.SpecGenerator
67+
additionalModes []string
6768
}
6869

6970
var _ Interface = &cdiHandler{}
@@ -134,18 +135,17 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
134135
c.cdilibs["imex-channel"] = c.newImexChannelSpecGenerator()
135136
}
136137

137-
var additionalModes []string
138138
if c.gdrcopyEnabled {
139-
additionalModes = append(additionalModes, "gdrcopy")
139+
c.additionalModes = append(c.additionalModes, "gdrcopy")
140140
}
141141
if c.gdsEnabled {
142-
additionalModes = append(additionalModes, "gds")
142+
c.additionalModes = append(c.additionalModes, "gds")
143143
}
144144
if c.mofedEnabled {
145-
additionalModes = append(additionalModes, "mofed")
145+
c.additionalModes = append(c.additionalModes, "mofed")
146146
}
147147

148-
for _, mode := range additionalModes {
148+
for _, mode := range c.additionalModes {
149149
lib, err := nvcdi.New(
150150
nvcdi.WithInfoLib(c.infolib),
151151
nvcdi.WithLogger(c.logger),
@@ -166,6 +166,7 @@ func New(infolib info.Interface, nvmllib nvml.Interface, devicelib device.Interf
166166

167167
// CreateSpecFile creates a CDI spec file for the specified devices.
168168
func (cdi *cdiHandler) CreateSpecFile() error {
169+
var emptySpecs []string
169170
for class, cdilib := range cdi.cdilibs {
170171
cdi.logger.Infof("Generating CDI spec for resource: %s/%s", cdi.vendor, class)
171172

@@ -197,10 +198,22 @@ func (cdi *cdiHandler) CreateSpecFile() error {
197198

198199
err = spec.Save(filepath.Join(cdiRoot, specName+".json"))
199200
if err != nil {
201+
// TODO: This is a brittle check since it relies on exact string matches.
202+
// We should pull this functionality into the CDI tooling instead.
203+
if strings.Contains(err.Error(), "invalid device, empty device edits") {
204+
klog.ErrorS(err, "Ignoring empty CDI specs", "vendor", cdi.vendor, "class", class)
205+
emptySpecs = append(emptySpecs, class)
206+
continue
207+
}
200208
return fmt.Errorf("failed to save CDI spec: %v", err)
201209
}
202210
}
203211

212+
// Remove the classes with empty specs from the supported types.
213+
for _, emptySpec := range emptySpecs {
214+
delete(cdi.cdilibs, emptySpec)
215+
}
216+
204217
return nil
205218
}
206219

@@ -233,3 +246,18 @@ func (cdi *cdiHandler) getRootTransformer() transform.Transformer {
233246
func (cdi *cdiHandler) QualifiedName(class string, id string) string {
234247
return cdiparser.QualifiedName(cdi.vendor, class, id)
235248
}
249+
250+
// AdditionalDevices returns the optional CDI devices based on the device plugin
251+
// configuration.
252+
// Here we check for requested modes as well as whether the modes have a valid
253+
// CDI spec associated with them.
254+
func (cdi *cdiHandler) AdditionalDevices() []string {
255+
var devices []string
256+
for _, mode := range cdi.additionalModes {
257+
if cdi.cdilibs[mode] == nil {
258+
continue
259+
}
260+
devices = append(devices, cdi.QualifiedName(mode, "all"))
261+
}
262+
return devices
263+
}

internal/cdi/null.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ func NewNullHandler() Interface {
3030
return &null{}
3131
}
3232

33+
func (n *null) AdditionalDevices() []string {
34+
return nil
35+
}
36+
3337
// CreateSpecFile is a no-op for the null handler.
3438
func (n *null) CreateSpecFile() error {
3539
return nil

internal/plugin/server.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -380,15 +380,8 @@ func (plugin *nvidiaDevicePlugin) updateResponseForCDI(response *pluginapi.Conta
380380
for _, channel := range plugin.imexChannels {
381381
devices = append(devices, plugin.cdiHandler.QualifiedName("imex-channel", channel.ID))
382382
}
383-
if *plugin.config.Flags.GDSEnabled {
384-
devices = append(devices, plugin.cdiHandler.QualifiedName("gds", "all"))
385-
}
386-
if *plugin.config.Flags.MOFEDEnabled {
387-
devices = append(devices, plugin.cdiHandler.QualifiedName("mofed", "all"))
388-
}
389-
if *plugin.config.Flags.GDRCopyEnabled {
390-
devices = append(devices, plugin.cdiHandler.QualifiedName("gdrcopy", "all"))
391-
}
383+
384+
devices = append(devices, plugin.cdiHandler.AdditionalDevices()...)
392385

393386
if len(devices) == 0 {
394387
return nil

internal/plugin/server_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ func TestCDIAllocateResponse(t *testing.T) {
3333
deviceIds []string
3434
deviceListStrategies []string
3535
CDIPrefix string
36+
AdditionalCDIDevices []string
3637
GDSEnabled bool
3738
MOFEDEnabled bool
3839
imexChannels []*imex.Channel
@@ -91,7 +92,7 @@ func TestCDIAllocateResponse(t *testing.T) {
9192
description: "mofed devices are selected if configured",
9293
deviceListStrategies: []string{"cdi-annotations"},
9394
CDIPrefix: "cdi.k8s.io/",
94-
MOFEDEnabled: true,
95+
AdditionalCDIDevices: []string{"nvidia.com/mofed=all"},
9596
expectedResponse: pluginapi.ContainerAllocateResponse{
9697
Annotations: map[string]string{
9798
"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/mofed=all",
@@ -102,7 +103,7 @@ func TestCDIAllocateResponse(t *testing.T) {
102103
description: "gds devices are selected if configured",
103104
deviceListStrategies: []string{"cdi-annotations"},
104105
CDIPrefix: "cdi.k8s.io/",
105-
GDSEnabled: true,
106+
AdditionalCDIDevices: []string{"nvidia.com/gds=all"},
106107
expectedResponse: pluginapi.ContainerAllocateResponse{
107108
Annotations: map[string]string{
108109
"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gds=all",
@@ -114,8 +115,7 @@ func TestCDIAllocateResponse(t *testing.T) {
114115
deviceIds: []string{"gpu0"},
115116
deviceListStrategies: []string{"cdi-annotations"},
116117
CDIPrefix: "cdi.k8s.io/",
117-
GDSEnabled: true,
118-
MOFEDEnabled: true,
118+
AdditionalCDIDevices: []string{"nvidia.com/gds=all", "nvidia.com/mofed=all"},
119119
expectedResponse: pluginapi.ContainerAllocateResponse{
120120
Annotations: map[string]string{
121121
"cdi.k8s.io/nvidia-device-plugin_uuid": "nvidia.com/gpu=gpu0,nvidia.com/gds=all,nvidia.com/mofed=all",
@@ -152,6 +152,9 @@ func TestCDIAllocateResponse(t *testing.T) {
152152
QualifiedNameFunc: func(c string, s string) string {
153153
return "nvidia.com/" + c + "=" + s
154154
},
155+
AdditionalDevicesFunc: func() []string {
156+
return tc.AdditionalCDIDevices
157+
},
155158
},
156159
deviceListStrategies: deviceListStrategies,
157160
cdiAnnotationPrefix: tc.CDIPrefix,

0 commit comments

Comments
 (0)