Skip to content

Commit 87f1fde

Browse files
pohlybart0shoxxenix
committed
WIP: adapt implementation to new API
The structured parameter allocation logic was written from scratch in staging/src/k8s.io/dynamic-resource-allocation/structured. Besides the new features (amount, admin access) and API it now supports backtracking when the initial device selection doesn't lead to a complete allocation of all claims. "make" works. A local cluster comes up and can run the DRA E2E tests. pkg/scheduler/framework/plugins/dynamicresources unit tests pass. test/integration/scheduler_perf unit tests pass for DRA. Many other unit tests haven't been adapted yet and coverage of new code is poor. DRA: use VAP to control "admin access" permissions The advantages of using a validation admission policy (VAP) are that no changes are needed in Kubernetes and that admins have full flexibility if and how they want to control which users are allowed to use "admin access" in their requests. The downside is that without admins taking actions, the feature is enabled out-of-the-box in a cluster. Documentation for DRA will have to make it very clear that something needs to be done in multi-tenant clusters. The test/e2e/testing-manifests/dra/admin-access-policy.yaml shows how to do this. The corresponding E2E tests ensures that it actually works as intended. For some reason, adding the namespace to the message expression leads to a type check errors, so it's currently commented out. DRA kubelet: always call NodePrepareResources, even if not used This could be useful for drivers where that call has some effect other than injecting CDI device IDs into containers. DRA: remove obsolete support for deterministic claim name Now only the claim name as recorded in the pod status matters, which can be looked up without listing claims. DRA kubelet: extend and clean up logging Because of a faulty E2E test, kubelet was told to contact the wrong driver for a claim. This was not visible in the kubelet log output. Now changes to the claim info cache are getting logged. While at it, naming of variables and some existing log output gets harmonized. Co-authored-by: Ed Bartosh <[email protected]> Co-authored-by: Oksana Baranova <[email protected]>
1 parent c10ca9f commit 87f1fde

File tree

73 files changed

+3996
-5911
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+3996
-5911
lines changed

pkg/controller/resourceclaim/controller_test.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,12 @@ func makeClaim(name, namespace, classname string, owner *metav1.OwnerReference)
528528
claim := &resourceapi.ResourceClaim{
529529
ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace},
530530
Spec: resourceapi.ResourceClaimSpec{
531-
ResourceClassName: classname,
531+
Devices: resourceapi.DeviceClaim{
532+
Requests: []resourceapi.DeviceRequest{{
533+
Name: "req1",
534+
DeviceClassName: classname,
535+
}},
536+
},
532537
},
533538
}
534539
if owner != nil {
@@ -547,7 +552,12 @@ func makeGeneratedClaim(podClaimName, generateName, namespace, classname string,
547552
Annotations: map[string]string{"resource.kubernetes.io/pod-claim-name": podClaimName},
548553
},
549554
Spec: resourceapi.ResourceClaimSpec{
550-
ResourceClassName: classname,
555+
Devices: resourceapi.DeviceClaim{
556+
Requests: []resourceapi.DeviceRequest{{
557+
Name: "req1",
558+
DeviceClassName: classname,
559+
}},
560+
},
551561
},
552562
}
553563
if owner != nil {
@@ -606,7 +616,12 @@ func makeTemplate(name, namespace, classname string) *resourceapi.ResourceClaimT
606616
ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace},
607617
Spec: resourceapi.ResourceClaimTemplateSpec{
608618
Spec: resourceapi.ResourceClaimSpec{
609-
ResourceClassName: classname,
619+
Devices: resourceapi.DeviceClaim{
620+
Requests: []resourceapi.DeviceRequest{{
621+
Name: "req1",
622+
DeviceClassName: classname,
623+
}},
624+
},
610625
},
611626
},
612627
}

pkg/kubelet/cm/container_manager_linux.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -660,10 +660,6 @@ func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Containe
660660
if err != nil {
661661
return nil, err
662662
}
663-
// NOTE: Passing CDI device names as annotations is a temporary solution
664-
// It will be removed after all runtimes are updated
665-
// to get CDI device names from the ContainerConfig.CDIDevices field
666-
opts.Annotations = append(opts.Annotations, resOpts.Annotations...)
667663
opts.CDIDevices = append(opts.CDIDevices, resOpts.CDIDevices...)
668664
}
669665
// Allocate should already be called during predicateAdmitHandler.Admit(),
@@ -968,15 +964,18 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C
968964
// a set of CDIDevices from a different kubelet plugin. In the future we may want to
969965
// include the name of the kubelet plugin and/or other types of resources that are
970966
// not CDIDevices (assuming the DRAmanager supports this).
971-
for _, klPluginCdiDevices := range containerClaimInfo.CDIDevices {
967+
for /* driverName */ _, driverState := range containerClaimInfo.Drivers {
972968
var cdiDevices []*podresourcesapi.CDIDevice
973-
for _, cdiDevice := range klPluginCdiDevices {
974-
cdiDevices = append(cdiDevices, &podresourcesapi.CDIDevice{Name: cdiDevice})
969+
for /*requestName */ _, requestDevices := range driverState.CDIDevices {
970+
for _, cdiDevice := range requestDevices {
971+
cdiDevices = append(cdiDevices, &podresourcesapi.CDIDevice{Name: cdiDevice})
972+
}
975973
}
976974
claimResources = append(claimResources, &podresourcesapi.ClaimResource{CDIDevices: cdiDevices})
977975
}
978976
containerDynamicResource := podresourcesapi.DynamicResource{
979-
ClassName: containerClaimInfo.ClassName,
977+
// As of 1.31, ResourceClaims are not associated with a single class anymore.
978+
// ClassName: ???
980979
ClaimName: containerClaimInfo.ClaimName,
981980
ClaimNamespace: containerClaimInfo.Namespace,
982981
ClaimResources: claimResources,

pkg/kubelet/cm/devicemanager/topology_hints_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,6 @@ func TestTopologyAlignedAllocation(t *testing.T) {
440440
opts: &pluginapi.DevicePluginOptions{GetPreferredAllocationAvailable: true},
441441
}
442442
}
443-
444443
allocated, err := m.devicesToAllocate("podUID", "containerName", tc.resource, tc.request, sets.New[string]())
445444
if err != nil {
446445
t.Errorf("Unexpected error: %v", err)

pkg/kubelet/cm/dra/claiminfo.go

Lines changed: 26 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,22 @@ limitations under the License.
1717
package dra
1818

1919
import (
20+
"errors"
2021
"fmt"
2122
"sync"
2223

2324
resourceapi "k8s.io/api/resource/v1alpha3"
2425
"k8s.io/apimachinery/pkg/types"
2526
"k8s.io/apimachinery/pkg/util/sets"
2627
"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
27-
"k8s.io/kubernetes/pkg/kubelet/cm/util/cdi"
28-
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
2928
)
3029

3130
// ClaimInfo holds information required
3231
// to prepare and unprepare a resource claim.
3332
// +k8s:deepcopy-gen=true
3433
type ClaimInfo struct {
3534
state.ClaimInfoState
36-
// annotations is a mapping of container annotations per DRA plugin associated with
37-
// a prepared resource
38-
annotations map[string][]kubecontainer.Annotation
39-
prepared bool
35+
prepared bool
4036
}
4137

4238
// claimInfoCache is a cache of processed resource claims keyed by namespace/claimname.
@@ -47,89 +43,50 @@ type claimInfoCache struct {
4743
}
4844

4945
// newClaimInfoFromClaim creates a new claim info from a resource claim.
50-
func newClaimInfoFromClaim(claim *resourceapi.ResourceClaim) *ClaimInfo {
51-
// Grab the allocation.resourceHandles. If there are no
52-
// allocation.resourceHandles, create a single resourceHandle with no
53-
// content. This will trigger processing of this claim by a single
54-
// kubelet plugin whose name matches resourceClaim.Status.DriverName.
55-
resourceHandles := claim.Status.Allocation.ResourceHandles
56-
if len(resourceHandles) == 0 {
57-
resourceHandles = make([]resourceapi.ResourceHandle, 1)
58-
}
46+
// It verifies that the kubelet can handle the claim.
47+
func newClaimInfoFromClaim(claim *resourceapi.ResourceClaim) (*ClaimInfo, error) {
5948
claimInfoState := state.ClaimInfoState{
60-
DriverName: claim.Status.DriverName,
61-
ClassName: claim.Spec.ResourceClassName,
62-
ClaimUID: claim.UID,
63-
ClaimName: claim.Name,
64-
Namespace: claim.Namespace,
65-
PodUIDs: sets.New[string](),
66-
ResourceHandles: resourceHandles,
67-
CDIDevices: make(map[string][]string),
49+
ClaimUID: claim.UID,
50+
ClaimName: claim.Name,
51+
Namespace: claim.Namespace,
52+
PodUIDs: sets.New[string](),
53+
Drivers: make(map[string]state.DriverState),
54+
}
55+
if claim.Status.Allocation == nil {
56+
return nil, errors.New("not allocated")
57+
}
58+
for _, result := range claim.Status.Allocation.Devices.Results {
59+
claimInfoState.Drivers[result.Driver] = state.DriverState{
60+
CDIDevices: make(map[string][]string),
61+
}
6862
}
6963
info := &ClaimInfo{
7064
ClaimInfoState: claimInfoState,
71-
annotations: make(map[string][]kubecontainer.Annotation),
7265
prepared: false,
7366
}
74-
return info
67+
return info, nil
7568
}
7669

7770
// newClaimInfoFromClaim creates a new claim info from a checkpointed claim info state object.
7871
func newClaimInfoFromState(state *state.ClaimInfoState) *ClaimInfo {
7972
info := &ClaimInfo{
8073
ClaimInfoState: *state.DeepCopy(),
81-
annotations: make(map[string][]kubecontainer.Annotation),
8274
prepared: false,
8375
}
84-
for pluginName, devices := range info.CDIDevices {
85-
annotations, _ := cdi.GenerateAnnotations(info.ClaimUID, info.DriverName, devices)
86-
info.annotations[pluginName] = append(info.annotations[pluginName], annotations...)
87-
}
8876
return info
8977
}
9078

9179
// setCDIDevices adds a set of CDI devices to the claim info.
92-
func (info *ClaimInfo) setCDIDevices(pluginName string, cdiDevices []string) error {
93-
// NOTE: Passing CDI device names as annotations is a temporary solution
94-
// It will be removed after all runtimes are updated
95-
// to get CDI device names from the ContainerConfig.CDIDevices field
96-
annotations, err := cdi.GenerateAnnotations(info.ClaimUID, info.DriverName, cdiDevices)
97-
if err != nil {
98-
return fmt.Errorf("failed to generate container annotations, err: %+v", err)
99-
}
100-
101-
if info.CDIDevices == nil {
102-
info.CDIDevices = make(map[string][]string)
103-
}
104-
105-
if info.annotations == nil {
106-
info.annotations = make(map[string][]kubecontainer.Annotation)
80+
func (info *ClaimInfo) setCDIDevices(driverName string, requestName string, cdiDevices []string) {
81+
if info.Drivers == nil {
82+
info.Drivers = make(map[string]state.DriverState)
10783
}
108-
109-
info.CDIDevices[pluginName] = cdiDevices
110-
info.annotations[pluginName] = annotations
111-
112-
return nil
113-
}
114-
115-
// annotationsAsList returns container annotations as a single list.
116-
func (info *ClaimInfo) annotationsAsList() []kubecontainer.Annotation {
117-
var lst []kubecontainer.Annotation
118-
for _, v := range info.annotations {
119-
lst = append(lst, v...)
120-
}
121-
return lst
122-
}
123-
124-
// cdiDevicesAsList returns a list of CDIDevices from the provided claim info.
125-
func (info *ClaimInfo) cdiDevicesAsList() []kubecontainer.CDIDevice {
126-
var cdiDevices []kubecontainer.CDIDevice
127-
for _, devices := range info.CDIDevices {
128-
for _, device := range devices {
129-
cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: device})
130-
}
84+
driverState := info.Drivers[driverName]
85+
if driverState.CDIDevices == nil {
86+
driverState.CDIDevices = make(map[string][]string)
13187
}
132-
return cdiDevices
88+
driverState.CDIDevices[requestName] = cdiDevices
89+
info.Drivers[driverName] = driverState
13390
}
13491

13592
// addPodReference adds a pod reference to the claim info.

0 commit comments

Comments
 (0)