Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 59 additions & 25 deletions cmd/containerd-shim-runhcs-v1/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques
parent.Close()
return nil, err
}

} else if oci.IsJobContainer(s) {
// If we're making a job container fake a task (i.e reuse the wcowPodSandbox logic)
p.sandboxTask = newWcowPodSandboxTask(ctx, events, req.ID, req.Bundle, parent, "")
Expand All @@ -224,7 +223,7 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques
}); err != nil {
return nil, err
}
p.jobContainer = true

return &p, nil
} else if !isWCOW {
return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "oci spec does not contain WCOW or LCOW spec")
Expand Down Expand Up @@ -339,11 +338,6 @@ type pod struct {
// It MUST be treated as read only in the lifetime of the pod.
host *uvm.UtilityVM

// jobContainer specifies whether this pod is for WCOW job containers only.
//
// It MUST be treated as read only in the lifetime of the pod.
jobContainer bool

// spec is the OCI runtime specification for the pod sandbox container.
spec *specs.Spec

Expand All @@ -368,24 +362,9 @@ func (p *pod) CreateTask(ctx context.Context, req *task.CreateTaskRequest, s *sp
return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "task with id: '%s' already exists id pod: '%s'", req.ID, p.id)
}

if p.jobContainer {
// This is a short circuit to make sure that all containers in a pod will have
// the same IP address/be added to the same compartment.
//
// There will need to be OS work needed to support this scenario, so for now we need to block on
// this.
if !oci.IsJobContainer(s) {
return nil, errors.New("cannot create a normal process isolated container if the pod sandbox is a job container")
}
// Pass through some annotations from the pod spec that if specified will need to be made available
// to every container as well. Kubernetes only passes annotations to RunPodSandbox so there needs to be
// a way for individual containers to get access to these.
oci.SandboxAnnotationsPassThrough(
p.spec.Annotations,
s.Annotations,
annotations.HostProcessInheritUser,
annotations.HostProcessRootfsLocation,
)
err = p.updateConfigForHostProcessContainer(s)
if err != nil {
return nil, err
}

ct, sid, err := oci.GetSandboxTypeAndID(s.Annotations)
Expand Down Expand Up @@ -502,3 +481,58 @@ func (p *pod) DeleteTask(ctx context.Context, tid string) error {

return nil
}

// updateConfigForHostProcessContainer validates and adjusts a container spec
// against the pod sandbox spec for HostProcess (HPC) scenarios:
// - Rejects a hypervisor-isolated HPC when the parent UVM does not
// advertise HostProcess container support.
// - Rejects a non-HPC container inside a process-isolated HPC sandbox
// (all containers in such a pod must share the host job/network).
// - Propagates HPC pod-level annotations (e.g. inherit-user, rootfs
// location) onto each container, since CRI only delivers them at pod
// create time.
// - Clears HostProcessInheritUser on any non-HPC container so a stray or
// propagated annotation can't grant it SYSTEM.
func (p *pod) updateConfigForHostProcessContainer(s *specs.Spec) error {
isProcessIsolatedPrivilegedSandbox := oci.IsJobContainer(p.spec) && !oci.IsIsolated(p.spec)
isProcessIsolatedPrivilegedContainer := oci.IsJobContainer(s) && !oci.IsIsolated(s)
isHypervisorIsolatedPrivilegedContainer := oci.IsJobContainer(s) && oci.IsIsolated(s)

// Reject hypervisor-isolated HPCs when the UVM doesn't support them.
if isHypervisorIsolatedPrivilegedContainer &&
p.host != nil && !p.host.HostProcessContainerSupported() {
return fmt.Errorf("UVM does not support HostProcess containers")
}

if isProcessIsolatedPrivilegedSandbox || isHypervisorIsolatedPrivilegedContainer {
if isProcessIsolatedPrivilegedSandbox && !isProcessIsolatedPrivilegedContainer {
// This is a short circuit to make sure that all containers in a pod will have
// the same IP address/be added to the same compartment.
//
// There will need to be OS work needed to support this scenario, so for now we need to block on
// this.
//
// If the pod sandbox is a process-isolated HPC then the container must also be a
// process-isolated HPC.
return errors.New("cannot create a normal process isolated container if the pod sandbox is a job container running on host")
}

// Pass through some annotations from the pod spec that if specified will need to be made available
// to every container as well. Kubernetes only passes annotations to RunPodSandbox so there needs to be
// a way for individual containers to get access to these.
oci.SandboxAnnotationsPassThrough(
p.spec.Annotations,
s.Annotations,
annotations.HostProcessInheritUser,
annotations.HostProcessRootfsLocation,
)
}

// Non-HPC containers must never carry HostProcessInheritUser - it would
// otherwise grant SYSTEM to a non-privileged exec.
if !oci.IsJobContainer(s) && s.Annotations[annotations.HostProcessInheritUser] != "" {
s.Annotations[annotations.HostProcessInheritUser] = "false"
}

return nil
}
158 changes: 158 additions & 0 deletions cmd/containerd-shim-runhcs-v1/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ import (
"context"
"fmt"
"math/rand"
"reflect"
"strconv"
"sync"
"testing"

"github.com/Microsoft/hcsshim/pkg/annotations"
task "github.com/containerd/containerd/api/runtime/task/v2"
"github.com/containerd/errdefs"
specs "github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -388,3 +390,159 @@ func Test_pod_DeleteTask_TaskID_Not_Created(t *testing.T) {
err := p.KillTask(context.Background(), strconv.Itoa(rand.Int()), "", 0xf, true)
verifyExpectedError(t, nil, err, errdefs.ErrNotFound)
}

func getWCOWSpecsWithAnnotations(annotation map[string]string, isIsolated bool, processUser string) *specs.Spec {
spec := &specs.Spec{
Windows: &specs.Windows{},
Annotations: annotation,
Process: &specs.Process{
User: specs.User{Username: processUser},
},
}

if isIsolated {
spec.Windows.HyperV = &specs.WindowsHyperV{}
}
return spec
}

func Test_pod_UpdateConfigForHostProcessContainer(t *testing.T) {
testCases := []struct {
testName string
podSpec *specs.Spec
containerSpec *specs.Spec
expectedContainerSpec *specs.Spec
expectedError string
}{
{
testName: "privileged container in unprivileged pod (isolated hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{}, true, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
expectedError: "",
},
{
testName: "privileged container in privileged pod (isolated hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
expectedError: "",
},
{
testName: "normal container in privileged pod (process hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, false, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{}, false, ""),
expectedContainerSpec: nil,
expectedError: "cannot create a normal process isolated container if the pod sandbox is a job container running on host",
},
{
testName: "normal container in privileged pod (isolated hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{}, false, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{}, false, ""),
expectedError: "",
},
{
testName: "annotations passthrough (isolated hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
annotations.HostProcessInheritUser: "true",
annotations.HostProcessRootfsLocation: "C:\\test",
}, true, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
annotations.HostProcessInheritUser: "true",
annotations.HostProcessRootfsLocation: "C:\\test",
}, true, ""),
expectedError: "",
},
{
testName: "annotations passthrough (process hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
annotations.HostProcessInheritUser: "true",
annotations.HostProcessRootfsLocation: "C:\\test",
}, false, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, false, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
annotations.HostProcessInheritUser: "true",
annotations.HostProcessRootfsLocation: "C:\\test",
}, false, ""),
expectedError: "",
},
{
testName: "no annotation passthrough for normal containers (isolated hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessInheritUser: "true",
annotations.HostProcessRootfsLocation: "C:\\test",
}, true, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{}, true, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{}, true, ""),
expectedError: "",
},
{
testName: "no changes in user process for normal containers (isolated hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
annotations.HostProcessInheritUser: "true",
}, true, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{}, true, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{}, true, ""),
expectedError: "",
},
{
testName: "set inherit user annotation to false for normal containers (isolated hpc)",
podSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessContainer: "true",
}, true, ""),
containerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessInheritUser: "true",
}, true, ""),
expectedContainerSpec: getWCOWSpecsWithAnnotations(map[string]string{
annotations.HostProcessInheritUser: "false",
}, true, ""),
expectedError: "",
},
}

for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
p := &pod{spec: tc.podSpec}

err := p.updateConfigForHostProcessContainer(tc.containerSpec)
if err == nil && tc.expectedError != "" {
t.Fatalf("should have failed with '%s'", tc.expectedError)
}
if err != nil && err.Error() != tc.expectedError {
t.Fatalf("should have failed with '%s', actual: '%s'", tc.expectedError, err.Error())
}

if tc.expectedContainerSpec != nil {
equal := reflect.DeepEqual(tc.containerSpec, tc.expectedContainerSpec)
if !equal {
t.Fatalf("containerSpec expected: %+v, got: %+v", tc.expectedContainerSpec, tc.containerSpec)
}
}
})
}
}
64 changes: 63 additions & 1 deletion cmd/containerd-shim-runhcs-v1/task_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ func createContainer(
return nil, nil, err
}

if oci.IsJobContainer(s) {
// For Job Container which runs on host, we create the same using shim logic.
// If the request was for job container within UVM, then the request is passed along to GCS.
if oci.IsJobContainer(s) && !oci.IsIsolated(s) {
opts := jobcontainers.CreateOptions{WCOWLayers: wcowLayers}
container, resources, err = jobcontainers.Create(ctx, id, s, opts)
if err != nil {
Expand Down Expand Up @@ -212,6 +214,12 @@ func newHcsTask(
shimOpts = v.(*runhcsopts.Options)
}

// For Isolated Job containers, we need special handling wherein if the command line
// is not specified, then we add 'cmd /C' by default.
if oci.IsJobContainer(s) && oci.IsIsolated(s) {
handleProcessArgsForIsolatedJobContainer(s, s.Process)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this part of it special for HPC? I dont get it, this is true for all Windows containers right? At this point its just another container

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's specific to HostProcess containers because of when the process is joined to the silo.

For a normal WCOW container (process- or Hyper-V-isolated), the guest creates the workload process from inside the container silo, so CreateProcess resolves the image name and PATH using the silo's filesystem view — the bind-mounted rootfs, the container's System32, etc. A bare echo / dir or a binary that lives only in the rootfs works fine because the silo is already in scope at creation time.

For a HostProcess container the process is created outside the silo and only assigned to the silo's job object after the fact. That means image-name resolution happens against the launcher's view (host/UVM System32, Windows, host PATH), not the container's rootfs. Shell builtins (echo, set, redirection, …) and any rootfs-only executable would fail with ERROR_FILE_NOT_FOUND.

Wrapping with cmd /c sidesteps this: cmd.exe is guaranteed to exist under the launcher's System32, and once it's joined to the silo it runs inside the container's filesystem view, where it can resolve builtins and rootfs-relative paths normally. Regular WCOW containers don't need this because they never have that "created outside, joined after" gap.

}

// Default to an infinite timeout (zero value)
var ioRetryTimeout time.Duration
if shimOpts != nil {
Expand Down Expand Up @@ -364,6 +372,10 @@ func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest,
return errors.Wrapf(errdefs.ErrFailedPrecondition, "exec: '' in task: '%s' must be running to create additional execs", ht.id)
}

if oci.IsJobContainer(ht.taskSpec) && oci.IsIsolated(ht.taskSpec) {
handleProcessArgsForIsolatedJobContainer(ht.taskSpec, spec)
}

io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal, ht.ioRetryTimeout)
if err != nil {
return err
Expand Down Expand Up @@ -1015,6 +1027,56 @@ func (ht *hcsTask) requestAddContainerMount(ctx context.Context, resourcePath st
return ht.c.Modify(ctx, modification)
}

// handleProcessArgsForIsolatedJobContainer adjusts the process spec for a
// HostProcess container running inside a hypervisor-isolated UVM.
//
// Why the "cmd /c" wrap:
// - For a regular WCOW container, the guest creates the workload process
// from within the container silo, so the silo's filesystem view (including
// the bind-mounted container rootfs) is in scope for path resolution.
// - For a HostProcess container the guest creates the process from outside
// the silo and only attaches it to the silo's job object after creation.
// Executable-name resolution therefore uses the launcher's view (System32,
// Windows, PATH) and cannot see binaries that live only inside the
// container rootfs, nor resolve shell builtins like `echo`, `dir`, `set`,
// redirection, etc. Anything not present as a real .exe on the standard
// search path would fail with ERROR_FILE_NOT_FOUND.
// - cmd.exe always exists under System32 (so the launcher can find it) and,
// once joined to the silo's job, runs inside the container's filesystem
// view where it can resolve builtins and any rootfs-relative binary.
func handleProcessArgsForIsolatedJobContainer(taskSpec *specs.Spec, p *specs.Process) {
if p == nil {
return
}

// Wrap the process invocation with "cmd /c" so it runs via cmd.exe. Only mutate
// whichever of CommandLine/Args will actually be used (CommandLine wins if set),
// and skip if it already invokes cmd.
switch {
case p.CommandLine != "":
fields := strings.Fields(p.CommandLine)
base := ""
if len(fields) > 0 {
base = strings.ToLower(filepath.Base(fields[0]))
}
if base != "cmd" && base != "cmd.exe" {
p.CommandLine = fmt.Sprintf("cmd /c %s", p.CommandLine)
}
case len(p.Args) > 0:
base := strings.ToLower(filepath.Base(strings.TrimSpace(p.Args[0])))
if base != "cmd" && base != "cmd.exe" {
p.Args = append([]string{"cmd", "/c"}, p.Args...)
}
}
Comment thread
rawahars marked this conversation as resolved.

// HostProcessInheritUser is set to explicit true or false during container create.
if taskSpec != nil && taskSpec.Annotations[annotations.HostProcessInheritUser] == "true" {
// For privileged containers within the sandbox, if the annotation to inherit user is set
// we will set the user to NT AUTHORITY\SYSTEM.
p.User.Username = `NT AUTHORITY\SYSTEM`
}
}

func isMountTypeSupported(hostPath, mountType string) bool {
// currently we only support mounting of host volumes/directories
switch mountType {
Expand Down
Loading
Loading