diff --git a/config-linux.md b/config-linux.md
index 504f6c203..a8cc65cc8 100644
--- a/config-linux.md
+++ b/config-linux.md
@@ -791,6 +791,45 @@ and may use a maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
}
```
+## Memory policy
+
+**`memoryPolicy`** (object, OPTIONAL) sets the NUMA memory policy for the container.
+For more information see the [set_mempolicy(2)][set_mempolicy.2] man page.
+
+* **`mode`** *(string, REQUIRED)* -
+
+ A valid list of constants is shown below.
+
+ * `MPOL_DEFAULT`
+ * `MPOL_BIND`
+ * `MPOL_INTERLEAVE`
+ * `MPOL_WEIGHTED_INTERLEAVE`
+ * `MPOL_PREFERRED`
+ * `MPOL_PREFERRED_MANY`
+ * `MPOL_LOCAL`
+
+* **`nodes`** *(string, REQUIRED)* - list of memory nodes from which nodemask is constructed to set_mempolicy(2). This is a comma-separated list, with dashes to represent ranges. For example, `0-3,7` represents memory nodes 0,1,2,3, and 7.
+
+* **`flags`** *(array of strings, OPTIONAL)* - list of flags to use with set_mempolicy(2).
+
+ A valid list of constants is shown below.
+
+ * `MPOL_F_NUMA_BALANCING`
+ * `MPOL_F_RELATIVE_NODES`
+ * `MPOL_F_STATIC_NODES`
+
+### Example
+
+```json
+"linux": {
+ "memoryPolicy": {
+ "mode": "MPOL_INTERLEAVE",
+ "nodes": "2-3"
+ "flags": ["MPOL_F_STATIC_NODES"],
+ }
+}
+```
+
## Sysctl
**`sysctl`** (object, OPTIONAL) allows kernel parameters to be modified at runtime for the container.
@@ -1073,6 +1112,7 @@ subset of the available options.
[tmpfs]: https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
[full.4]: https://man7.org/linux/man-pages/man4/full.4.html
+[set_mempolicy.2]: https://man7.org/linux/man-pages/man2/set_mempolicy.2.html
[mknod.1]: https://man7.org/linux/man-pages/man1/mknod.1.html
[mknod.2]: https://man7.org/linux/man-pages/man2/mknod.2.html
[namespaces.7_2]: https://man7.org/linux/man-pages/man7/namespaces.7.html
diff --git a/features-linux.md b/features-linux.md
index a3488e5a7..96b369281 100644
--- a/features-linux.md
+++ b/features-linux.md
@@ -195,6 +195,37 @@ Irrelevant to the availability of SELinux on the host operating system.
}
```
+## MemoryPolicy
+
+**`memoryPolicy`** (object, OPTIONAL) represents the runtime's implementation status of memoryPolicy.
+
+* **`modes`** (array of strings, OPTIONAL). Recognized memory policies. Includes policies that may not be supported by the host operating system.
+ The runtime MUST recognize the elements in this array as the [`mode` of `linux.memoryPolicy` objects in `config.json`](config-linux.md#memory-policy).
+
+* **`flags`** (array of strings, OPTIONAL). Recognized flags for memory policies. Includes flags that may not be supported by the host operating system.
+ The runtime MUST recognize the elements in this in the [`flags` property of the `linux.memoryPolicy` object in `config.json`](config-linux.md#memory-policy)
+
+### Example
+
+```json
+"memoryPolicy": {
+ "modes": [
+ "MPOL_DEFAULT",
+ "MPOL_BIND",
+ "MPOL_INTERLEAVE",
+ "MPOL_WEIGHTED_INTERLEAVE",
+ "MPOL_PREFERRED",
+ "MPOL_PREFERRED_MANY",
+ "MPOL_LOCAL"
+ ],
+ "flags": [
+ "MPOL_F_NUMA_BALANCING",
+ "MPOL_F_RELATIVE_NODES",
+ "MPOL_F_STATIC_NODES"
+ ]
+}
+```
+
## Intel RDT
**`intelRdt`** (object, OPTIONAL) represents the runtime's implementation status of Intel RDT.
diff --git a/features.md b/features.md
index 24cd65c3b..2c65456a8 100644
--- a/features.md
+++ b/features.md
@@ -354,6 +354,22 @@ Here is a full example for reference.
"selinux": {
"enabled": true
},
+ "memoryPolicy": {
+ "modes": [
+ "MPOL_DEFAULT",
+ "MPOL_BIND",
+ "MPOL_INTERLEAVE",
+ "MPOL_WEIGHTED_INTERLEAVE",
+ "MPOL_PREFERRED",
+ "MPOL_PREFERRED_MANY",
+ "MPOL_LOCAL"
+ ],
+ "flags": [
+ "MPOL_F_NUMA_BALANCING",
+ "MPOL_F_RELATIVE_NODES",
+ "MPOL_F_STATIC_NODES"
+ ]
+ },
"intelRdt": {
"enabled": true
}
diff --git a/schema/config-linux.json b/schema/config-linux.json
index add4cf0e4..ba567f6e7 100644
--- a/schema/config-linux.json
+++ b/schema/config-linux.json
@@ -283,6 +283,23 @@
}
}
},
+ "memoryPolicy": {
+ "type": "object",
+ "properties": {
+ "mode": {
+ "$ref": "defs-linux.json#/definitions/MemoryPolicyMode"
+ },
+ "nodes": {
+ "type": "string"
+ },
+ "flags": {
+ "type": "array",
+ "items": {
+ "$ref": "defs-linux.json#/definitions/MemoryPolicyFlag"
+ }
+ }
+ }
+ },
"personality": {
"type": "object",
"$ref": "defs-linux.json#/definitions/Personality"
diff --git a/schema/defs-linux.json b/schema/defs-linux.json
index 4bf73d0fb..ec34445e0 100644
--- a/schema/defs-linux.json
+++ b/schema/defs-linux.json
@@ -272,6 +272,26 @@
"allow"
]
},
+ "MemoryPolicyMode": {
+ "type": "string",
+ "enum": [
+ "MPOL_DEFAULT",
+ "MPOL_BIND",
+ "MPOL_INTERLEAVE",
+ "MPOL_WEIGHTED_INTERLEAVE",
+ "MPOL_PREFERRED",
+ "MPOL_PREFERRED_MANY",
+ "MPOL_LOCAL"
+ ]
+ },
+ "MemoryPolicyFlag": {
+ "type": "string",
+ "enum": [
+ "MPOL_F_NUMA_BALANCING",
+ "MPOL_F_RELATIVE_NODES",
+ "MPOL_F_STATIC_NODES"
+ ]
+ },
"NetworkInterfacePriority": {
"type": "object",
"properties": {
diff --git a/specs-go/config.go b/specs-go/config.go
index 854290da2..01e003310 100644
--- a/specs-go/config.go
+++ b/specs-go/config.go
@@ -251,6 +251,8 @@ type Linux struct {
// IntelRdt contains Intel Resource Director Technology (RDT) information for
// handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container
IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"`
+ // MemoryPolicy contains NUMA memory policy for the container.
+ MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"`
// Personality contains configuration for the Linux personality syscall
Personality *LinuxPersonality `json:"personality,omitempty"`
// TimeOffsets specifies the offset for supporting time namespaces.
@@ -855,6 +857,19 @@ type LinuxIntelRdt struct {
EnableMBM bool `json:"enableMBM,omitempty"`
}
+// LinuxMemoryPolicy represents input for the set_mempolicy syscall.
+type LinuxMemoryPolicy struct {
+ // Mode for the set_mempolicy syscall.
+ Mode MemoryPolicyModeType `json:"mode"`
+
+ // Nodes representing the nodemask for the set_mempolicy syscall in comma separated ranges format.
+ // Format: "-,,-,..."
+ Nodes string `json:"nodes"`
+
+ // Flags for the set_mempolicy syscall.
+ Flags []MemoryPolicyFlagType `json:"flags,omitempty"`
+}
+
// ZOS contains platform-specific configuration for z/OS based containers.
type ZOS struct {
// Namespaces contains the namespaces that are created and/or joined by the container
@@ -884,6 +899,26 @@ const (
ZOSUTSNamespace ZOSNamespaceType = "uts"
)
+type MemoryPolicyModeType string
+
+const (
+ MpolDefault MemoryPolicyModeType = "MPOL_DEFAULT"
+ MpolBind MemoryPolicyModeType = "MPOL_BIND"
+ MpolInterleave MemoryPolicyModeType = "MPOL_INTERLEAVE"
+ MpolWeightedInterleave MemoryPolicyModeType = "MPOL_WEIGHTED_INTERLEAVE"
+ MpolPreferred MemoryPolicyModeType = "MPOL_PREFERRED"
+ MpolPreferredMany MemoryPolicyModeType = "MPOL_PREFERRED_MANY"
+ MpolLocal MemoryPolicyModeType = "MPOL_LOCAL"
+)
+
+type MemoryPolicyFlagType string
+
+const (
+ MpolFNumaBalancing MemoryPolicyFlagType = "MPOL_F_NUMA_BALANCING"
+ MpolFRelativeNodes MemoryPolicyFlagType = "MPOL_F_RELATIVE_NODES"
+ MpolFStaticNodes MemoryPolicyFlagType = "MPOL_F_STATIC_NODES"
+)
+
// LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler
type LinuxSchedulerPolicy string
diff --git a/specs-go/features/features.go b/specs-go/features/features.go
index d8eb169dc..8271ded8a 100644
--- a/specs-go/features/features.go
+++ b/specs-go/features/features.go
@@ -47,6 +47,7 @@ type Linux struct {
Apparmor *Apparmor `json:"apparmor,omitempty"`
Selinux *Selinux `json:"selinux,omitempty"`
IntelRdt *IntelRdt `json:"intelRdt,omitempty"`
+ MemoryPolicy *MemoryPolicy `json:"memoryPolicy,omitempty"`
MountExtensions *MountExtensions `json:"mountExtensions,omitempty"`
NetDevices *NetDevices `json:"netDevices,omitempty"`
}
@@ -132,6 +133,14 @@ type IntelRdt struct {
Enabled *bool `json:"enabled,omitempty"`
}
+// MemoryPolicy represents the "memoryPolicy" field.
+type MemoryPolicy struct {
+ // modes is the list of known memory policy modes, e.g., "MPOL_INTERLEAVE".
+ Modes []string `json:"modes,omitempty"`
+ // flags is the list of known memory policy mode flags, e.g., "MPOL_F_STATIC_NODES".
+ Flags []string `json:"flags,omitempty"`
+}
+
// MountExtensions represents the "mountExtensions" field.
type MountExtensions struct {
// IDMap represents the status of idmap mounts support.