|
11 | 11 | # It is not intended to be set in the configuration file. |
12 | 12 | ## plugins = "..." |
13 | 13 |
|
14 | | -# Agent configuration |
| 14 | +# Agent configuration. |
| 15 | +# If agents field is populated, this field indicates the default values for all |
| 16 | +# agents. |
15 | 17 | [agent] |
| 18 | + # Agent ID |
| 19 | + ## id = "agent-001" |
| 20 | + # Agent socket port |
| 21 | + agent-sock-port = 6007 |
| 22 | + # Base path for IPC |
| 23 | + ipc-base-path = "/tmp/backend.ai/ipc" |
| 24 | + # Base path for variable data |
| 25 | + var-base-path = "var/lib/backend.ai" |
| 26 | + # Scaling group name |
| 27 | + scaling-group = "default" |
| 28 | + # Scaling group type |
| 29 | + scaling-group-type = "compute" |
| 30 | + # Allowed compute plugins |
| 31 | + ## allow-compute-plugins = [ "ai.backend.accelerator.cuda_open", "ai.backend.activator.agent",] |
| 32 | + # Blocked compute plugins |
| 33 | + ## block-compute-plugins = [ "ai.backend.accelerator.mock",] |
| 34 | + # Allowed network plugins |
| 35 | + ## allow-network-plugins = [ "ai.backend.manager.network.overlay",] |
| 36 | + # Blocked network plugins |
| 37 | + ## block-network-plugins = [ "ai.backend.manager.network.overlay",] |
| 38 | + # Path for image commit |
| 39 | + image-commit-path = "tmp/backend.ai/commit" |
| 40 | + # Path for abuse reports |
| 41 | + ## abuse-report-path = "/var/log/backend.ai/abuse" |
| 42 | + # Whether to force terminate abusing containers |
| 43 | + force-terminate-abusing-containers = false |
| 44 | + # Kernel creation concurrency |
| 45 | + kernel-creation-concurrency = 4 |
16 | 46 | # Backend type for the agent. |
17 | 47 | # This determines how the agent interacts with the underlying infrastructure. |
18 | 48 | # Available options are: |
|
38 | 68 | ## rpc-auth-manager-public-key = "/path/to/public.key" |
39 | 69 | # Path to RPC auth agent keypair |
40 | 70 | ## rpc-auth-agent-keypair = "/path/to/keypair.key" |
41 | | - # Agent socket port |
42 | | - agent-sock-port = 6007 |
43 | | - # Agent ID |
44 | | - ## id = "agent-001" |
45 | | - # Base path for IPC |
46 | | - ipc-base-path = "/tmp/backend.ai/ipc" |
47 | | - # Base path for variable data |
48 | | - var-base-path = "var/lib/backend.ai" |
49 | 71 | # Mount path for containers |
50 | 72 | ## mount-path = "/mnt/backend.ai" |
51 | 73 | # Whether to enable cohabiting storage proxy |
|
56 | 78 | ## region = "us-east-1" |
57 | 79 | # Instance type |
58 | 80 | ## instance-type = "m5.large" |
59 | | - # Scaling group name |
60 | | - scaling-group = "default" |
61 | | - # Scaling group type |
62 | | - scaling-group-type = "compute" |
63 | 81 | # Path to PID file |
64 | 82 | pid-file = "/dev/null" |
65 | 83 | # Event loop type |
|
74 | 92 | metadata-server-bind-host = "0.0.0.0" |
75 | 93 | # Metadata server port |
76 | 94 | metadata-server-port = 40128 |
77 | | - # Allowed compute plugins |
78 | | - ## allow-compute-plugins = [ "ai.backend.accelerator.cuda_open", "ai.backend.activator.agent",] |
79 | | - # Blocked compute plugins |
80 | | - ## block-compute-plugins = [ "ai.backend.accelerator.mock",] |
81 | | - # Allowed network plugins |
82 | | - ## allow-network-plugins = [ "ai.backend.manager.network.overlay",] |
83 | | - # Blocked network plugins |
84 | | - ## block-network-plugins = [ "ai.backend.manager.network.overlay",] |
85 | | - # Path for image commit |
86 | | - image-commit-path = "tmp/backend.ai/commit" |
87 | | - # Path for abuse reports |
88 | | - ## abuse-report-path = "/var/log/backend.ai/abuse" |
89 | | - # Whether to force terminate abusing containers |
90 | | - force-terminate-abusing-containers = false |
91 | | - # Kernel creation concurrency |
92 | | - kernel-creation-concurrency = 4 |
93 | 95 | # Whether to use experimental Redis event dispatcher |
94 | 96 | use-experimental-redis-event-dispatcher = false |
95 | 97 | # Docker mode detected based on kernel version (linuxkit/native) |
|
104 | 106 | # Synchronization interval in seconds |
105 | 107 | interval = 10.0 |
106 | 108 |
|
107 | | -# Container configuration |
| 109 | +# Container configuration. |
| 110 | +# If agents field is populated, this field indicates the default values for all |
| 111 | +# agents. |
108 | 112 | [container] |
109 | 113 | # Kernel user ID |
110 | 114 | kernel-uid = -1 |
111 | 115 | # Kernel group ID |
112 | 116 | kernel-gid = -1 |
113 | | - # Bind host for containers |
114 | | - bind-host = "" |
115 | | - # Advertised host for containers |
116 | | - ## advertised-host = "192.168.1.100" |
117 | | - # Port range for containers |
| 117 | + # Port range for containers. |
| 118 | + # If multiple agents are used, user must ensure that the port ranges |
| 119 | + # do not overlap between the agent, else it may cause subtle issues |
| 120 | + # late into the agent's runtime. |
118 | 121 | port-range = [ 30000, 31000,] |
119 | 122 | # Statistics type |
120 | 123 | ## stats-type = "docker" |
|
140 | 143 | # networks, and services. |
141 | 144 | # This field is only used when backend is set to 'docker'. |
142 | 145 | swarm-enabled = false |
| 146 | + # Bind host for containers |
| 147 | + bind-host = "" |
| 148 | + # Advertised host for containers |
| 149 | + ## advertised-host = "192.168.1.100" |
143 | 150 |
|
144 | 151 | # KRunner volumes configuration, mapping container names to host paths. |
145 | 152 | # This is used to specify volumes that should be mounted into containers |
|
149 | 156 | # It is not intended to be set in the configuration file. |
150 | 157 | ## [container.krunner-volumes] |
151 | 158 |
|
| 159 | +# Resource configuration. |
| 160 | +# If agents field is populated, this field indicates the default values for all |
| 161 | +# agents. |
| 162 | +[resource] |
| 163 | + # The number of CPU cores reserved for the operating system and the agent |
| 164 | + # service. |
| 165 | + reserved-cpu = 1 |
| 166 | + # The memory space reserved for the operating system and the agent service. It |
| 167 | + # is subtracted from the reported main memory size and not available for user |
| 168 | + # workload allocation. Depending on the memory-align-size option and system |
| 169 | + # configuration, this may not be the exact value but have slightly less or more |
| 170 | + # values within the memory-align-size. |
| 171 | + reserved-mem = "1G" |
| 172 | + # The disk space reserved for the operating system and the agent service. |
| 173 | + # Currently this value is unused. In future releases, it may be used to preserve |
| 174 | + # the minimum disk space from the scratch disk allocation via loopback files. |
| 175 | + reserved-disk = "8G" |
| 176 | + # The alignment of the reported main memory size to absorb tiny deviations from |
| 177 | + # per-node firmware/hardware settings. Recommended to be multiple of the |
| 178 | + # page/hugepage size (e.g., 2 MiB). |
| 179 | + memory-align-size = "16M" |
| 180 | + # Resource allocation order |
| 181 | + allocation-order = [ "cuda", "rocm", "tpu", "cpu", "mem",] |
| 182 | + # Affinity policy |
| 183 | + affinity-policy = "INTERLEAVED" |
| 184 | + |
152 | 185 | # Pyroscope configuration |
153 | 186 | [pyroscope] |
154 | 187 | # Whether to enable Pyroscope profiling |
|
228 | 261 | # Override default log level for specific scope of package |
229 | 262 | [logging.pkg_ns] |
230 | 263 |
|
231 | | -# Resource configuration |
232 | | -[resource] |
233 | | - # The number of CPU cores reserved for the operating system and the agent |
234 | | - # service. |
235 | | - reserved-cpu = 1 |
236 | | - # The memory space reserved for the operating system and the agent service. It |
237 | | - # is subtracted from the reported main memory size and not available for user |
238 | | - # workload allocation. Depending on the memory-align-size option and system |
239 | | - # configuration, this may not be the exact value but have slightly less or more |
240 | | - # values within the memory-align-size. |
241 | | - reserved-mem = "1G" |
242 | | - # The disk space reserved for the operating system and the agent service. |
243 | | - # Currently this value is unused. In future releases, it may be used to preserve |
244 | | - # the minimum disk space from the scratch disk allocation via loopback files. |
245 | | - reserved-disk = "8G" |
246 | | - # The alignment of the reported main memory size to absorb tiny deviations from |
247 | | - # per-node firwmare/hardware settings. Recommended to be multiple of the |
248 | | - # page/hugepage size (e.g., 2 MiB). |
249 | | - memory-align-size = "16M" |
250 | | - # Resource allocation order |
251 | | - allocation-order = [ "cuda", "rocm", "tpu", "cpu", "mem",] |
252 | | - # Affinity policy |
253 | | - affinity-policy = "INTERLEAVED" |
254 | | - |
255 | 264 | # OpenTelemetry configuration |
256 | 265 | [otel] |
257 | 266 | # Whether to enable OpenTelemetry |
|
344 | 353 | # This field is injected at runtime based on etcd configuration. |
345 | 354 | # It is not intended to be set in the other way. |
346 | 355 | ## [redis] |
| 356 | + |
| 357 | +# Configuration overrides for multiple agents. |
| 358 | +# Use this field only to define 2 or more agents, as defining only one |
| 359 | +# agent using this field is redundant. Use the fields agent, container, |
| 360 | +# and resource to define the configuration at a global level. |
| 361 | +# Any field populated in the agents config will be treated as an |
| 362 | +# override to the global default values. Thus the global fields must still |
| 363 | +# be provided when defining multiple agents. |
| 364 | +[[agents]] |
| 365 | +# Add multiple [[agents]] sections as needed |
| 366 | + # Agent config overrides for the individual agent. |
| 367 | + # All fields except Agent ID are by default optional. |
| 368 | + # Only override fields if necessary. |
| 369 | + [agents.agent] |
| 370 | + # Agent ID |
| 371 | + ## id = "agent-001" |
| 372 | + # Agent socket port |
| 373 | + agent-sock-port = 6007 |
| 374 | + # Base path for IPC |
| 375 | + ipc-base-path = "/tmp/backend.ai/ipc" |
| 376 | + # Base path for variable data |
| 377 | + var-base-path = "var/lib/backend.ai" |
| 378 | + # Scaling group name |
| 379 | + scaling-group = "default" |
| 380 | + # Scaling group type |
| 381 | + scaling-group-type = "compute" |
| 382 | + # Allowed compute plugins |
| 383 | + ## allow-compute-plugins = [ "ai.backend.accelerator.cuda_open", "ai.backend.activator.agent",] |
| 384 | + # Blocked compute plugins |
| 385 | + ## block-compute-plugins = [ "ai.backend.accelerator.mock",] |
| 386 | + # Allowed network plugins |
| 387 | + ## allow-network-plugins = [ "ai.backend.manager.network.overlay",] |
| 388 | + # Blocked network plugins |
| 389 | + ## block-network-plugins = [ "ai.backend.manager.network.overlay",] |
| 390 | + # Path for image commit |
| 391 | + image-commit-path = "tmp/backend.ai/commit" |
| 392 | + # Path for abuse reports |
| 393 | + ## abuse-report-path = "/var/log/backend.ai/abuse" |
| 394 | + # Whether to force terminate abusing containers |
| 395 | + force-terminate-abusing-containers = false |
| 396 | + # Kernel creation concurrency |
| 397 | + kernel-creation-concurrency = 4 |
| 398 | + |
| 399 | + # Container lifecycle synchronization config |
| 400 | + [agents.agent.sync-container-lifecycles] |
| 401 | + # Whether to enable container lifecycle synchronization |
| 402 | + enabled = true |
| 403 | + # Synchronization interval in seconds |
| 404 | + interval = 10.0 |
| 405 | + |
| 406 | + # Container config overrides for the individual agent |
| 407 | + [agents.container] |
| 408 | + # Kernel user ID |
| 409 | + kernel-uid = -1 |
| 410 | + # Kernel group ID |
| 411 | + kernel-gid = -1 |
| 412 | + # Port range for containers. |
| 413 | + # If multiple agents are used, user must ensure that the port ranges |
| 414 | + # do not overlap between the agent, else it may cause subtle issues |
| 415 | + # late into the agent's runtime. |
| 416 | + port-range = [ 30000, 31000,] |
| 417 | + # Statistics type |
| 418 | + ## stats-type = "cgroup" |
| 419 | + # Sandbox type |
| 420 | + sandbox-type = "docker" |
| 421 | + # Jail arguments |
| 422 | + jail-args = [ "--mount", "/tmp",] |
| 423 | + # Scratch type |
| 424 | + scratch-type = "hostdir" |
| 425 | + # Scratch root directory |
| 426 | + scratch-root = "scratches" |
| 427 | + # Scratch size |
| 428 | + scratch-size = 0 |
| 429 | + # Scratch NFS address |
| 430 | + ## scratch-nfs-address = "192.168.1.100:/export" |
| 431 | + # Scratch NFS options |
| 432 | + ## scratch-nfs-options = "rw,sync" |
| 433 | + # Alternative bridge network |
| 434 | + ## alternative-bridge = "br-backend" |
| 435 | + # Whether to enable Docker Swarm mode. |
| 436 | + # This allows the agent to manage containers in a Docker Swarm cluster. |
| 437 | + # When enabled, the agent will use Docker Swarm APIs to manage containers, |
| 438 | + # networks, and services. |
| 439 | + # This field is only used when backend is set to 'docker'. |
| 440 | + swarm-enabled = false |
| 441 | + |
| 442 | + # Resource config overrides for the individual agent |
| 443 | + [agents.resource] |
| 444 | + # The number of CPU cores reserved for the operating system and the agent |
| 445 | + # service. |
| 446 | + reserved-cpu = 1 |
| 447 | + # The memory space reserved for the operating system and the agent service. It |
| 448 | + # is subtracted from the reported main memory size and not available for user |
| 449 | + # workload allocation. Depending on the memory-align-size option and system |
| 450 | + # configuration, this may not be the exact value but have slightly less or more |
| 451 | + # values within the memory-align-size. |
| 452 | + reserved-mem = 1073741824 |
| 453 | + # The disk space reserved for the operating system and the agent service. |
| 454 | + # Currently this value is unused. In future releases, it may be used to preserve |
| 455 | + # the minimum disk space from the scratch disk allocation via loopback files. |
| 456 | + reserved-disk = 8589934592 |
| 457 | + # The alignment of the reported main memory size to absorb tiny deviations from |
| 458 | + # per-node firmware/hardware settings. Recommended to be multiple of the |
| 459 | + # page/hugepage size (e.g., 2 MiB). |
| 460 | + memory-align-size = 16777216 |
| 461 | + # Resource allocation order |
| 462 | + allocation-order = [ "cuda", "rocm", "tpu", "cpu", "mem",] |
| 463 | + # Affinity policy |
| 464 | + affinity-policy = 1 |
0 commit comments