feat(BA-2750): Add support for array in config generator

hhoikoo · hhoikoo · commit 86bc18ce7bb2 · 2025-11-10T16:26:12.000+09:00
This change adds array of tables syntax [[table]] of TOML in the sample
config generator. As the downstream change of adding multiple agents in
agent runtime server will need the config to be able to express array of
tables, having the sample generator be able to handle this would be
beneficial.
diff --git a/changes/6311.feature.md b/changes/6311.feature.md
@@ -0,0 +1 @@
+Add support for array of tables syntax in config sample generator
diff --git a/configs/agent/sample.toml b/configs/agent/sample.toml
@@ -6,6 +6,7 @@
 #
 # Generated automatically from the AgentUnifiedConfig schema.
 
+# Agent configuration
 [agent]
   # Backend type for the agent.
   # This determines how the agent interacts with the underlying infrastructure.
@@ -91,12 +92,14 @@
   # Owner uid:gid of the mount directory
   ## mount-path-uid-gid = "root:root"
 
+  # Container lifecycle synchronization config
   [agent.sync-container-lifecycles]
     # Whether to enable container lifecycle synchronization
     enabled = true
     # Synchronization interval in seconds
     interval = 10.0
 
+# Container configuration
 [container]
   # Kernel user ID
   kernel-uid = -1
@@ -126,20 +129,14 @@
   ## scratch-nfs-options = "rw,sync"
   # Alternative bridge network
   ## alternative-bridge = "br-backend"
-  # KRunner volumes configuration, mapping container names to host paths.
-  # This is used to specify volumes that should be mounted into containers
-  # when using the KRunner backend.
-  # This fields is filled by the agent at runtime based on the
-  # `krunner_volumes` configuration in the agent's environment.
-  # It is not intended to be set in the configuration file.
-  ## krunner-volumes = {  }
   # Whether to enable Docker Swarm mode.
   # This allows the agent to manage containers in a Docker Swarm cluster.
   # When enabled, the agent will use Docker Swarm APIs to manage containers,
   # networks, and services.
   # This field is only used when backend is set to 'docker'.
   swarm-enabled = false
 
+# Pyroscope configuration
 [pyroscope]
   # Whether to enable Pyroscope profiling
   enabled = false
@@ -150,6 +147,7 @@
   # Sampling rate for Pyroscope profiling
   ## sample-rate = 10
 
+# Logging configuration
 [logging]
   # The version used by logging.dictConfig().
   version = 1
@@ -159,9 +157,6 @@
   disable-existing-loggers = false
   # The list of log drivers to activate.
   drivers = [ "console",]
-  ## file = "{ FileConfig }"
-  ## logstash = "{ LogstashConfig }"
-  ## graylog = "{ GraylogConfig }"
 
   # The mapping of log handler configurations.
   [logging.handlers]
@@ -175,9 +170,52 @@
     # Determine verbosity of log.
     format = "verbose"
 
+  [logging.file]
+    # Path to store log.
+    path = "/var/log/backend.ai"
+    # Log file name.
+    filename = "wsproxy.log"
+    # Number of outdated log files to retain.
+    backup-count = 5
+    # Maximum size for a single log file.
+    ## rotation-size = "..."  # |     # min=0
+    # Determine verbosity of log.
+    format = "verbose"
+
+  [logging.logstash]
+    # Connection information of logstash node.
+    endpoint = { host = "127.0.0.1", port = 8001 }
+    # Protocol to communicate with logstash server.
+    protocol = "tcp"
+    # Use TLS to communicate with logstash server.
+    ssl-enabled = true
+    # Verify validity of TLS certificate when communicating with logstash.
+    ssl-verify = true
+
+  [logging.graylog]
+    # Graylog hostname.
+    host = "127.0.0.1"
+    # Graylog server port number.
+    port = 8000
+    # Log level.
+    level = "INFO"
+    # The custom source identifier. If not specified, fqdn will be used instead.
+    ## localname = "..."
+    # The fuly qualified domain name of the source.
+    ## fqdn = "..."
+    # Verify validity of TLS certificate when communicating with logstash.
+    ssl-verify = true
+    # Path to Root CA certificate file.
+    ## ca-certs = "/etc/ssl/ca.pem"
+    # Path to TLS private key file.
+    ## keyfile = "/etc/backend.ai/graylog/privkey.pem"
+    # Path to TLS certificate file.
+    ## certfile = "/etc/backend.ai/graylog/cert.pem"
+
   # Override default log level for specific scope of package
   [logging.pkg_ns]
 
+# Resource configuration
 [resource]
   # The number of CPU cores reserved for the operating system and the agent
   # service.
@@ -201,6 +239,7 @@
   # Affinity policy
   affinity-policy = "INTERLEAVED"
 
+# OpenTelemetry configuration
 [otel]
   # Whether to enable OpenTelemetry
   enabled = false
@@ -209,10 +248,12 @@
   # OTLP endpoint for sending traces
   endpoint = "http://127.0.0.1:4317"
 
+# Service discovery configuration
 [service-discovery]
   # Type of service discovery to use
   type = "redis"
 
+# Debug configuration
 [debug]
   # Master switch for debug mode
   enabled = false
@@ -239,6 +280,7 @@
   # Whether to log Docker events
   log-docker-events = false
 
+  # Core dump configuration
   [debug.coredump]
     # Whether to enable core dump collection
     enabled = false
@@ -249,6 +291,7 @@
     # Maximum size limit for core dumps
     size-limit = "64M"
 
+# Etcd configuration
 [etcd]
   # Etcd namespace
   namespace = "local"
@@ -259,12 +302,14 @@
   # Etcd password
   ## password = "PASSWORD"
 
+# Container logs configuration
 [container-logs]
   # Maximum length of container logs
   max-length = "10M"
   # Chunk size for container logs
   chunk-size = "64K"
 
+# API configuration
 [api]
   # Image pull timeout in seconds
   ## pull-timeout = 7200.0  # min=0
@@ -273,6 +318,7 @@
   # Image push timeout in seconds
   ## push-timeout = 7200.0  # min=0
 
+# Kernel lifecycles configuration
 [kernel-lifecycles]
   # Number of init polling attempts
   init-polling-attempt = 10
diff --git a/src/ai/backend/common/configs/sample_generator.py b/src/ai/backend/common/configs/sample_generator.py
@@ -49,6 +49,10 @@ class _InlineTable(dict, InlineTableDict):
     pass
 
 
+def _is_runtime_field(description: str) -> bool:
+    return "at runtime" in description
+
+
 def _wrap_comment(text: str, prefix: str = "", width: int = 80) -> str:
     """Wrap text into multiline comment format."""
     lines = text.strip().split("\n")
@@ -122,7 +126,7 @@ def _dump_toml_scalar(
             case "BinarySize":
                 value = f"{BinarySize(value):s}".upper()
             case "HostPortPair":
-                value = {"host": value.host, "port": value.port}
+                value = {"host": value["host"], "port": value["port"]}
             case "EnumByValue":
                 assert ctx.annotation is not None
                 value = ctx.annotation(value).value
@@ -193,7 +197,7 @@ def _get_field_info(model_cls: Type[BaseModel], field_name: str, indent: int) ->
                     try:
                         factory_instance = field.default_factory()  # type: ignore
                         if isinstance(factory_instance, BaseModel):
-                            field_info["default"] = factory_instance.model_dump()
+                            field_info["default"] = factory_instance.model_dump(mode="python")
                         else:
                             field_info["default"] = factory_instance
                     except Exception:
@@ -224,7 +228,7 @@ def _process_property(
         # Add description as comment if available
         description = field_info.get("description") or prop_schema.get("description")
         if description:
-            if "This field is injected at runtime" in description:
+            if _is_runtime_field(description):
                 # Skip runtimme-generated fields.
                 return []
             comment_lines = _wrap_comment(description)
@@ -332,8 +336,20 @@ def _process_schema(
         # Group properties by type
         simple_props = {}
         object_props = {}
+        array_of_tables_props = {}
 
         for prop_name, prop_schema in properties.items():
+            original_prop_schema = prop_schema
+
+            # Handle anyOf (optional types) - unwrap to check the inner type
+            if "anyOf" in prop_schema:
+                # Check if this is an optional type (Type | None)
+                any_of_items = prop_schema["anyOf"]
+                non_null_items = [item for item in any_of_items if item != {"type": "null"}]
+                if len(non_null_items) == 1 and len(any_of_items) == 2:
+                    # This is an optional type - unwrap it to check if it's an object
+                    prop_schema = non_null_items[0]
+
             if "$ref" in prop_schema:
                 # Resolve reference
                 ref_path = prop_schema["$ref"].split("/")
@@ -345,12 +361,36 @@ def _process_schema(
 
             prop_type = prop_schema.get("type", "")
 
-            if (prop_type == "object" or "properties" in prop_schema) and prop_schema[
+            # Check if this is an array of objects (array of tables in TOML)
+            if prop_type == "array" and "items" in prop_schema:
+                items_schema = prop_schema["items"]
+                # Resolve $ref in items if present
+                if "$ref" in items_schema:
+                    ref_path = items_schema["$ref"].split("/")
+                    if ref_path[0] == "#" and len(ref_path) > 1:
+                        resolved = schema
+                        for part in ref_path[1:]:
+                            resolved = resolved.get(part, {})
+                        items_schema = resolved
+
+                # Check if the items are objects (complex types)
+                if items_schema.get("type") == "object" or "properties" in items_schema:
+                    array_of_tables_props[prop_name] = (original_prop_schema, items_schema)
+                    continue
+
+            # Check if this is a complex object that should be expanded
+            if (prop_type == "object" or "properties" in prop_schema) and prop_schema.get(
                 "title"
-            ] != "HostPortPair":
+            ) != "HostPortPair":
+                # Preserve description from original schema if it was unwrapped
+                if "description" in original_prop_schema and "description" not in prop_schema:
+                    prop_schema = {
+                        **prop_schema,
+                        "description": original_prop_schema["description"],
+                    }
                 object_props[prop_name] = prop_schema
             else:
-                simple_props[prop_name] = prop_schema
+                simple_props[prop_name] = original_prop_schema
 
         # Add simple properties first
         processed_simple_props = []
@@ -360,6 +400,7 @@ def _process_schema(
             )
             if prop_lines:
                 lines.extend(prop_lines)
+                # Exclude runtime-injected fields from the warning
                 processed_simple_props.append(prop_name)
 
         if path == [] and processed_simple_props:
@@ -368,19 +409,25 @@ def _process_schema(
                 "The configuration schema CANNOT have simple fields in the root "
                 "without any section header according to the TOML specification. "
                 "Also, optional sections should be defined non-optional with explicit default factory. "
-                f"Please move or fix these fields/sections: {', '.join(simple_props.keys())}. "
+                f"Please move or fix these fields/sections: {', '.join(processed_simple_props)}. "
             )
 
         # Add object properties as sections
         for prop_name, prop_schema in object_props.items():
             indent_str = "  " * len(path)
 
+            # Skip if this is a runtime-injected field
+            description = prop_schema.get("description", "")
+            if _is_runtime_field(description):
+                continue
+
             if lines and lines[-1].strip():  # Add blank line before section
                 lines.append("")
 
             # Add section comment
-            if "description" in prop_schema:
-                comment_lines = _wrap_comment(prop_schema["description"], prefix=indent_str)
+            description = prop_schema.get("description", "")
+            if description:
+                comment_lines = _wrap_comment(description, prefix=indent_str)
                 lines.extend(comment_lines.split("\n"))
 
             # Add section header
@@ -422,6 +469,67 @@ def _process_schema(
             )
             lines.extend(nested_lines)
 
+        # Add array of tables properties using [[array.name]] syntax
+        for prop_name, (prop_schema, items_schema) in array_of_tables_props.items():
+            indent_str = "  " * len(path)
+
+            # Skip if this is a runtime-injected field
+            description = prop_schema.get("description", "")
+            if _is_runtime_field(description):
+                continue
+
+            if lines and lines[-1].strip():  # Add blank line before section
+                lines.append("")
+
+            # Add array of tables comment
+            if description:
+                comment_lines = _wrap_comment(description, prefix=indent_str)
+                lines.extend(comment_lines.split("\n"))
+
+            # Add array of tables header with double brackets [[array.name]]
+            section_path = path + [prop_name]
+            array_header = f"{indent_str}[[{'.'.join(section_path)}]]"
+            lines.append(array_header)
+            print(array_header)
+
+            # Add a comment about adding multiple entries
+            lines.append(
+                f"{indent_str}# Add multiple [[{'.'.join(section_path)}]] sections as needed"
+            )
+
+            # Process nested properties for the item schema
+            nested_model_cls = None
+            if model_cls and hasattr(model_cls, "model_fields"):
+                field_info = _get_field_info(model_cls, prop_name, indent=len(path))
+                if field_info:
+                    # Try to find the field and extract the item type from list annotation
+                    field = None
+                    if prop_name in model_cls.model_fields:
+                        field = model_cls.model_fields[prop_name]
+                    else:
+                        # Search by alias
+                        for finfo in model_cls.model_fields.values():
+                            if (
+                                hasattr(finfo, "serialization_alias")
+                                and finfo.serialization_alias == prop_name
+                            ):
+                                field = finfo
+                                break
+
+                    if field:
+                        if hasattr(field, "annotation") and hasattr(field.annotation, "__origin__"):
+                            # Handle generic types like list[SubAgentConfig]
+                            args = getattr(field.annotation, "__args__", ())
+                            if args and hasattr(args[0], "model_fields"):
+                                nested_model_cls = args[0]
+                        elif hasattr(field.annotation, "model_fields"):
+                            nested_model_cls = field.annotation
+
+            nested_lines = _process_schema(
+                items_schema, path=section_path, parent_required=[], model_cls=nested_model_cls
+            )
+            lines.extend(nested_lines)
+
         return lines
 
     # Process the root schema

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Add support for array of tables syntax in config sample generator`