Initial (hacky) performance tests

JamesC1305 · JamesC1305 · commit 3e5187c66e8b · 2024-08-05T15:07:00.000Z
Create initial performance tests measuring the time between hotplug API
request and vCPUs being available to the guest.

Note: These tests are NOT designed to be merged or used in CI, they are
merely investigative tests for the latency of vCPU hotplugging

Signed-off-by: James Curtis &lt;jxcurtis@amazon.co.uk&gt;
diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs
@@ -201,7 +201,7 @@ fn create_vmm_and_vcpus(
         // This has to instantiated here, before the CpuContainer, to ensure that it gets the
         // correct address, the first page of MMIO memory.
         if boot_timer_enabled {
-            let boot_timer = crate::devices::pseudo::BootTimer::new(TimestampUs::default());
+            let mut boot_timer = crate::devices::pseudo::BootTimer::new(TimestampUs::default());
 
             mmio_device_manager
                 .register_mmio_boot_timer(&mut resource_allocator, boot_timer)
diff --git a/src/vmm/src/devices/pseudo/boot_timer.rs b/src/vmm/src/devices/pseudo/boot_timer.rs
@@ -10,7 +10,7 @@ const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123;
 /// Pseudo device to record the kernel boot time.
 #[derive(Debug)]
 pub struct BootTimer {
-    start_ts: TimestampUs,
+    pub start_ts: TimestampUs,
 }
 
 impl BootTimer {
diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs
@@ -620,6 +620,8 @@ impl Vmm {
         &mut self,
         config: HotplugVcpuConfig,
     ) -> Result<MachineConfigUpdate, HotplugVcpuError> {
+        use utils::time::TimestampUs;
+
         use crate::logger::IncMetric;
         if config.add < 1 {
             return Err(HotplugVcpuError::VcpuCountTooLow);
@@ -688,6 +690,12 @@ impl Vmm {
         self.resume_vcpu_threads(start_idx.into())?;
 
         self.acpi_device_manager.notify_cpu_container()?;
+        if let Some(devices::BusDevice::BootTimer(dev)) = self
+            .mmio_device_manager
+            .get_device(DeviceType::BootTimer, "BootTimer")
+        {
+            dev.lock().unwrap().start_ts = TimestampUs::default()
+        }
 
         Ok(new_machine_config)
     }
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -361,6 +361,9 @@ def rootfs_fxt(request, record_property):
 guest_kernel_linux_5_10 = pytest.fixture(
     guest_kernel_fxt, params=kernel_params("vmlinux-5.10*")
 )
+guest_kernel_linux_acpi_only = pytest.fixture(
+    guest_kernel_fxt, params=kernel_params("vmlinux-5.10.221")
+)
 # Use the unfiltered selector, since we don't officially support 6.1 yet.
 # TODO: switch to default selector once we add full 6.1 support.
 guest_kernel_linux_6_1 = pytest.fixture(
@@ -394,6 +397,11 @@ def uvm_plain_rw(microvm_factory, guest_kernel_linux_5_10, rootfs_rw):
     return microvm_factory.build(guest_kernel_linux_5_10, rootfs_rw)
 
 
+@pytest.fixture
+def uvm_hotplug(microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw):
+    return microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw)
+
+
 @pytest.fixture
 def uvm_nano(uvm_plain):
     """A preconfigured uvm with 2vCPUs and 256MiB of memory
diff --git a/tests/host_tools/1-cpu-hotplug.rules b/tests/host_tools/1-cpu-hotplug.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="cpu", ACTION=="add", ATTR{online}!="1", ATTR{online}="1"
diff --git a/tests/host_tools/hotplug.py b/tests/host_tools/hotplug.py
@@ -0,0 +1,70 @@
+# import pandas
+# import re
+# from framework.microvm import MicroVMFactory
+#
+# KERNEL = "vmlinux-5.10.221"
+# ROOTFS = "ubuntu-22.04.ext4"
+#
+#
+# def run_tests():
+#     factory = MicrovmFactory(fc_binary_path, jailer_binary_path)
+#     manual_data = test_manual_latency(factory)
+#     manual_data.to_csv("~/dev/results/manual_hotplug_data.csv")
+#
+# def test_manual_latency(microvm_factory):
+#     """Test the latency for hotplugging and booting CPUs in the guest"""
+#     fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries()
+#     df = pandas.DataFrame(columns=["vcpus", "api", "onlining"])
+#     gcc_compile(Path("./hotplug_time.c"), Path("./hotplug_time.o"))
+#     data = []
+#     for vcpu_count in range(2, 30, 2):
+#         for i in range(50):
+#             uvm_hotplug = microvm_factory.build(KERNEL, ROOTFS)
+#             uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None})
+#             uvm_hotplug.help.enable_console()
+#             uvm_hotplug.spawn()
+#             uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
+#             uvm_hotplug.add_net_iface()
+#             uvm_hotplug.start()
+#             uvm_hotplug.ssh.scp_put(Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh"))
+#             uvm_hotplug.ssh.scp_put(Path("./host_tools//hotplug_time.o"), Path("/home/hotplug_time.o"))
+#             uvm_hotplug.ssh.run("tmux new-session -d /bin/bash /home/hotplug.sh > /home/test 2>&1")
+#
+#
+#             uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
+#
+#             time.sleep(0.25)
+#             # Extract API call duration
+#             api_duration = float(re.findall(r"Total previous API call duration: (\d+) us\.", uvm_hotplug.log_data)[-1]) / 1000
+#             timestamp = float(re.findall(r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data)[0]) / 1000
+#             data.append({"vcpus" : vcpu_count, "api": api_duration, "onlining": timestamp})
+#     return pandas.DataFrame.from_dict(data)
+
+# def test_custom_udev_latency():
+#     """Test the latency for hotplugging and booting CPUs in the guest"""
+#     fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries()
+#     df = pandas.DataFrame(columns=["vcpus", "api", "onlining"])
+#     gcc_compile(Path("./hotplug_time.c"), Path("./hotplug_time.o"))
+#     data = []
+#     for vcpu_count in range(2, 30, 2):
+#         for i in range(50):
+#             uvm_hotplug = microvm_factory.build(KERNEL, ROOTFS)
+#             uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None})
+#             uvm_hotplug.help.enable_console()
+#             uvm_hotplug.spawn()
+#             uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
+#             uvm_hotplug.add_net_iface()
+#             uvm_hotplug.start()
+#             uvm_hotplug.ssh.scp_put(Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh"))
+#             uvm_hotplug.ssh.scp_put(Path("./host_tools//hotplug_time.o"), Path("/home/hotplug_time.o"))
+#             uvm_hotplug.ssh.run("tmux new-session -d /bin/bash /home/hotplug.sh > /home/test 2>&1")
+#
+#
+#             uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
+#
+#             time.sleep(0.25)
+#             # Extract API call duration
+#             api_duration = float(re.findall(r"Total previous API call duration: (\d+) us\.", uvm_hotplug.log_data)[-1]) / 1000
+#             timestamp = float(re.findall(r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data)[0]) / 1000
+#             data.append({"vcpus" : vcpu_count, "api": api_duration, "onlining": timestamp})
+#
diff --git a/tests/host_tools/hotplug.sh b/tests/host_tools/hotplug.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+while :; do
+  [[ -d /sys/devices/system/cpu/cpu1 ]] && break
+done
+
+readarray -t offline_cpus < <(lscpu -p=cpu --offline | sed '/^#/d')
+
+for cpu_idx in ${offline_cpus[@]}; do
+  echo 1 >/sys/devices/system/cpu/cpu$cpu_idx/online
+done
+
+/home/hotplug_time.o
diff --git a/tests/host_tools/hotplug_time.c b/tests/host_tools/hotplug_time.c
@@ -0,0 +1,33 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// Init wrapper for boot timing. It points at /sbin/init.
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// Base address values are defined in arch/src/lib.rs as arch::MMIO_MEM_START.
+// Values are computed in arch/src/<arch>/mod.rs from the architecture layouts.
+// Position on the bus is defined by MMIO_LEN increments, where MMIO_LEN is
+// defined as 0x1000 in vmm/src/device_manager/mmio.rs.
+#ifdef __x86_64__
+#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0xd0000000
+#endif
+#ifdef __aarch64__
+#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0x40000000
+#endif
+
+#define MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE 123
+
+int main() {
+  int fd = open("/dev/mem", (O_RDWR | O_SYNC | O_CLOEXEC));
+  int mapped_size = getpagesize();
+
+  char *map_base = mmap(NULL, mapped_size, PROT_WRITE, MAP_SHARED, fd,
+                        MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE);
+
+  *map_base = MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE;
+  msync(map_base, mapped_size, MS_ASYNC);
+}
diff --git a/tests/host_tools/hotplug_time.o b/tests/host_tools/hotplug_time.o
diff --git a/tests/integration_tests/performance/test_vcpu_hotplug.py b/tests/integration_tests/performance/test_vcpu_hotplug.py
@@ -0,0 +1,198 @@
+# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Testing hotplug performance"""
+
+import os
+import re
+import time
+from pathlib import Path
+
+import pandas
+import pytest
+
+from framework.utils_cpuid import check_guest_cpuid_output
+from host_tools.cargo_build import gcc_compile
+
+
+@pytest.mark.parametrize(
+    "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+)
+def test_custom_udev_rule_latency(
+    microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw, vcpu_count
+):
+    """Test the latency for hotplugging and booting CPUs in the guest"""
+    api_durations = []
+    onlining_durations = []
+    print(f"Vcpu count: {vcpu_count}")
+    for i in range(5):
+        uvm_hotplug = microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw)
+        uvm_hotplug.jailer.extra_args.update({"no-seccomp": None})
+        uvm_hotplug.help.enable_console()
+        uvm_hotplug.spawn()
+        uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
+        uvm_hotplug.add_net_iface()
+        uvm_hotplug.start()
+        uvm_hotplug.ssh.run("rm /usr/lib/udev/rules.d/40-vm-hotadd.rules")
+        uvm_hotplug.ssh.scp_put(
+            Path("./host_tools/1-cpu-hotplug.rules"),
+            Path("/usr/lib/udev/rules.d/1-cpu-hotplug.rules"),
+        )
+
+        time.sleep(0.25)
+
+        uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
+        time.sleep(0.25)
+        _, stdout, _ = uvm_hotplug.ssh.run("dmesg")
+
+        # Extract API call duration
+        api_duration = (
+            float(
+                re.findall(
+                    r"Total previous API call duration: (\d+) us\.",
+                    uvm_hotplug.log_data,
+                )[-1]
+            )
+            / 1000
+        )
+
+        # Extract onlining timings
+        start = float(
+            re.findall(r"\[\s+(\d+\.\d+)\] CPU1 has been hot-added\n", stdout)[0]
+        )
+        end = float(re.findall(r"\[\s+(\d+\.\d+)\] \w+", stdout)[-1])
+        elapsed_time = (end - start) * 1000
+        print(f"Api call duration: {api_duration} ms")
+        print(f"Onlining duration: {elapsed_time} ms")
+        api_durations.append(api_duration)
+        onlining_durations.append(elapsed_time)
+        uvm_hotplug.kill()
+        time.sleep(1)
+
+    avg_api_duration = sum(api_durations) / 5
+    avg_onlining_duration = sum(onlining_durations) / 5
+    print(f"Averages for {vcpu_count} hotplugged vcpus:")
+    print(f"\tAverage API call duration: {avg_api_duration} ms")
+    print(f"\tAverage onliing duration: {avg_onlining_duration} ms")
+
+
+@pytest.mark.parametrize(
+    "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+)
+def test_default_udev_rule_latency(
+    microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw, vcpu_count
+):
+    """Test the latency for hotplugging and booting CPUs in the guest"""
+    api_durations = []
+    onlining_durations = []
+    print(f"Vcpu count: {vcpu_count}")
+    for i in range(5):
+        uvm_hotplug = microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw)
+        uvm_hotplug.jailer.extra_args.update({"no-seccomp": None})
+        uvm_hotplug.help.enable_console()
+        uvm_hotplug.spawn()
+        uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
+        uvm_hotplug.add_net_iface()
+        uvm_hotplug.start()
+
+        time.sleep(0.25)
+
+        _, stdout, _ = uvm_hotplug.ssh.run("ls /usr/lib/udev/rules.d")
+        default_rule = re.search(r"40-vm-hotadd\.rules", stdout)
+        assert default_rule is not None
+
+        uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
+        time.sleep(0.25)
+        _, stdout, _ = uvm_hotplug.ssh.run("dmesg")
+
+        # Extract API call duration
+        api_duration = (
+            float(
+                re.findall(
+                    r"Total previous API call duration: (\d+) us\.",
+                    uvm_hotplug.log_data,
+                )[-1]
+            )
+            / 1000
+        )
+
+        # Extract onlining timings
+        start = float(
+            re.findall(r"\[\s+(\d+\.\d+)\] CPU1 has been hot-added\n", stdout)[0]
+        )
+        end = float(re.findall(r"\[\s+(\d+\.\d+)\] \w+", stdout)[-1])
+        elapsed_time = (end - start) * 1000
+        print(f"Api call duration: {api_duration} ms")
+        print(f"Onlining duration: {elapsed_time} ms")
+        api_durations.append(api_duration)
+        onlining_durations.append(elapsed_time)
+        uvm_hotplug.kill()
+        time.sleep(1)
+
+    avg_api_duration = sum(api_durations) / 5
+    avg_onlining_duration = sum(onlining_durations) / 5
+    print(f"Averages for {vcpu_count} hotplugged vcpus:")
+    print(f"\tAverage API call duration: {avg_api_duration} ms")
+    print(f"\tAverage onliing duration: {avg_onlining_duration} ms")
+
+
+@pytest.mark.parametrize(
+    "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+)
+def test_manual_latency(
+    microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw, vcpu_count
+):
+    """Test the latency for hotplugging and booting CPUs in the guest"""
+    gcc_compile(Path("./host_tools/hotplug_time.c"), Path("host_tools/hotplug_time.o"))
+    data = []
+    for i in range(50):
+        uvm_hotplug = microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw)
+        uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None})
+        uvm_hotplug.help.enable_console()
+        uvm_hotplug.spawn()
+        uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
+        uvm_hotplug.add_net_iface()
+        uvm_hotplug.start()
+        uvm_hotplug.ssh.scp_put(
+            Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh")
+        )
+        uvm_hotplug.ssh.scp_put(
+            Path("./host_tools//hotplug_time.o"), Path("/home/hotplug_time.o")
+        )
+        uvm_hotplug.ssh.run(
+            "tmux new-session -d /bin/bash /home/hotplug.sh > /home/test 2>&1"
+        )
+
+        uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
+
+        time.sleep(1.5)
+        # Extract API call duration
+        api_duration = (
+            float(
+                re.findall(
+                    r"Total previous API call duration: (\d+) us\.",
+                    uvm_hotplug.log_data,
+                )[-1]
+            )
+            / 1000
+        )
+        try:
+            timestamp = (
+                float(
+                    re.findall(
+                        r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data
+                    )[0]
+                )
+                / 1000
+            )
+        except:
+            data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": None})
+            continue
+        # Extract onlining timings
+        data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": timestamp})
+
+    df = pandas.DataFrame.from_dict(data).to_csv(
+        f"../test_results/manual-hotplug_{vcpu_count}.csv",
+        index=False,
+        float_format="%.3f",
+    )

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123;`
`10`	`10`	`/// Pseudo device to record the kernel boot time.`
`11`	`11`	`#[derive(Debug)]`
`12`	`12`	`pub struct BootTimer {`
`13`		`- start_ts: TimestampUs,`
	`13`	`+ pub start_ts: TimestampUs,`
`14`	`14`	`}`
`15`	`15`
`16`	`16`	`impl BootTimer {`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+SUBSYSTEM=="cpu", ACTION=="add", ATTR{online}!="1", ATTR{online}="1"`