diff --git a/helm/bundles/cortex-nova/templates/datasources.yaml b/helm/bundles/cortex-nova/templates/datasources.yaml index 8da1906a..1d50aea1 100644 --- a/helm/bundles/cortex-nova/templates/datasources.yaml +++ b/helm/bundles/cortex-nova/templates/datasources.yaml @@ -73,35 +73,6 @@ spec: --- apiVersion: cortex.cloud/v1alpha1 kind: Datasource -metadata: - name: kvm-libvirt-domain-steal-pct -spec: - schedulingDomain: nova - databaseSecretRef: - name: cortex-nova-postgres - namespace: {{ .Release.Namespace }} - {{- if .Values.prometheus.sso.enabled }} - ssoSecretRef: - name: cortex-nova-prometheus-sso - namespace: {{ .Release.Namespace }} - {{- end }} - type: prometheus - prometheus: - secretRef: - name: cortex-nova-prometheus - namespace: {{ .Release.Namespace }} - alias: kvm_libvirt_domain_steal_pct - # This metric is exported by https://github.com/cobaltcore-dev/kvm-monitoring - query: | - max by (domain) (rate(kvm_domain_libvirt_vcpu_delay_nanoseconds[5m])) / 1e9 * 100 - type: kvm_libvirt_domain_metric - # It's ok to only look at a short time period here. - timeRange: "1200s" # 20 minutes - interval: "300s" # 5 minutes - resolution: "60s" # 1 minute ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Datasource metadata: name: nova-servers spec: diff --git a/helm/bundles/cortex-nova/templates/datasources_kvm.yaml b/helm/bundles/cortex-nova/templates/datasources_kvm.yaml new file mode 100644 index 00000000..3614bda2 --- /dev/null +++ b/helm/bundles/cortex-nova/templates/datasources_kvm.yaml @@ -0,0 +1,31 @@ +{{- if .Values.kvm.enabled }} +--- +apiVersion: cortex.cloud/v1alpha1 +kind: Datasource +metadata: + name: kvm-libvirt-domain-steal-pct +spec: + schedulingDomain: nova + databaseSecretRef: + name: cortex-nova-postgres + namespace: {{ .Release.Namespace }} + {{- if .Values.prometheus.sso.enabled }} + ssoSecretRef: + name: cortex-nova-prometheus-sso + namespace: {{ .Release.Namespace }} + {{- end }} + type: prometheus + prometheus: + secretRef: + name: cortex-nova-prometheus + namespace: {{ .Release.Namespace }} + alias: kvm_libvirt_domain_steal_pct + # This metric is exported by https://github.com/cobaltcore-dev/kvm-monitoring + query: | + max by (domain) (rate(kvm_domain_libvirt_vcpu_delay_nanoseconds[5m])) / 1e9 * 100 + type: kvm_libvirt_domain_metric + # It's ok to only look at a short time period here. + timeRange: "1200s" # 20 minutes + interval: "300s" # 5 minutes + resolution: "60s" # 1 minute +{{- end }} \ No newline at end of file diff --git a/helm/bundles/cortex-nova/templates/knowledges.yaml b/helm/bundles/cortex-nova/templates/knowledges.yaml index 28952fd1..223bc615 100644 --- a/helm/bundles/cortex-nova/templates/knowledges.yaml +++ b/helm/bundles/cortex-nova/templates/knowledges.yaml @@ -70,22 +70,8 @@ spec: - name: vmware-resolved-hostsystems datasources: - name: vrops-hostsystem-cpu-contention-short-term-percentage ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Knowledge -metadata: - name: kvm-libvirt-domain-cpu-steal-pct -spec: - schedulingDomain: nova - extractor: - name: kvm_libvirt_domain_cpu_steal_pct_extractor - description: | - This knowledge identifies KVM libvirt domains that are experiencing CPU - steal, i.e. cpu contention or cpu wait. - dependencies: - datasources: - - name: kvm-libvirt-domain-steal-pct - - name: nova-servers + + --- apiVersion: cortex.cloud/v1alpha1 kind: Knowledge diff --git a/helm/bundles/cortex-nova/templates/knowledges_kvm.yaml b/helm/bundles/cortex-nova/templates/knowledges_kvm.yaml new file mode 100644 index 00000000..f2181fe9 --- /dev/null +++ b/helm/bundles/cortex-nova/templates/knowledges_kvm.yaml @@ -0,0 +1,18 @@ +{{- if .Values.kvm.enabled }} +--- +apiVersion: cortex.cloud/v1alpha1 +kind: Knowledge +metadata: + name: kvm-libvirt-domain-cpu-steal-pct +spec: + schedulingDomain: nova + extractor: + name: kvm_libvirt_domain_cpu_steal_pct_extractor + description: | + This knowledge identifies KVM libvirt domains that are experiencing CPU + steal, i.e. cpu contention or cpu wait. + dependencies: + datasources: + - name: kvm-libvirt-domain-steal-pct + - name: nova-servers +{{- end }} \ No newline at end of file diff --git a/helm/bundles/cortex-nova/templates/kpis.yaml b/helm/bundles/cortex-nova/templates/kpis.yaml index b8994782..af01c10c 100644 --- a/helm/bundles/cortex-nova/templates/kpis.yaml +++ b/helm/bundles/cortex-nova/templates/kpis.yaml @@ -110,20 +110,6 @@ spec: --- apiVersion: cortex.cloud/v1alpha1 kind: KPI -metadata: - name: kvm-host-capacity -spec: - schedulingDomain: nova - impl: kvm_host_capacity_kpi - dependencies: - knowledges: - - name: host-details - - name: host-utilization - description: | - This KPI tracks the total, utilized, reserved and failover capacity of KVM hosts. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: KPI metadata: name: cortex-nova-datasource-state spec: diff --git a/helm/bundles/cortex-nova/templates/kpis_kvm.yaml b/helm/bundles/cortex-nova/templates/kpis_kvm.yaml new file mode 100644 index 00000000..e98c0a44 --- /dev/null +++ b/helm/bundles/cortex-nova/templates/kpis_kvm.yaml @@ -0,0 +1,16 @@ +{{- if .Values.kvm.enabled }} +--- +apiVersion: cortex.cloud/v1alpha1 +kind: KPI +metadata: + name: kvm-host-capacity +spec: + schedulingDomain: nova + impl: kvm_host_capacity_kpi + dependencies: + knowledges: + - name: host-details + - name: host-utilization + description: | + This KPI tracks the total, utilized, reserved and failover capacity of KVM hosts. +{{- end }} \ No newline at end of file diff --git a/helm/bundles/cortex-nova/templates/pipelines.yaml b/helm/bundles/cortex-nova/templates/pipelines.yaml index c258e836..c3b13acc 100644 --- a/helm/bundles/cortex-nova/templates/pipelines.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines.yaml @@ -83,167 +83,3 @@ spec: maxCPUContentionActivationLowerBound: 0.0 maxCPUContentionActivationUpperBound: -0.25 mandatory: false ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Pipeline -metadata: - name: nova-external-scheduler-kvm -spec: - schedulingDomain: nova - description: | - Nova provides virtual machine placement on compute hosts for OpenStack. - After applying its own filtering and weighing logic, it delegates to cortex - for additional filtering and weighing via this external scheduler pipeline. - Cortex returns a ranked list of hosts back to nova for final selection. - This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). - type: filter-weigher - {{- if $createDecisions }} - createDecisions: true - {{- end }} - steps: [] ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Pipeline -metadata: - name: nova-external-scheduler-kvm-all-filters-enabled -spec: - schedulingDomain: nova - description: | - This pipeline can be used to place reservations the same way nova would place - its virtual machines. It uses the same filtering steps as implemented in the - nova service, ensuring a valid placement of the reservation. It then leverages - cortex's weighing steps to provide an optimized host selection for the reservation. - This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). - type: filter-weigher - {{- if $createDecisions }} - createDecisions: true - {{- end }} - steps: - - type: filter - impl: filter_host_instructions - description: | - This step will consider the `ignore_hosts` and `force_hosts` instructions - from the nova scheduler request spec to filter out or exclusively allow - certain hosts. - knowledges: [] - - type: filter - impl: filter_has_enough_capacity - description: | - This step will filter out hosts that do not have enough available capacity - to host the requested flavor. If enabled, this step will subtract the - current reservations residing on this host from the available capacity. - opts: - # If reserved space should be locked even for matching requests. - # For the reservations pipeline, we don't want to unlock - # reserved space, to avoid reservations for the same project - # and flavor to overlap. - lockReserved: true - - type: filter - impl: filter_has_requested_traits - description: | - This step filters hosts that do not have the requested traits given by the - nova flavor extra spec: "trait:": "forbidden" means the host must - not have the specified trait. "trait:": "required" means the host - must have the specified trait. - - type: filter - impl: filter_has_accelerators - description: | - This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if - the nova flavor extra specs request accelerators via "accel:device_profile". - - type: filter - impl: filter_correct_az - description: | - This step will filter out hosts whose aggregate information indicates they - are not placed in the requested availability zone. - - type: filter - impl: filter_status_conditions - description: | - This step will filter out hosts for which the hypervisor status conditions - do not meet the expected values, for example, that the hypervisor is ready - and not disabled. - - type: filter - impl: filter_maintenance - description: | - This step will filter out hosts that are currently in maintenance mode that - prevents scheduling, for example, manual maintenance or termination. - - type: filter - impl: filter_external_customer - description: | - This step prefix-matches the domain name for external customer domains and - filters out hosts that are not intended for external customers. It considers - the `CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED` trait on hosts as well as the - `domain_name` scheduler hint from the nova request spec. - opts: - domainNamePrefixes: ["iaas-"] - - type: filter - impl: filter_packed_virtqueue - description: | - If the flavor extra specs contain the `hw:virtio_packed_ring` key, or the - image properties contain the `hw_virtio_packed_ring` key, this step will - filter out hosts that do not have the `COMPUTE_NET_VIRTIO_PACKED` trait. - - type: filter - impl: filter_allowed_projects - description: | - This step filters hosts based on allowed projects defined in the - hypervisor resource. Note that hosts allowing all projects are still - accessible and will not be filtered out. In this way some hypervisors - are made accessible to some projects only. - - type: filter - impl: filter_capabilities - description: | - This step will filter out hosts that do not meet the compute capabilities - requested by the nova flavor extra specs, like `{"arch": "x86_64", - "maxphysaddr:bits": 46, ...}`. - - Note: currently, advanced boolean/numeric operators for the capabilities - like `>`, `!`, ... are not supported because they are not used by any of our - flavors in production. - - type: filter - impl: filter_instance_group_affinity - description: | - This step selects hosts in the instance group specified in the nova - scheduler request spec. - - type: filter - impl: filter_instance_group_anti_affinity - description: | - This step selects hosts not in the instance group specified in the nova - scheduler request spec, but only until the max_server_per_host limit is - reached (default = 1). - - type: filter - impl: filter_live_migratable - description: | - This step ensures that the target host of a live migration can accept - the migrating VM, by checking cpu architecture, cpu features, emulated - devices, and cpu modes. - - type: filter - impl: filter_requested_destination - description: | - This step filters hosts based on the `requested_destination` instruction - from the nova scheduler request spec. It supports filtering by host and - by aggregates. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Pipeline -metadata: - name: nova-descheduler-kvm -spec: - schedulingDomain: nova - description: - This pipeline runs steps that select virtual machines to deschedule from - compute hosts in order to optimize resource usage and performance. - This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). - type: descheduler - {{- if $createDecisions }} - createDecisions: true - {{- end }} - steps: - - type: descheduler - impl: avoid_high_steal_pct - description: | - This step will deschedule VMs once they reach this CPU steal percentage over - the observed time span. - knowledges: - - name: kvm-libvirt-domain-cpu-steal-pct - opts: - maxStealPctOverObservedTimeSpan: 20.0 - mandatory: false diff --git a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml new file mode 100644 index 00000000..0ba7926d --- /dev/null +++ b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml @@ -0,0 +1,167 @@ +{{- $createDecisions := .Values.pipelines.createDecisions | default false }} +{{- if .Values.kvm.enabled }} +--- +apiVersion: cortex.cloud/v1alpha1 +kind: Pipeline +metadata: + name: nova-external-scheduler-kvm +spec: + schedulingDomain: nova + description: | + Nova provides virtual machine placement on compute hosts for OpenStack. + After applying its own filtering and weighing logic, it delegates to cortex + for additional filtering and weighing via this external scheduler pipeline. + Cortex returns a ranked list of hosts back to nova for final selection. + This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). + type: filter-weigher + {{- if $createDecisions }} + createDecisions: true + {{- end }} + steps: [] +--- +apiVersion: cortex.cloud/v1alpha1 +kind: Pipeline +metadata: + name: nova-external-scheduler-kvm-all-filters-enabled +spec: + schedulingDomain: nova + description: | + This pipeline can be used to place reservations the same way nova would place + its virtual machines. It uses the same filtering steps as implemented in the + nova service, ensuring a valid placement of the reservation. It then leverages + cortex's weighing steps to provide an optimized host selection for the reservation. + This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). + type: filter-weigher + {{- if $createDecisions }} + createDecisions: true + {{- end }} + steps: + - type: filter + impl: filter_host_instructions + description: | + This step will consider the `ignore_hosts` and `force_hosts` instructions + from the nova scheduler request spec to filter out or exclusively allow + certain hosts. + knowledges: [] + - type: filter + impl: filter_has_enough_capacity + description: | + This step will filter out hosts that do not have enough available capacity + to host the requested flavor. If enabled, this step will subtract the + current reservations residing on this host from the available capacity. + opts: + # If reserved space should be locked even for matching requests. + # For the reservations pipeline, we don't want to unlock + # reserved space, to avoid reservations for the same project + # and flavor to overlap. + lockReserved: true + - type: filter + impl: filter_has_requested_traits + description: | + This step filters hosts that do not have the requested traits given by the + nova flavor extra spec: "trait:": "forbidden" means the host must + not have the specified trait. "trait:": "required" means the host + must have the specified trait. + - type: filter + impl: filter_has_accelerators + description: | + This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if + the nova flavor extra specs request accelerators via "accel:device_profile". + - type: filter + impl: filter_correct_az + description: | + This step will filter out hosts whose aggregate information indicates they + are not placed in the requested availability zone. + - type: filter + impl: filter_status_conditions + description: | + This step will filter out hosts for which the hypervisor status conditions + do not meet the expected values, for example, that the hypervisor is ready + and not disabled. + - type: filter + impl: filter_maintenance + description: | + This step will filter out hosts that are currently in maintenance mode that + prevents scheduling, for example, manual maintenance or termination. + - type: filter + impl: filter_external_customer + description: | + This step prefix-matches the domain name for external customer domains and + filters out hosts that are not intended for external customers. It considers + the `CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED` trait on hosts as well as the + `domain_name` scheduler hint from the nova request spec. + opts: + domainNamePrefixes: ["iaas-"] + - type: filter + impl: filter_packed_virtqueue + description: | + If the flavor extra specs contain the `hw:virtio_packed_ring` key, or the + image properties contain the `hw_virtio_packed_ring` key, this step will + filter out hosts that do not have the `COMPUTE_NET_VIRTIO_PACKED` trait. + - type: filter + impl: filter_allowed_projects + description: | + This step filters hosts based on allowed projects defined in the + hypervisor resource. Note that hosts allowing all projects are still + accessible and will not be filtered out. In this way some hypervisors + are made accessible to some projects only. + - type: filter + impl: filter_capabilities + description: | + This step will filter out hosts that do not meet the compute capabilities + requested by the nova flavor extra specs, like `{"arch": "x86_64", + "maxphysaddr:bits": 46, ...}`. + + Note: currently, advanced boolean/numeric operators for the capabilities + like `>`, `!`, ... are not supported because they are not used by any of our + flavors in production. + - type: filter + impl: filter_instance_group_affinity + description: | + This step selects hosts in the instance group specified in the nova + scheduler request spec. + - type: filter + impl: filter_instance_group_anti_affinity + description: | + This step selects hosts not in the instance group specified in the nova + scheduler request spec, but only until the max_server_per_host limit is + reached (default = 1). + - type: filter + impl: filter_live_migratable + description: | + This step ensures that the target host of a live migration can accept + the migrating VM, by checking cpu architecture, cpu features, emulated + devices, and cpu modes. + - type: filter + impl: filter_requested_destination + description: | + This step filters hosts based on the `requested_destination` instruction + from the nova scheduler request spec. It supports filtering by host and + by aggregates. +--- +apiVersion: cortex.cloud/v1alpha1 +kind: Pipeline +metadata: + name: nova-descheduler-kvm +spec: + schedulingDomain: nova + description: + This pipeline runs steps that select virtual machines to deschedule from + compute hosts in order to optimize resource usage and performance. + This is the pipeline used for KVM hypervisors (qemu and cloud-hypervisor). + type: descheduler + {{- if $createDecisions }} + createDecisions: true + {{- end }} + steps: + - type: descheduler + impl: avoid_high_steal_pct + description: | + This step will deschedule VMs once they reach this CPU steal percentage over + the observed time span. + knowledges: + - name: kvm-libvirt-domain-cpu-steal-pct + opts: + maxStealPctOverObservedTimeSpan: 20.0 + mandatory: false +{{- end }} \ No newline at end of file diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index 346a33d6..a2aece78 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -66,6 +66,10 @@ pipelines: # Use this flag to disable the creation of decisions across all pipelines. createDecisions: false +kvm: + # Use this flag to enable/disable KVM host related features. + enabled: false + cortex: &cortex crd: {enable: false} # Disable the default ServiceMonitor and metrics service from the kubebuilder stack.