From cd4c499768871c25cf77706b1673bc53d57bc252 Mon Sep 17 00:00:00 2001 From: Alexander Rovner Date: Fri, 11 Jul 2025 16:33:28 +0200 Subject: [PATCH 1/4] add end-to-end kubernetes example --- example/README.md | 41 ++ example/kcc/values.yaml | 155 +++++ example/manifests/ecommerce.yaml | 91 +++ example/manifests/grafana-dashboard.yaml | 708 +++++++++++++++++++++++ example/manifests/grafana.yaml | 33 ++ example/manifests/kafka.yaml | 212 +++++++ example/manifests/prometheus.yaml | 93 +++ example/manifests/schema-registry.yaml | 149 +++++ example/manifests/web-analytics.yaml | 91 +++ 9 files changed, 1573 insertions(+) create mode 100644 example/README.md create mode 100644 example/kcc/values.yaml create mode 100644 example/manifests/ecommerce.yaml create mode 100644 example/manifests/grafana-dashboard.yaml create mode 100644 example/manifests/grafana.yaml create mode 100644 example/manifests/kafka.yaml create mode 100644 example/manifests/prometheus.yaml create mode 100644 example/manifests/schema-registry.yaml create mode 100644 example/manifests/web-analytics.yaml diff --git a/example/README.md b/example/README.md new file mode 100644 index 00000000..9dff790c --- /dev/null +++ b/example/README.md @@ -0,0 +1,41 @@ +# Kafka Cost Control CTF Setup + +This repo sets up an example Kubernetes environment for Kafka Cost Control (https://github.com/spoud/kafka-cost-control). + +## Requirements + +Have a Kubernetes cluster with Strimzi installed (we assume that Strimzi operator is running in the `kafka` namespace). + +## Setup + +1. Create a Kafka cluster with a metrics config: + ```bash + kubectl apply -f manifests/kafka.yaml -n kafka + ``` +1. Install a schema registry: + ```bash + kubectl apply -f manifests/schema-registry.yaml -n kafka + ``` +1. Install cost control from the helm chart: + ```bash + helm install ctf-kcc ../helm/kcc-strimzi --namespace kafka -f kcc/values.yaml + ``` +1. Install prometheus + ```bash + kubectl apply -f manifests/prometheus.yaml -n kafka + ``` +1. Install grafana operator + ```bash + helm upgrade -i grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator --version v5.18.0 --namespace kafka + ``` +1. Install grafana instance + ```bash + kubectl apply -f manifests/grafana.yaml -n kafka + kubectl apply -f manifests/grafana-dashboard.yaml -n kafka + ``` +1. Now install some synth-clients to add some usage to the Kafka cluster: + ```bash + kubectl apply -f manifests/web-analytics.yaml -n kafka + kubectl apply -f manifests/ecommerce.yaml -n kafka + ``` + diff --git a/example/kcc/values.yaml b/example/kcc/values.yaml new file mode 100644 index 00000000..326ca1ac --- /dev/null +++ b/example/kcc/values.yaml @@ -0,0 +1,155 @@ +# Default values for Kafka Cost Control. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Used as a tag in all scraped metrics. Helps you to identify the environment where the metrics are coming from. +env: dev +# Set this to true to enable debug logging in telegraf and the context operator +debug: true + +topics: + context: {} # when unset, the below values are generated from the release name + #topicName: "context-data" + rawMetrics: + #topicName: "raw-metrics" # if unset, the topic name is generated from the release name + partitions: 3 + # default: 90 days retention + retentionMs: "7776000000" + pricingRules: + #topicName: "pricing-rules" # if unset, the topic name is generated from the release name + partitions: 1 + aggregated: + #topicName: "aggregated" # if unset, the topic name is generated from the release name + partitions: 1 + config: + retentionMs: "7776000000" + aggregatedTableFriendly: + #topicName: "aggregated-table-friendly" # if unset, the topic name is generated from the release name + partitions: 1 + config: + retentionMs: "7776000000" + # list of raw metrics topics that should be consumed by the aggregator. + # If empty, the aggregator will consume the topic defined in the rawMetrics section + toAggregate: [] + +telegraf: + image: telegraf:1.31.3-alpine + enabled: true + labels: {} + # will be applied to the container + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 12023 + runAsGroup: 12023 + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + seccompProfile: + type: RuntimeDefault + resources: + limits: + cpu: 1 + memory: 500Mi + requests: + cpu: 100m + memory: 100Mi + scrapeIntervalSeconds: 10 + aggregationWindowSizeSeconds: 120 + +strimzi: + clusterName: my-cluster + bootstrapServer: my-cluster-kafka-bootstrap:9093 + # If you have a custom CA certificate stored in a secret, you can specify it here. + # If this value is not provided, no truststore will be configured and all containers will use their built-in truststores (these will usually trust known CAs like Let's Encrypt). + # The custom secret must have the same structure as the strimzi-generated one. (i.e. it should contain ca.crt, ca.p12, ca.password keys) + clusterCaCertSecret: my-cluster-cluster-ca-cert + + # TODO: actually add support for tls and none + # "scram-sha-512" or "tls" or "none" + auth: scram-sha-512 + scramOverTls: true + contextOperator: + labels: {} + enabled: true + image: spoud/kafka-cost-control-strimzi-operator:latest + # will be applied to the container + securityContext: + runAsUser: 185 + runAsGroup: 185 + allowPrivilegeEscalation: false + runAsNonRoot: true + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + seccompProfile: + type: RuntimeDefault + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 10 + failureThreshold: 5 + readinessProbe: + initialDelaySeconds: 15 + periodSeconds: 10 + failureThreshold: 5 + startupProbe: + initialDelaySeconds: 15 + periodSeconds: 10 + failureThreshold: 10 + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 100m + memory: 128Mi + +# Don't have a schema registry and would like to try out this chart? Run the following two commands: +# kubectl create deploy schema-registry --image=apicurio/apicurio-registry:3.0.5 -n --port 8080 +# kubectl expose deploy schema-registry +schemaRegistry: + url: http://schema-registry:8080/apis/ccompat/v7 + +aggregator: + enabled: true + aggregationWindowSize: PT2M + # Enable DuckDB integration + olapEnabled: false + # How much of the total pod memory is DuckDB allowed to use (30% is the default if not set) + olapDatabaseMemoryLimitPercent: 30 + appId: kcc-aggregator + image: spoud/kafka-cost-control:0.4.1 + # Labels that will be added to the pod + labels: {} + # will be applied to the container + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 185 + runAsGroup: 185 + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + seccompProfile: + type: RuntimeDefault + storage: + # Override the default storage class if you don't want to use the default one + resources: + limits: + cpu: 4000m + memory: 2Gi + requests: + cpu: 200m + memory: 1Gi + volumeClaimTemplate: + #storageClassName: standard + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi + +connect: + enabled: false + +timescaledb: + enabled: false \ No newline at end of file diff --git a/example/manifests/ecommerce.yaml b/example/manifests/ecommerce.yaml new file mode 100644 index 00000000..eb99a91f --- /dev/null +++ b/example/manifests/ecommerce.yaml @@ -0,0 +1,91 @@ +# This file illustrates how to use the synth client with a Strimzi cluster. +# It defines a kafka user for the synth client, a topic to write to, and a deployment of the client itself. +# The metrics are exposed on port 8081 (this port is also exposed via a service). +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + annotations: + spoud.io/kcc-context.application: ecommerce + labels: + strimzi.io/cluster: my-cluster + name: ecommerce +spec: + authentication: + type: scram-sha-512 + authorization: + acls: + - host: '*' + operations: + - Describe + - Write + - Read + - Alter + resource: + name: ecommerce-purchases + patternType: literal + type: topic + - host: '*' + operations: + - Read + resource: + name: ecommerce- + patternType: prefix + type: group + type: simple +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaTopic +metadata: + annotations: + spoud.io/kcc-context.application: ecommerce + labels: + strimzi.io/cluster: my-cluster + name: ecommerce-purchases +spec: + config: + retention.ms: 86400000 + partitions: 1 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: ecommerce + name: ecommerce +spec: + replicas: 1 + selector: + matchLabels: + app: ecommerce + strategy: {} + template: + metadata: + labels: + app: ecommerce + spec: + containers: + - image: ghcr.io/spoud/kafka-synth-client:v1.0.1 + name: synth + resources: {} + ports: + - containerPort: 8081 + name: prometheus + env: + - name: SYNTH_CLIENT_TOPIC + value: ecommerce-purchases + - name: KAFKA_BOOTSTRAP_SERVERS + value: my-cluster-kafka-bootstrap:9094 + - name: KAFKA_GROUP_ID + value: ecommerce-group + - name: KAFKA_SASL_MECHANISM + value: SCRAM-SHA-512 + - name: SYNTH_CLIENT_MESSAGES_MESSAGES_PER_SECOND + value: "75" + - name: KAFKA_SASL_JAAS_CONFIG + valueFrom: + secretKeyRef: + name: ecommerce + key: sasl.jaas.config + # don't forget to adjust this to SASL_SSL if you are using a TLS listener (also remember to mount the truststore in this case) + - name: KAFKA_SECURITY_PROTOCOL + value: SASL_PLAINTEXT diff --git a/example/manifests/grafana-dashboard.yaml b/example/manifests/grafana-dashboard.yaml new file mode 100644 index 00000000..800916a3 --- /dev/null +++ b/example/manifests/grafana-dashboard.yaml @@ -0,0 +1,708 @@ +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kcc-dashboard +spec: + resyncPeriod: 30s + instanceSelector: + matchLabels: + dashboards: "grafana" + json: > + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + }, + "editorMode": "code", + "expr": "sum(max_over_time (kcc_kafka_log_log_size{principal!=\"unknown\"}[2m])) by (principal)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Storage Usage by Principal", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 95, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 15, + "x": 9, + "y": 0 + }, + "id": 3, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "percent", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 100 + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "editorMode": "code", + "expr": "sum(max_over_time(kcc_kafka_server_brokertopicmetrics_bytesout_total{principal!=\"unknown\"}[2m])) by (principal)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Bytes Consumed by Principal", + "type": "barchart" + }, + { + "datasource": { + "type": "prometheus", + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 0, + "y": 9 + }, + "id": 1, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + }, + "editorMode": "code", + "expr": "max(max_over_time (kcc_kafka_log_log_size{principal!=\"unknown\"}[2m])) by (topic)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Storage Usage by Topic", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 95, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 15, + "x": 9, + "y": 9 + }, + "id": 2, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "percent", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 100 + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "editorMode": "code", + "expr": "sum(max_over_time(kcc_kafka_server_brokertopicmetrics_bytesin_total{principal!=\"unknown\"}[2m])) by (principal)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Bytes Produced by Principal", + "type": "barchart" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Kafka Cost Control Dashboard", + "uid": "aerf8nzire7eod", + "version": 23, + "weekStart": ""} +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: app-dashboard +spec: + resyncPeriod: 30s + instanceSelector: + matchLabels: + dashboards: "grafana" + json: > + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 5, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "editorMode": "code", + "expr": "max(max_over_time (kcc_kafka_log_log_size{principal=\"$application\"}[2m])) by (topic)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Application Topic Sizes", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 14, + "x": 10, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "editorMode": "code", + "expr": "sum(max_over_time(kcc_kafka_server_brokertopicmetrics_bytesout_total{principal=\"$application\"}[2m])) / sum(max_over_time(kcc_kafka_server_brokertopicmetrics_bytesout_total{principal!=\"unknown\"}[2m]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Percentage of total consume traffic", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 14, + "x": 10, + "y": 5 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "editorMode": "code", + "expr": "sum(max_over_time(kcc_kafka_log_log_size{principal=\"$application\"}[2m])) / sum(max_over_time(kcc_kafka_log_log_size{principal!=\"unknown\"}[2m]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Percentage of total storage usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 14, + "x": 10, + "y": 10 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "editorMode": "code", + "expr": "sum(max_over_time(kcc_kafka_server_brokertopicmetrics_bytesin_total{principal=\"$application\"}[2m])) / sum(max_over_time(kcc_kafka_server_brokertopicmetrics_bytesin_total{principal!=\"unknown\"}[2m]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Percentage of total produce traffic", + "type": "stat" + } + ], + "preload": false, + "refresh": "1m", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "web-analytics", + "value": "web-analytics" + }, + "datasource": { + "type": "prometheus", + "uid": "08c55290-991c-4165-95bf-874d2d5eef5b" + }, + "definition": "label_values(principal)", + "description": "Application to monitor", + "label": "Application", + "name": "application", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(principal)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Kafka Costs by Application", + "uid": "cerfdqh0vpzpca", + "version": 10, + "weekStart": ""} \ No newline at end of file diff --git a/example/manifests/grafana.yaml b/example/manifests/grafana.yaml new file mode 100644 index 00000000..dc44a508 --- /dev/null +++ b/example/manifests/grafana.yaml @@ -0,0 +1,33 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: Grafana +metadata: + name: grafana + labels: + dashboards: "grafana" +spec: + config: + log: + mode: "console" + security: + admin_user: admin + admin_password: admin +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDatasource +metadata: + name: prometheus +spec: + instanceSelector: + matchLabels: + dashboards: grafana + allowCrossNamespaceImport: true + uid: 08c55290-991c-4165-95bf-874d2d5eef5b + datasource: + access: proxy + database: prometheus + jsonData: + timeInterval: 5s + tlsSkipVerify: true + name: prometheus + type: prometheus + url: http://prometheus:9090 \ No newline at end of file diff --git a/example/manifests/kafka.yaml b/example/manifests/kafka.yaml new file mode 100644 index 00000000..1c4d946c --- /dev/null +++ b/example/manifests/kafka.yaml @@ -0,0 +1,212 @@ +apiVersion: kafka.strimzi.io/v1beta2 +kind: Kafka +metadata: + annotations: + strimzi.io/kraft: enabled + strimzi.io/node-pools: enabled + name: my-cluster +spec: + entityOperator: + topicOperator: {} + userOperator: {} + kafka: + authorization: + type: simple + config: + auto.create.topics.enable: "false" + default.replication.factor: 1 + min.insync.replicas: 1 + offsets.topic.replication.factor: 1 + transaction.state.log.min.isr: 1 + transaction.state.log.replication.factor: 1 + listeners: + - authentication: + type: scram-sha-512 + name: plain + port: 9094 + tls: false + type: internal + - authentication: + type: scram-sha-512 + name: tls + port: 9093 + tls: true + type: internal + metadataVersion: 3.9-IV0 + metricsConfig: + type: jmxPrometheusExporter + valueFrom: + configMapKeyRef: + key: kafka-metrics-config.yml + name: kafka-metrics + version: 3.9.0 +--- +apiVersion: v1 +data: + kafka-metrics-config.yml: | + # See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics + lowercaseOutputName: true + rules: + # Special cases and very specific rules + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + topic: "$4" + partition: "$5" + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + broker: "$4:$5" + - pattern: kafka.server<>connections + name: kafka_server_$1_connections_tls_info + type: GAUGE + labels: + cipher: "$2" + protocol: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: kafka.server<>connections + name: kafka_server_$1_connections_software + type: GAUGE + labels: + clientSoftwareName: "$2" + clientSoftwareVersion: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: "kafka.server<>(.+-total):" + name: kafka_server_$1_$4 + type: COUNTER + labels: + listener: "$2" + networkProcessor: "$3" + - pattern: "kafka.server<>(.+):" + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + - pattern: kafka.server<>(.+-total) + name: kafka_server_$1_$4 + type: COUNTER + labels: + listener: "$2" + networkProcessor: "$3" + - pattern: kafka.server<>(.+) + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + # Some percent metrics use MeanRate attribute + # Ex) kafka.server<>MeanRate + - pattern: kafka.(\w+)<>MeanRate + name: kafka_$1_$2_$3_percent + type: GAUGE + # Generic gauges for percents + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + labels: + "$4": "$5" + # Generic per-second counters with 0-2 key/value pairs + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + # Generic gauges with 0-2 key/value pairs + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's. + # Note that these are missing the '_sum' metric! + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + quantile: "0.$8" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + quantile: "0.$6" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + quantile: "0.$4" + # KRaft overall related metrics + # distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics + - pattern: "kafka.server<>(.+-total|.+-max):" + name: kafka_server_raftmetrics_$1 + type: COUNTER + - pattern: "kafka.server<>(current-state): (.+)" + name: kafka_server_raftmetrics_$1 + value: 1 + type: UNTYPED + labels: + $1: "$2" + - pattern: "kafka.server<>(.+):" + name: kafka_server_raftmetrics_$1 + type: GAUGE + # KRaft "low level" channels related metrics + # distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics + - pattern: "kafka.server<>(.+-total|.+-max):" + name: kafka_server_raftchannelmetrics_$1 + type: COUNTER + - pattern: "kafka.server<>(.+):" + name: kafka_server_raftchannelmetrics_$1 + type: GAUGE + # Broker metrics related to fetching metadata topic records in KRaft mode + - pattern: "kafka.server<>(.+):" + name: kafka_server_brokermetadatametrics_$1 + type: GAUGE +kind: ConfigMap +metadata: + labels: + app: strimzi + name: kafka-metrics diff --git a/example/manifests/prometheus.yaml b/example/manifests/prometheus.yaml new file mode 100644 index 00000000..0a8d77e1 --- /dev/null +++ b/example/manifests/prometheus.yaml @@ -0,0 +1,93 @@ +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + labels: + app: kcc + name: kcc +spec: + ports: + - name: 8080-8080 + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + k8s-app: ctf-kcc-kafka-cost-control + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + creationTimestamp: null + labels: + app: prometheus + name: prometheus +spec: + volumeClaimTemplates: + - spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + metadata: + name: data + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + creationTimestamp: null + labels: + app: prometheus + spec: + volumes: + - name: config-volume + configMap: + name: prometheus-config + containers: + - image: prom/prometheus:latest + name: prometheus + volumeMounts: + - mountPath: /etc/prometheus + name: config-volume + readOnly: true + - mountPath: /prometheus + name: data + ports: + - containerPort: 9090 + resources: {} +--- +apiVersion: v1 +data: + prometheus.yml: |- + global: + + scrape_configs: + - job_name: 'kcc' + scrape_interval: 30s + metrics_path: /q/metrics + static_configs: + - targets: [ 'kcc:8080' ] +kind: ConfigMap +metadata: + creationTimestamp: null + name: prometheus-config +--- +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + labels: + app: prometheus + name: prometheus +spec: + ports: + - name: 9090-9090 + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: prometheus + type: ClusterIP diff --git a/example/manifests/schema-registry.yaml b/example/manifests/schema-registry.yaml new file mode 100644 index 00000000..5c2bf6e9 --- /dev/null +++ b/example/manifests/schema-registry.yaml @@ -0,0 +1,149 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: schema-registry + name: schema-registry +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: schema-registry + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + creationTimestamp: null + labels: + app: schema-registry + spec: + containers: + - env: + - name: REGISTRY_RULES_GLOBAL_COMPATIBILITY + value: BACKWARD + - name: REGISTRY_CCOMPAT_GLOBAL_COMPATIBILITY_LEVEL + value: BACKWARD + - name: COMPATIBILITY_LEVEL + value: BACKWARD + - name: KAFKA_BOOTSTRAP_SERVERS + value: "my-cluster-kafka-bootstrap:9094" + - name: REGISTRY_KAFKASQL_SCRAM_USER + value: schema-registry + - name: REGISTRY_KAFKASQL_SCRAM_PASSWORD + valueFrom: + secretKeyRef: + key: password + name: schema-registry + - name: REGISTRY_KAFKA_COMMON_SASL_JAAS_CONFIG + value: >- + org.apache.kafka.common.security.scram.ScramLoginModule required + username='$(REGISTRY_KAFKASQL_SCRAM_USER)' + password='$(REGISTRY_KAFKASQL_SCRAM_PASSWORD)'; + - name: QUARKUS_PROFILE + value: prod + - name: REGISTRY_PROPERTIES_PREFIX + value: REGISTRY_ + - name: REGISTRY_UI_FEATURES_READONLY + value: 'true' + - name: REGISTRY_KAFKA_COMMON_SASL_MECHANISM + value: SCRAM-SHA-512 + - name: REGISTRY_KAFKA_COMMON_SECURITY_PROTOCOL + value: SASL_PLAINTEXT + image: apicurio/apicurio-registry-kafkasql:2.6.8.Final + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /health/live + port: 8080 + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + name: registry + ports: + - containerPort: 8443 + protocol: TCP + - containerPort: 8080 + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /health/ready + port: 8080 + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + annotations: + spoud.io/kcc-context.application: schema-registry + labels: + strimzi.io/cluster: my-cluster + name: schema-registry +spec: + authentication: + type: scram-sha-512 + authorization: + acls: + - host: '*' + operations: + - All + resource: + name: "kafkasql-journal" + patternType: literal + type: topic + - host: '*' + operations: + - Read + resource: + name: "*" + patternType: literal + type: group + type: simple +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: schema-registry + name: schema-registry +spec: + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: schema-registry + sessionAffinity: None + type: ClusterIP +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaTopic +metadata: + annotations: + spoud.io/kcc-context.application: schema-registry + labels: + strimzi.io/cluster: my-cluster + name: kafkasql-journal +spec: + config: + min.insync.replicas: 1 + cleanup.policy: compact + message.timestamp.type: LogAppendTime + partitions: 1 diff --git a/example/manifests/web-analytics.yaml b/example/manifests/web-analytics.yaml new file mode 100644 index 00000000..385feffc --- /dev/null +++ b/example/manifests/web-analytics.yaml @@ -0,0 +1,91 @@ +# This file illustrates how to use the synth client with a Strimzi cluster. +# It defines a kafka user for the synth client, a topic to write to, and a deployment of the client itself. +# The metrics are exposed on port 8081 (this port is also exposed via a service). +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + annotations: + spoud.io/kcc-context.application: web-analytics + labels: + strimzi.io/cluster: my-cluster + name: web-analytics +spec: + authentication: + type: scram-sha-512 + authorization: + acls: + - host: '*' + operations: + - Describe + - Write + - Read + - Alter + resource: + name: web-analytics-clickstream + patternType: literal + type: topic + - host: '*' + operations: + - Read + resource: + name: web-analytics- + patternType: prefix + type: group + type: simple +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaTopic +metadata: + annotations: + spoud.io/kcc-context.application: web-analytics + labels: + strimzi.io/cluster: my-cluster + name: web-analytics-clickstream +spec: + config: + retention.ms: 86400000 + partitions: 1 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: web-analytics + name: web-analytics +spec: + replicas: 1 + selector: + matchLabels: + app: web-analytics + strategy: {} + template: + metadata: + labels: + app: web-analytics + spec: + containers: + - image: ghcr.io/spoud/kafka-synth-client:v1.0.1 + name: synth + resources: {} + ports: + - containerPort: 8081 + name: prometheus + env: + - name: SYNTH_CLIENT_TOPIC + value: web-analytics-clickstream + - name: KAFKA_BOOTSTRAP_SERVERS + value: my-cluster-kafka-bootstrap:9094 + - name: KAFKA_GROUP_ID + value: web-analytics-group + - name: KAFKA_SASL_MECHANISM + value: SCRAM-SHA-512 + - name: SYNTH_CLIENT_MESSAGES_MESSAGES_PER_SECOND + value: "150" + - name: KAFKA_SASL_JAAS_CONFIG + valueFrom: + secretKeyRef: + name: web-analytics + key: sasl.jaas.config + # don't forget to adjust this to SASL_SSL if you are using a TLS listener (also remember to mount the truststore in this case) + - name: KAFKA_SECURITY_PROTOCOL + value: SASL_PLAINTEXT From c02986d89d5edcf0c83096879f245b4d84e0846c Mon Sep 17 00:00:00 2001 From: Alexander Rovner Date: Fri, 11 Jul 2025 16:35:29 +0200 Subject: [PATCH 2/4] docs fix --- example/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/README.md b/example/README.md index 9dff790c..ac236ae1 100644 --- a/example/README.md +++ b/example/README.md @@ -1,10 +1,10 @@ -# Kafka Cost Control CTF Setup +# Kafka Cost Control Demo Setup This repo sets up an example Kubernetes environment for Kafka Cost Control (https://github.com/spoud/kafka-cost-control). ## Requirements -Have a Kubernetes cluster with Strimzi installed (we assume that Strimzi operator is running in the `kafka` namespace). +Have a Kubernetes cluster with Strimzi installed (we assume that the Strimzi operator is running in the `kafka` namespace). ## Setup From eacb78d5f15a0244ba24570c91e9198366df990a Mon Sep 17 00:00:00 2001 From: Alexander Rovner Date: Wed, 16 Jul 2025 12:06:06 +0200 Subject: [PATCH 3/4] add non-trivial producer/consumer example --- example/manifests/bookings.yaml | 129 ++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 example/manifests/bookings.yaml diff --git a/example/manifests/bookings.yaml b/example/manifests/bookings.yaml new file mode 100644 index 00000000..23f6f9cb --- /dev/null +++ b/example/manifests/bookings.yaml @@ -0,0 +1,129 @@ +# This file illustrates how to use the synth client with a Strimzi cluster. +# It defines a kafka user for the synth client, a topic to write to, and a deployment of the client itself. +# The metrics are exposed on port 8081 (this port is also exposed via a service). +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + annotations: + spoud.io/kcc-context.application: hotels + labels: + strimzi.io/cluster: my-cluster + name: hotels +spec: + authentication: + type: scram-sha-512 + authorization: + acls: + - host: '*' + operations: + - Describe + - Write + - Read + - Alter + resource: + name: hotels-bookings + patternType: literal + type: topic + - host: '*' + operations: + - Read + resource: + name: hotels- + patternType: prefix + type: group + type: simple +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaTopic +metadata: + annotations: + spoud.io/kcc-context.application: hotels + labels: + strimzi.io/cluster: my-cluster + name: hotels-bookings +spec: + config: + retention.ms: 86400000 + partitions: 1 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: hotels-producer + name: hotels-producer +spec: + replicas: 1 + selector: + matchLabels: + app: hotels-producer + strategy: {} + template: + metadata: + labels: + app: hotels-producer + spec: + containers: + - image: ghcr.io/spoud/kcc-ctf-challenges:main + name: producer + args: ["--topic", "hotels-bookings", "producer"] + resources: {} + ports: + - containerPort: 8000 + name: prometheus + env: + - name: KAFKA_BOOTSTRAP_SERVERS + value: my-cluster-kafka-bootstrap:9094 + - name: KAFKA_SASL_MECHANISM + value: SCRAM-SHA-512 + - name: KAFKA_SASL_PLAIN_USERNAME + value: hotels + - name: KAFKA_SASL_PLAIN_PASSWORD + valueFrom: + secretKeyRef: + name: hotels + key: password + - name: KAFKA_SECURITY_PROTOCOL + value: SASL_PLAINTEXT +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: hotels-consumer + name: hotels-consumer +spec: + replicas: 1 + selector: + matchLabels: + app: hotels-consumer + strategy: {} + template: + metadata: + labels: + app: hotels-consumer + spec: + containers: + - image: ghcr.io/spoud/kcc-ctf-challenges:main + name: consumer + args: ["--topic", "hotels-bookings", "consumer"] + resources: {} + ports: + - containerPort: 8000 + name: prometheus + env: + - name: KAFKA_BOOTSTRAP_SERVERS + value: my-cluster-kafka-bootstrap:9094 + - name: KAFKA_GROUP_ID + value: hotels-consumer-group + - name: KAFKA_SASL_MECHANISM + value: SCRAM-SHA-512 + - name: KAFKA_SASL_PLAIN_USERNAME + value: hotels + - name: KAFKA_SASL_PLAIN_PASSWORD + valueFrom: + secretKeyRef: + name: hotels + key: password + - name: KAFKA_SECURITY_PROTOCOL + value: SASL_PLAINTEXT \ No newline at end of file From 256a237ba6a007a6a6f8f7892c5258a7435185c9 Mon Sep 17 00:00:00 2001 From: Lukas Gisi Date: Mon, 18 Aug 2025 16:58:20 +0200 Subject: [PATCH 4/4] Bump Kafka to 4.0 --- example/manifests/kafka.yaml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/example/manifests/kafka.yaml b/example/manifests/kafka.yaml index 1c4d946c..00b4aa93 100644 --- a/example/manifests/kafka.yaml +++ b/example/manifests/kafka.yaml @@ -1,3 +1,24 @@ +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaNodePool +metadata: + name: dual-role + labels: + strimzi.io/cluster: my-cluster +spec: + replicas: 1 + roles: + - controller + - broker + storage: + type: jbod + volumes: + - id: 0 + type: persistent-claim + size: 20Gi + deleteClaim: false + kraftMetadata: shared +--- + apiVersion: kafka.strimzi.io/v1beta2 kind: Kafka metadata: @@ -32,14 +53,14 @@ spec: port: 9093 tls: true type: internal - metadataVersion: 3.9-IV0 + metadataVersion: 4.0-IV3 metricsConfig: type: jmxPrometheusExporter valueFrom: configMapKeyRef: key: kafka-metrics-config.yml name: kafka-metrics - version: 3.9.0 + version: 4.0.0 --- apiVersion: v1 data: