Skip to content

Commit e7b7ebd

Browse files
committed
Support OpenMetrics
- add cluster_health_status enum - add units to cluster metrics
1 parent b144a0e commit e7b7ebd

File tree

4 files changed

+88
-21
lines changed

4 files changed

+88
-21
lines changed

src/main/java/org/compuscene/metrics/prometheus/PrometheusMetricsCatalog.java

+36-8
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,27 @@
1919

2020
import org.apache.logging.log4j.LogManager;
2121
import org.apache.logging.log4j.Logger;
22+
import org.elasticsearch.SpecialPermission;
2223
import org.elasticsearch.rest.prometheus.RestPrometheusMetricsAction;
2324

2425
import java.io.IOException;
2526
import java.io.StringWriter;
2627
import java.io.Writer;
28+
import java.security.AccessController;
29+
import java.security.PrivilegedActionException;
30+
import java.security.PrivilegedExceptionAction;
2731
import java.util.HashMap;
2832
import java.util.Locale;
2933

3034
import io.prometheus.client.CollectorRegistry;
35+
//import io.prometheus.client.Counter;
36+
import io.prometheus.client.Enumeration;
3137
import io.prometheus.client.Gauge;
3238
import io.prometheus.client.Info;
3339
import io.prometheus.client.Summary;
3440
import io.prometheus.client.exporter.common.TextFormat;
3541
import io.prometheus.client.hotspot.DefaultExports;
3642

37-
import org.elasticsearch.SpecialPermission;
38-
import java.security.AccessController;
39-
import java.security.PrivilegedActionException;
40-
import java.security.PrivilegedExceptionAction;
41-
4243
/**
4344
* A class that describes a Prometheus metrics catalog.
4445
*/
@@ -106,9 +107,28 @@ private String[] getExtendedNodeLabelValues(String... labelValues) {
106107
return extended;
107108
}
108109

109-
public void registerClusterGauge(String metric, String help, String... labels) {
110+
public void registerClusterEnum(String metric, String help, Class e, String... labels) {
111+
Enumeration enumeration = Enumeration.build().
112+
name(metricPrefix + metric).
113+
help(help).
114+
states(e).
115+
labelNames(getExtendedClusterLabelNames(labels)).
116+
register(registry);
117+
118+
metrics.put(metric, enumeration);
119+
120+
logger.debug(String.format(Locale.ENGLISH, "Registered new enumeration cluster %s", metric));
121+
}
122+
123+
public void setClusterEnum(String metric, String state, String... labelValues) {
124+
Enumeration enumeration = (Enumeration) metrics.get(metric);
125+
enumeration.labels(getExtendedClusterLabelValues(labelValues)).state(state);
126+
}
127+
128+
public void registerClusterGaugeUnit(String metric, String unit, String help, String... labels) {
110129
Gauge gauge = Gauge.build().
111130
name(metricPrefix + metric).
131+
unit(unit).
112132
help(help).
113133
labelNames(getExtendedClusterLabelNames(labels)).
114134
register(registry);
@@ -118,6 +138,10 @@ public void registerClusterGauge(String metric, String help, String... labels) {
118138
logger.debug(String.format(Locale.ENGLISH, "Registered new cluster gauge %s", metric));
119139
}
120140

141+
public void registerClusterGauge(String metric, String help, String... labels) {
142+
registerClusterGaugeUnit(metric, "", help, labels);
143+
}
144+
121145
public void setClusterGauge(String metric, double value, String... labelValues) {
122146
Gauge gauge = (Gauge) metrics.get(metric);
123147
gauge.labels(getExtendedClusterLabelValues(labelValues)).set(value);
@@ -174,12 +198,16 @@ public Summary.Timer startSummaryTimer(String metric, String... labelValues) {
174198
return summary.labels(getExtendedNodeLabelValues(labelValues)).startTimer();
175199
}
176200

177-
public String toTextFormat() throws IOException {
201+
public String getContentType(String acceptHeader) {
202+
return TextFormat.chooseContentType(acceptHeader);
203+
}
204+
205+
public String toTextFormat(String contentType) throws IOException {
178206
Writer writer = new StringWriter();
179207
SpecialPermission.check();
180208
try {
181209
AccessController.doPrivileged((PrivilegedExceptionAction<Void>) () -> {
182-
TextFormat.write004(writer, registry.metricFamilySamples());
210+
TextFormat.writeFormat(contentType, writer, registry.metricFamilySamples());
183211
return null;
184212
});
185213
} catch (PrivilegedActionException e) {

src/main/java/org/compuscene/metrics/prometheus/PrometheusMetricsCollector.java

+30-12
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717

1818
package org.compuscene.metrics.prometheus;
1919

20-
import io.prometheus.client.Summary;
2120
import org.elasticsearch.Build;
2221
import org.elasticsearch.action.ClusterStatsData;
2322
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
2423
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
2524
import org.elasticsearch.action.admin.indices.stats.CommonStats;
2625
import org.elasticsearch.action.admin.indices.stats.IndexStats;
2726
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
27+
import org.elasticsearch.cluster.health.ClusterHealthStatus;
2828
import org.elasticsearch.cluster.health.ClusterIndexHealth;
2929
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
3030
import org.elasticsearch.http.HttpStats;
@@ -44,6 +44,8 @@
4444
import java.util.List;
4545
import java.util.Map;
4646

47+
import io.prometheus.client.Summary;
48+
4749
/**
4850
* A class that describes a Prometheus metrics collector.
4951
*/
@@ -82,30 +84,46 @@ public void registerMetrics() {
8284
}
8385

8486
private void registerClusterMetrics() {
85-
catalog.registerClusterGauge("cluster_status", "Cluster status");
86-
87-
catalog.registerClusterGauge("cluster_nodes_number", "Number of nodes in the cluster");
88-
catalog.registerClusterGauge("cluster_datanodes_number", "Number of data nodes in the cluster");
89-
90-
catalog.registerClusterGauge("cluster_shards_active_percent", "Percent of active shards");
91-
catalog.registerClusterGauge("cluster_shards_number", "Number of shards", "type");
87+
catalog.registerClusterGauge(
88+
"cluster_status", "Health status of the cluster, based on the state of its primary and replica shards");
89+
catalog.registerClusterEnum(
90+
"cluster_health_status",
91+
"Health status of the cluster, based on the state of its primary and replica shards as enumeration",
92+
ClusterHealthStatus.class
93+
);
94+
95+
catalog.registerClusterGauge("cluster_nodes_number", "The number of nodes within the cluster");
96+
catalog.registerClusterGauge("cluster_datanodes_number", "The number of nodes that are dedicated data nodes");
97+
98+
catalog.registerClusterGauge(
99+
"cluster_shards_active_percent", "The ratio of active shards in the cluster expressed as a percentage");
100+
catalog.registerClusterGaugeUnit(
101+
"cluster_shards_active", "ratio", "The ratio of active shards in the cluster");
102+
catalog.registerClusterGauge("cluster_shards_number", "The number of shards by type", "type");
92103

93104
catalog.registerClusterGauge("cluster_pending_tasks_number", "Number of pending tasks");
94-
catalog.registerClusterGauge("cluster_task_max_waiting_time_seconds", "Max waiting time for tasks");
105+
catalog.registerClusterGaugeUnit(
106+
"cluster_task_max_waiting_time",
107+
"seconds",
108+
"The time expressed in seconds since the earliest initiated task is waiting for being performed");
95109

96-
catalog.registerClusterGauge("cluster_is_timedout_bool", "Is the cluster timed out ?");
110+
catalog.registerClusterGauge(
111+
"cluster_is_timedout_bool",
112+
"If false the response returned within the period of time that is specified by the timeout parameter (30s by default)");
97113

98-
catalog.registerClusterGauge("cluster_inflight_fetch_number", "Number of in flight fetches");
114+
catalog.registerClusterGauge("cluster_inflight_fetch_number", "The number of unfinished fetches");
99115
}
100116

101117
private void updateClusterMetrics(ClusterHealthResponse chr) {
102118
if (chr != null) {
103119
catalog.setClusterGauge("cluster_status", chr.getStatus().value());
120+
catalog.setClusterEnum("cluster_health_status", chr.getStatus().name());
104121

105122
catalog.setClusterGauge("cluster_nodes_number", chr.getNumberOfNodes());
106123
catalog.setClusterGauge("cluster_datanodes_number", chr.getNumberOfDataNodes());
107124

108125
catalog.setClusterGauge("cluster_shards_active_percent", chr.getActiveShardsPercent());
126+
catalog.setClusterGauge("cluster_shards_active", chr.getActiveShardsPercent() / 100.0);
109127

110128
catalog.setClusterGauge("cluster_shards_number", chr.getActiveShards(), "active");
111129
catalog.setClusterGauge("cluster_shards_number", chr.getActivePrimaryShards(), "active_primary");
@@ -115,7 +133,7 @@ private void updateClusterMetrics(ClusterHealthResponse chr) {
115133
catalog.setClusterGauge("cluster_shards_number", chr.getUnassignedShards(), "unassigned");
116134

117135
catalog.setClusterGauge("cluster_pending_tasks_number", chr.getNumberOfPendingTasks());
118-
catalog.setClusterGauge("cluster_task_max_waiting_time_seconds", chr.getTaskMaxWaitingTime().millis() / 1000.0);
136+
catalog.setClusterGauge("cluster_task_max_waiting_time", chr.getTaskMaxWaitingTime().millis() / 1000.0);
119137

120138
catalog.setClusterGauge("cluster_is_timedout_bool", chr.isTimedOut() ? 1 : 0);
121139

src/main/java/org/elasticsearch/rest/prometheus/RestPrometheusMetricsAction.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
7474
remoteAddress));
7575
}
7676

77+
String acceptHeader = request.header("Accept");
78+
if (logger.isTraceEnabled()) {
79+
logger.trace(String.format(Locale.ENGLISH, "Request accept header %s",
80+
acceptHeader != null ? acceptHeader : "NONE"
81+
));
82+
}
7783
NodePrometheusMetricsRequest metricsRequest = new NodePrometheusMetricsRequest();
7884

7985
return channel -> client.execute(INSTANCE, metricsRequest,
@@ -96,7 +102,8 @@ public RestResponse buildResponse(NodePrometheusMetricsResponse response) throws
96102
collector.registerMetrics();
97103
collector.updateMetrics(response.getClusterHealth(), response.getNodeStats(), response.getIndicesStats(),
98104
response.getClusterStatsData());
99-
return new BytesRestResponse(RestStatus.OK, collector.getCatalog().toTextFormat());
105+
String contentType = catalog.getContentType(acceptHeader);
106+
return new BytesRestResponse(RestStatus.OK, contentType, catalog.toTextFormat(contentType));
100107
}
101108
});
102109
}

src/yamlRestTest/resources/rest-api-spec/test/resthandler/20_00_metrics.yml

+14
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,20 @@
2525
- match:
2626
$body: /.* es_node_version_info .*/
2727

28+
---
29+
"Prometheus metrics can be pulled in OpenMetrics format":
30+
- skip:
31+
features: headers
32+
33+
- do:
34+
prometheus.metrics: {}
35+
headers:
36+
Accept: application/openmetrics-text
37+
# Output is pure text hence we need to rely solely on regex expressions.
38+
# See: https://github.com/elastic/elasticsearch/tree/master/rest-api-spec/src/main/resources/rest-api-spec/test#test-file-structure
39+
40+
- match:
41+
$body: /# UNIT es_cluster_shards_active_ratio ratio\n.*/
2842
#---
2943
#"Pull Prometheus metrics from individual nodes":
3044
# - skip:

0 commit comments

Comments
 (0)