From 2dd5fc5ef6f7f9ef5a2eb4315d9af0393d4d02b5 Mon Sep 17 00:00:00 2001 From: robsunday Date: Tue, 12 Nov 2024 16:45:01 +0100 Subject: [PATCH 1/7] Update jmx-metrics units to satisfy semconv rules --- .../target_systems/HbaseIntegrationTest.java | 28 ++++++++--------- .../resources/target-systems/hbase.groovy | 30 +++++++++---------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HbaseIntegrationTest.java b/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HbaseIntegrationTest.java index 412805385..ba52e1158 100644 --- a/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HbaseIntegrationTest.java +++ b/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HbaseIntegrationTest.java @@ -43,7 +43,7 @@ void endToEnd() { metric, "hbase.master.region_server.count", "The number of region servers.", - "{servers}", + "{server}", attrs -> attrs.contains(entry("state", "dead")), attrs -> attrs.contains(entry("state", "live"))), metric -> @@ -51,14 +51,14 @@ void endToEnd() { metric, "hbase.master.regions_in_transition.count", "The number of regions that are in transition.", - "{regions}", + "{region}", /* isMonotonic= */ false), metric -> assertSum( metric, "hbase.master.regions_in_transition.over_threshold", "The number of regions that have been in transition longer than a threshold time.", - "{regions}", + "{region}", /* isMonotonic= */ false), metric -> assertGauge( @@ -71,14 +71,14 @@ void endToEnd() { metric, "hbase.region_server.region.count", "The number of regions hosted by the region server.", - "{regions}", + "{region}", attrs -> attrs.containsKey("region_server")), metric -> assertSumWithAttributes( metric, "hbase.region_server.disk.store_file.count", "The number of store files on disk currently managed by the region server.", - "{files}", + "{file}", attrs -> attrs.containsKey("region_server")), metric -> assertSumWithAttributes( @@ -92,14 +92,14 @@ void endToEnd() { metric, "hbase.region_server.write_ahead_log.count", "The number of write ahead logs not yet archived.", - "{logs}", + "{log}", attrs -> attrs.containsKey("region_server")), metric -> assertSumWithAttributes( metric, "hbase.region_server.request.count", "The number of requests received.", - "{requests}", + "{request}", attrs -> attrs.contains(entry("state", "write")), attrs -> attrs.contains(entry("state", "read"))), metric -> @@ -107,7 +107,7 @@ void endToEnd() { metric, "hbase.region_server.queue.length", "The number of RPC handlers actively servicing requests.", - "{handlers}", + "{handler}", attrs -> attrs.contains(entry("state", "flush")), attrs -> attrs.contains(entry("state", "compaction"))), metric -> @@ -122,7 +122,7 @@ void endToEnd() { metric, "hbase.region_server.request.count", "The number of requests received.", - "{requests}", + "{request}", attrs -> attrs.contains(entry("state", "write")), attrs -> attrs.contains(entry("state", "read"))), metric -> @@ -347,7 +347,7 @@ void endToEnd() { metric, "hbase.region_server.operations.slow", "Number of operations that took over 1000ms to complete.", - "{operations}", + "{operation}", attrs -> attrs.contains(entry("operation", "delete")), attrs -> attrs.contains(entry("operation", "append")), attrs -> attrs.contains(entry("operation", "get")), @@ -358,21 +358,21 @@ void endToEnd() { metric, "hbase.region_server.open_connection.count", "The number of open connections at the RPC layer.", - "{connections}", + "{connection}", attrs -> attrs.containsKey("region_server")), metric -> assertSumWithAttributes( metric, "hbase.region_server.active_handler.count", "The number of RPC handlers actively servicing requests.", - "{handlers}", + "{handler}", attrs -> attrs.containsKey("region_server")), metric -> assertSumWithAttributes( metric, "hbase.region_server.queue.request.count", "The number of currently enqueued requests.", - "{requests}", + "{request}", attrs -> attrs.contains(entry("state", "replication")), attrs -> attrs.contains(entry("state", "user")), attrs -> attrs.contains(entry("state", "priority"))), @@ -381,7 +381,7 @@ void endToEnd() { metric, "hbase.region_server.authentication.count", "Number of client connection authentication failures/successes.", - "{authentication requests}", + "{authentication request}", attrs -> attrs.contains(entry("state", "successes")), attrs -> attrs.contains(entry("state", "failures"))), metric -> diff --git a/jmx-metrics/src/main/resources/target-systems/hbase.groovy b/jmx-metrics/src/main/resources/target-systems/hbase.groovy index 7c03f75ab..4f9c3b02f 100644 --- a/jmx-metrics/src/main/resources/target-systems/hbase.groovy +++ b/jmx-metrics/src/main/resources/target-systems/hbase.groovy @@ -16,16 +16,16 @@ def beanMasterServer = otel.mbeans("Hadoop:service=HBase,name=Master,sub=Server") otel.instrument(beanMasterServer, "hbase.master.region_server.count", - "The number of region servers.", "{servers}", + "The number of region servers.", "{server}", ["numDeadRegionServers":["state" : {"dead"}], "numRegionServers": ["state" : {"live"}]], otel.&longUpDownCounterCallback) def beanMasterAssignmentManager = otel.mbean("Hadoop:service=HBase,name=Master,sub=AssignmentManager") otel.instrument(beanMasterAssignmentManager, "hbase.master.regions_in_transition.count", - "The number of regions that are in transition.", "{regions}", + "The number of regions that are in transition.", "{region}", "ritCount", otel.&longUpDownCounterCallback) otel.instrument(beanMasterAssignmentManager, "hbase.master.regions_in_transition.over_threshold", - "The number of regions that have been in transition longer than a threshold time.", "{regions}", + "The number of regions that have been in transition longer than a threshold time.", "{region}", "ritCountOverThreshold", otel.&longUpDownCounterCallback) otel.instrument(beanMasterAssignmentManager, "hbase.master.regions_in_transition.oldest_age", "The age of the longest region in transition.", "ms", @@ -33,11 +33,11 @@ otel.instrument(beanMasterAssignmentManager, "hbase.master.regions_in_transition def beanRegionServerServer = otel.mbean("Hadoop:service=HBase,name=RegionServer,sub=Server") otel.instrument(beanRegionServerServer, "hbase.region_server.region.count", - "The number of regions hosted by the region server.", "{regions}", + "The number of regions hosted by the region server.", "{region}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], "regionCount", otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerServer, "hbase.region_server.disk.store_file.count", - "The number of store files on disk currently managed by the region server.", "{files}", + "The number of store files on disk currently managed by the region server.", "{file}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], "storeFileCount", otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerServer, "hbase.region_server.disk.store_file.size", @@ -45,16 +45,16 @@ otel.instrument(beanRegionServerServer, "hbase.region_server.disk.store_file.siz ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], "storeFileSize", otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerServer, "hbase.region_server.write_ahead_log.count", - "The number of write ahead logs not yet archived.", "{logs}", + "The number of write ahead logs not yet archived.", "{log}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], "hlogFileCount", otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerServer, "hbase.region_server.request.count", - "The number of requests received.", "{requests}", + "The number of requests received.", "{request}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], ["writeRequestCount":["state" : {"write"}], "readRequestCount": ["state" : {"read"}]], otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerServer, "hbase.region_server.queue.length", - "The number of RPC handlers actively servicing requests.", "{handlers}", + "The number of RPC handlers actively servicing requests.", "{handler}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], ["flushQueueLength":["state" : {"flush"}], "compactionQueueLength": ["state" : {"compaction"}]], otel.&longUpDownCounterCallback) @@ -63,7 +63,7 @@ otel.instrument(beanRegionServerServer, "hbase.region_server.blocked_update.time ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], "updatesBlockedTime", otel.&longValueCallback) otel.instrument(beanRegionServerServer, "hbase.region_server.block_cache.operation.count", - "Number of block cache hits/misses.", "{operations}", + "Number of block cache hits/misses.", "{operation}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], ["blockCacheMissCount":["state" : {"miss"}], "blockCacheHitCount": ["state" : {"hit"}]], otel.&longValueCallback) @@ -199,7 +199,7 @@ otel.instrument(beanRegionServerServer, "hbase.region_server.operation.increment "Increment_median", otel.&longValueCallback) otel.instrument(beanRegionServerServer, "hbase.region_server.operations.slow", - "Number of operations that took over 1000ms to complete.", "{operations}", + "Number of operations that took over 1000ms to complete.", "{operation}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], [ "slowDeleteCount":["operation" : {"delete"}], @@ -212,15 +212,15 @@ otel.instrument(beanRegionServerServer, "hbase.region_server.operations.slow", def beanRegionServerIPC = otel.mbean("Hadoop:service=HBase,name=RegionServer,sub=IPC") otel.instrument(beanRegionServerIPC, "hbase.region_server.open_connection.count", - "The number of open connections at the RPC layer.", "{connections}", + "The number of open connections at the RPC layer.", "{connection}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], "numOpenConnections", otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerIPC, "hbase.region_server.active_handler.count", - "The number of RPC handlers actively servicing requests.", "{handlers}", + "The number of RPC handlers actively servicing requests.", "{handler}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], "numActiveHandler", otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerIPC, "hbase.region_server.queue.request.count", - "The number of currently enqueued requests.", "{requests}", + "The number of currently enqueued requests.", "{request}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], [ "numCallsInReplicationQueue":["state" : {"replication"}], @@ -229,7 +229,7 @@ otel.instrument(beanRegionServerIPC, "hbase.region_server.queue.request.count", ], otel.&longUpDownCounterCallback) otel.instrument(beanRegionServerIPC, "hbase.region_server.authentication.count", - "Number of client connection authentication failures/successes.", "{authentication requests}", + "Number of client connection authentication failures/successes.", "{authentication request}", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], ["authenticationSuccesses":["state" : {"successes"}], "authenticationFailures": ["state" : {"failures"}]], otel.&longUpDownCounterCallback) @@ -246,4 +246,4 @@ otel.instrument(beanJVMMetrics, "hbase.region_server.gc.young_gen.time", otel.instrument(beanJVMMetrics, "hbase.region_server.gc.old_gen.time", "Time spent in garbage collection of the old generation.", "ms", ["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }], - "GcTimeMillisConcurrentMarkSweep", otel.&longCounterCallback) \ No newline at end of file + "GcTimeMillisConcurrentMarkSweep", otel.&longCounterCallback) From 9e3a3401669b235aa987e9393c0b91d96ed6ffba Mon Sep 17 00:00:00 2001 From: robsunday Date: Tue, 12 Nov 2024 16:45:40 +0100 Subject: [PATCH 2/7] Hbase yaml and integration test added --- .../target_systems/HBaseIntegrationTest.java | 447 ++++++++++++++++++ .../target_systems/MetricAssertions.java | 21 +- jmx-scraper/src/main/resources/hbase.yaml | 329 +++++++++++++ 3 files changed, 789 insertions(+), 8 deletions(-) create mode 100644 jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java create mode 100644 jmx-scraper/src/main/resources/hbase.yaml diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java new file mode 100644 index 000000000..11df7f6a3 --- /dev/null +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java @@ -0,0 +1,447 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.contrib.jmxscraper.target_systems; + +import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import java.time.Duration; + +import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertGauge; +import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertGaugeWithAttributes; +import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertSum; +import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertSumWithAttributes; +import static org.assertj.core.data.MapEntry.entry; + +public class HBaseIntegrationTest extends TargetSystemIntegrationTest { + + @Override + protected GenericContainer createTargetContainer(int jmxPort) { + return new GenericContainer<>("dajobe/hbase") + .withEnv("JAVA_HOME", "/usr/lib/jvm/java-8-openjdk-amd64") + .withEnv("HBASE_OPTS", "-XX:+UseConcMarkSweepGC") + .withEnv("HBASE_MASTER_OPTS", genericJmxJvmArguments(jmxPort)) + .withStartupTimeout(Duration.ofMinutes(2)) + .withExposedPorts(jmxPort) + .waitingFor(Wait.forListeningPort()); + } + + @Override + protected JmxScraperContainer customizeScraperContainer(JmxScraperContainer scraper) { + return scraper.withTargetSystem("hbase"); + } + + @Override + protected void verifyMetrics() { + waitAndAssertMetrics( + metric -> + assertSumWithAttributes( + metric, + "hbase.master.region_server.count", + "The number of region servers.", + "{server}", + /* isMonotonic= */ false, + attrs -> attrs.contains(entry("state", "dead")), + attrs -> attrs.contains(entry("state", "live"))), + metric -> + assertSum( + metric, + "hbase.master.regions_in_transition.count", + "The number of regions that are in transition.", + "{region}", + /* isMonotonic= */ false), + metric -> + assertSum( + metric, + "hbase.master.regions_in_transition.over_threshold", + "The number of regions that have been in transition longer than a threshold time.", + "{region}", + /* isMonotonic= */ false), + metric -> + assertGauge( + metric, + "hbase.master.regions_in_transition.oldest_age", + "The age of the longest region in transition.", + "ms"), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.region.count", + "The number of regions hosted by the region server.", + "{region}", + /* isMonotonic= */ false, + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.disk.store_file.count", + "The number of store files on disk currently managed by the region server.", + "{file}", + /* isMonotonic= */ false, + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.disk.store_file.size", + "Aggregate size of the store files on disk.", + "By", + /* isMonotonic= */ false, + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.write_ahead_log.count", + "The number of write ahead logs not yet archived.", + "{log}", + /* isMonotonic= */ false, + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.request.count", + "The number of requests received.", + "{request}", + /* isMonotonic= */ false, + attrs -> { + attrs.contains(entry("state", "write")); + attrs.containsKey("region_server"); + }, + attrs -> { + attrs.contains(entry("state", "read")); + attrs.containsKey("region_server"); + }), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.queue.length", + "The number of RPC handlers actively servicing requests.", + "{handler}", + /* isMonotonic= */ false, + attrs -> { + attrs.contains(entry("state", "flush")); + attrs.containsKey("region_server"); + }, + attrs -> { + attrs.contains(entry("state", "compaction")); + attrs.containsKey("region_server"); + }), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.blocked_update.time", + "Amount of time updates have been blocked so the memstore can be flushed.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.block_cache.operation.count", + "Number of block cache hits/misses.", + "{operation}", + attrs -> { + attrs.contains(entry("state", "miss")); + attrs.containsKey("region_server"); + }, + attrs -> { + attrs.contains(entry("state", "hit")); + attrs.containsKey("region_server"); + }), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.files.local", + "Percent of store file data that can be read from the local.", + "%", + attrs -> attrs.containsKey("region_server")), + + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.append.latency.p99", + "Append operation 99th Percentile latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.append.latency.max", + "Append operation max latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.append.latency.min", + "Append operation minimum latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.append.latency.mean", + "Append operation mean latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.append.latency.median", + "Append operation median latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.delete.latency.p99", + "Delete operation 99th Percentile latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.delete.latency.max", + "Delete operation max latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.delete.latency.min", + "Delete operation minimum latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.delete.latency.mean", + "Delete operation mean latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.delete.latency.median", + "Delete operation median latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.put.latency.p99", + "Put operation 99th Percentile latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.put.latency.max", + "Put operation max latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.put.latency.min", + "Put operation minimum latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.put.latency.mean", + "Put operation mean latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.put.latency.median", + "Put operation median latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.get.latency.p99", + "Get operation 99th Percentile latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.get.latency.max", + "Get operation max latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.get.latency.min", + "Get operation minimum latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.get.latency.mean", + "Get operation mean latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.get.latency.median", + "Get operation median latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.replay.latency.p99", + "Replay operation 99th Percentile latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.replay.latency.max", + "Replay operation max latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.replay.latency.min", + "Replay operation minimum latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.replay.latency.mean", + "Replay operation mean latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.replay.latency.median", + "Replay operation median latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.increment.latency.p99", + "Increment operation 99th Percentile latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.increment.latency.max", + "Increment operation max latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.increment.latency.min", + "Increment operation minimum latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.increment.latency.mean", + "Increment operation mean latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertGaugeWithAttributes( + metric, + "hbase.region_server.operation.increment.latency.median", + "Increment operation median latency.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.operations.slow", + "Number of operations that took over 1000ms to complete.", + "{operation}", + /* isMonotonic= */ false, + attrs -> attrs.contains(entry("operation", "delete")), + attrs -> attrs.contains(entry("operation", "append")), + attrs -> attrs.contains(entry("operation", "get")), + attrs -> attrs.contains(entry("operation", "put")), + attrs -> attrs.contains(entry("operation", "increment"))), + + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.open_connection.count", + "The number of open connections at the RPC layer.", + "{connection}", + /* isMonotonic= */ false, + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.active_handler.count", + "The number of RPC handlers actively servicing requests.", + "{handler}", + /* isMonotonic= */ false, + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.queue.request.count", + "The number of currently enqueued requests.", + "{request}", + /* isMonotonic= */ false, + attrs -> attrs.contains(entry("state", "replication")), + attrs -> attrs.contains(entry("state", "user")), + attrs -> attrs.contains(entry("state", "priority"))), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.authentication.count", + "Number of client connection authentication failures/successes.", + "{authentication request}", + /* isMonotonic= */ false, + attrs -> attrs.contains(entry("state", "successes")), + attrs -> attrs.contains(entry("state", "failures"))), + + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.gc.time", + "Time spent in garbage collection.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.gc.young_gen.time", + "Time spent in garbage collection of the young generation.", + "ms", + attrs -> attrs.containsKey("region_server")), + metric -> + assertSumWithAttributes( + metric, + "hbase.region_server.gc.old_gen.time", + "Time spent in garbage collection of the old generation.", + "ms", + attrs -> attrs.containsKey("region_server")) + ); + } +} diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java index 713d2f21b..9a3e507b0 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java @@ -27,7 +27,7 @@ static void assertGauge(Metric metric, String name, String description, String u assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasGauge()).isTrue(); + assertThat(metric.hasGauge()).withFailMessage("Invalid metric type").isTrue(); assertThat(metric.getGauge().getDataPointsList()) .satisfiesExactly(point -> assertThat(point.getAttributesList()).isEmpty()); } @@ -41,10 +41,12 @@ static void assertSum( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasSum()).isTrue(); + assertThat(metric.hasSum()).withFailMessage("Invalid metric type").isTrue(); + assertThat(metric.getSum().getIsMonotonic()) + .withFailMessage("Metric should " + (isMonotonic ? "" : "not ") + "be monotonic") + .isEqualTo(isMonotonic); assertThat(metric.getSum().getDataPointsList()) .satisfiesExactly(point -> assertThat(point.getAttributesList()).isEmpty()); - assertThat(metric.getSum().getIsMonotonic()).isEqualTo(isMonotonic); } static void assertTypedGauge( @@ -52,7 +54,7 @@ static void assertTypedGauge( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasGauge()).isTrue(); + assertThat(metric.hasGauge()).withFailMessage("Invalid metric type").isTrue(); assertTypedPoints(metric.getGauge().getDataPointsList(), types); } @@ -61,7 +63,7 @@ static void assertTypedSum( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasSum()).isTrue(); + assertThat(metric.hasSum()).withFailMessage("Invalid metric type").isTrue(); assertTypedPoints(metric.getSum().getDataPointsList(), types); } @@ -87,8 +89,10 @@ static void assertSumWithAttributes( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasSum()).isTrue(); - assertThat(metric.getSum().getIsMonotonic()).isEqualTo(isMonotonic); + assertThat(metric.hasSum()).withFailMessage("Invalid metric type").isTrue(); + assertThat(metric.getSum().getIsMonotonic()) + .withFailMessage("Metric should " + (isMonotonic ? "" : "not ") + "be monotonic") + .isEqualTo(isMonotonic); assertAttributedPoints(metric.getSum().getDataPointsList(), attributeGroupAssertions); } @@ -118,7 +122,7 @@ static void assertGaugeWithAttributes( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasGauge()).isTrue(); + assertThat(metric.hasGauge()).withFailMessage("Invalid metric type").isTrue(); assertAttributedPoints(metric.getGauge().getDataPointsList(), attributeGroupAssertions); } @@ -145,6 +149,7 @@ private static void assertAttributedPoints( .toArray(Consumer[]::new); assertThat(points) + .withFailMessage("Invalid metric attributes. Actual: " + points) .extracting( numberDataPoint -> numberDataPoint.getAttributesList().stream() diff --git a/jmx-scraper/src/main/resources/hbase.yaml b/jmx-scraper/src/main/resources/hbase.yaml new file mode 100644 index 000000000..fb5ebb924 --- /dev/null +++ b/jmx-scraper/src/main/resources/hbase.yaml @@ -0,0 +1,329 @@ +--- + +rules: + + - bean: Hadoop:service=HBase,name=Master,sub=Server + prefix: hbase.master. + unit: "{server}" + type: updowncounter + mapping: + numDeadRegionServers: + metric: &metric region_server.count + desc: &desc The number of region servers. + metricAttribute: + state: const(dead) + numRegionServers: + metric: *metric + desc: *desc + metricAttribute: + state: const(live) + + - bean: Hadoop:service=HBase,name=Master,sub=AssignmentManager + prefix: hbase.master.regions_in_transition. + unit: "{region}" + type: updowncounter + mapping: + ritCount: + metric: count + desc: The number of regions that are in transition. + ritCountOverThreshold: + metric: over_threshold + desc: The number of regions that have been in transition longer than a threshold time. + ritOldestAge: + metric: oldest_age + unit: ms + type: gauge + desc: The age of the longest region in transition. + + - bean: Hadoop:service=HBase,name=RegionServer,sub=Server + prefix: hbase.region_server. + type: updowncounter + metricAttribute: + region_server: &hostname beanattr(tag\.Hostname) + mapping: + regionCount: + metric: region.count + unit: "{region}" + desc: The number of regions hosted by the region server. + + storeFileCount: + metric: disk.store_file.count + unit: "{file}" + desc: The number of store files on disk currently managed by the region server. + + storeFileSize: + metric: disk.store_file.size + unit: By + desc: Aggregate size of the store files on disk. + + hlogFileCount: + metric: write_ahead_log.count + unit: "{log}" + desc: The number of write ahead logs not yet archived. + + writeRequestCount: + metric: &metric request.count + unit: &unit "{request}" + desc: &desc The number of requests received. + metricAttribute: + state: const(write) + region_server: *hostname + readRequestCount: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + state: const(read) + region_server: *hostname + + flushQueueLength: + metric: &metric queue.length + unit: &unit "{handler}" + desc: &desc The number of RPC handlers actively servicing requests. + metricAttribute: + state: const(flush) + region_server: *hostname + compactionQueueLength: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + state: const(compaction) + region_server: *hostname + + updatesBlockedTime: + metric: blocked_update.time + type: gauge + unit: ms + desc: Amount of time updates have been blocked so the memstore can be flushed. + + blockCacheMissCount: + metric: &metric block_cache.operation.count + type: &type gauge + unit: &unit "{operation}" + desc: &desc Number of block cache hits/misses. + metricAttribute: + state: const(miss) + region_server: *hostname + blockCacheHitCount: + metric: *metric + type: *type + unit: *unit + desc: *desc + metricAttribute: + state: const(hit) + region_server: *hostname + + percentFilesLocal: + metric: files.local + type: gauge + unit: "%" + desc: Percent of store file data that can be read from the local. + + slowDeleteCount: + metric: &metric operations.slow + unit: &unit "{operation}" + desc: &desc Number of operations that took over 1000ms to complete. + metricAttribute: + operation: const(delete) + region_server: &hostname beanattr(tag\.Hostname) + slowAppendCount: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + operation: const(append) + region_server: *hostname + slowGetCount: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + operation: const(get) + region_server: *hostname + slowPutCount: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + operation: const(put) + region_server: *hostname + slowIncrementCount: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + operation: const(increment) + region_server: *hostname + + - bean: Hadoop:service=HBase,name=RegionServer,sub=Server + prefix: hbase.region_server. + type: gauge + unit: ms + metricAttribute: + region_server: *hostname + mapping: + Append_99th_percentile: + metric: operation.append.latency.p99 + desc: Append operation 99th Percentile latency. + Append_max: + metric: operation.append.latency.max + desc: Append operation max latency. + Append_min: + metric: operation.append.latency.min + desc: Append operation minimum latency. + Append_mean: + metric: operation.append.latency.mean + desc: Append operation mean latency. + Append_median: + metric: operation.append.latency.median + desc: Append operation median latency. + + Delete_99th_percentile: + metric: operation.delete.latency.p99 + desc: Delete operation 99th Percentile latency. + Delete_max: + metric: operation.delete.latency.max + desc: Delete operation max latency. + Delete_min: + metric: operation.delete.latency.min + desc: Delete operation minimum latency. + Delete_mean: + metric: operation.delete.latency.mean + desc: Delete operation mean latency. + Delete_median: + metric: operation.delete.latency.median + desc: Delete operation median latency. + + Put_99th_percentile: + metric: operation.put.latency.p99 + desc: Put operation 99th Percentile latency. + Put_max: + metric: operation.put.latency.max + desc: Put operation max latency. + Put_min: + metric: operation.put.latency.min + desc: Put operation minimum latency. + Put_mean: + metric: operation.put.latency.mean + desc: Put operation mean latency. + Put_median: + metric: operation.put.latency.median + desc: Put operation median latency. + + Get_99th_percentile: + metric: operation.get.latency.p99 + desc: Get operation 99th Percentile latency. + Get_max: + metric: operation.get.latency.max + desc: Get operation max latency. + Get_min: + metric: operation.get.latency.min + desc: Get operation minimum latency. + Get_mean: + metric: operation.get.latency.mean + desc: Get operation mean latency. + Get_median: + metric: operation.get.latency.median + desc: Get operation median latency. + + Replay_99th_percentile: + metric: operation.replay.latency.p99 + desc: Replay operation 99th Percentile latency. + Replay_max: + metric: operation.replay.latency.max + desc: Replay operation max latency. + Replay_min: + metric: operation.replay.latency.min + desc: Replay operation minimum latency. + Replay_mean: + metric: operation.replay.latency.mean + desc: Replay operation mean latency. + Replay_median: + metric: operation.replay.latency.median + desc: Replay operation median latency. + + Increment_99th_percentile: + metric: operation.increment.latency.p99 + desc: Increment operation 99th Percentile latency. + Increment_max: + metric: operation.increment.latency.max + desc: Increment operation max latency. + Increment_min: + metric: operation.increment.latency.min + desc: Increment operation minimum latency. + Increment_mean: + metric: operation.increment.latency.mean + desc: Increment operation mean latency. + Increment_median: + metric: operation.increment.latency.median + desc: Increment operation median latency. + + - bean: Hadoop:service=HBase,name=RegionServer,sub=IPC + prefix: hbase.region_server. + type: updowncounter + metricAttribute: + region_server: *hostname + mapping: + numOpenConnections: + metric: open_connection.count + unit: "{connection}" + desc: The number of open connections at the RPC layer. + numActiveHandler: + metric: active_handler.count + unit: "{handler}" + desc: The number of RPC handlers actively servicing requests. + + numCallsInReplicationQueue: + metric: &metric queue.request.count + unit: &unit "{request}" + desc: &desc The number of currently enqueued requests. + metricAttribute: + state: const(replication) + region_server: *hostname + numCallsInGeneralQueue: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + state: const(user) + region_server: *hostname + numCallsInPriorityQueue: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + state: const(priority) + region_server: *hostname + + authenticationSuccesses: + metric: &metric authentication.count + unit: &unit "{authentication request}" + desc: &desc Number of client connection authentication failures/successes. + metricAttribute: + state: const(successes) + region_server: *hostname + authenticationFailures: + metric: *metric + unit: *unit + desc: *desc + metricAttribute: + state: const(failures) + region_server: *hostname + + - bean: Hadoop:service=HBase,name=JvmMetrics + prefix: hbase.region_server.gc. + unit: ms + type: counter + metricAttribute: + region_server: *hostname + mapping: + GcTimeMillis: + metric: time + desc: Time spent in garbage collection. + GcTimeMillisParNew: + metric: young_gen.time + desc: Time spent in garbage collection of the young generation. + GcTimeMillisConcurrentMarkSweep: + metric: old_gen.time + desc: Time spent in garbage collection of the old generation. From bf36533c31706843593558ad017752391360f614 Mon Sep 17 00:00:00 2001 From: robsunday Date: Tue, 12 Nov 2024 17:07:58 +0100 Subject: [PATCH 3/7] Improve test stability by waiting for additional ports to be opened. --- .../ActiveMqIntegrationTest.java | 4 ++- .../target_systems/HBaseIntegrationTest.java | 26 +++++++------------ 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/ActiveMqIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/ActiveMqIntegrationTest.java index 675c5d39d..40adc2a31 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/ActiveMqIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/ActiveMqIntegrationTest.java @@ -16,6 +16,7 @@ import org.testcontainers.images.builder.ImageFromDockerfile; public class ActiveMqIntegrationTest extends TargetSystemIntegrationTest { + private static final int DEFAULT_TCP_CONNECTION_LISTENING_PORT = 61616; @Override protected GenericContainer createTargetContainer(int jmxPort) { @@ -25,7 +26,8 @@ protected GenericContainer createTargetContainer(int jmxPort) { builder -> builder.from("apache/activemq-classic:5.18.6").build())) .withEnv("JAVA_TOOL_OPTIONS", genericJmxJvmArguments(jmxPort)) .withStartupTimeout(Duration.ofMinutes(2)) - .waitingFor(Wait.forListeningPort()); + .withExposedPorts(jmxPort, DEFAULT_TCP_CONNECTION_LISTENING_PORT) + .waitingFor(Wait.forListeningPorts(jmxPort, DEFAULT_TCP_CONNECTION_LISTENING_PORT)); } @Override diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java index 11df7f6a3..9b77c10f4 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java @@ -5,18 +5,19 @@ package io.opentelemetry.contrib.jmxscraper.target_systems; -import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer; -import org.testcontainers.containers.GenericContainer; -import org.testcontainers.containers.wait.strategy.Wait; -import java.time.Duration; - import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertGauge; import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertGaugeWithAttributes; import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertSum; import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertSumWithAttributes; import static org.assertj.core.data.MapEntry.entry; +import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer; +import java.time.Duration; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; + public class HBaseIntegrationTest extends TargetSystemIntegrationTest { + private static final int DEFAULT_MASTER_SERVICE_PORT = 16000; @Override protected GenericContainer createTargetContainer(int jmxPort) { @@ -25,8 +26,8 @@ protected GenericContainer createTargetContainer(int jmxPort) { .withEnv("HBASE_OPTS", "-XX:+UseConcMarkSweepGC") .withEnv("HBASE_MASTER_OPTS", genericJmxJvmArguments(jmxPort)) .withStartupTimeout(Duration.ofMinutes(2)) - .withExposedPorts(jmxPort) - .waitingFor(Wait.forListeningPort()); + .withExposedPorts(jmxPort, DEFAULT_MASTER_SERVICE_PORT) + .waitingFor(Wait.forListeningPorts(jmxPort, DEFAULT_MASTER_SERVICE_PORT)); } @Override @@ -156,7 +157,6 @@ protected void verifyMetrics() { "Percent of store file data that can be read from the local.", "%", attrs -> attrs.containsKey("region_server")), - metric -> assertGaugeWithAttributes( metric, @@ -192,7 +192,6 @@ protected void verifyMetrics() { "Append operation median latency.", "ms", attrs -> attrs.containsKey("region_server")), - metric -> assertGaugeWithAttributes( metric, @@ -228,7 +227,6 @@ protected void verifyMetrics() { "Delete operation median latency.", "ms", attrs -> attrs.containsKey("region_server")), - metric -> assertGaugeWithAttributes( metric, @@ -264,7 +262,6 @@ protected void verifyMetrics() { "Put operation median latency.", "ms", attrs -> attrs.containsKey("region_server")), - metric -> assertGaugeWithAttributes( metric, @@ -300,7 +297,6 @@ protected void verifyMetrics() { "Get operation median latency.", "ms", attrs -> attrs.containsKey("region_server")), - metric -> assertGaugeWithAttributes( metric, @@ -336,7 +332,6 @@ protected void verifyMetrics() { "Replay operation median latency.", "ms", attrs -> attrs.containsKey("region_server")), - metric -> assertGaugeWithAttributes( metric, @@ -384,7 +379,6 @@ protected void verifyMetrics() { attrs -> attrs.contains(entry("operation", "get")), attrs -> attrs.contains(entry("operation", "put")), attrs -> attrs.contains(entry("operation", "increment"))), - metric -> assertSumWithAttributes( metric, @@ -420,7 +414,6 @@ protected void verifyMetrics() { /* isMonotonic= */ false, attrs -> attrs.contains(entry("state", "successes")), attrs -> attrs.contains(entry("state", "failures"))), - metric -> assertSumWithAttributes( metric, @@ -441,7 +434,6 @@ protected void verifyMetrics() { "hbase.region_server.gc.old_gen.time", "Time spent in garbage collection of the old generation.", "ms", - attrs -> attrs.containsKey("region_server")) - ); + attrs -> attrs.containsKey("region_server"))); } } From ac3ff6d5092e2df6fd29a856a453e265b344dce2 Mon Sep 17 00:00:00 2001 From: robsunday Date: Fri, 15 Nov 2024 13:01:05 +0100 Subject: [PATCH 4/7] Fixed merge issue --- .../jmxscraper/target_systems/HBaseIntegrationTest.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java index 9b77c10f4..312df916b 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java @@ -12,6 +12,7 @@ import static org.assertj.core.data.MapEntry.entry; import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer; +import java.nio.file.Path; import java.time.Duration; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.wait.strategy.Wait; @@ -31,7 +32,9 @@ protected GenericContainer createTargetContainer(int jmxPort) { } @Override - protected JmxScraperContainer customizeScraperContainer(JmxScraperContainer scraper) { + protected JmxScraperContainer customizeScraperContainer( + JmxScraperContainer scraper, GenericContainer target, Path tempDir + ) { return scraper.withTargetSystem("hbase"); } From c3a0e23daf3830cbe768b98fa19b24413c144a16 Mon Sep 17 00:00:00 2001 From: robsunday Date: Fri, 15 Nov 2024 14:04:10 +0100 Subject: [PATCH 5/7] Spotless fix --- .../jmxscraper/target_systems/HBaseIntegrationTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java index 312df916b..f3aa98131 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java @@ -33,8 +33,7 @@ protected GenericContainer createTargetContainer(int jmxPort) { @Override protected JmxScraperContainer customizeScraperContainer( - JmxScraperContainer scraper, GenericContainer target, Path tempDir - ) { + JmxScraperContainer scraper, GenericContainer target, Path tempDir) { return scraper.withTargetSystem("hbase"); } From db42889c95e5a9fe6df274295f91327760ebed01 Mon Sep 17 00:00:00 2001 From: robsunday Date: Thu, 21 Nov 2024 09:47:59 +0100 Subject: [PATCH 6/7] Code review changes --- .../target_systems/HBaseIntegrationTest.java | 2 - .../target_systems/MetricAssertions.java | 36 +++++++++------ jmx-scraper/src/main/resources/hbase.yaml | 45 +++++++++++++------ 3 files changed, 54 insertions(+), 29 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java index f3aa98131..93877e279 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HBaseIntegrationTest.java @@ -23,8 +23,6 @@ public class HBaseIntegrationTest extends TargetSystemIntegrationTest { @Override protected GenericContainer createTargetContainer(int jmxPort) { return new GenericContainer<>("dajobe/hbase") - .withEnv("JAVA_HOME", "/usr/lib/jvm/java-8-openjdk-amd64") - .withEnv("HBASE_OPTS", "-XX:+UseConcMarkSweepGC") .withEnv("HBASE_MASTER_OPTS", genericJmxJvmArguments(jmxPort)) .withStartupTimeout(Duration.ofMinutes(2)) .withExposedPorts(jmxPort, DEFAULT_MASTER_SERVICE_PORT) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java index 9a3e507b0..79e35fe45 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/MetricAssertions.java @@ -27,7 +27,7 @@ static void assertGauge(Metric metric, String name, String description, String u assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasGauge()).withFailMessage("Invalid metric type").isTrue(); + assertMetricWithGauge(metric); assertThat(metric.getGauge().getDataPointsList()) .satisfiesExactly(point -> assertThat(point.getAttributesList()).isEmpty()); } @@ -41,10 +41,7 @@ static void assertSum( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasSum()).withFailMessage("Invalid metric type").isTrue(); - assertThat(metric.getSum().getIsMonotonic()) - .withFailMessage("Metric should " + (isMonotonic ? "" : "not ") + "be monotonic") - .isEqualTo(isMonotonic); + assertMetricWithSum(metric, isMonotonic); assertThat(metric.getSum().getDataPointsList()) .satisfiesExactly(point -> assertThat(point.getAttributesList()).isEmpty()); } @@ -54,7 +51,7 @@ static void assertTypedGauge( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasGauge()).withFailMessage("Invalid metric type").isTrue(); + assertMetricWithGauge(metric); assertTypedPoints(metric.getGauge().getDataPointsList(), types); } @@ -63,7 +60,7 @@ static void assertTypedSum( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasSum()).withFailMessage("Invalid metric type").isTrue(); + assertMetricWithSum(metric); assertTypedPoints(metric.getSum().getDataPointsList(), types); } @@ -89,10 +86,7 @@ static void assertSumWithAttributes( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasSum()).withFailMessage("Invalid metric type").isTrue(); - assertThat(metric.getSum().getIsMonotonic()) - .withFailMessage("Metric should " + (isMonotonic ? "" : "not ") + "be monotonic") - .isEqualTo(isMonotonic); + assertMetricWithSum(metric, isMonotonic); assertAttributedPoints(metric.getSum().getDataPointsList(), attributeGroupAssertions); } @@ -107,8 +101,7 @@ static void assertSumWithAttributesMultiplePoints( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasSum()).isTrue(); - assertThat(metric.getSum().getIsMonotonic()).isEqualTo(isMonotonic); + assertMetricWithSum(metric, isMonotonic); assertAttributedMultiplePoints(metric.getSum().getDataPointsList(), attributeGroupAssertions); } @@ -122,10 +115,25 @@ static void assertGaugeWithAttributes( assertThat(metric.getName()).isEqualTo(name); assertThat(metric.getDescription()).isEqualTo(description); assertThat(metric.getUnit()).isEqualTo(unit); - assertThat(metric.hasGauge()).withFailMessage("Invalid metric type").isTrue(); + assertMetricWithGauge(metric); assertAttributedPoints(metric.getGauge().getDataPointsList(), attributeGroupAssertions); } + private static void assertMetricWithGauge(Metric metric) { + assertThat(metric.hasGauge()).withFailMessage("Metric with gauge expected").isTrue(); + } + + private static void assertMetricWithSum(Metric metric) { + assertThat(metric.hasSum()).withFailMessage("Metric with sum expected").isTrue(); + } + + private static void assertMetricWithSum(Metric metric, boolean isMonotonic) { + assertMetricWithSum(metric); + assertThat(metric.getSum().getIsMonotonic()) + .withFailMessage("Metric should " + (isMonotonic ? "" : "not ") + "be monotonic") + .isEqualTo(isMonotonic); + } + @SuppressWarnings("unchecked") private static void assertTypedPoints(List points, List types) { Consumer>[] assertions = diff --git a/jmx-scraper/src/main/resources/hbase.yaml b/jmx-scraper/src/main/resources/hbase.yaml index fb5ebb924..f6b782140 100644 --- a/jmx-scraper/src/main/resources/hbase.yaml +++ b/jmx-scraper/src/main/resources/hbase.yaml @@ -7,6 +7,7 @@ rules: unit: "{server}" type: updowncounter mapping: + # Group of properties to build hbase.master.region_server.count metric numDeadRegionServers: metric: &metric region_server.count desc: &desc The number of region servers. @@ -26,9 +27,11 @@ rules: ritCount: metric: count desc: The number of regions that are in transition. + ritCountOverThreshold: metric: over_threshold desc: The number of regions that have been in transition longer than a threshold time. + ritOldestAge: metric: oldest_age unit: ms @@ -61,6 +64,19 @@ rules: unit: "{log}" desc: The number of write ahead logs not yet archived. + percentFilesLocal: + metric: files.local + type: gauge + unit: "%" + desc: Percent of store file data that can be read from the local. + + updatesBlockedTime: + metric: blocked_update.time + type: gauge + unit: ms + desc: Amount of time updates have been blocked so the memstore can be flushed. + + # Group of properties to build hbase.region_server.request.count metric writeRequestCount: metric: &metric request.count unit: &unit "{request}" @@ -76,6 +92,7 @@ rules: state: const(read) region_server: *hostname + # Group of properties to build hbase.region_server.queue.length metric flushQueueLength: metric: &metric queue.length unit: &unit "{handler}" @@ -91,12 +108,7 @@ rules: state: const(compaction) region_server: *hostname - updatesBlockedTime: - metric: blocked_update.time - type: gauge - unit: ms - desc: Amount of time updates have been blocked so the memstore can be flushed. - + # Group of properties to build hbase.region_server.block_cache.operation.count metric blockCacheMissCount: metric: &metric block_cache.operation.count type: &type gauge @@ -114,19 +126,14 @@ rules: state: const(hit) region_server: *hostname - percentFilesLocal: - metric: files.local - type: gauge - unit: "%" - desc: Percent of store file data that can be read from the local. - + # Group of properties to build hbase.region_server.operations.slow metric slowDeleteCount: metric: &metric operations.slow unit: &unit "{operation}" desc: &desc Number of operations that took over 1000ms to complete. metricAttribute: operation: const(delete) - region_server: &hostname beanattr(tag\.Hostname) + region_server: *hostname slowAppendCount: metric: *metric unit: *unit @@ -156,6 +163,7 @@ rules: operation: const(increment) region_server: *hostname + # RegionServer statistical metrics - bean: Hadoop:service=HBase,name=RegionServer,sub=Server prefix: hbase.region_server. type: gauge @@ -163,6 +171,7 @@ rules: metricAttribute: region_server: *hostname mapping: + # Statistics for 'append' operation Append_99th_percentile: metric: operation.append.latency.p99 desc: Append operation 99th Percentile latency. @@ -179,6 +188,7 @@ rules: metric: operation.append.latency.median desc: Append operation median latency. + # Statistics for 'delete' operation Delete_99th_percentile: metric: operation.delete.latency.p99 desc: Delete operation 99th Percentile latency. @@ -195,6 +205,7 @@ rules: metric: operation.delete.latency.median desc: Delete operation median latency. + # Statistics for 'put' operation Put_99th_percentile: metric: operation.put.latency.p99 desc: Put operation 99th Percentile latency. @@ -211,6 +222,7 @@ rules: metric: operation.put.latency.median desc: Put operation median latency. + # Statistics for 'get' operation Get_99th_percentile: metric: operation.get.latency.p99 desc: Get operation 99th Percentile latency. @@ -227,6 +239,7 @@ rules: metric: operation.get.latency.median desc: Get operation median latency. + # Statistics for 'replay' operation Replay_99th_percentile: metric: operation.replay.latency.p99 desc: Replay operation 99th Percentile latency. @@ -243,6 +256,7 @@ rules: metric: operation.replay.latency.median desc: Replay operation median latency. + # Statistics for 'increment' operation Increment_99th_percentile: metric: operation.increment.latency.p99 desc: Increment operation 99th Percentile latency. @@ -269,11 +283,13 @@ rules: metric: open_connection.count unit: "{connection}" desc: The number of open connections at the RPC layer. + numActiveHandler: metric: active_handler.count unit: "{handler}" desc: The number of RPC handlers actively servicing requests. + # Group of properties to build hbase.region_server.queue.request.count metric numCallsInReplicationQueue: metric: &metric queue.request.count unit: &unit "{request}" @@ -296,6 +312,7 @@ rules: state: const(priority) region_server: *hostname + # Group of properties to build hbase.region_server.authentication.count metric authenticationSuccesses: metric: &metric authentication.count unit: &unit "{authentication request}" @@ -321,9 +338,11 @@ rules: GcTimeMillis: metric: time desc: Time spent in garbage collection. + GcTimeMillisParNew: metric: young_gen.time desc: Time spent in garbage collection of the young generation. + GcTimeMillisConcurrentMarkSweep: metric: old_gen.time desc: Time spent in garbage collection of the old generation. From 8d46c9423279d6882bf0035f71db96e798786da0 Mon Sep 17 00:00:00 2001 From: robsunday Date: Fri, 22 Nov 2024 13:06:31 +0100 Subject: [PATCH 7/7] Code review suggestions regarding comments in yaml file --- jmx-scraper/src/main/resources/hbase.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/jmx-scraper/src/main/resources/hbase.yaml b/jmx-scraper/src/main/resources/hbase.yaml index f6b782140..90f41d97b 100644 --- a/jmx-scraper/src/main/resources/hbase.yaml +++ b/jmx-scraper/src/main/resources/hbase.yaml @@ -7,7 +7,7 @@ rules: unit: "{server}" type: updowncounter mapping: - # Group of properties to build hbase.master.region_server.count metric + # hbase.master.region_server.count numDeadRegionServers: metric: &metric region_server.count desc: &desc The number of region servers. @@ -76,7 +76,7 @@ rules: unit: ms desc: Amount of time updates have been blocked so the memstore can be flushed. - # Group of properties to build hbase.region_server.request.count metric + # hbase.region_server.request.count writeRequestCount: metric: &metric request.count unit: &unit "{request}" @@ -92,7 +92,7 @@ rules: state: const(read) region_server: *hostname - # Group of properties to build hbase.region_server.queue.length metric + # hbase.region_server.queue.length flushQueueLength: metric: &metric queue.length unit: &unit "{handler}" @@ -108,7 +108,7 @@ rules: state: const(compaction) region_server: *hostname - # Group of properties to build hbase.region_server.block_cache.operation.count metric + # hbase.region_server.block_cache.operation.count blockCacheMissCount: metric: &metric block_cache.operation.count type: &type gauge @@ -126,7 +126,7 @@ rules: state: const(hit) region_server: *hostname - # Group of properties to build hbase.region_server.operations.slow metric + # hbase.region_server.operations.slow slowDeleteCount: metric: &metric operations.slow unit: &unit "{operation}" @@ -289,7 +289,7 @@ rules: unit: "{handler}" desc: The number of RPC handlers actively servicing requests. - # Group of properties to build hbase.region_server.queue.request.count metric + # hbase.region_server.queue.request.count numCallsInReplicationQueue: metric: &metric queue.request.count unit: &unit "{request}" @@ -312,7 +312,7 @@ rules: state: const(priority) region_server: *hostname - # Group of properties to build hbase.region_server.authentication.count metric + # hbase.region_server.authentication.count authenticationSuccesses: metric: &metric authentication.count unit: &unit "{authentication request}"