From 1bf130f9b03416023935a8705079334afd1ded12 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Mon, 28 Oct 2024 14:24:47 +0100 Subject: [PATCH 01/15] copied hadoop.yaml from java-instrumentation --- jmx-scraper/src/main/resources/hadoop.yaml | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 jmx-scraper/src/main/resources/hadoop.yaml diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml new file mode 100644 index 000000000..82c32bd95 --- /dev/null +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -0,0 +1,63 @@ +--- +rules: + - bean: Hadoop:service=NameNode,name=FSNamesystem + unit: "1" + prefix: hadoop. + metricAttribute: + node_name: param(tag.Hostname) + mapping: + CapacityUsed: + metric: capacity.CapacityUsed + type: updowncounter + unit: By + desc: Current used capacity across all data nodes + CapacityTotal: + metric: capacity.CapacityTotal + type: updowncounter + unit: By + BlocksTotal: + metric: block.BlocksTotal + type: updowncounter + unit: "{blocks}" + desc: Current number of allocated blocks in the system + MissingBlocks: + metric: block.MissingBlocks + type: updowncounter + unit: "{blocks}" + desc: Current number of missing blocks + CorruptBlocks: + metric: block.CorruptBlocks + type: updowncounter + unit: "{blocks}" + desc: Current number of blocks with corrupt replicas + VolumeFailuresTotal: + metric: volume.VolumeFailuresTotal + type: updowncounter + unit: "{volumes}" + desc: Total number of volume failures across all data nodes + metricAttribute: + direction: const(sent) + FilesTotal: + metric: file.FilesTotal + type: updowncounter + unit: "{files}" + desc: Current number of files and directories + TotalLoad: + metric: file.TotalLoad + type: updowncounter + unit: "{operations}" + desc: Current number of connections + NumLiveDataNodes: + metric: datenode.Count + type: updowncounter + unit: "{nodes}" + desc: The Number of data nodes + metricAttribute: + state: const(live) + NumDeadDataNodes: + metric: datenode.Count + type: updowncounter + unit: "{nodes}" + desc: The Number of data nodes + metricAttribute: + state: const(dead) From eee7bc52387cca817493c5afd7e7363d488af6fe Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Mon, 28 Oct 2024 16:52:11 +0100 Subject: [PATCH 02/15] first metric changed --- .../target_systems/HadoopIntegrationTest.java | 149 ++++++++++++++++++ jmx-scraper/src/main/resources/hadoop.yaml | 110 ++++++------- 2 files changed, 204 insertions(+), 55 deletions(-) create mode 100644 jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java new file mode 100644 index 000000000..06b62383d --- /dev/null +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -0,0 +1,149 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.contrib.jmxscraper.target_systems; + +import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer; +import java.time.Duration; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; + +public class HadoopIntegrationTest extends TargetSystemIntegrationTest { + + @Override + protected GenericContainer createTargetContainer(int jmxPort) { + return new GenericContainer<>("bmedora/hadoop:2.9-base") + .withEnv( + "HADOOP_NAMENODE_OPTS", + "-Dcom.sun.management.jmxremote.port=" + + jmxPort + + " -Dcom.sun.management.jmxremote.rmi.port=" + + jmxPort + + " -Dcom.sun.management.jmxremote.ssl=false" + + " -Dcom.sun.management.jmxremote.authenticate=false") + .withEnv( + "HADOOP_DATANODE_OPTS", + "-Dcom.sun.management.jmxremote.port=" + + jmxPort + + " -Dcom.sun.management.jmxremote.rmi.port=" + + jmxPort + + " -Dcom.sun.management.jmxremote.ssl=false" + + " -Dcom.sun.management.jmxremote.authenticate=false") + .withStartupTimeout(Duration.ofMinutes(2)) + .waitingFor(Wait.forListeningPort()); + } + + @Override + protected JmxScraperContainer customizeScraperContainer(JmxScraperContainer scraper) { + return scraper.withTargetSystem("hadoop"); + } + + @Override + protected void verifyMetrics() { +// waitAndAssertMetrics( +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.range_slice.latency.50p", +// "Token range read request latency - 50th percentile", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.range_slice.latency.99p", +// "Token range read request latency - 99th percentile", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.range_slice.latency.max", +// "Maximum token range read request latency", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.read.latency.50p", +// "Standard read request latency - 50th percentile", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.read.latency.99p", +// "Standard read request latency - 99th percentile", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.read.latency.max", +// "Maximum standard read request latency", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.write.latency.50p", +// "Regular write request latency - 50th percentile", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.write.latency.99p", +// "Regular write request latency - 99th percentile", +// "us"), +// metric -> +// assertGauge( +// metric, +// "cassandra.client.request.write.latency.max", +// "Maximum regular write request latency", +// "us"), +// metric -> +// assertSum( +// metric, +// "cassandra.compaction.tasks.completed", +// "Number of completed compactions since server [re]start", +// "1"), +// metric -> +// assertGauge( +// metric, +// "cassandra.compaction.tasks.pending", +// "Estimated number of compactions remaining to perform", +// "1"), +// metric -> +// assertSum( +// metric, +// "cassandra.storage.load.count", +// "Size of the on disk data size this node manages", +// "by", +// /* isMonotonic= */ false), +// metric -> +// assertSum( +// metric, +// "cassandra.storage.total_hints.count", +// "Number of hint messages written to this node since [re]start", +// "1"), +// metric -> +// assertSum( +// metric, +// "cassandra.storage.total_hints.in_progress.count", +// "Number of hints attempting to be sent currently", +// "1", +// /* isMonotonic= */ false), +// metric -> +// assertSumWithAttributes( +// metric, +// "cassandra.client.request.count", +// "Number of requests by operation", +// "1", +// attrs -> attrs.containsOnly(entry("operation", "RangeSlice")), +// attrs -> attrs.containsOnly(entry("operation", "Read")), +// attrs -> attrs.containsOnly(entry("operation", "Write"))), +// metric -> +// assertSumWithAttributes( +// metric, +// "cassandra.client.request.error.count", +// "Number of request errors by operation", +// "1", +// getRequestErrorCountAttributes())); + } +} diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml index 82c32bd95..901e72bb9 100644 --- a/jmx-scraper/src/main/resources/hadoop.yaml +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -2,62 +2,62 @@ rules: - bean: Hadoop:service=NameNode,name=FSNamesystem unit: "1" - prefix: hadoop. + prefix: hadoop.name_node. metricAttribute: node_name: param(tag.Hostname) mapping: CapacityUsed: - metric: capacity.CapacityUsed - type: updowncounter - unit: By - desc: Current used capacity across all data nodes - CapacityTotal: - metric: capacity.CapacityTotal - type: updowncounter - unit: By - BlocksTotal: - metric: block.BlocksTotal - type: updowncounter - unit: "{blocks}" - desc: Current number of allocated blocks in the system - MissingBlocks: - metric: block.MissingBlocks - type: updowncounter - unit: "{blocks}" - desc: Current number of missing blocks - CorruptBlocks: - metric: block.CorruptBlocks - type: updowncounter - unit: "{blocks}" - desc: Current number of blocks with corrupt replicas - VolumeFailuresTotal: - metric: volume.VolumeFailuresTotal - type: updowncounter - unit: "{volumes}" - desc: Total number of volume failures across all data nodes - metricAttribute: - direction: const(sent) - FilesTotal: - metric: file.FilesTotal - type: updowncounter - unit: "{files}" - desc: Current number of files and directories - TotalLoad: - metric: file.TotalLoad - type: updowncounter - unit: "{operations}" - desc: Current number of connections - NumLiveDataNodes: - metric: datenode.Count - type: updowncounter - unit: "{nodes}" - desc: The Number of data nodes - metricAttribute: - state: const(live) - NumDeadDataNodes: - metric: datenode.Count - type: updowncounter - unit: "{nodes}" - desc: The Number of data nodes - metricAttribute: - state: const(dead) + metric: capacity.usage + type: updowncounter + unit: by + desc: The current used capacity across all data nodes reporting to the name node. +# CapacityTotal: +# metric: capacity.CapacityTotal +# type: updowncounter +# unit: By +# BlocksTotal: +# metric: block.BlocksTotal +# type: updowncounter +# unit: "{blocks}" +# desc: Current number of allocated blocks in the system +# MissingBlocks: +# metric: block.MissingBlocks +# type: updowncounter +# unit: "{blocks}" +# desc: Current number of missing blocks +# CorruptBlocks: +# metric: block.CorruptBlocks +# type: updowncounter +# unit: "{blocks}" +# desc: Current number of blocks with corrupt replicas +# VolumeFailuresTotal: +# metric: volume.VolumeFailuresTotal +# type: updowncounter +# unit: "{volumes}" +# desc: Total number of volume failures across all data nodes +# metricAttribute: +# direction: const(sent) +# FilesTotal: +# metric: file.FilesTotal +# type: updowncounter +# unit: "{files}" +# desc: Current number of files and directories +# TotalLoad: +# metric: file.TotalLoad +# type: updowncounter +# unit: "{operations}" +# desc: Current number of connections +# NumLiveDataNodes: +# metric: datenode.Count +# type: updowncounter +# unit: "{nodes}" +# desc: The Number of data nodes +# metricAttribute: +# state: const(live) +# NumDeadDataNodes: +# metric: datenode.Count +# type: updowncounter +# unit: "{nodes}" +# desc: The Number of data nodes +# metricAttribute: +# state: const(dead) From ad6d7be73b2d310cac53030cc8d385c1a75f0eb3 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Thu, 31 Oct 2024 17:43:42 +0100 Subject: [PATCH 03/15] applied HadoopIntegrationTest.patch --- .../target_systems/HadoopIntegrationTest.java | 218 +++++++++--------- 1 file changed, 106 insertions(+), 112 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index 06b62383d..088e70267 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -22,16 +22,10 @@ protected GenericContainer createTargetContainer(int jmxPort) { + " -Dcom.sun.management.jmxremote.rmi.port=" + jmxPort + " -Dcom.sun.management.jmxremote.ssl=false" - + " -Dcom.sun.management.jmxremote.authenticate=false") - .withEnv( - "HADOOP_DATANODE_OPTS", - "-Dcom.sun.management.jmxremote.port=" - + jmxPort - + " -Dcom.sun.management.jmxremote.rmi.port=" - + jmxPort - + " -Dcom.sun.management.jmxremote.ssl=false" - + " -Dcom.sun.management.jmxremote.authenticate=false") + + " -Dcom.sun.management.jmxremote.authenticate=false" + + " -Dcom.sun.management.jmxremote.local.only=false") .withStartupTimeout(Duration.ofMinutes(2)) + .withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host")) .waitingFor(Wait.forListeningPort()); } @@ -42,108 +36,108 @@ protected JmxScraperContainer customizeScraperContainer(JmxScraperContainer scra @Override protected void verifyMetrics() { -// waitAndAssertMetrics( -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.range_slice.latency.50p", -// "Token range read request latency - 50th percentile", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.range_slice.latency.99p", -// "Token range read request latency - 99th percentile", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.range_slice.latency.max", -// "Maximum token range read request latency", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.read.latency.50p", -// "Standard read request latency - 50th percentile", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.read.latency.99p", -// "Standard read request latency - 99th percentile", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.read.latency.max", -// "Maximum standard read request latency", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.write.latency.50p", -// "Regular write request latency - 50th percentile", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.write.latency.99p", -// "Regular write request latency - 99th percentile", -// "us"), -// metric -> -// assertGauge( -// metric, -// "cassandra.client.request.write.latency.max", -// "Maximum regular write request latency", -// "us"), -// metric -> -// assertSum( -// metric, -// "cassandra.compaction.tasks.completed", -// "Number of completed compactions since server [re]start", -// "1"), -// metric -> -// assertGauge( -// metric, -// "cassandra.compaction.tasks.pending", -// "Estimated number of compactions remaining to perform", -// "1"), -// metric -> -// assertSum( -// metric, -// "cassandra.storage.load.count", -// "Size of the on disk data size this node manages", -// "by", -// /* isMonotonic= */ false), -// metric -> -// assertSum( -// metric, -// "cassandra.storage.total_hints.count", -// "Number of hint messages written to this node since [re]start", -// "1"), -// metric -> -// assertSum( -// metric, -// "cassandra.storage.total_hints.in_progress.count", -// "Number of hints attempting to be sent currently", -// "1", -// /* isMonotonic= */ false), -// metric -> -// assertSumWithAttributes( -// metric, -// "cassandra.client.request.count", -// "Number of requests by operation", -// "1", -// attrs -> attrs.containsOnly(entry("operation", "RangeSlice")), -// attrs -> attrs.containsOnly(entry("operation", "Read")), -// attrs -> attrs.containsOnly(entry("operation", "Write"))), -// metric -> -// assertSumWithAttributes( -// metric, -// "cassandra.client.request.error.count", -// "Number of request errors by operation", -// "1", -// getRequestErrorCountAttributes())); + // waitAndAssertMetrics( + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.range_slice.latency.50p", + // "Token range read request latency - 50th percentile", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.range_slice.latency.99p", + // "Token range read request latency - 99th percentile", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.range_slice.latency.max", + // "Maximum token range read request latency", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.read.latency.50p", + // "Standard read request latency - 50th percentile", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.read.latency.99p", + // "Standard read request latency - 99th percentile", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.read.latency.max", + // "Maximum standard read request latency", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.write.latency.50p", + // "Regular write request latency - 50th percentile", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.write.latency.99p", + // "Regular write request latency - 99th percentile", + // "us"), + // metric -> + // assertGauge( + // metric, + // "cassandra.client.request.write.latency.max", + // "Maximum regular write request latency", + // "us"), + // metric -> + // assertSum( + // metric, + // "cassandra.compaction.tasks.completed", + // "Number of completed compactions since server [re]start", + // "1"), + // metric -> + // assertGauge( + // metric, + // "cassandra.compaction.tasks.pending", + // "Estimated number of compactions remaining to perform", + // "1"), + // metric -> + // assertSum( + // metric, + // "cassandra.storage.load.count", + // "Size of the on disk data size this node manages", + // "by", + // /* isMonotonic= */ false), + // metric -> + // assertSum( + // metric, + // "cassandra.storage.total_hints.count", + // "Number of hint messages written to this node since [re]start", + // "1"), + // metric -> + // assertSum( + // metric, + // "cassandra.storage.total_hints.in_progress.count", + // "Number of hints attempting to be sent currently", + // "1", + // /* isMonotonic= */ false), + // metric -> + // assertSumWithAttributes( + // metric, + // "cassandra.client.request.count", + // "Number of requests by operation", + // "1", + // attrs -> attrs.containsOnly(entry("operation", "RangeSlice")), + // attrs -> attrs.containsOnly(entry("operation", "Read")), + // attrs -> attrs.containsOnly(entry("operation", "Write"))), + // metric -> + // assertSumWithAttributes( + // metric, + // "cassandra.client.request.error.count", + // "Number of request errors by operation", + // "1", + // getRequestErrorCountAttributes())); } } From 10393a208e9cc4781d3835ed6b1f4c86e323a0fa Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Thu, 31 Oct 2024 18:59:49 +0100 Subject: [PATCH 04/15] troubleshooting --- .../target_systems/HadoopIntegrationTest.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index 088e70267..5fe2c01fe 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -24,7 +24,9 @@ protected GenericContainer createTargetContainer(int jmxPort) { + " -Dcom.sun.management.jmxremote.ssl=false" + " -Dcom.sun.management.jmxremote.authenticate=false" + " -Dcom.sun.management.jmxremote.local.only=false") - .withStartupTimeout(Duration.ofMinutes(2)) + .withStartupTimeout(Duration.ofMinutes(20)) +// .withExposedPorts(jmxPort, 10020, 19888, 50010, 50020, 50070, 50075, 50090, 8020, 8042, 8088, 9000) + .withExposedPorts(jmxPort, 8004) .withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host")) .waitingFor(Wait.forListeningPort()); } @@ -36,6 +38,15 @@ protected JmxScraperContainer customizeScraperContainer(JmxScraperContainer scra @Override protected void verifyMetrics() { +// waitAndAssertMetrics( +// metric -> +// assertSumWithAttributes( +// metric, +// "hadoop.name_node.capacity.usage", +// "The current used capacity across all data nodes reporting to the name node.", +// "by", +// attrs -> attrs.contains(entry("node_name", "test-host"))) +// ); // waitAndAssertMetrics( // metric -> // assertGauge( From 410fc3abe0e2509959dd9898f841e832733ac2d2 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Tue, 19 Nov 2024 15:07:07 +0100 Subject: [PATCH 05/15] added hadoop-env.sh --- jmx-scraper/src/main/resources/hadoop-env.sh | 99 ++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 jmx-scraper/src/main/resources/hadoop-env.sh diff --git a/jmx-scraper/src/main/resources/hadoop-env.sh b/jmx-scraper/src/main/resources/hadoop-env.sh new file mode 100644 index 000000000..7e842d341 --- /dev/null +++ b/jmx-scraper/src/main/resources/hadoop-env.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. +export JAVA_HOME=${JAVA_HOME} + +# The jsvc implementation to use. Jsvc is required to run secure datanodes +# that bind to privileged ports to provide authentication of data transfer +# protocol. Jsvc is not required if SASL is configured for authentication of +# data transfer protocol using non-privileged ports. +#export JSVC_HOME=${JSVC_HOME} + +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} + +# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. +for f in "$HADOOP_HOME"/contrib/capacity-scheduler/*.jar; do + if [ "$HADOOP_CLASSPATH" ]; then + export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f + else + export HADOOP_CLASSPATH=$f + fi +done + +# The maximum amount of heap to use, in MB. Default is 1000. +#export HADOOP_HEAPSIZE= +#export HADOOP_NAMENODE_INIT_HEAPSIZE="" + +# Enable extra debugging of Hadoop's JAAS binding, used to set up +# Kerberos security. +# export HADOOP_JAAS_DEBUG=true + +# Extra Java runtime options. Empty by default. +# For Kerberos debugging, an extended option set logs more invormation +# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug" +export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" + +# Command specific options appended to HADOOP_OPTS when specified +export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS" +export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS" +export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Dcom.sun.management.jmxremote.authenticate=false" +export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Dcom.sun.management.jmxremote.ssl=false" +export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Dcom.sun.management.jmxremote.port=9999 -Dcom.sun.management.jmxremote.rmi.port=9999" + +export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" + +export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" + +export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS" +export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" + +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS" +# set heap args when HADOOP_HEAPSIZE is empty +if [ "$HADOOP_HEAPSIZE" = "" ]; then + export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" +fi +#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" + +# On secure datanodes, user to run the datanode as after dropping privileges. +# This **MUST** be uncommented to enable secure HDFS if using privileged ports +# to provide authentication of data transfer protocol. This **MUST NOT** be +# defined if SASL is configured for authentication of data transfer protocol +# using non-privileged ports. +export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} + +# Where log files are stored. $HADOOP_HOME/logs by default. +#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER + +# Where log files are stored in the secure data environment. +#export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} + +### +# HDFS Mover specific parameters +### +# Specify the JVM options to be used when starting the HDFS Mover. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HADOOP_MOVER_OPTS="" + +### +# Advanced Users Only! +### + +# The directory where pid files are stored. /tmp by default. +# NOTE: this should be set to a directory that can only be written to by +# the user that will run the hadoop daemons. Otherwise there is the +# potential for a symlink attack. +export HADOOP_PID_DIR=${HADOOP_PID_DIR} +export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} + +# A string representing this instance of hadoop. $USER by default. +export HADOOP_IDENT_STRING=$USER From ae661ae1cc683a9fbd0309259e900181727e60c2 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Tue, 19 Nov 2024 19:40:51 +0100 Subject: [PATCH 06/15] troubleshooting --- .../target_systems/HadoopIntegrationTest.java | 143 +++--------------- jmx-scraper/src/main/resources/hadoop.yaml | 39 +++-- 2 files changed, 38 insertions(+), 144 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index 5fe2c01fe..b547c56cb 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -5,28 +5,25 @@ package io.opentelemetry.contrib.jmxscraper.target_systems; +import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertSumWithAttributes; +import static org.assertj.core.api.Assertions.entry; + import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer; import java.time.Duration; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.utility.MountableFile; public class HadoopIntegrationTest extends TargetSystemIntegrationTest { @Override protected GenericContainer createTargetContainer(int jmxPort) { return new GenericContainer<>("bmedora/hadoop:2.9-base") - .withEnv( - "HADOOP_NAMENODE_OPTS", - "-Dcom.sun.management.jmxremote.port=" - + jmxPort - + " -Dcom.sun.management.jmxremote.rmi.port=" - + jmxPort - + " -Dcom.sun.management.jmxremote.ssl=false" - + " -Dcom.sun.management.jmxremote.authenticate=false" - + " -Dcom.sun.management.jmxremote.local.only=false") - .withStartupTimeout(Duration.ofMinutes(20)) -// .withExposedPorts(jmxPort, 10020, 19888, 50010, 50020, 50070, 50075, 50090, 8020, 8042, 8088, 9000) - .withExposedPorts(jmxPort, 8004) + .withCopyFileToContainer( + MountableFile.forClasspathResource("hadoop-env.sh", 0400), + "/hadoop/etc/hadoop/hadoop-env.sh") + .waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofMinutes(200))) + .withExposedPorts(jmxPort) .withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host")) .waitingFor(Wait.forListeningPort()); } @@ -38,117 +35,15 @@ protected JmxScraperContainer customizeScraperContainer(JmxScraperContainer scra @Override protected void verifyMetrics() { -// waitAndAssertMetrics( -// metric -> -// assertSumWithAttributes( -// metric, -// "hadoop.name_node.capacity.usage", -// "The current used capacity across all data nodes reporting to the name node.", -// "by", -// attrs -> attrs.contains(entry("node_name", "test-host"))) -// ); - // waitAndAssertMetrics( - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.range_slice.latency.50p", - // "Token range read request latency - 50th percentile", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.range_slice.latency.99p", - // "Token range read request latency - 99th percentile", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.range_slice.latency.max", - // "Maximum token range read request latency", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.read.latency.50p", - // "Standard read request latency - 50th percentile", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.read.latency.99p", - // "Standard read request latency - 99th percentile", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.read.latency.max", - // "Maximum standard read request latency", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.write.latency.50p", - // "Regular write request latency - 50th percentile", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.write.latency.99p", - // "Regular write request latency - 99th percentile", - // "us"), - // metric -> - // assertGauge( - // metric, - // "cassandra.client.request.write.latency.max", - // "Maximum regular write request latency", - // "us"), - // metric -> - // assertSum( - // metric, - // "cassandra.compaction.tasks.completed", - // "Number of completed compactions since server [re]start", - // "1"), - // metric -> - // assertGauge( - // metric, - // "cassandra.compaction.tasks.pending", - // "Estimated number of compactions remaining to perform", - // "1"), - // metric -> - // assertSum( - // metric, - // "cassandra.storage.load.count", - // "Size of the on disk data size this node manages", - // "by", - // /* isMonotonic= */ false), - // metric -> - // assertSum( - // metric, - // "cassandra.storage.total_hints.count", - // "Number of hint messages written to this node since [re]start", - // "1"), - // metric -> - // assertSum( - // metric, - // "cassandra.storage.total_hints.in_progress.count", - // "Number of hints attempting to be sent currently", - // "1", - // /* isMonotonic= */ false), - // metric -> - // assertSumWithAttributes( - // metric, - // "cassandra.client.request.count", - // "Number of requests by operation", - // "1", - // attrs -> attrs.containsOnly(entry("operation", "RangeSlice")), - // attrs -> attrs.containsOnly(entry("operation", "Read")), - // attrs -> attrs.containsOnly(entry("operation", "Write"))), - // metric -> - // assertSumWithAttributes( - // metric, - // "cassandra.client.request.error.count", - // "Number of request errors by operation", - // "1", - // getRequestErrorCountAttributes())); + waitAndAssertMetrics( + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.capacity.usage", + "The current used capacity across all data nodes reporting to the name node.", + "by", + attrs -> attrs.contains(entry("node_name", "test-host"))) + ); } } + diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml index 901e72bb9..b27ae9353 100644 --- a/jmx-scraper/src/main/resources/hadoop.yaml +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -12,52 +12,51 @@ rules: unit: by desc: The current used capacity across all data nodes reporting to the name node. # CapacityTotal: -# metric: capacity.CapacityTotal +# metric: capacity.limit # type: updowncounter -# unit: By +# unit: by +# desc: The total capacity allotted to data nodes reporting to the name node. # BlocksTotal: -# metric: block.BlocksTotal +# metric: block.count # type: updowncounter # unit: "{blocks}" -# desc: Current number of allocated blocks in the system +# desc: The total number of blocks on the name node. # MissingBlocks: -# metric: block.MissingBlocks +# metric: block.missing # type: updowncounter # unit: "{blocks}" -# desc: Current number of missing blocks +# desc: The number of blocks reported as missing to the name node. # CorruptBlocks: -# metric: block.CorruptBlocks +# metric: block.corrupt # type: updowncounter # unit: "{blocks}" -# desc: Current number of blocks with corrupt replicas +# desc: The number of blocks reported as corrupt to the name node. # VolumeFailuresTotal: -# metric: volume.VolumeFailuresTotal +# metric: volume.failed # type: updowncounter # unit: "{volumes}" -# desc: Total number of volume failures across all data nodes -# metricAttribute: -# direction: const(sent) +# desc: The number of failed volumes reported to the name node. # FilesTotal: -# metric: file.FilesTotal +# metric: file.count # type: updowncounter # unit: "{files}" -# desc: Current number of files and directories +# desc: The total number of files being tracked by the name node. # TotalLoad: -# metric: file.TotalLoad +# metric: file.load # type: updowncounter # unit: "{operations}" -# desc: Current number of connections +# desc: The current number of concurrent file accesses. # NumLiveDataNodes: -# metric: datenode.Count +# metric: data_node.count # type: updowncounter # unit: "{nodes}" -# desc: The Number of data nodes +# desc: The number of live data nodes reporting to the name node. # metricAttribute: # state: const(live) # NumDeadDataNodes: -# metric: datenode.Count +# metric: data_node.count # type: updowncounter # unit: "{nodes}" -# desc: The Number of data nodes +# desc: The number of data nodes reporting to the name node. # metricAttribute: # state: const(dead) From 23a0ebc34fa0c03a4d8748c6dda3f53f31a6396a Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Wed, 20 Nov 2024 20:00:04 +0100 Subject: [PATCH 07/15] working integration test --- .../target_systems/HadoopIntegrationTest.java | 70 +++++++++++- jmx-scraper/src/main/resources/hadoop.yaml | 100 +++++++++--------- 2 files changed, 119 insertions(+), 51 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index b547c56cb..b8bcabcdd 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -42,7 +42,75 @@ protected void verifyMetrics() { "hadoop.name_node.capacity.usage", "The current used capacity across all data nodes reporting to the name node.", "by", - attrs -> attrs.contains(entry("node_name", "test-host"))) + /* isMonotonic= */false, + attrs -> attrs.contains(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.capacity.limit", + "The total capacity allotted to data nodes reporting to the name node.", + "by", + /* isMonotonic= */false, + attrs -> attrs.containsOnly(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.block.count", + "The total number of blocks on the name node.", + "{blocks}", + /* isMonotonic= */false, + attrs -> attrs.containsOnly(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.block.missing", + "The number of blocks reported as missing to the name node.", + "{blocks}", + /* isMonotonic= */false, + attrs -> attrs.containsOnly(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.block.corrupt", + "The number of blocks reported as corrupt to the name node.", + "{blocks}", + /* isMonotonic= */false, + attrs -> attrs.containsOnly(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.volume.failed", + "The number of failed volumes reported to the name node.", + "{volumes}", + /* isMonotonic= */false, + attrs -> attrs.containsOnly(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.file.count", + "The total number of files being tracked by the name node.", + "{files}", + /* isMonotonic= */false, + attrs -> attrs.containsOnly(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.file.load", + "The current number of concurrent file accesses.", + "{operations}", + /* isMonotonic= */false, + attrs -> attrs.containsOnly(entry("node_name", "test-host"))), + metric -> + assertSumWithAttributes( + metric, + "hadoop.name_node.data_node.count", + "The number of data nodes reporting to the name node.", + "{nodes}", + /* isMonotonic= */false, + attrs -> + attrs.containsOnly(entry("node_name", "test-host"), entry("state", "live")), + attrs -> + attrs.containsOnly(entry("node_name", "test-host"), entry("state", "dead"))) ); } } diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml index b27ae9353..7a3844c94 100644 --- a/jmx-scraper/src/main/resources/hadoop.yaml +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -4,59 +4,59 @@ rules: unit: "1" prefix: hadoop.name_node. metricAttribute: - node_name: param(tag.Hostname) + node_name: beanattr(tag\.Hostname) mapping: CapacityUsed: metric: capacity.usage type: updowncounter unit: by desc: The current used capacity across all data nodes reporting to the name node. -# CapacityTotal: -# metric: capacity.limit -# type: updowncounter -# unit: by -# desc: The total capacity allotted to data nodes reporting to the name node. -# BlocksTotal: -# metric: block.count -# type: updowncounter -# unit: "{blocks}" -# desc: The total number of blocks on the name node. -# MissingBlocks: -# metric: block.missing -# type: updowncounter -# unit: "{blocks}" -# desc: The number of blocks reported as missing to the name node. -# CorruptBlocks: -# metric: block.corrupt -# type: updowncounter -# unit: "{blocks}" -# desc: The number of blocks reported as corrupt to the name node. -# VolumeFailuresTotal: -# metric: volume.failed -# type: updowncounter -# unit: "{volumes}" -# desc: The number of failed volumes reported to the name node. -# FilesTotal: -# metric: file.count -# type: updowncounter -# unit: "{files}" -# desc: The total number of files being tracked by the name node. -# TotalLoad: -# metric: file.load -# type: updowncounter -# unit: "{operations}" -# desc: The current number of concurrent file accesses. -# NumLiveDataNodes: -# metric: data_node.count -# type: updowncounter -# unit: "{nodes}" -# desc: The number of live data nodes reporting to the name node. -# metricAttribute: -# state: const(live) -# NumDeadDataNodes: -# metric: data_node.count -# type: updowncounter -# unit: "{nodes}" -# desc: The number of data nodes reporting to the name node. -# metricAttribute: -# state: const(dead) + CapacityTotal: + metric: capacity.limit + type: updowncounter + unit: by + desc: The total capacity allotted to data nodes reporting to the name node. + BlocksTotal: + metric: block.count + type: updowncounter + unit: "{blocks}" + desc: The total number of blocks on the name node. + MissingBlocks: + metric: block.missing + type: updowncounter + unit: "{blocks}" + desc: The number of blocks reported as missing to the name node. + CorruptBlocks: + metric: block.corrupt + type: updowncounter + unit: "{blocks}" + desc: The number of blocks reported as corrupt to the name node. + VolumeFailuresTotal: + metric: volume.failed + type: updowncounter + unit: "{volumes}" + desc: The number of failed volumes reported to the name node. + FilesTotal: + metric: file.count + type: updowncounter + unit: "{files}" + desc: The total number of files being tracked by the name node. + TotalLoad: + metric: file.load + type: updowncounter + unit: "{operations}" + desc: The current number of concurrent file accesses. + NumLiveDataNodes: + metric: &metric data_node.count + type: updowncounter + unit: &unit "{nodes}" + desc: &desc The number of data nodes reporting to the name node. + metricAttribute: + state: const(live) + NumDeadDataNodes: + metric: *metric + type: updowncounter + unit: *unit + desc: *desc + metricAttribute: + state: const(dead) From 675cd7b5f83f510fa8211aed67e174d946299ca5 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Mon, 25 Nov 2024 16:48:41 +0100 Subject: [PATCH 08/15] changed unit names --- .../target_systems/HadoopIntegrationTest.java | 14 +++++++------- .../main/resources/target-systems/hadoop.groovy | 14 +++++++------- .../target_systems/HadoopIntegrationTest.java | 14 +++++++------- .../resources/hadoop-env.sh | 0 jmx-scraper/src/main/resources/hadoop.yaml | 14 +++++++------- 5 files changed, 28 insertions(+), 28 deletions(-) rename jmx-scraper/src/{main => integrationTest}/resources/hadoop-env.sh (100%) diff --git a/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java b/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java index 856870455..0f00eafd8 100644 --- a/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java +++ b/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java @@ -60,49 +60,49 @@ void endToEnd() { metric, "hadoop.name_node.block.count", "The total number of blocks on the name node.", - "{blocks}", + "{block}", attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( metric, "hadoop.name_node.block.missing", "The number of blocks reported as missing to the name node.", - "{blocks}", + "{block}", attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( metric, "hadoop.name_node.block.corrupt", "The number of blocks reported as corrupt to the name node.", - "{blocks}", + "{block}", attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( metric, "hadoop.name_node.volume.failed", "The number of failed volumes reported to the name node.", - "{volumes}", + "{volume}", attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( metric, "hadoop.name_node.file.count", "The total number of files being tracked by the name node.", - "{files}", + "{file}", attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( metric, "hadoop.name_node.file.load", "The current number of concurrent file accesses.", - "{operations}", + "{operation}", attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( metric, "hadoop.name_node.data_node.count", "The number of data nodes reporting to the name node.", - "{nodes}", + "{node}", attrs -> attrs.containsOnly(entry("node_name", "test-host"), entry("state", "live")), attrs -> diff --git a/jmx-metrics/src/main/resources/target-systems/hadoop.groovy b/jmx-metrics/src/main/resources/target-systems/hadoop.groovy index 17ce797a8..5c381c3ff 100644 --- a/jmx-metrics/src/main/resources/target-systems/hadoop.groovy +++ b/jmx-metrics/src/main/resources/target-systems/hadoop.groovy @@ -21,25 +21,25 @@ otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.usage", "The cu otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.limit", "The total capacity allotted to data nodes reporting to the name node.", "by", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "CapacityTotal", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.count", "The total number of blocks on the name node.", "{blocks}", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.count", "The total number of blocks on the name node.", "{block}", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "BlocksTotal", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.missing", "The number of blocks reported as missing to the name node.", "{blocks}", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.missing", "The number of blocks reported as missing to the name node.", "{block}", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "MissingBlocks", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.corrupt", "The number of blocks reported as corrupt to the name node.", "{blocks}", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.corrupt", "The number of blocks reported as corrupt to the name node.", "{block}", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "CorruptBlocks", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.volume.failed", "The number of failed volumes reported to the name node.", "{volumes}", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.volume.failed", "The number of failed volumes reported to the name node.", "{volume}", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "VolumeFailuresTotal", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.count", "The total number of files being tracked by the name node.", "{files}", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.count", "The total number of files being tracked by the name node.", "{file}", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "FilesTotal", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.load", "The current number of concurrent file accesses.", "{operations}", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.load", "The current number of concurrent file accesses.", "{operation}", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "TotalLoad", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.data_node.count", "The number of data nodes reporting to the name node.", "{nodes}", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.data_node.count", "The number of data nodes reporting to the name node.", "{node}", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], ["NumLiveDataNodes":["state":{"live"}], "NumDeadDataNodes": ["state":{"dead"}]], otel.&longUpDownCounterCallback) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index b8bcabcdd..60b0ed339 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -57,7 +57,7 @@ protected void verifyMetrics() { metric, "hadoop.name_node.block.count", "The total number of blocks on the name node.", - "{blocks}", + "{block}", /* isMonotonic= */false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> @@ -65,7 +65,7 @@ protected void verifyMetrics() { metric, "hadoop.name_node.block.missing", "The number of blocks reported as missing to the name node.", - "{blocks}", + "{block}", /* isMonotonic= */false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> @@ -73,7 +73,7 @@ protected void verifyMetrics() { metric, "hadoop.name_node.block.corrupt", "The number of blocks reported as corrupt to the name node.", - "{blocks}", + "{block}", /* isMonotonic= */false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> @@ -81,7 +81,7 @@ protected void verifyMetrics() { metric, "hadoop.name_node.volume.failed", "The number of failed volumes reported to the name node.", - "{volumes}", + "{volume}", /* isMonotonic= */false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> @@ -89,7 +89,7 @@ protected void verifyMetrics() { metric, "hadoop.name_node.file.count", "The total number of files being tracked by the name node.", - "{files}", + "{file}", /* isMonotonic= */false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> @@ -97,7 +97,7 @@ protected void verifyMetrics() { metric, "hadoop.name_node.file.load", "The current number of concurrent file accesses.", - "{operations}", + "{operation}", /* isMonotonic= */false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> @@ -105,7 +105,7 @@ protected void verifyMetrics() { metric, "hadoop.name_node.data_node.count", "The number of data nodes reporting to the name node.", - "{nodes}", + "{node}", /* isMonotonic= */false, attrs -> attrs.containsOnly(entry("node_name", "test-host"), entry("state", "live")), diff --git a/jmx-scraper/src/main/resources/hadoop-env.sh b/jmx-scraper/src/integrationTest/resources/hadoop-env.sh similarity index 100% rename from jmx-scraper/src/main/resources/hadoop-env.sh rename to jmx-scraper/src/integrationTest/resources/hadoop-env.sh diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml index 7a3844c94..c604ad21b 100644 --- a/jmx-scraper/src/main/resources/hadoop.yaml +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -19,37 +19,37 @@ rules: BlocksTotal: metric: block.count type: updowncounter - unit: "{blocks}" + unit: "{block}" desc: The total number of blocks on the name node. MissingBlocks: metric: block.missing type: updowncounter - unit: "{blocks}" + unit: "{block}" desc: The number of blocks reported as missing to the name node. CorruptBlocks: metric: block.corrupt type: updowncounter - unit: "{blocks}" + unit: "{block}" desc: The number of blocks reported as corrupt to the name node. VolumeFailuresTotal: metric: volume.failed type: updowncounter - unit: "{volumes}" + unit: "{volume}" desc: The number of failed volumes reported to the name node. FilesTotal: metric: file.count type: updowncounter - unit: "{files}" + unit: "{file}" desc: The total number of files being tracked by the name node. TotalLoad: metric: file.load type: updowncounter - unit: "{operations}" + unit: "{operation}" desc: The current number of concurrent file accesses. NumLiveDataNodes: metric: &metric data_node.count type: updowncounter - unit: &unit "{nodes}" + unit: &unit "{node}" desc: &desc The number of data nodes reporting to the name node. metricAttribute: state: const(live) From 761a2fabedac753f9c50ebb2da195da6443ba41f Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Mon, 25 Nov 2024 16:49:38 +0100 Subject: [PATCH 09/15] spotless --- .../target_systems/HadoopIntegrationTest.java | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index 60b0ed339..8e28cc7b1 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -42,7 +42,7 @@ protected void verifyMetrics() { "hadoop.name_node.capacity.usage", "The current used capacity across all data nodes reporting to the name node.", "by", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.contains(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -50,7 +50,7 @@ protected void verifyMetrics() { "hadoop.name_node.capacity.limit", "The total capacity allotted to data nodes reporting to the name node.", "by", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -58,7 +58,7 @@ protected void verifyMetrics() { "hadoop.name_node.block.count", "The total number of blocks on the name node.", "{block}", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -66,7 +66,7 @@ protected void verifyMetrics() { "hadoop.name_node.block.missing", "The number of blocks reported as missing to the name node.", "{block}", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -74,7 +74,7 @@ protected void verifyMetrics() { "hadoop.name_node.block.corrupt", "The number of blocks reported as corrupt to the name node.", "{block}", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -82,7 +82,7 @@ protected void verifyMetrics() { "hadoop.name_node.volume.failed", "The number of failed volumes reported to the name node.", "{volume}", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -90,7 +90,7 @@ protected void verifyMetrics() { "hadoop.name_node.file.count", "The total number of files being tracked by the name node.", "{file}", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -98,7 +98,7 @@ protected void verifyMetrics() { "hadoop.name_node.file.load", "The current number of concurrent file accesses.", "{operation}", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( @@ -106,12 +106,10 @@ protected void verifyMetrics() { "hadoop.name_node.data_node.count", "The number of data nodes reporting to the name node.", "{node}", - /* isMonotonic= */false, + /* isMonotonic= */ false, attrs -> attrs.containsOnly(entry("node_name", "test-host"), entry("state", "live")), attrs -> - attrs.containsOnly(entry("node_name", "test-host"), entry("state", "dead"))) - ); + attrs.containsOnly(entry("node_name", "test-host"), entry("state", "dead")))); } } - From f51f100a3ed784f9e54e59f0a7f2ca106c6d87f4 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Tue, 3 Dec 2024 15:04:47 +0100 Subject: [PATCH 10/15] minor fixes --- .../jmxscraper/target_systems/HadoopIntegrationTest.java | 9 +++++---- jmx-scraper/src/main/resources/hadoop.yaml | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index 8e28cc7b1..dd78a2c77 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -16,16 +16,17 @@ public class HadoopIntegrationTest extends TargetSystemIntegrationTest { + private static final int HADOOP_PORT = 50070; + @Override protected GenericContainer createTargetContainer(int jmxPort) { return new GenericContainer<>("bmedora/hadoop:2.9-base") .withCopyFileToContainer( MountableFile.forClasspathResource("hadoop-env.sh", 0400), "/hadoop/etc/hadoop/hadoop-env.sh") - .waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofMinutes(200))) - .withExposedPorts(jmxPort) - .withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host")) - .waitingFor(Wait.forListeningPort()); + .waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofMinutes(2))) + .withExposedPorts(HADOOP_PORT, jmxPort) + .waitingFor(Wait.forListeningPorts(HADOOP_PORT, jmxPort)); } @Override diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml index c604ad21b..20e70a104 100644 --- a/jmx-scraper/src/main/resources/hadoop.yaml +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -1,7 +1,6 @@ --- rules: - bean: Hadoop:service=NameNode,name=FSNamesystem - unit: "1" prefix: hadoop.name_node. metricAttribute: node_name: beanattr(tag\.Hostname) From 241209c15a6fc75bb26f285e52ecad0779a3bfc3 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Tue, 3 Dec 2024 16:08:07 +0100 Subject: [PATCH 11/15] cmd line back --- .../contrib/jmxscraper/target_systems/HadoopIntegrationTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index dd78a2c77..59fdf20d9 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -26,6 +26,7 @@ protected GenericContainer createTargetContainer(int jmxPort) { "/hadoop/etc/hadoop/hadoop-env.sh") .waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofMinutes(2))) .withExposedPorts(HADOOP_PORT, jmxPort) + .withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host")) .waitingFor(Wait.forListeningPorts(HADOOP_PORT, jmxPort)); } From 0442b3afde9666ee86b94ff4c5cf518131cb8779 Mon Sep 17 00:00:00 2001 From: Magda Wojtowicz Date: Tue, 17 Dec 2024 14:46:13 +0100 Subject: [PATCH 12/15] changed by to By to match semconv --- jmx-metrics/src/main/resources/target-systems/hadoop.groovy | 4 ++-- jmx-scraper/src/main/resources/hadoop.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jmx-metrics/src/main/resources/target-systems/hadoop.groovy b/jmx-metrics/src/main/resources/target-systems/hadoop.groovy index 5c381c3ff..0a2b51b5d 100644 --- a/jmx-metrics/src/main/resources/target-systems/hadoop.groovy +++ b/jmx-metrics/src/main/resources/target-systems/hadoop.groovy @@ -15,10 +15,10 @@ */ def beanHadoopNameNodeFS = otel.mbean("Hadoop:service=NameNode,name=FSNamesystem") -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.usage", "The current used capacity across all data nodes reporting to the name node.", "by", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.usage", "The current used capacity across all data nodes reporting to the name node.", "By", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "CapacityUsed", otel.&longUpDownCounterCallback) -otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.limit", "The total capacity allotted to data nodes reporting to the name node.", "by", +otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.limit", "The total capacity allotted to data nodes reporting to the name node.", "By", ["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }], "CapacityTotal", otel.&longUpDownCounterCallback) otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.count", "The total number of blocks on the name node.", "{block}", diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml index 20e70a104..a024d3bf4 100644 --- a/jmx-scraper/src/main/resources/hadoop.yaml +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -8,12 +8,12 @@ rules: CapacityUsed: metric: capacity.usage type: updowncounter - unit: by + unit: By desc: The current used capacity across all data nodes reporting to the name node. CapacityTotal: metric: capacity.limit type: updowncounter - unit: by + unit: By desc: The total capacity allotted to data nodes reporting to the name node. BlocksTotal: metric: block.count From 5a133799d675a7af204af656f8a5e5d6950f7cee Mon Sep 17 00:00:00 2001 From: robsunday Date: Mon, 27 Jan 2025 10:28:09 +0100 Subject: [PATCH 13/15] Use of MetricsVerifier --- .../target_systems/HadoopIntegrationTest.java | 165 +++++++++--------- 1 file changed, 85 insertions(+), 80 deletions(-) diff --git a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java index 59fdf20d9..b89225629 100644 --- a/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java +++ b/jmx-scraper/src/integrationTest/java/io/opentelemetry/contrib/jmxscraper/target_systems/HadoopIntegrationTest.java @@ -5,10 +5,12 @@ package io.opentelemetry.contrib.jmxscraper.target_systems; -import static io.opentelemetry.contrib.jmxscraper.target_systems.MetricAssertions.assertSumWithAttributes; -import static org.assertj.core.api.Assertions.entry; +import static io.opentelemetry.contrib.jmxscraper.assertions.DataPointAttributes.attribute; +import static io.opentelemetry.contrib.jmxscraper.assertions.DataPointAttributes.attributeGroup; import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer; +import io.opentelemetry.contrib.jmxscraper.assertions.AttributeMatcher; +import java.nio.file.Path; import java.time.Duration; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.wait.strategy.Wait; @@ -31,87 +33,90 @@ protected GenericContainer createTargetContainer(int jmxPort) { } @Override - protected JmxScraperContainer customizeScraperContainer(JmxScraperContainer scraper) { + protected JmxScraperContainer customizeScraperContainer( + JmxScraperContainer scraper, GenericContainer target, Path tempDir) { return scraper.withTargetSystem("hadoop"); } @Override - protected void verifyMetrics() { - waitAndAssertMetrics( - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.capacity.usage", - "The current used capacity across all data nodes reporting to the name node.", - "by", - /* isMonotonic= */ false, - attrs -> attrs.contains(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.capacity.limit", - "The total capacity allotted to data nodes reporting to the name node.", - "by", - /* isMonotonic= */ false, - attrs -> attrs.containsOnly(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.block.count", - "The total number of blocks on the name node.", - "{block}", - /* isMonotonic= */ false, - attrs -> attrs.containsOnly(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.block.missing", - "The number of blocks reported as missing to the name node.", - "{block}", - /* isMonotonic= */ false, - attrs -> attrs.containsOnly(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.block.corrupt", - "The number of blocks reported as corrupt to the name node.", - "{block}", - /* isMonotonic= */ false, - attrs -> attrs.containsOnly(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.volume.failed", - "The number of failed volumes reported to the name node.", - "{volume}", - /* isMonotonic= */ false, - attrs -> attrs.containsOnly(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.file.count", - "The total number of files being tracked by the name node.", - "{file}", - /* isMonotonic= */ false, - attrs -> attrs.containsOnly(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.file.load", - "The current number of concurrent file accesses.", - "{operation}", - /* isMonotonic= */ false, - attrs -> attrs.containsOnly(entry("node_name", "test-host"))), - metric -> - assertSumWithAttributes( - metric, - "hadoop.name_node.data_node.count", - "The number of data nodes reporting to the name node.", - "{node}", - /* isMonotonic= */ false, - attrs -> - attrs.containsOnly(entry("node_name", "test-host"), entry("state", "live")), - attrs -> - attrs.containsOnly(entry("node_name", "test-host"), entry("state", "dead")))); + protected MetricsVerifier createMetricsVerifier() { + AttributeMatcher nodeNameAttribute = attribute("node_name", "test-host"); + return MetricsVerifier.create() + .add( + "hadoop.name_node.capacity.usage", + metric -> + metric + .hasDescription( + "The current used capacity across all data nodes reporting to the name node.") + .hasUnit("By") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.capacity.limit", + metric -> + metric + .hasDescription( + "The total capacity allotted to data nodes reporting to the name node.") + .hasUnit("By") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.block.count", + metric -> + metric + .hasDescription("The total number of blocks on the name node.") + .hasUnit("{block}") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.block.missing", + metric -> + metric + .hasDescription("The number of blocks reported as missing to the name node.") + .hasUnit("{block}") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.block.corrupt", + metric -> + metric + .hasDescription("The number of blocks reported as corrupt to the name node.") + .hasUnit("{block}") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.volume.failed", + metric -> + metric + .hasDescription("The number of failed volumes reported to the name node.") + .hasUnit("{volume}") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.file.count", + metric -> + metric + .hasDescription("The total number of files being tracked by the name node.") + .hasUnit("{file}") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.file.load", + metric -> + metric + .hasDescription("The current number of concurrent file accesses.") + .hasUnit("{operation}") + .isUpDownCounter() + .hasDataPointsWithOneAttribute(nodeNameAttribute)) + .add( + "hadoop.name_node.data_node.count", + metric -> + metric + .hasDescription("The number of data nodes reporting to the name node.") + .hasUnit("{node}") + .isUpDownCounter() + .hasDataPointsWithAttributes( + attributeGroup(nodeNameAttribute, attribute("state", "live")), + attributeGroup(nodeNameAttribute, attribute("state", "dead")))); } } From 8b17ac5bed0ccb9f9b7aa09324123c614e52375e Mon Sep 17 00:00:00 2001 From: robsunday Date: Mon, 27 Jan 2025 14:55:07 +0100 Subject: [PATCH 14/15] Fixed units in JMX Metrics integration test. --- .../jmxmetrics/target_systems/HadoopIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java b/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java index 0f00eafd8..c871e28ea 100644 --- a/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java +++ b/jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java @@ -46,14 +46,14 @@ void endToEnd() { metric, "hadoop.name_node.capacity.usage", "The current used capacity across all data nodes reporting to the name node.", - "by", + "By", attrs -> attrs.contains(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( metric, "hadoop.name_node.capacity.limit", "The total capacity allotted to data nodes reporting to the name node.", - "by", + "By", attrs -> attrs.containsOnly(entry("node_name", "test-host"))), metric -> assertSumWithAttributes( From 667bab0cf8efb3e6cc6d65f3ac7f8023624bbbb8 Mon Sep 17 00:00:00 2001 From: robsunday Date: Fri, 31 Jan 2025 11:03:59 +0100 Subject: [PATCH 15/15] Share common metric type --- jmx-scraper/src/main/resources/hadoop.yaml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/jmx-scraper/src/main/resources/hadoop.yaml b/jmx-scraper/src/main/resources/hadoop.yaml index a024d3bf4..4336b16d9 100644 --- a/jmx-scraper/src/main/resources/hadoop.yaml +++ b/jmx-scraper/src/main/resources/hadoop.yaml @@ -2,59 +2,50 @@ rules: - bean: Hadoop:service=NameNode,name=FSNamesystem prefix: hadoop.name_node. + type: updowncounter metricAttribute: node_name: beanattr(tag\.Hostname) mapping: CapacityUsed: metric: capacity.usage - type: updowncounter unit: By desc: The current used capacity across all data nodes reporting to the name node. CapacityTotal: metric: capacity.limit - type: updowncounter unit: By desc: The total capacity allotted to data nodes reporting to the name node. BlocksTotal: metric: block.count - type: updowncounter unit: "{block}" desc: The total number of blocks on the name node. MissingBlocks: metric: block.missing - type: updowncounter unit: "{block}" desc: The number of blocks reported as missing to the name node. CorruptBlocks: metric: block.corrupt - type: updowncounter unit: "{block}" desc: The number of blocks reported as corrupt to the name node. VolumeFailuresTotal: metric: volume.failed - type: updowncounter unit: "{volume}" desc: The number of failed volumes reported to the name node. FilesTotal: metric: file.count - type: updowncounter unit: "{file}" desc: The total number of files being tracked by the name node. TotalLoad: metric: file.load - type: updowncounter unit: "{operation}" desc: The current number of concurrent file accesses. NumLiveDataNodes: metric: &metric data_node.count - type: updowncounter unit: &unit "{node}" desc: &desc The number of data nodes reporting to the name node. metricAttribute: state: const(live) NumDeadDataNodes: metric: *metric - type: updowncounter unit: *unit desc: *desc metricAttribute: