Skip to content

Commit 2085148

Browse files
robsundaymagda-woj
authored andcommitted
JMX Scraper: YAML file and integration test hadoop (open-telemetry#1675)
Co-authored-by: Magda Wojtowicz <[email protected]>
1 parent b2155b3 commit 2085148

File tree

5 files changed

+291
-18
lines changed

5 files changed

+291
-18
lines changed

jmx-metrics/src/integrationTest/java/io/opentelemetry/contrib/jmxmetrics/target_systems/HadoopIntegrationTest.java

+9-9
Original file line numberDiff line numberDiff line change
@@ -46,63 +46,63 @@ void endToEnd() {
4646
metric,
4747
"hadoop.name_node.capacity.usage",
4848
"The current used capacity across all data nodes reporting to the name node.",
49-
"by",
49+
"By",
5050
attrs -> attrs.contains(entry("node_name", "test-host"))),
5151
metric ->
5252
assertSumWithAttributes(
5353
metric,
5454
"hadoop.name_node.capacity.limit",
5555
"The total capacity allotted to data nodes reporting to the name node.",
56-
"by",
56+
"By",
5757
attrs -> attrs.containsOnly(entry("node_name", "test-host"))),
5858
metric ->
5959
assertSumWithAttributes(
6060
metric,
6161
"hadoop.name_node.block.count",
6262
"The total number of blocks on the name node.",
63-
"{blocks}",
63+
"{block}",
6464
attrs -> attrs.containsOnly(entry("node_name", "test-host"))),
6565
metric ->
6666
assertSumWithAttributes(
6767
metric,
6868
"hadoop.name_node.block.missing",
6969
"The number of blocks reported as missing to the name node.",
70-
"{blocks}",
70+
"{block}",
7171
attrs -> attrs.containsOnly(entry("node_name", "test-host"))),
7272
metric ->
7373
assertSumWithAttributes(
7474
metric,
7575
"hadoop.name_node.block.corrupt",
7676
"The number of blocks reported as corrupt to the name node.",
77-
"{blocks}",
77+
"{block}",
7878
attrs -> attrs.containsOnly(entry("node_name", "test-host"))),
7979
metric ->
8080
assertSumWithAttributes(
8181
metric,
8282
"hadoop.name_node.volume.failed",
8383
"The number of failed volumes reported to the name node.",
84-
"{volumes}",
84+
"{volume}",
8585
attrs -> attrs.containsOnly(entry("node_name", "test-host"))),
8686
metric ->
8787
assertSumWithAttributes(
8888
metric,
8989
"hadoop.name_node.file.count",
9090
"The total number of files being tracked by the name node.",
91-
"{files}",
91+
"{file}",
9292
attrs -> attrs.containsOnly(entry("node_name", "test-host"))),
9393
metric ->
9494
assertSumWithAttributes(
9595
metric,
9696
"hadoop.name_node.file.load",
9797
"The current number of concurrent file accesses.",
98-
"{operations}",
98+
"{operation}",
9999
attrs -> attrs.containsOnly(entry("node_name", "test-host"))),
100100
metric ->
101101
assertSumWithAttributes(
102102
metric,
103103
"hadoop.name_node.data_node.count",
104104
"The number of data nodes reporting to the name node.",
105-
"{nodes}",
105+
"{node}",
106106
attrs ->
107107
attrs.containsOnly(entry("node_name", "test-host"), entry("state", "live")),
108108
attrs ->

jmx-metrics/src/main/resources/target-systems/hadoop.groovy

+9-9
Original file line numberDiff line numberDiff line change
@@ -15,31 +15,31 @@
1515
*/
1616

1717
def beanHadoopNameNodeFS = otel.mbean("Hadoop:service=NameNode,name=FSNamesystem")
18-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.usage", "The current used capacity across all data nodes reporting to the name node.", "by",
18+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.usage", "The current used capacity across all data nodes reporting to the name node.", "By",
1919
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
2020
"CapacityUsed", otel.&longUpDownCounterCallback)
21-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.limit", "The total capacity allotted to data nodes reporting to the name node.", "by",
21+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.capacity.limit", "The total capacity allotted to data nodes reporting to the name node.", "By",
2222
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
2323
"CapacityTotal", otel.&longUpDownCounterCallback)
24-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.count", "The total number of blocks on the name node.", "{blocks}",
24+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.count", "The total number of blocks on the name node.", "{block}",
2525
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
2626
"BlocksTotal", otel.&longUpDownCounterCallback)
27-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.missing", "The number of blocks reported as missing to the name node.", "{blocks}",
27+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.missing", "The number of blocks reported as missing to the name node.", "{block}",
2828
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
2929
"MissingBlocks", otel.&longUpDownCounterCallback)
30-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.corrupt", "The number of blocks reported as corrupt to the name node.", "{blocks}",
30+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.block.corrupt", "The number of blocks reported as corrupt to the name node.", "{block}",
3131
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
3232
"CorruptBlocks", otel.&longUpDownCounterCallback)
33-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.volume.failed", "The number of failed volumes reported to the name node.", "{volumes}",
33+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.volume.failed", "The number of failed volumes reported to the name node.", "{volume}",
3434
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
3535
"VolumeFailuresTotal", otel.&longUpDownCounterCallback)
36-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.count", "The total number of files being tracked by the name node.", "{files}",
36+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.count", "The total number of files being tracked by the name node.", "{file}",
3737
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
3838
"FilesTotal", otel.&longUpDownCounterCallback)
39-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.load", "The current number of concurrent file accesses.", "{operations}",
39+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.file.load", "The current number of concurrent file accesses.", "{operation}",
4040
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
4141
"TotalLoad", otel.&longUpDownCounterCallback)
42-
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.data_node.count", "The number of data nodes reporting to the name node.", "{nodes}",
42+
otel.instrument(beanHadoopNameNodeFS, "hadoop.name_node.data_node.count", "The number of data nodes reporting to the name node.", "{node}",
4343
["node_name" : { mbean -> mbean.getProperty("tag.Hostname") }],
4444
["NumLiveDataNodes":["state":{"live"}], "NumDeadDataNodes": ["state":{"dead"}]],
4545
otel.&longUpDownCounterCallback)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.contrib.jmxscraper.target_systems;
7+
8+
import static io.opentelemetry.contrib.jmxscraper.assertions.DataPointAttributes.attribute;
9+
import static io.opentelemetry.contrib.jmxscraper.assertions.DataPointAttributes.attributeGroup;
10+
11+
import io.opentelemetry.contrib.jmxscraper.JmxScraperContainer;
12+
import io.opentelemetry.contrib.jmxscraper.assertions.AttributeMatcher;
13+
import java.nio.file.Path;
14+
import java.time.Duration;
15+
import org.testcontainers.containers.GenericContainer;
16+
import org.testcontainers.containers.wait.strategy.Wait;
17+
import org.testcontainers.utility.MountableFile;
18+
19+
public class HadoopIntegrationTest extends TargetSystemIntegrationTest {
20+
21+
private static final int HADOOP_PORT = 50070;
22+
23+
@Override
24+
protected GenericContainer<?> createTargetContainer(int jmxPort) {
25+
return new GenericContainer<>("bmedora/hadoop:2.9-base")
26+
.withCopyFileToContainer(
27+
MountableFile.forClasspathResource("hadoop-env.sh", 0400),
28+
"/hadoop/etc/hadoop/hadoop-env.sh")
29+
.waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofMinutes(2)))
30+
.withExposedPorts(HADOOP_PORT, jmxPort)
31+
.withCreateContainerCmdModifier(cmd -> cmd.withHostName("test-host"))
32+
.waitingFor(Wait.forListeningPorts(HADOOP_PORT, jmxPort));
33+
}
34+
35+
@Override
36+
protected JmxScraperContainer customizeScraperContainer(
37+
JmxScraperContainer scraper, GenericContainer<?> target, Path tempDir) {
38+
return scraper.withTargetSystem("hadoop");
39+
}
40+
41+
@Override
42+
protected MetricsVerifier createMetricsVerifier() {
43+
AttributeMatcher nodeNameAttribute = attribute("node_name", "test-host");
44+
return MetricsVerifier.create()
45+
.add(
46+
"hadoop.name_node.capacity.usage",
47+
metric ->
48+
metric
49+
.hasDescription(
50+
"The current used capacity across all data nodes reporting to the name node.")
51+
.hasUnit("By")
52+
.isUpDownCounter()
53+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
54+
.add(
55+
"hadoop.name_node.capacity.limit",
56+
metric ->
57+
metric
58+
.hasDescription(
59+
"The total capacity allotted to data nodes reporting to the name node.")
60+
.hasUnit("By")
61+
.isUpDownCounter()
62+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
63+
.add(
64+
"hadoop.name_node.block.count",
65+
metric ->
66+
metric
67+
.hasDescription("The total number of blocks on the name node.")
68+
.hasUnit("{block}")
69+
.isUpDownCounter()
70+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
71+
.add(
72+
"hadoop.name_node.block.missing",
73+
metric ->
74+
metric
75+
.hasDescription("The number of blocks reported as missing to the name node.")
76+
.hasUnit("{block}")
77+
.isUpDownCounter()
78+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
79+
.add(
80+
"hadoop.name_node.block.corrupt",
81+
metric ->
82+
metric
83+
.hasDescription("The number of blocks reported as corrupt to the name node.")
84+
.hasUnit("{block}")
85+
.isUpDownCounter()
86+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
87+
.add(
88+
"hadoop.name_node.volume.failed",
89+
metric ->
90+
metric
91+
.hasDescription("The number of failed volumes reported to the name node.")
92+
.hasUnit("{volume}")
93+
.isUpDownCounter()
94+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
95+
.add(
96+
"hadoop.name_node.file.count",
97+
metric ->
98+
metric
99+
.hasDescription("The total number of files being tracked by the name node.")
100+
.hasUnit("{file}")
101+
.isUpDownCounter()
102+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
103+
.add(
104+
"hadoop.name_node.file.load",
105+
metric ->
106+
metric
107+
.hasDescription("The current number of concurrent file accesses.")
108+
.hasUnit("{operation}")
109+
.isUpDownCounter()
110+
.hasDataPointsWithOneAttribute(nodeNameAttribute))
111+
.add(
112+
"hadoop.name_node.data_node.count",
113+
metric ->
114+
metric
115+
.hasDescription("The number of data nodes reporting to the name node.")
116+
.hasUnit("{node}")
117+
.isUpDownCounter()
118+
.hasDataPointsWithAttributes(
119+
attributeGroup(nodeNameAttribute, attribute("state", "live")),
120+
attributeGroup(nodeNameAttribute, attribute("state", "dead"))));
121+
}
122+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/bin/bash
2+
3+
# Set Hadoop-specific environment variables here.
4+
5+
# The only required environment variable is JAVA_HOME. All others are
6+
# optional. When running a distributed configuration it is best to
7+
# set JAVA_HOME in this file, so that it is correctly defined on
8+
# remote nodes.
9+
10+
# The java implementation to use.
11+
export JAVA_HOME=${JAVA_HOME}
12+
13+
# The jsvc implementation to use. Jsvc is required to run secure datanodes
14+
# that bind to privileged ports to provide authentication of data transfer
15+
# protocol. Jsvc is not required if SASL is configured for authentication of
16+
# data transfer protocol using non-privileged ports.
17+
#export JSVC_HOME=${JSVC_HOME}
18+
19+
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
20+
21+
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
22+
for f in "$HADOOP_HOME"/contrib/capacity-scheduler/*.jar; do
23+
if [ "$HADOOP_CLASSPATH" ]; then
24+
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
25+
else
26+
export HADOOP_CLASSPATH=$f
27+
fi
28+
done
29+
30+
# The maximum amount of heap to use, in MB. Default is 1000.
31+
#export HADOOP_HEAPSIZE=
32+
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
33+
34+
# Enable extra debugging of Hadoop's JAAS binding, used to set up
35+
# Kerberos security.
36+
# export HADOOP_JAAS_DEBUG=true
37+
38+
# Extra Java runtime options. Empty by default.
39+
# For Kerberos debugging, an extended option set logs more invormation
40+
# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug"
41+
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
42+
43+
# Command specific options appended to HADOOP_OPTS when specified
44+
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
45+
export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
46+
export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
47+
export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Dcom.sun.management.jmxremote.ssl=false"
48+
export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Dcom.sun.management.jmxremote.port=9999 -Dcom.sun.management.jmxremote.rmi.port=9999"
49+
50+
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
51+
52+
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
53+
54+
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
55+
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
56+
57+
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
58+
export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS"
59+
# set heap args when HADOOP_HEAPSIZE is empty
60+
if [ "$HADOOP_HEAPSIZE" = "" ]; then
61+
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
62+
fi
63+
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
64+
65+
# On secure datanodes, user to run the datanode as after dropping privileges.
66+
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
67+
# to provide authentication of data transfer protocol. This **MUST NOT** be
68+
# defined if SASL is configured for authentication of data transfer protocol
69+
# using non-privileged ports.
70+
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
71+
72+
# Where log files are stored. $HADOOP_HOME/logs by default.
73+
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
74+
75+
# Where log files are stored in the secure data environment.
76+
#export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
77+
78+
###
79+
# HDFS Mover specific parameters
80+
###
81+
# Specify the JVM options to be used when starting the HDFS Mover.
82+
# These options will be appended to the options specified as HADOOP_OPTS
83+
# and therefore may override any similar flags set in HADOOP_OPTS
84+
#
85+
# export HADOOP_MOVER_OPTS=""
86+
87+
###
88+
# Advanced Users Only!
89+
###
90+
91+
# The directory where pid files are stored. /tmp by default.
92+
# NOTE: this should be set to a directory that can only be written to by
93+
# the user that will run the hadoop daemons. Otherwise there is the
94+
# potential for a symlink attack.
95+
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
96+
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
97+
98+
# A string representing this instance of hadoop. $USER by default.
99+
export HADOOP_IDENT_STRING=$USER
+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
---
2+
rules:
3+
- bean: Hadoop:service=NameNode,name=FSNamesystem
4+
prefix: hadoop.name_node.
5+
type: updowncounter
6+
metricAttribute:
7+
node_name: beanattr(tag\.Hostname)
8+
mapping:
9+
CapacityUsed:
10+
metric: capacity.usage
11+
unit: By
12+
desc: The current used capacity across all data nodes reporting to the name node.
13+
CapacityTotal:
14+
metric: capacity.limit
15+
unit: By
16+
desc: The total capacity allotted to data nodes reporting to the name node.
17+
BlocksTotal:
18+
metric: block.count
19+
unit: "{block}"
20+
desc: The total number of blocks on the name node.
21+
MissingBlocks:
22+
metric: block.missing
23+
unit: "{block}"
24+
desc: The number of blocks reported as missing to the name node.
25+
CorruptBlocks:
26+
metric: block.corrupt
27+
unit: "{block}"
28+
desc: The number of blocks reported as corrupt to the name node.
29+
VolumeFailuresTotal:
30+
metric: volume.failed
31+
unit: "{volume}"
32+
desc: The number of failed volumes reported to the name node.
33+
FilesTotal:
34+
metric: file.count
35+
unit: "{file}"
36+
desc: The total number of files being tracked by the name node.
37+
TotalLoad:
38+
metric: file.load
39+
unit: "{operation}"
40+
desc: The current number of concurrent file accesses.
41+
NumLiveDataNodes:
42+
metric: &metric data_node.count
43+
unit: &unit "{node}"
44+
desc: &desc The number of data nodes reporting to the name node.
45+
metricAttribute:
46+
state: const(live)
47+
NumDeadDataNodes:
48+
metric: *metric
49+
unit: *unit
50+
desc: *desc
51+
metricAttribute:
52+
state: const(dead)

0 commit comments

Comments
 (0)