Skip to content

Commit be8cb32

Browse files
authored
Add more flint metrics (#255)
* Refactor and add metrics for streaming job Signed-off-by: Louis Chu <[email protected]> * Add metrics for request / result index Signed-off-by: Louis Chu <[email protected]> * Add java doc for OpenSearchUpdater class Signed-off-by: Louis Chu <[email protected]> * Address comment from Peng Signed-off-by: Louis Chu <[email protected]> --------- Signed-off-by: Louis Chu <[email protected]>
1 parent 9c34a1d commit be8cb32

File tree

18 files changed

+439
-182
lines changed

18 files changed

+439
-182
lines changed

flint-core/src/main/java/org/opensearch/flint/core/IRestHighLevelClient.java

+54-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.flint.core;
77

8+
import org.opensearch.OpenSearchException;
89
import org.opensearch.action.bulk.BulkRequest;
910
import org.opensearch.action.bulk.BulkResponse;
1011
import org.opensearch.action.delete.DeleteRequest;
@@ -26,6 +27,7 @@
2627
import org.opensearch.client.indices.GetIndexResponse;
2728
import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
2829
import org.opensearch.client.RequestOptions;
30+
import org.opensearch.flint.core.metrics.MetricsUtil;
2931

3032
import java.io.Closeable;
3133
import java.io.IOException;
@@ -52,11 +54,62 @@ public interface IRestHighLevelClient extends Closeable {
5254

5355
IndexResponse index(IndexRequest indexRequest, RequestOptions options) throws IOException;
5456

55-
Boolean isIndexExists(GetIndexRequest getIndexRequest, RequestOptions options) throws IOException;
57+
Boolean doesIndexExist(GetIndexRequest getIndexRequest, RequestOptions options) throws IOException;
5658

5759
SearchResponse search(SearchRequest searchRequest, RequestOptions options) throws IOException;
5860

5961
SearchResponse scroll(SearchScrollRequest searchScrollRequest, RequestOptions options) throws IOException;
6062

6163
DocWriteResponse update(UpdateRequest updateRequest, RequestOptions options) throws IOException;
64+
65+
66+
/**
67+
* Records the success of an OpenSearch operation by incrementing the corresponding metric counter.
68+
* This method constructs the metric name by appending ".200.count" to the provided metric name prefix.
69+
* The metric name is then used to increment the counter, indicating a successful operation.
70+
*
71+
* @param metricNamePrefix the prefix for the metric name which is used to construct the full metric name for success
72+
*/
73+
static void recordOperationSuccess(String metricNamePrefix) {
74+
String successMetricName = metricNamePrefix + ".2xx.count";
75+
MetricsUtil.incrementCounter(successMetricName);
76+
}
77+
78+
/**
79+
* Records the failure of an OpenSearch operation by incrementing the corresponding metric counter.
80+
* If the exception is an OpenSearchException with a specific status code (e.g., 403),
81+
* it increments a metric specifically for that status code.
82+
* Otherwise, it increments a general failure metric counter based on the status code category (e.g., 4xx, 5xx).
83+
*
84+
* @param metricNamePrefix the prefix for the metric name which is used to construct the full metric name for failure
85+
* @param e the exception encountered during the operation, used to determine the type of failure
86+
*/
87+
static void recordOperationFailure(String metricNamePrefix, Exception e) {
88+
OpenSearchException openSearchException = extractOpenSearchException(e);
89+
int statusCode = openSearchException != null ? openSearchException.status().getStatus() : 500;
90+
91+
if (statusCode == 403) {
92+
String forbiddenErrorMetricName = metricNamePrefix + ".403.count";
93+
MetricsUtil.incrementCounter(forbiddenErrorMetricName);
94+
}
95+
96+
String failureMetricName = metricNamePrefix + "." + (statusCode / 100) + "xx.count";
97+
MetricsUtil.incrementCounter(failureMetricName);
98+
}
99+
100+
/**
101+
* Extracts an OpenSearchException from the given Throwable.
102+
* Checks if the Throwable is an instance of OpenSearchException or caused by one.
103+
*
104+
* @param ex the exception to be checked
105+
* @return the extracted OpenSearchException, or null if not found
106+
*/
107+
private static OpenSearchException extractOpenSearchException(Throwable ex) {
108+
if (ex instanceof OpenSearchException) {
109+
return (OpenSearchException) ex;
110+
} else if (ex.getCause() instanceof OpenSearchException) {
111+
return (OpenSearchException) ex.getCause();
112+
}
113+
return null;
114+
}
62115
}

flint-core/src/main/java/org/opensearch/flint/core/RestHighLevelClientWrapper.java

+3-55
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,13 @@
2020
import org.opensearch.action.search.SearchResponse;
2121
import org.opensearch.action.search.SearchScrollRequest;
2222
import org.opensearch.action.update.UpdateRequest;
23-
import org.opensearch.OpenSearchException;
2423
import org.opensearch.action.update.UpdateResponse;
2524
import org.opensearch.client.RequestOptions;
2625
import org.opensearch.client.RestHighLevelClient;
2726
import org.opensearch.client.indices.CreateIndexRequest;
2827
import org.opensearch.client.indices.CreateIndexResponse;
2928
import org.opensearch.client.indices.GetIndexRequest;
3029
import org.opensearch.client.indices.GetIndexResponse;
31-
import org.opensearch.flint.core.metrics.MetricsUtil;
3230

3331
import java.io.IOException;
3432

@@ -91,7 +89,7 @@ public IndexResponse index(IndexRequest indexRequest, RequestOptions options) th
9189
}
9290

9391
@Override
94-
public Boolean isIndexExists(GetIndexRequest getIndexRequest, RequestOptions options) throws IOException {
92+
public Boolean doesIndexExist(GetIndexRequest getIndexRequest, RequestOptions options) throws IOException {
9593
return execute(OS_READ_OP_METRIC_PREFIX, () -> client.indices().exists(getIndexRequest, options));
9694
}
9795

@@ -122,64 +120,14 @@ public UpdateResponse update(UpdateRequest updateRequest, RequestOptions options
122120
private <T> T execute(String metricNamePrefix, IOCallable<T> operation) throws IOException {
123121
try {
124122
T result = operation.call();
125-
recordOperationSuccess(metricNamePrefix);
123+
IRestHighLevelClient.recordOperationSuccess(metricNamePrefix);
126124
return result;
127125
} catch (Exception e) {
128-
recordOperationFailure(metricNamePrefix, e);
126+
IRestHighLevelClient.recordOperationFailure(metricNamePrefix, e);
129127
throw e;
130128
}
131129
}
132130

133-
/**
134-
* Records the success of an OpenSearch operation by incrementing the corresponding metric counter.
135-
* This method constructs the metric name by appending ".200.count" to the provided metric name prefix.
136-
* The metric name is then used to increment the counter, indicating a successful operation.
137-
*
138-
* @param metricNamePrefix the prefix for the metric name which is used to construct the full metric name for success
139-
*/
140-
private void recordOperationSuccess(String metricNamePrefix) {
141-
String successMetricName = metricNamePrefix + ".2xx.count";
142-
MetricsUtil.incrementCounter(successMetricName);
143-
}
144-
145-
/**
146-
* Records the failure of an OpenSearch operation by incrementing the corresponding metric counter.
147-
* If the exception is an OpenSearchException with a specific status code (e.g., 403),
148-
* it increments a metric specifically for that status code.
149-
* Otherwise, it increments a general failure metric counter based on the status code category (e.g., 4xx, 5xx).
150-
*
151-
* @param metricNamePrefix the prefix for the metric name which is used to construct the full metric name for failure
152-
* @param e the exception encountered during the operation, used to determine the type of failure
153-
*/
154-
private void recordOperationFailure(String metricNamePrefix, Exception e) {
155-
OpenSearchException openSearchException = extractOpenSearchException(e);
156-
int statusCode = openSearchException != null ? openSearchException.status().getStatus() : 500;
157-
158-
if (statusCode == 403) {
159-
String forbiddenErrorMetricName = metricNamePrefix + ".403.count";
160-
MetricsUtil.incrementCounter(forbiddenErrorMetricName);
161-
}
162-
163-
String failureMetricName = metricNamePrefix + "." + (statusCode / 100) + "xx.count";
164-
MetricsUtil.incrementCounter(failureMetricName);
165-
}
166-
167-
/**
168-
* Extracts an OpenSearchException from the given Throwable.
169-
* Checks if the Throwable is an instance of OpenSearchException or caused by one.
170-
*
171-
* @param ex the exception to be checked
172-
* @return the extracted OpenSearchException, or null if not found
173-
*/
174-
private OpenSearchException extractOpenSearchException(Throwable ex) {
175-
if (ex instanceof OpenSearchException) {
176-
return (OpenSearchException) ex;
177-
} else if (ex.getCause() instanceof OpenSearchException) {
178-
return (OpenSearchException) ex.getCause();
179-
}
180-
return null;
181-
}
182-
183131
/**
184132
* Functional interface for operations that can throw IOException.
185133
*

flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricConstants.java

+46-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
/**
99
* This class defines custom metric constants used for monitoring flint operations.
1010
*/
11-
public class MetricConstants {
11+
public final class MetricConstants {
1212

1313
/**
1414
* The prefix for all read-related metrics in OpenSearch.
@@ -47,6 +47,26 @@ public class MetricConstants {
4747
*/
4848
public static final String REPL_PROCESSING_TIME_METRIC = "session.processingTime";
4949

50+
/**
51+
* Prefix for metrics related to the request metadata read operations.
52+
*/
53+
public static final String REQUEST_METADATA_READ_METRIC_PREFIX = "request.metadata.read";
54+
55+
/**
56+
* Prefix for metrics related to the request metadata write operations.
57+
*/
58+
public static final String REQUEST_METADATA_WRITE_METRIC_PREFIX = "request.metadata.write";
59+
60+
/**
61+
* Metric name for counting failed heartbeat operations on request metadata.
62+
*/
63+
public static final String REQUEST_METADATA_HEARTBEAT_FAILED_METRIC = "request.metadata.heartbeat.failed.count";
64+
65+
/**
66+
* Prefix for metrics related to the result metadata write operations.
67+
*/
68+
public static final String RESULT_METADATA_WRITE_METRIC_PREFIX = "result.metadata.write";
69+
5070
/**
5171
* Metric name for counting the number of statements currently running.
5272
*/
@@ -65,5 +85,29 @@ public class MetricConstants {
6585
/**
6686
* Metric name for tracking the processing time of statements.
6787
*/
68-
public static final String STATEMENT_PROCESSING_TIME_METRIC = "STATEMENT.processingTime";
88+
public static final String STATEMENT_PROCESSING_TIME_METRIC = "statement.processingTime";
89+
90+
/**
91+
* Metric for tracking the count of currently running streaming jobs.
92+
*/
93+
public static final String STREAMING_RUNNING_METRIC = "streaming.running.count";
94+
95+
/**
96+
* Metric for tracking the count of streaming jobs that have failed.
97+
*/
98+
public static final String STREAMING_FAILED_METRIC = "streaming.failed.count";
99+
100+
/**
101+
* Metric for tracking the count of streaming jobs that have completed successfully.
102+
*/
103+
public static final String STREAMING_SUCCESS_METRIC = "streaming.success.count";
104+
105+
/**
106+
* Metric for tracking the count of failed heartbeat signals in streaming jobs.
107+
*/
108+
public static final String STREAMING_HEARTBEAT_FAILED_METRIC = "streaming.heartbeat.failed.count";
109+
110+
private MetricConstants() {
111+
// Private constructor to prevent instantiation
112+
}
69113
}

flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricsUtil.java

+31-26
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@
66
package org.opensearch.flint.core.metrics;
77

88
import com.codahale.metrics.Counter;
9+
import com.codahale.metrics.Gauge;
10+
import com.codahale.metrics.MetricRegistry;
911
import com.codahale.metrics.Timer;
1012
import org.apache.spark.SparkEnv;
1113
import org.apache.spark.metrics.source.FlintMetricSource;
1214
import org.apache.spark.metrics.source.Source;
1315
import scala.collection.Seq;
1416

17+
import java.util.concurrent.atomic.AtomicInteger;
1518
import java.util.logging.Logger;
1619

1720
/**
@@ -21,8 +24,8 @@ public final class MetricsUtil {
2124

2225
private static final Logger LOG = Logger.getLogger(MetricsUtil.class.getName());
2326

24-
// Private constructor to prevent instantiation
2527
private MetricsUtil() {
28+
// Private constructor to prevent instantiation
2629
}
2730

2831
/**
@@ -60,10 +63,7 @@ public static void decrementCounter(String metricName) {
6063
*/
6164
public static Timer.Context getTimerContext(String metricName) {
6265
Timer timer = getOrCreateTimer(metricName);
63-
if (timer != null) {
64-
return timer.time();
65-
}
66-
return null;
66+
return timer != null ? timer.time() : null;
6767
}
6868

6969
/**
@@ -74,42 +74,47 @@ public static Timer.Context getTimerContext(String metricName) {
7474
* @return The elapsed time in nanoseconds since the timer was started, or {@code null} if the context was {@code null}.
7575
*/
7676
public static Long stopTimer(Timer.Context context) {
77-
if (context != null) {
78-
return context.stop();
77+
return context != null ? context.stop() : null;
78+
}
79+
80+
/**
81+
* Registers a gauge metric with the provided name and value.
82+
* The gauge will reflect the current value of the AtomicInteger provided.
83+
*
84+
* @param metricName The name of the gauge metric to register.
85+
* @param value The AtomicInteger whose current value should be reflected by the gauge.
86+
*/
87+
public static void registerGauge(String metricName, final AtomicInteger value) {
88+
MetricRegistry metricRegistry = getMetricRegistry();
89+
if (metricRegistry == null) {
90+
LOG.warning("MetricRegistry not available, cannot register gauge: " + metricName);
91+
return;
7992
}
80-
return null;
93+
metricRegistry.register(metricName, (Gauge<Integer>) value::get);
8194
}
8295

8396
// Retrieves or creates a new counter for the given metric name
8497
private static Counter getOrCreateCounter(String metricName) {
85-
SparkEnv sparkEnv = SparkEnv.get();
86-
if (sparkEnv == null) {
87-
LOG.warning("Spark environment not available, cannot instrument metric: " + metricName);
88-
return null;
89-
}
90-
91-
FlintMetricSource flintMetricSource = getOrInitFlintMetricSource(sparkEnv);
92-
Counter counter = flintMetricSource.metricRegistry().getCounters().get(metricName);
93-
if (counter == null) {
94-
counter = flintMetricSource.metricRegistry().counter(metricName);
95-
}
96-
return counter;
98+
MetricRegistry metricRegistry = getMetricRegistry();
99+
return metricRegistry != null ? metricRegistry.counter(metricName) : null;
97100
}
98101

99102
// Retrieves or creates a new Timer for the given metric name
100103
private static Timer getOrCreateTimer(String metricName) {
104+
MetricRegistry metricRegistry = getMetricRegistry();
105+
return metricRegistry != null ? metricRegistry.timer(metricName) : null;
106+
}
107+
108+
// Retrieves the MetricRegistry from the current Spark environment.
109+
private static MetricRegistry getMetricRegistry() {
101110
SparkEnv sparkEnv = SparkEnv.get();
102111
if (sparkEnv == null) {
103-
LOG.warning("Spark environment not available, cannot instrument metric: " + metricName);
112+
LOG.warning("Spark environment not available, cannot access MetricRegistry.");
104113
return null;
105114
}
106115

107116
FlintMetricSource flintMetricSource = getOrInitFlintMetricSource(sparkEnv);
108-
Timer timer = flintMetricSource.metricRegistry().getTimers().get(metricName);
109-
if (timer == null) {
110-
timer = flintMetricSource.metricRegistry().timer(metricName);
111-
}
112-
return timer;
117+
return flintMetricSource.metricRegistry();
113118
}
114119

115120
// Gets or initializes the FlintMetricSource

0 commit comments

Comments
 (0)