Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for tracking hadoop and GCS API level metrics at a thread level #1304

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ public class GhfsGlobalStorageStatistics extends StorageStatistics {
// Initial requests are expected to take time due to warmup.
private static final int WARMUP_THRESHOLD_SEC = 30;

private static final GhfsThreadLocalStatistics threadLocalStatistics =
new GhfsThreadLocalStatistics();

private final Map<String, AtomicLong> opsCount = new HashMap<>();
private final Map<String, AtomicLong> minimums = new HashMap<>();
private final Map<String, AtomicLong> maximums = new HashMap<>();
Expand Down Expand Up @@ -121,6 +124,10 @@ static <B> B trackDuration(
}
}

public GhfsThreadLocalStatistics getThreadLocalStatistics() {
return threadLocalStatistics;
}

private String getNonZeroMetrics() {
// TreeMap to keep the result sorted.
TreeMap<String, Long> result = new TreeMap<>();
Expand Down Expand Up @@ -156,6 +163,7 @@ void increment(GoogleCloudStorageStatistics statistic) {
* @return the new value
*/
long incrementCounter(GhfsStatistic op, long count) {
threadLocalStatistics.increment(op, count);
return opsCount.get(op.getSymbol()).addAndGet(count);
}

Expand All @@ -167,6 +175,7 @@ long incrementCounter(GhfsStatistic op, long count) {
*/
void incrementCounter(GoogleCloudStorageStatistics op, long count) {
opsCount.get(op.getSymbol()).addAndGet(count);
threadLocalStatistics.increment(op, count);
}

@Override
Expand Down Expand Up @@ -242,7 +251,7 @@ void updateStats(
String symbol = statistic.getSymbol();
updateMinMaxStats(minLatency, maxLatency, context, symbol);
addMeanStatistic(statistic.getSymbol(), totalDuration, count);
opsCount.get(symbol).addAndGet(count);
incrementCounter(statistic, count);

updateConnectorHadoopApiTime(totalDuration);
}
Expand Down
56 changes: 41 additions & 15 deletions gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsStatistic.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,33 +66,48 @@ public enum GhfsStatistic {
"files_delete_rejected",
"Total number of files whose delete request was rejected",
TYPE_COUNTER),
INVOCATION_CREATE(StoreStatisticNames.OP_CREATE, "Calls of create()", TYPE_DURATION_TOTAL),
INVOCATION_DELETE(StoreStatisticNames.OP_DELETE, "Calls of delete()", TYPE_DURATION_TOTAL),
INVOCATION_EXISTS(StoreStatisticNames.OP_EXISTS, "Calls of exists()", TYPE_COUNTER),
INVOCATION_CREATE(StoreStatisticNames.OP_CREATE, "Calls of create()", TYPE_DURATION_TOTAL, true),
INVOCATION_DELETE(StoreStatisticNames.OP_DELETE, "Calls of delete()", TYPE_DURATION_TOTAL, true),
INVOCATION_EXISTS(StoreStatisticNames.OP_EXISTS, "Calls of exists()", TYPE_COUNTER, true),
INVOCATION_GET_FILE_STATUS(
StoreStatisticNames.OP_GET_FILE_STATUS, "Calls of getFileStatus()", TYPE_DURATION_TOTAL),
StoreStatisticNames.OP_GET_FILE_STATUS,
"Calls of getFileStatus()",
TYPE_DURATION_TOTAL,
true),
INVOCATION_GET_FILE_CHECKSUM(
StoreStatisticNames.OP_GET_FILE_CHECKSUM, "Calls of getFileChecksum()", TYPE_COUNTER),

INVOCATION_LIST_STATUS_RESULT_SIZE(
"op_get_list_status_result_size", "Number of files returned from list call", TYPE_COUNTER),
INVOCATION_GLOB_STATUS(
StoreStatisticNames.OP_GLOB_STATUS, "Calls of globStatus()", TYPE_DURATION_TOTAL),
INVOCATION_HFLUSH(StoreStatisticNames.OP_HFLUSH, "Calls of hflush()", TYPE_DURATION_TOTAL),
INVOCATION_HSYNC(StoreStatisticNames.OP_HSYNC, "Calls of hsync()", TYPE_DURATION_TOTAL),
StoreStatisticNames.OP_GLOB_STATUS, "Calls of globStatus()", TYPE_DURATION_TOTAL, true),
INVOCATION_HFLUSH(StoreStatisticNames.OP_HFLUSH, "Calls of hflush()", TYPE_DURATION_TOTAL, true),
INVOCATION_HSYNC(StoreStatisticNames.OP_HSYNC, "Calls of hsync()", TYPE_DURATION_TOTAL, true),
INVOCATION_LIST_STATUS(
StoreStatisticNames.OP_LIST_STATUS, "Calls of listStatus()", TYPE_DURATION_TOTAL),
INVOCATION_MKDIRS(StoreStatisticNames.OP_MKDIRS, "Calls of mkdirs()", TYPE_DURATION_TOTAL),
INVOCATION_OPEN(StoreStatisticNames.OP_OPEN, "Calls of open()", TYPE_DURATION_TOTAL),
INVOCATION_RENAME(StoreStatisticNames.OP_RENAME, "Calls of rename()", TYPE_DURATION_TOTAL),
StoreStatisticNames.OP_LIST_STATUS, "Calls of listStatus()", TYPE_DURATION_TOTAL, true),
INVOCATION_MKDIRS(StoreStatisticNames.OP_MKDIRS, "Calls of mkdirs()", TYPE_DURATION_TOTAL, true),
INVOCATION_OPEN(StoreStatisticNames.OP_OPEN, "Calls of open()", TYPE_DURATION_TOTAL, true),
INVOCATION_RENAME(StoreStatisticNames.OP_RENAME, "Calls of rename()", TYPE_DURATION_TOTAL, true),
INVOCATION_COPY_FROM_LOCAL_FILE(
StoreStatisticNames.OP_COPY_FROM_LOCAL_FILE, "Calls of copyFromLocalFile()", TYPE_COUNTER),
StoreStatisticNames.OP_COPY_FROM_LOCAL_FILE,
"Calls of copyFromLocalFile()",
TYPE_COUNTER,
true),
INVOCATION_CREATE_NON_RECURSIVE(
StoreStatisticNames.OP_CREATE_NON_RECURSIVE, "Calls of createNonRecursive()", TYPE_DURATION),
StoreStatisticNames.OP_CREATE_NON_RECURSIVE,
"Calls of createNonRecursive()",
TYPE_DURATION,
true),
INVOCATION_GET_DELEGATION_TOKEN(
StoreStatisticNames.OP_GET_DELEGATION_TOKEN, "Calls of getDelegationToken()", TYPE_COUNTER),
StoreStatisticNames.OP_GET_DELEGATION_TOKEN,
"Calls of getDelegationToken()",
TYPE_COUNTER,
true),
INVOCATION_LIST_LOCATED_STATUS(
StoreStatisticNames.OP_LIST_LOCATED_STATUS, "Calls of listLocatedStatus()", TYPE_COUNTER),
StoreStatisticNames.OP_LIST_LOCATED_STATUS,
"Calls of listLocatedStatus()",
TYPE_COUNTER,
true),

/** Stream reads */
STREAM_READ_BYTES(
Expand Down Expand Up @@ -188,6 +203,8 @@ public enum GhfsStatistic {
private static final ImmutableMap<String, GhfsStatistic> SYMBOL_MAP =
Maps.uniqueIndex(Iterators.forArray(values()), GhfsStatistic::getSymbol);

private final boolean isHadoopApi;

/**
* Statistic definition.
*
Expand All @@ -196,9 +213,14 @@ public enum GhfsStatistic {
* @param type type
*/
GhfsStatistic(String symbol, String description, StatisticTypeEnum type) {
this(symbol, description, type, false);
}

GhfsStatistic(String symbol, String description, StatisticTypeEnum type, boolean isHadoopApi) {
this.symbol = symbol;
this.description = description;
this.type = type;
this.isHadoopApi = isHadoopApi;
}

/** Statistic name. */
Expand Down Expand Up @@ -248,4 +270,8 @@ public String toString() {
public StatisticTypeEnum getType() {
return type;
}

boolean getIsHadoopApi() {
return this.isHadoopApi;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright 2025 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.hadoop.fs.gcs;

import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.GCS_CONNECTOR_TIME;

import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.fs.StorageStatistics;

class GhfsThreadLocalStatistics extends StorageStatistics {
static final String NAME = "GhfsThreadLocalStatistics";
private Map<String, Metric> metrics = new HashMap<>();

GhfsThreadLocalStatistics() {
super(NAME);
Arrays.stream(Metric.values()).forEach(x -> metrics.put(x.metricName, x));
}

@Override
public Long getLong(String s) {
if (!metrics.containsKey(s)) {
return 0L;
}

return metrics.get(s).metricValue.getValue();
}

@Override
public boolean isTracked(String s) {
return metrics.containsKey(s);
}

@Override
public void reset() {
for (Metric s : metrics.values()) {
s.reset();
}
}

void increment(GhfsStatistic statistic, long count) {
if (statistic == GCS_CONNECTOR_TIME) {
Metric.HADOOP_API_TIME.increment(count);
} else if (statistic.getIsHadoopApi()) {
Metric.HADOOP_API_COUNT.increment(count);
}
}

void increment(GoogleCloudStorageStatistics op, long count) {
if (op == GoogleCloudStorageStatistics.GCS_API_TIME) {
Metric.GCS_API_TIME.increment(count);
} else if (op == GoogleCloudStorageStatistics.GCS_API_REQUEST_COUNT) {
Metric.GCS_API_COUNT.increment(count);
} else if (op == GoogleCloudStorageStatistics.GCS_BACKOFF_COUNT) {
Metric.BACKOFF_COUNT.increment(count);
} else if (op == GoogleCloudStorageStatistics.GCS_BACKOFF_TIME) {
Metric.BACKOFF_TIME.increment(count);
}
}

@Override
public Iterator<LongStatistic> getLongStatistics() {
return this.metrics.entrySet().stream()
.map(entry -> new LongStatistic(entry.getKey(), entry.getValue().metricValue.getValue()))
.iterator();
}

private static class ThreadLocalValue {
private ThreadLocal<Long> value = ThreadLocal.withInitial(() -> 0L);

void increment(long count) {
value.set(value.get() + count);
}

Long getValue() {
return value.get();
}

void reset() {
value.set(0L);
}
}

private enum Metric {
HADOOP_API_COUNT("hadoopApiCount"),
HADOOP_API_TIME("hadoopApiTime"),
GCS_API_COUNT("gcsApiCount"),
GCS_API_TIME("gcsApiTime"),
BACKOFF_COUNT("backoffCount"),
BACKOFF_TIME("backoffTime");

private final String metricName;
private final ThreadLocalValue metricValue;

Metric(String metricName) {
this.metricName = metricName;
this.metricValue = new ThreadLocalValue();
}

void reset() {
metricValue.reset();
}

void increment(long count) {
metricValue.increment(count);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ public GoogleHadoopFileSystem() {
GlobalStorageStatistics.INSTANCE.put(
GhfsGlobalStorageStatistics.NAME, () -> new GhfsGlobalStorageStatistics());

GlobalStorageStatistics.INSTANCE.put(
GhfsThreadLocalStatistics.NAME,
() -> ((GhfsGlobalStorageStatistics) globalStats).getThreadLocalStatistics());

if (GhfsGlobalStorageStatistics.class.isAssignableFrom(globalStats.getClass())) {
globalStorageStatistics = (GhfsGlobalStorageStatistics) globalStats;
} else {
Expand Down
Loading