Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added changes to enable full file cache stats #17538

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -12,6 +12,9 @@

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory;
import org.opensearch.action.admin.cluster.node.stats.NodeStats;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
import org.opensearch.action.admin.indices.get.GetIndexRequest;
import org.opensearch.action.admin.indices.get.GetIndexResponse;
@@ -28,6 +31,8 @@
import org.opensearch.index.store.CompositeDirectory;
import org.opensearch.index.store.remote.file.CleanerDaemonThreadLeakFilter;
import org.opensearch.index.store.remote.filecache.FileCache;
import org.opensearch.index.store.remote.filecache.FileCacheStats;
import org.opensearch.index.store.remote.filecache.FullFileCacheStats;
import org.opensearch.index.store.remote.utils.FileTypeUtils;
import org.opensearch.indices.IndicesService;
import org.opensearch.node.Node;
@@ -36,7 +41,9 @@

import java.util.Arrays;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
@@ -49,6 +56,7 @@
public class WritableWarmIT extends RemoteStoreBaseIntegTestCase {

protected static final String INDEX_NAME = "test-idx-1";
protected static final String INDEX_NAME_2 = "test-idx-2";
protected static final int NUM_DOCS_IN_BULK = 1000;

/*
@@ -168,4 +176,88 @@ public void testWritableWarmBasic() throws Exception {
assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).get());
fileCache.prune();
}

public void testFullFileAndFileCacheStats() throws ExecutionException, InterruptedException {

InternalTestCluster internalTestCluster = internalCluster();
internalTestCluster.startClusterManagerOnlyNode();
internalTestCluster.startDataAndSearchNodes(1);

Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexModule.INDEX_STORE_LOCALITY_SETTING.getKey(), IndexModule.DataLocalityType.PARTIAL.name())
.build();

assertAcked(client().admin().indices().prepareCreate(INDEX_NAME_2).setSettings(settings).get());

// Verify from the cluster settings if the data locality is partial
GetIndexResponse getIndexResponse = client().admin()
.indices()
.getIndex(new GetIndexRequest().indices(INDEX_NAME_2).includeDefaults(true))
.get();

Settings indexSettings = getIndexResponse.settings().get(INDEX_NAME_2);
assertEquals(IndexModule.DataLocalityType.PARTIAL.name(), indexSettings.get(IndexModule.INDEX_STORE_LOCALITY_SETTING.getKey()));

// Ingesting docs again before force merge
indexBulk(INDEX_NAME_2, NUM_DOCS_IN_BULK);
flushAndRefresh(INDEX_NAME_2);

// ensuring cluster is green
ensureGreen();

SearchResponse searchResponse = client().prepareSearch(INDEX_NAME_2).setQuery(QueryBuilders.matchAllQuery()).get();
// Asserting that search returns same number of docs as ingested
assertHitCount(searchResponse, NUM_DOCS_IN_BULK);

// Ingesting docs again before force merge
indexBulk(INDEX_NAME_2, NUM_DOCS_IN_BULK);
flushAndRefresh(INDEX_NAME_2);

FileCache fileCache = internalTestCluster.getDataNodeInstance(Node.class).fileCache();

// TODO: Make these validation more robust, when SwitchableIndexInput is implemented.

NodesStatsResponse nodesStatsResponse = client().admin().cluster().nodesStats(new NodesStatsRequest().all()).actionGet();

FileCacheStats fileCacheStats = nodesStatsResponse.getNodes()
.stream()
.filter(n -> n.getNode().isDataNode())
.toList()
.getFirst()
.getFileCacheStats();

if (Objects.isNull(fileCacheStats)) {
fail("File Cache Stats should not be null");
}

FullFileCacheStats fullFileCacheStats = fileCacheStats.fullFileCacheStats();

if (Objects.isNull(fullFileCacheStats)) {
fail("Full File Cache Stats should not be null");
}

// Deleting the index (so that ref count drops to zero for all the files) and then pruning the cache to clear it to avoid any file
// leaks
assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME_2)).get());
fileCache.prune();

NodesStatsResponse response = client().admin().cluster().nodesStats(new NodesStatsRequest().all()).actionGet();
int nonEmptyFileCacheNodes = 0;
for (NodeStats stats : response.getNodes()) {
FileCacheStats fcStats = stats.getFileCacheStats();
if (Objects.isNull(fcStats) == false) {
if (isFileCacheEmpty(fcStats) == false) {
nonEmptyFileCacheNodes++;
}
}
}
assertEquals(0, nonEmptyFileCacheNodes);

}

private boolean isFileCacheEmpty(FileCacheStats stats) {
return stats.getUsed().getBytes() == 0L && stats.getActive().getBytes() == 0L;
}
}
Original file line number Diff line number Diff line change
@@ -14,7 +14,6 @@
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.core.common.breaker.CircuitBreaker;
import org.opensearch.core.common.breaker.CircuitBreakingException;
import org.opensearch.index.store.remote.utils.cache.CacheUsage;
import org.opensearch.index.store.remote.utils.cache.RefCountedCache;
import org.opensearch.index.store.remote.utils.cache.SegmentedCache;
import org.opensearch.index.store.remote.utils.cache.stats.CacheStats;
@@ -133,10 +132,15 @@
}

@Override
public CacheUsage usage() {
public long usage() {
return theCache.usage();
}

@Override
public long activeUsage() {
return theCache.activeUsage();
}

@Override
public CacheStats stats() {
return theCache.stats();
@@ -145,8 +149,8 @@
// To be used only for debugging purposes
public void logCurrentState() {
logger.trace("CURRENT STATE OF FILE CACHE \n");
CacheUsage cacheUsage = theCache.usage();
logger.trace("Total Usage: " + cacheUsage.usage() + " , Active Usage: " + cacheUsage.activeUsage());
long cacheUsage = theCache.usage();
logger.trace("Total Usage: " + cacheUsage);

Check warning on line 153 in server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java#L152-L153

Added lines #L152 - L153 were not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we log the active usage here as well since it was logged previously as well.

theCache.logCurrentState();
}

@@ -206,16 +210,23 @@
* Returns the current {@link FileCacheStats}
*/
public FileCacheStats fileCacheStats() {
CacheStats stats = stats();
CacheUsage usage = usage();
final CacheStats stats = stats();
final CacheStats.FullFileStats fullFileStats = stats.fullFileStats();

return new FileCacheStats(
System.currentTimeMillis(),
usage.activeUsage(),
stats.activeUsage(),
capacity(),
usage.usage(),
stats.usage(),
stats.evictionWeight(),
stats.hitCount(),
stats.missCount()
stats.missCount(),
new FullFileCacheStats(
fullFileStats.getActiveUsage(),
fullFileStats.getUsage(),
fullFileStats.getEvictionWeight(),
fullFileStats.getHitCount()
)
);
}

Original file line number Diff line number Diff line change
@@ -19,7 +19,15 @@
import java.io.IOException;

/**
* Statistics on file cache
* Statistics for the file cache system that tracks memory usage and performance metrics.
* {@link FileCache} internally uses a {@link org.opensearch.index.store.remote.utils.cache.SegmentedCache}
* to manage cached file data in memory segments.
* This class aggregates statistics across all cache segments including:
* - Memory usage (total, active, used)
* - Cache performance (hits, misses, evictions)
* - Utilization percentages
* The statistics are exposed via {@link org.opensearch.action.admin.cluster.node.stats.NodeStats}
* to provide visibility into cache behavior and performance.
*
* @opensearch.api
*/
@@ -33,6 +41,7 @@
private final long evicted;
private final long hits;
private final long misses;
private final FullFileCacheStats fullFileCacheStats;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach is not extensible, in case we need to add another BlockedFileCacheStats or PinnedFileCacheStats, we would have to create multiple duplicate classes with the same fields and functionality. Let's create a base class similar to below

public class FileCacheStatsBase {
    private long used;
    private long total;
    private long evicted;
    private long hits;
    private long misses;
}

Then we can create overallCacheStats, FullFileCacheStats, BlockedFileCacheStats, PinnedFileCacheStats and have our FileCacheStats as below

public class FileCacheStats {
    private FileCacheStatsBase overallCacheStats;
    private FileCacheStatsBase FullFileCacheStats;
    private FileCacheStatsBase BlockedFileCacheStats;
    private FileCacheStatsBase PinnedFileCacheStats;

    // return the overall stats for currently existing methods such as getHits/getMisses etc
    public long getCacheHits/getCacheMisses/...() {
        overallCacheStats.hits/misses;
    }

    // similarly for all other stats
    public long getFullFileCacheStats() {
        return FullFileCacheStats;
    }

}


public FileCacheStats(
final long timestamp,
@@ -41,7 +50,8 @@
final long used,
final long evicted,
final long hits,
final long misses
final long misses,
final FullFileCacheStats fullFileCacheStats
) {
this.timestamp = timestamp;
this.active = active;
@@ -50,6 +60,7 @@
this.evicted = evicted;
this.hits = hits;
this.misses = misses;
this.fullFileCacheStats = fullFileCacheStats;
}

public FileCacheStats(final StreamInput in) throws IOException {
@@ -60,6 +71,7 @@
this.evicted = in.readLong();
this.hits = in.readLong();
this.misses = in.readLong();
this.fullFileCacheStats = new FullFileCacheStats(in);
}

public static short calculatePercentage(long used, long max) {
@@ -75,6 +87,7 @@
out.writeLong(evicted);
out.writeLong(hits);
out.writeLong(misses);
if (fullFileCacheStats != null) fullFileCacheStats.writeTo(out);
}

public long getTimestamp() {
@@ -113,6 +126,10 @@
return misses;
}

public FullFileCacheStats fullFileCacheStats() {
return fullFileCacheStats;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(Fields.FILE_CACHE);
@@ -125,6 +142,9 @@
builder.field(Fields.USED_PERCENT, getUsedPercent());
builder.field(Fields.HIT_COUNT, getCacheHits());
builder.field(Fields.MISS_COUNT, getCacheMisses());
if (fullFileCacheStats != null) {
fullFileCacheStats.toXContent(builder, params);

Check warning on line 146 in server/src/main/java/org/opensearch/index/store/remote/filecache/FileCacheStats.java

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/store/remote/filecache/FileCacheStats.java#L146

Added line #L146 was not covered by tests
}
builder.endObject();
return builder;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.store.remote.filecache;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
import org.opensearch.core.xcontent.ToXContentFragment;
import org.opensearch.core.xcontent.XContentBuilder;

import java.io.IOException;

import static org.opensearch.index.store.remote.filecache.FileCacheStats.calculatePercentage;

/**
* Statistics for the file cache system that tracks memory usage and performance metrics.
* Aggregates statistics across all cache segments including:
* - Memory usage: active and used bytes.
* - Cache performance: hit counts and eviction counts.
* - Utilization: active percentage of total used memory.
* The statistics are exposed as part of {@link FileCacheStats} and via {@link org.opensearch.action.admin.cluster.node.stats.NodeStats}
* to provide visibility into cache behavior and performance.
*
* @opensearch.api
*/
@ExperimentalApi
public class FullFileCacheStats implements Writeable, ToXContentFragment {

private final long active;
private final long used;
private final long evicted;
private final long hits;

public FullFileCacheStats(final long active, final long used, final long evicted, final long hits) {
this.active = active;
this.used = used;
this.evicted = evicted;
this.hits = hits;
}

public FullFileCacheStats(final StreamInput in) throws IOException {
this.active = in.readLong();
this.used = in.readLong();
this.evicted = in.readLong();
this.hits = in.readLong();
}

@Override
public void writeTo(final StreamOutput out) throws IOException {
out.writeLong(active);
out.writeLong(used);
out.writeLong(evicted);
out.writeLong(hits);
}

public long getActive() {
return active;
}

public long getUsed() {
return used;
}

public long getEvicted() {
return evicted;
}

public long getHits() {
return hits;
}

public short getActivePercent() {
return calculatePercentage(active, used);
}

static final class Fields {

Check warning on line 83 in server/src/main/java/org/opensearch/index/store/remote/filecache/FullFileCacheStats.java

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/store/remote/filecache/FullFileCacheStats.java#L83

Added line #L83 was not covered by tests
static final String FULL_FILE_STATS = "full_file_stats";
static final String ACTIVE = "active";
static final String ACTIVE_IN_BYTES = "active_in_bytes";
static final String USED = "used";
static final String USED_IN_BYTES = "used_in_bytes";
static final String EVICTIONS = "evictions";
static final String EVICTIONS_IN_BYTES = "evictions_in_bytes";
static final String ACTIVE_PERCENT = "active_percent";
static final String HIT_COUNT = "hit_count";
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(Fields.FULL_FILE_STATS);
builder.humanReadableField(FullFileCacheStats.Fields.ACTIVE_IN_BYTES, FullFileCacheStats.Fields.ACTIVE, getActive());
builder.humanReadableField(FullFileCacheStats.Fields.USED_IN_BYTES, FullFileCacheStats.Fields.USED, getUsed());
builder.humanReadableField(FullFileCacheStats.Fields.EVICTIONS_IN_BYTES, FullFileCacheStats.Fields.EVICTIONS, getEvicted());
builder.field(FullFileCacheStats.Fields.ACTIVE_PERCENT, getActivePercent());
builder.field(FullFileCacheStats.Fields.HIT_COUNT, getHits());
builder.endObject();
return builder;

Check warning on line 104 in server/src/main/java/org/opensearch/index/store/remote/filecache/FullFileCacheStats.java

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/store/remote/filecache/FullFileCacheStats.java#L97-L104

Added lines #L97 - L104 were not covered by tests
}
}
Original file line number Diff line number Diff line change
@@ -111,13 +111,13 @@ private static FileCachedIndexInput createIndexInput(FileCache fileCache, Stream
try {
// This local file cache is ref counted and may not strictly enforce configured capacity.
// If we find available capacity is exceeded, deny further BlobFetchRequests.
if (fileCache.capacity() < fileCache.usage().usage()) {
if (fileCache.capacity() < fileCache.usage()) {
fileCache.prune();
throw new IOException(
"Local file cache capacity ("
+ fileCache.capacity()
+ ") exceeded ("
+ fileCache.usage().usage()
+ fileCache.usage()
+ ") - BlobFetchRequest failed: "
+ request.getFilePath()
);
Loading