Skip to content

Commit e5c0779

Browse files
SwethaGupthaSwetha Guptha
authored and
Swetha Guptha
committed
Optimise memory for rest cluster health calls with in-line shard aggregations for levels cluster and indices. (opensearch-project#15492)
Signed-off-by: Swetha Guptha <[email protected]>
1 parent 7712bea commit e5c0779

File tree

15 files changed

+673
-41
lines changed

15 files changed

+673
-41
lines changed

server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java

+163
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
import org.opensearch.action.support.IndicesOptions;
3838
import org.opensearch.action.support.PlainActionFuture;
3939
import org.opensearch.cluster.health.ClusterHealthStatus;
40+
import org.opensearch.cluster.health.ClusterIndexHealth;
41+
import org.opensearch.cluster.health.ClusterShardHealth;
4042
import org.opensearch.cluster.metadata.IndexMetadata;
4143
import org.opensearch.cluster.routing.UnassignedInfo;
4244
import org.opensearch.cluster.service.ClusterService;
@@ -49,6 +51,7 @@
4951

5052
import java.util.ArrayList;
5153
import java.util.List;
54+
import java.util.Map;
5255
import java.util.concurrent.atomic.AtomicBoolean;
5356

5457
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
@@ -439,4 +442,164 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
439442
completionFuture.actionGet(TimeValue.timeValueSeconds(30));
440443
}
441444
}
445+
446+
public void testHealthWithClusterLevelAppliedAtTransportLayer() {
447+
createIndex(
448+
"test1",
449+
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build()
450+
);
451+
ensureGreen();
452+
ClusterHealthResponse healthResponse = client().admin()
453+
.cluster()
454+
.prepareHealth()
455+
.setApplyLevelAtTransportLayer(true)
456+
.execute()
457+
.actionGet();
458+
assertEquals(ClusterHealthStatus.GREEN, healthResponse.getStatus());
459+
assertTrue(healthResponse.getIndices().isEmpty());
460+
assertEquals(1, healthResponse.getActiveShards());
461+
assertEquals(1, healthResponse.getActivePrimaryShards());
462+
assertEquals(0, healthResponse.getUnassignedShards());
463+
assertEquals(0, healthResponse.getInitializingShards());
464+
assertEquals(0, healthResponse.getRelocatingShards());
465+
assertEquals(0, healthResponse.getDelayedUnassignedShards());
466+
}
467+
468+
public void testHealthWithIndicesLevelAppliedAtTransportLayer() {
469+
createIndex(
470+
"test1",
471+
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build()
472+
);
473+
ensureGreen();
474+
ClusterHealthResponse healthResponse = client().admin()
475+
.cluster()
476+
.prepareHealth()
477+
.setLevel("indices")
478+
.setApplyLevelAtTransportLayer(true)
479+
.execute()
480+
.actionGet();
481+
assertEquals(ClusterHealthStatus.GREEN, healthResponse.getStatus());
482+
483+
assertEquals(1, healthResponse.getActiveShards());
484+
assertEquals(1, healthResponse.getActivePrimaryShards());
485+
assertEquals(0, healthResponse.getUnassignedShards());
486+
assertEquals(0, healthResponse.getInitializingShards());
487+
assertEquals(0, healthResponse.getRelocatingShards());
488+
assertEquals(0, healthResponse.getDelayedUnassignedShards());
489+
490+
Map<String, ClusterIndexHealth> indices = healthResponse.getIndices();
491+
assertFalse(indices.isEmpty());
492+
assertEquals(1, indices.size());
493+
for (Map.Entry<String, ClusterIndexHealth> indicesHealth : indices.entrySet()) {
494+
String indexName = indicesHealth.getKey();
495+
assertEquals("test1", indexName);
496+
ClusterIndexHealth indicesHealthValue = indicesHealth.getValue();
497+
assertEquals(1, indicesHealthValue.getActiveShards());
498+
assertEquals(1, indicesHealthValue.getActivePrimaryShards());
499+
assertEquals(0, indicesHealthValue.getInitializingShards());
500+
assertEquals(0, indicesHealthValue.getUnassignedShards());
501+
assertEquals(0, indicesHealthValue.getDelayedUnassignedShards());
502+
assertEquals(0, indicesHealthValue.getRelocatingShards());
503+
assertEquals(ClusterHealthStatus.GREEN, indicesHealthValue.getStatus());
504+
assertTrue(indicesHealthValue.getShards().isEmpty());
505+
}
506+
}
507+
508+
public void testHealthWithShardLevelAppliedAtTransportLayer() {
509+
int dataNodes = internalCluster().getDataNodeNames().size();
510+
int greenClusterReplicaCount = dataNodes - 1;
511+
int yellowClusterReplicaCount = dataNodes;
512+
513+
createIndex(
514+
"test1",
515+
Settings.builder()
516+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
517+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, greenClusterReplicaCount)
518+
.build()
519+
);
520+
ensureGreen(TimeValue.timeValueSeconds(120), "test1");
521+
createIndex(
522+
"test2",
523+
Settings.builder()
524+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
525+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, greenClusterReplicaCount)
526+
.build()
527+
);
528+
ensureGreen(TimeValue.timeValueSeconds(120));
529+
client().admin()
530+
.indices()
531+
.prepareUpdateSettings()
532+
.setIndices("test2")
533+
.setSettings(Settings.builder().put("index.number_of_replicas", yellowClusterReplicaCount).build())
534+
.execute()
535+
.actionGet();
536+
ClusterHealthResponse healthResponse = client().admin()
537+
.cluster()
538+
.prepareHealth()
539+
.setLevel("shards")
540+
.setApplyLevelAtTransportLayer(true)
541+
.execute()
542+
.actionGet();
543+
assertEquals(ClusterHealthStatus.YELLOW, healthResponse.getStatus());
544+
545+
assertEquals(2 * dataNodes, healthResponse.getActiveShards());
546+
assertEquals(2, healthResponse.getActivePrimaryShards());
547+
assertEquals(1, healthResponse.getUnassignedShards());
548+
assertEquals(0, healthResponse.getInitializingShards());
549+
assertEquals(0, healthResponse.getRelocatingShards());
550+
assertEquals(0, healthResponse.getDelayedUnassignedShards());
551+
552+
Map<String, ClusterIndexHealth> indices = healthResponse.getIndices();
553+
assertFalse(indices.isEmpty());
554+
assertEquals(2, indices.size());
555+
for (Map.Entry<String, ClusterIndexHealth> indicesHealth : indices.entrySet()) {
556+
String indexName = indicesHealth.getKey();
557+
boolean indexHasMoreReplicas = indexName.equals("test2");
558+
ClusterIndexHealth indicesHealthValue = indicesHealth.getValue();
559+
assertEquals(dataNodes, indicesHealthValue.getActiveShards());
560+
assertEquals(1, indicesHealthValue.getActivePrimaryShards());
561+
assertEquals(0, indicesHealthValue.getInitializingShards());
562+
assertEquals(indexHasMoreReplicas ? 1 : 0, indicesHealthValue.getUnassignedShards());
563+
assertEquals(0, indicesHealthValue.getDelayedUnassignedShards());
564+
assertEquals(0, indicesHealthValue.getRelocatingShards());
565+
assertEquals(indexHasMoreReplicas ? ClusterHealthStatus.YELLOW : ClusterHealthStatus.GREEN, indicesHealthValue.getStatus());
566+
Map<Integer, ClusterShardHealth> shards = indicesHealthValue.getShards();
567+
assertFalse(shards.isEmpty());
568+
assertEquals(1, shards.size());
569+
for (Map.Entry<Integer, ClusterShardHealth> shardHealth : shards.entrySet()) {
570+
ClusterShardHealth clusterShardHealth = shardHealth.getValue();
571+
assertEquals(dataNodes, clusterShardHealth.getActiveShards());
572+
assertEquals(indexHasMoreReplicas ? 1 : 0, clusterShardHealth.getUnassignedShards());
573+
assertEquals(0, clusterShardHealth.getDelayedUnassignedShards());
574+
assertEquals(0, clusterShardHealth.getRelocatingShards());
575+
assertEquals(0, clusterShardHealth.getInitializingShards());
576+
assertTrue(clusterShardHealth.isPrimaryActive());
577+
assertEquals(indexHasMoreReplicas ? ClusterHealthStatus.YELLOW : ClusterHealthStatus.GREEN, clusterShardHealth.getStatus());
578+
}
579+
}
580+
}
581+
582+
public void testHealthWithAwarenessAttributesLevelAppliedAtTransportLayer() {
583+
createIndex(
584+
"test1",
585+
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build()
586+
);
587+
ensureGreen();
588+
ClusterHealthResponse healthResponse = client().admin()
589+
.cluster()
590+
.prepareHealth()
591+
.setLevel("awareness_attributes")
592+
.setApplyLevelAtTransportLayer(true)
593+
.execute()
594+
.actionGet();
595+
assertEquals(ClusterHealthStatus.GREEN, healthResponse.getStatus());
596+
assertTrue(healthResponse.getIndices().isEmpty());
597+
assertNotNull(healthResponse.getClusterAwarenessHealth());
598+
assertEquals(1, healthResponse.getActiveShards());
599+
assertEquals(1, healthResponse.getActivePrimaryShards());
600+
assertEquals(0, healthResponse.getUnassignedShards());
601+
assertEquals(0, healthResponse.getInitializingShards());
602+
assertEquals(0, healthResponse.getRelocatingShards());
603+
assertEquals(0, healthResponse.getDelayedUnassignedShards());
604+
}
442605
}

server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.opensearch.client.Client;
1212
import org.opensearch.common.settings.Settings;
1313
import org.opensearch.common.settings.SettingsException;
14+
import org.opensearch.common.unit.TimeValue;
1415
import org.opensearch.test.InternalTestCluster;
1516
import org.opensearch.test.OpenSearchIntegTestCase;
1617

@@ -109,7 +110,7 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix
109110
setDirection(REMOTE_STORE.direction);
110111
String restoredIndexName2 = TEST_INDEX + "-restored2";
111112
restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName2);
112-
ensureGreen(restoredIndexName2);
113+
ensureGreen(TimeValue.timeValueSeconds(90), restoredIndexName2);
113114

114115
logger.info("Verify that restored index is non remote-backed");
115116
assertRemoteStoreBackedIndex(restoredIndexName2);

server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java

+25
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ public class ClusterHealthRequest extends ClusterManagerNodeReadRequest<ClusterH
7777
*/
7878
private Level level = Level.CLUSTER;
7979

80+
/**
81+
* This flag will be used by the TransportClusterHealthAction to decide if indices/shards info is required in the ClusterHealthResponse or not.
82+
* When the flag is disabled - indices/shard info will be returned in ClusterHealthResponse regardless of the health level requested.
83+
* When the flag is enabled - indices/shards info will be set according to health level requested.
84+
* For Level.CLUSTER (or) Level.AWARENESS_ATTRIBUTES - information on indices/shards will NOT be returned to the transport client
85+
* For Level.INDICES - information on indices will be returned to the transport client.
86+
* For Level.SHARDS - information on indices and shards will be returned to the transport client
87+
* By default, the flag is disabled.
88+
*/
89+
private boolean applyLevelAtTransportLayer = false;
90+
8091
public ClusterHealthRequest() {}
8192

8293
public ClusterHealthRequest(String... indices) {
@@ -109,6 +120,9 @@ public ClusterHealthRequest(StreamInput in) throws IOException {
109120
if (in.getVersion().onOrAfter(Version.V_2_6_0)) {
110121
ensureNodeWeighedIn = in.readBoolean();
111122
}
123+
if (in.getVersion().onOrAfter(Version.V_2_17_0)) {
124+
applyLevelAtTransportLayer = in.readBoolean();
125+
}
112126
}
113127

114128
@Override
@@ -146,6 +160,9 @@ public void writeTo(StreamOutput out) throws IOException {
146160
if (out.getVersion().onOrAfter(Version.V_2_6_0)) {
147161
out.writeBoolean(ensureNodeWeighedIn);
148162
}
163+
if (out.getVersion().onOrAfter(Version.V_2_17_0)) {
164+
out.writeBoolean(applyLevelAtTransportLayer);
165+
}
149166
}
150167

151168
@Override
@@ -344,6 +361,14 @@ public final boolean ensureNodeWeighedIn() {
344361
return ensureNodeWeighedIn;
345362
}
346363

364+
public boolean isApplyLevelAtTransportLayer() {
365+
return applyLevelAtTransportLayer;
366+
}
367+
368+
public void setApplyLevelAtTransportLayer(boolean applyLevelAtTransportLayer) {
369+
this.applyLevelAtTransportLayer = applyLevelAtTransportLayer;
370+
}
371+
347372
@Override
348373
public ActionRequestValidationException validate() {
349374
if (level.equals(Level.AWARENESS_ATTRIBUTES) && indices.length > 0) {

server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java

+5
Original file line numberDiff line numberDiff line change
@@ -171,4 +171,9 @@ public final ClusterHealthRequestBuilder setEnsureNodeWeighedIn(boolean ensureNo
171171
request.ensureNodeWeighedIn(ensureNodeCommissioned);
172172
return this;
173173
}
174+
175+
public ClusterHealthRequestBuilder setApplyLevelAtTransportLayer(boolean applyLevelAtTransportLayer) {
176+
request.setApplyLevelAtTransportLayer(applyLevelAtTransportLayer);
177+
return this;
178+
}
174179
}

server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java

+43
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,26 @@ public ClusterHealthResponse(
237237
this.clusterHealthStatus = clusterStateHealth.getStatus();
238238
}
239239

240+
public ClusterHealthResponse(
241+
String clusterName,
242+
String[] concreteIndices,
243+
ClusterHealthRequest clusterHealthRequest,
244+
ClusterState clusterState,
245+
int numberOfPendingTasks,
246+
int numberOfInFlightFetch,
247+
TimeValue taskMaxWaitingTime
248+
) {
249+
this.clusterName = clusterName;
250+
this.numberOfPendingTasks = numberOfPendingTasks;
251+
this.numberOfInFlightFetch = numberOfInFlightFetch;
252+
this.taskMaxWaitingTime = taskMaxWaitingTime;
253+
this.clusterStateHealth = clusterHealthRequest.isApplyLevelAtTransportLayer()
254+
? new ClusterStateHealth(clusterState, concreteIndices, clusterHealthRequest.level())
255+
: new ClusterStateHealth(clusterState, concreteIndices);
256+
this.clusterHealthStatus = clusterStateHealth.getStatus();
257+
this.delayedUnassignedShards = clusterStateHealth.getDelayedUnassignedShards();
258+
}
259+
240260
// Awareness Attribute health
241261
public ClusterHealthResponse(
242262
String clusterName,
@@ -261,6 +281,29 @@ public ClusterHealthResponse(
261281
this.clusterAwarenessHealth = new ClusterAwarenessHealth(clusterState, clusterSettings, awarenessAttributeName);
262282
}
263283

284+
public ClusterHealthResponse(
285+
String clusterName,
286+
ClusterHealthRequest clusterHealthRequest,
287+
ClusterState clusterState,
288+
ClusterSettings clusterSettings,
289+
String[] concreteIndices,
290+
String awarenessAttributeName,
291+
int numberOfPendingTasks,
292+
int numberOfInFlightFetch,
293+
TimeValue taskMaxWaitingTime
294+
) {
295+
this(
296+
clusterName,
297+
concreteIndices,
298+
clusterHealthRequest,
299+
clusterState,
300+
numberOfPendingTasks,
301+
numberOfInFlightFetch,
302+
taskMaxWaitingTime
303+
);
304+
this.clusterAwarenessHealth = new ClusterAwarenessHealth(clusterState, clusterSettings, awarenessAttributeName);
305+
}
306+
264307
/**
265308
* For XContent Parser and serialization tests
266309
*/

server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java

+9-5
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
import org.opensearch.cluster.metadata.ProcessClusterEventTimeoutException;
5353
import org.opensearch.cluster.node.DiscoveryNode;
5454
import org.opensearch.cluster.routing.NodeWeighedAwayException;
55-
import org.opensearch.cluster.routing.UnassignedInfo;
5655
import org.opensearch.cluster.routing.WeightedRoutingUtils;
5756
import org.opensearch.cluster.routing.allocation.AllocationService;
5857
import org.opensearch.cluster.service.ClusterService;
@@ -487,7 +486,12 @@ private ClusterHealthResponse clusterHealth(
487486
TimeValue pendingTaskTimeInQueue
488487
) {
489488
if (logger.isTraceEnabled()) {
490-
logger.trace("Calculating health based on state version [{}]", clusterState.version());
489+
logger.trace(
490+
"Calculating health based on state version [{}] for health level [{}] and applyLevelAtTransportLayer set to [{}]",
491+
clusterState.version(),
492+
request.level(),
493+
request.isApplyLevelAtTransportLayer()
494+
);
491495
}
492496

493497
String[] concreteIndices;
@@ -496,13 +500,13 @@ private ClusterHealthResponse clusterHealth(
496500
concreteIndices = clusterState.getMetadata().getConcreteAllIndices();
497501
return new ClusterHealthResponse(
498502
clusterState.getClusterName().value(),
503+
request,
499504
clusterState,
500505
clusterService.getClusterSettings(),
501506
concreteIndices,
502507
awarenessAttribute,
503508
numberOfPendingTasks,
504509
numberOfInFlightFetch,
505-
UnassignedInfo.getNumberOfDelayedUnassigned(clusterState),
506510
pendingTaskTimeInQueue
507511
);
508512
}
@@ -514,10 +518,10 @@ private ClusterHealthResponse clusterHealth(
514518
ClusterHealthResponse response = new ClusterHealthResponse(
515519
clusterState.getClusterName().value(),
516520
Strings.EMPTY_ARRAY,
521+
request,
517522
clusterState,
518523
numberOfPendingTasks,
519524
numberOfInFlightFetch,
520-
UnassignedInfo.getNumberOfDelayedUnassigned(clusterState),
521525
pendingTaskTimeInQueue
522526
);
523527
response.setStatus(ClusterHealthStatus.RED);
@@ -527,10 +531,10 @@ private ClusterHealthResponse clusterHealth(
527531
return new ClusterHealthResponse(
528532
clusterState.getClusterName().value(),
529533
concreteIndices,
534+
request,
530535
clusterState,
531536
numberOfPendingTasks,
532537
numberOfInFlightFetch,
533-
UnassignedInfo.getNumberOfDelayedUnassigned(clusterState),
534538
pendingTaskTimeInQueue
535539
);
536540
}

server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
import org.apache.lucene.store.AlreadyClosedException;
3636
import org.opensearch.action.FailedNodeException;
37+
import org.opensearch.action.admin.cluster.health.ClusterHealthRequest;
3738
import org.opensearch.action.admin.cluster.node.info.NodeInfo;
3839
import org.opensearch.action.admin.cluster.node.stats.NodeStats;
3940
import org.opensearch.action.admin.indices.stats.CommonStats;
@@ -209,7 +210,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq
209210

210211
ClusterHealthStatus clusterStatus = null;
211212
if (clusterService.state().nodes().isLocalNodeElectedClusterManager()) {
212-
clusterStatus = new ClusterStateHealth(clusterService.state()).getStatus();
213+
clusterStatus = new ClusterStateHealth(clusterService.state(), ClusterHealthRequest.Level.CLUSTER).getStatus();
213214
}
214215

215216
return new ClusterStatsNodeResponse(

0 commit comments

Comments
 (0)