Skip to content

Commit 36acc7b

Browse files
authored
fix(forecast): auto-expand replicas for default results index on 3AZ domains (#1615)
When running forecasting on a 3AZ domain with awareness attributes=3, the forecaster failed to initialize with: IllegalArgumentException: Validation Failed: 1: expected total copies needs to be a multiple of total awareness attributes [3]; The default forecast results index was created with: - number_of_shards: 1 - number_of_replicas: 1 This gives 2 total copies, which violates the 3AZ awareness requirement and causes index creation to fail, so all forecasting results errored on 3AZ domains. This change updates IndexManagement.java to: - set auto_expand_replicas to "0-2" for the default forecast results index Single-AZ domains remain replica-free, while 3AZ domains automatically allocate 2 replicas (3 total copies), satisfying the awareness constraint and allowing forecasters to initialize successfully. Test: 1. manual test Signed-off-by: kaituo <[email protected]>
1 parent a6c1916 commit 36acc7b

File tree

5 files changed

+31
-28
lines changed

5 files changed

+31
-28
lines changed

.github/workflows/test_bwc.yml

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,15 @@ jobs:
2222

2323
name: Test Anomaly detection BWC
2424
runs-on: ubuntu-latest
25-
container:
26-
# using the same image which is used by opensearch-build team to build the OpenSearch Distribution
27-
# this image tag is subject to change as more dependencies and updates will arrive over time
28-
image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
29-
# need to switch to root so that github actions can install runner binary on container without permission issues.
30-
options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }}
3125

3226
steps:
33-
- name: Run start commands
34-
run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }}
27+
# https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
28+
- name: Remove unnecessary files Linux
29+
if: ${{ runner.os == 'Linux' }}
30+
run: |
31+
sudo rm -rf /usr/share/dotnet
32+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
33+
3534
- name: Setup Java ${{ matrix.java }}
3635
uses: actions/setup-java@v4
3736
with:
@@ -45,18 +44,16 @@ jobs:
4544
- name: Assemble anomaly-detection
4645
run: |
4746
plugin_version=`./gradlew properties -q | grep "opensearch_build:" | awk '{print $2}'`
48-
chown -R 1000:1000 `pwd`
4947
echo plugin_version $plugin_version
50-
su `id -un 1000` -c "./gradlew assemble"
48+
./gradlew assemble
5149
echo "Creating ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version ..."
52-
su `id -un 1000` -c "mkdir -p ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version"
50+
mkdir -p ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version
5351
echo "Copying ./build/distributions/*.zip to ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version ..."
5452
ls ./build/distributions/
55-
su `id -un 1000` -c "cp ./build/distributions/*.zip ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version"
53+
cp ./build/distributions/*.zip ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version
5654
echo "Copied ./build/distributions/*.zip to ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version ..."
5755
ls ./src/test/resources/org/opensearch/ad/bwc/anomaly-detection/$plugin_version
5856
- name: Run AD Backwards Compatibility Tests
5957
run: |
6058
echo "Running backwards compatibility tests ..."
61-
chown -R 1000:1000 `pwd`
62-
su `id -un 1000` -c "./gradlew bwcTestSuite -Dtests.security.manager=false"
59+
./gradlew bwcTestSuite -Dtests.security.manager=false

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
1111
- Adding auto create as an optional field on detectors ([#1602](https://github.com/opensearch-project/anomaly-detection/pull/1602))
1212

1313
### Bug Fixes
14-
14+
- fix(forecast): auto-expand replicas for default results index on 3AZ domains ([#1615](https://github.com/opensearch-project/anomaly-detection/pull/1615))
1515

1616
### Infrastructure
1717
- Test: Prevent oversized bulk requests in synthetic data test ([#1603](https://github.com/opensearch-project/anomaly-detection/pull/1603))

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ buildscript {
5454
js_resource_folder = "src/test/resources/job-scheduler"
5555
common_utils_version = System.getProperty("common_utils.version", opensearch_build)
5656
job_scheduler_version = System.getProperty("job_scheduler.version", opensearch_build)
57-
bwcVersionShort = "2.20.0"
57+
bwcVersionShort = "2.19.1"
5858
bwcVersion = bwcVersionShort + ".0"
5959
bwcOpenSearchADDownload = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + bwcVersionShort + '/latest/linux/x64/tar/builds/' +
6060
'opensearch/plugins/opensearch-anomaly-detection-' + bwcVersion + '.zip'

src/main/java/org/opensearch/timeseries/indices/IndexManagement.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,8 @@ protected void choosePrimaryShards(CreateIndexRequest request, boolean hiddenInd
290290
.builder()
291291
// put 1 primary shards per hot node if possible
292292
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, getNumberOfPrimaryShards())
293-
// 1 replica for better search performance and fail-over
294-
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
293+
// Support up to 2 replicas at least
294+
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, customResultIndexAutoExpandReplica)
295295
.put(IndexMetadata.SETTING_INDEX_HIDDEN, hiddenIndex)
296296
);
297297
}
@@ -1403,13 +1403,6 @@ protected void initResultIndexDirectly(
14031403
if (defaultResultIndex) {
14041404
adminClient.indices().create(request, markMappingUpToDate(resultIndex, actionListener));
14051405
} else {
1406-
request
1407-
.settings(
1408-
Settings
1409-
.builder()
1410-
// Support up to 2 replicas at least
1411-
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, customResultIndexAutoExpandReplica)
1412-
);
14131406
adminClient.indices().create(request, actionListener);
14141407
}
14151408
}

src/test/java/org/opensearch/timeseries/ODFERestTestCase.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.apache.hc.core5.http.HttpEntity;
5757
import org.apache.hc.core5.http.HttpHeaders;
5858
import org.apache.hc.core5.http.HttpHost;
59+
import org.apache.hc.core5.http.io.entity.EntityUtils;
5960
import org.apache.hc.core5.http.message.BasicHeader;
6061
import org.apache.hc.core5.http.nio.ssl.TlsStrategy;
6162
import org.apache.hc.core5.reactor.ssl.TlsDetails;
@@ -438,7 +439,7 @@ public Response createRoleMapping(String role, ArrayList<String> users) throws I
438439
for (int i = 0; i < users.size(); i++) {
439440
usersString.add(users.get(i));
440441
}
441-
return TestHelpers
442+
Response response = TestHelpers
442443
.makeRequest(
443444
client(),
444445
"PUT",
@@ -450,10 +451,14 @@ public Response createRoleMapping(String role, ArrayList<String> users) throws I
450451
),
451452
ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana"))
452453
);
454+
// Consume the body so the underlying HTTP connection can be released back to the pool; otherwise the dispatcher
455+
// thread stays tied to the response and leaks when the test shuts down.
456+
EntityUtils.consume(response.getEntity());
457+
return response;
453458
}
454459

455460
public Response deleteUser(String user) throws IOException {
456-
return TestHelpers
461+
Response response = TestHelpers
457462
.makeRequest(
458463
client(),
459464
"DELETE",
@@ -462,6 +467,10 @@ public Response deleteUser(String user) throws IOException {
462467
"",
463468
ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana"))
464469
);
470+
// Consume the body so the underlying HTTP connection can be released back to the pool; otherwise the dispatcher
471+
// thread stays tied to the response and leaks when the test shuts down.
472+
EntityUtils.consume(response.getEntity());
473+
return response;
465474
}
466475

467476
public Response deleteAllResourceSharingRecords() throws IOException {
@@ -473,7 +482,7 @@ public Response deleteAllResourceSharingRecords() throws IOException {
473482
params.put("conflicts", "proceed");
474483
params.put("refresh", "true");
475484

476-
return TestHelpers
485+
Response response = TestHelpers
477486
.makeRequest(
478487
adminClient(),
479488
"POST",
@@ -483,6 +492,10 @@ public Response deleteAllResourceSharingRecords() throws IOException {
483492
ImmutableList
484493
.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana"), new BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json"))
485494
);
495+
// Consume the body so the underlying HTTP connection can be released back to the pool; otherwise the dispatcher
496+
// thread stays tied to the response and leaks when the test shuts down.
497+
EntityUtils.consume(response.getEntity());
498+
return response;
486499
}
487500

488501
}

0 commit comments

Comments
 (0)