Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2e577c4
Add semantic highlighting response processor with batch inference sup…
junqiu-lei Aug 13, 2025
97fdaa6
Add integration test with remote model by local hosted torch server
junqiu-lei Sep 11, 2025
f1d4947
Add batch inference support for semantic highlighting with backward c…
junqiu-lei Sep 12, 2025
3674f23
Use remote model for both multi-node and single-node integ test
junqiu-lei Sep 12, 2025
7b17ac1
Fix UnsupportedOperationException in multi-node semantic highlighting
junqiu-lei Sep 12, 2025
1169728
Enable system-generated semantic highlighting processor with automati…
junqiu-lei Sep 12, 2025
5e68aa5
Refactor semantic highlighting with cleaner architecture
junqiu-lei Sep 16, 2025
ecbc24b
Implement remoteModelIntegTest Gradle task with unified script and us…
junqiu-lei Sep 16, 2025
96910a5
Add more unit test
junqiu-lei Sep 17, 2025
647ef19
fix remote_model_integ_tests.yml
junqiu-lei Sep 17, 2025
17fb837
Remove test annotation
junqiu-lei Sep 17, 2025
490761b
Add more integ test and update model type check
junqiu-lei Sep 17, 2025
7c1880a
Move batch inference configuration from query parameters to connector
junqiu-lei Sep 18, 2025
ab975db
Externalize connector configurations to resource files
junqiu-lei Sep 18, 2025
49d6cd1
Add bwc tests
junqiu-lei Sep 18, 2025
a2b5843
Enable semantic highlighting processor by default in cluster settings
junqiu-lei Sep 19, 2025
0b98538
Use unified connector api for bwc
junqiu-lei Sep 24, 2025
f049f9c
optimize integTest and remove unused code
junqiu-lei Sep 24, 2025
c473c21
Fix BWC tests: Implement proper semantic highlighting search requests
junqiu-lei Sep 24, 2025
deae9c0
rebase main with conflicts
junqiu-lei Sep 25, 2025
ef11222
optimize bwc ci runner disk space
junqiu-lei Sep 25, 2025
2f422a0
Keep using highlighter for single inference
junqiu-lei Sep 26, 2025
eb1e59f
update torchserve script to extend other models
junqiu-lei Sep 29, 2025
c65adf0
Add bwc for semantic highlighting
junqiu-lei Sep 30, 2025
c581afa
reduce debug logs
junqiu-lei Sep 30, 2025
4514cd9
fix bwc failure
junqiu-lei Sep 30, 2025
1cd5b77
Fix bwc test failure in semantic highlighting
junqiu-lei Sep 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/remote_model_integ_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Remote Model Integration Tests

on:
schedule:
- cron: '0 0 * * *' # every night
push:
branches:
- "*"
- "feature/**"
pull_request:
branches:
- "*"
- "feature/**"

jobs:
remote-model-single-node-integration-tests:
runs-on: ubuntu-latest
strategy:
matrix:
java: [21, 24]
steps:
- name: Checkout neural-search
uses: actions/checkout@v4

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}

- name: Run single-node remote model tests
env:
CI: true
run: |
./gradlew remoteModelIntegTest --info

remote-model-multi-node-integration-tests:
runs-on: ubuntu-latest
strategy:
matrix:
nodes: [3]
java: [21]
steps:
- name: Checkout neural-search
uses: actions/checkout@v4

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}

- name: Run multi-node remote model tests
env:
CI: true
run: |
./gradlew remoteModelIntegTest -PnumNodes=${{ matrix.nodes }} --info
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Performance Improvement] Introduce QueryCollectContextSpec in Hybrid Query to improve search performance.
- [Agentic Search] Add support for conversational agent
- [Agentic Search] Add support for agent summary and memory id for conversational agent
- [Semantic Highlighting] Add semantic highlighting response processor with batch inference support ([#1520](https://github.com/opensearch-project/neural-search/pull/1520))

### Bug Fixes
- Fix reversed order of values in nested list with embedding processor [#1570](https://github.com/opensearch-project/neural-search/pull/1570)
Expand Down
124 changes: 124 additions & 0 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@
- [Run OpenSearch neural-search](#run-opensearch-neural-search)
- [Run Single-node Cluster Locally](#run-single-node-cluster-locally)
- [Run Multi-node Cluster Locally](#run-multi-node-cluster-locally)
- [Running Tests with Remote Models](#running-tests-with-remote-models)
- [Running Remote Model Integration Tests](#running-remote-model-integration-tests)
- [Prerequisites](#prerequisites-1)
- [Helper Tasks](#helper-tasks)
- [Test Organization](#test-organization)
- [Adding New Models](#adding-new-models)
- [Adding New Tests](#adding-new-tests)
- [Batch Inference Testing](#batch-inference-testing)
- [Remote Model Infrastructure](#remote-model-infrastructure)
- [Troubleshooting](#troubleshooting-1)
- [Debugging](#debugging)
- [Major Dependencies](#major-dependencies)
- [Backwards Compatibility Testing](#backwards-compatibility-testing)
Expand Down Expand Up @@ -218,6 +228,120 @@ In case remote cluster is secured it's possible to pass username and password wi
./gradlew :integTestRemote -Dtests.rest.cluster=localhost:9200 -Dtests.cluster=localhost:9200 -Dtests.clustername="integTest-0" -Dhttps=true -Duser=admin -Dpassword=<admin-password>
```

## Running Tests with Remote Models

Some tests require remote model servers (e.g., TorchServe for semantic highlighting). These tests use remote models and are separated from regular integration tests for efficiency.

### Running Remote Model Integration Tests

Remote model integration tests are separated from regular integration tests to optimize resource usage and test execution time. Tests requiring remote models (e.g., TorchServe) are placed in classes ending with `RemoteModelIT.java`.

```bash
./gradlew remoteModelIntegTest
```

This command will automatically:
1. Start an OpenSearch cluster with required plugins (k-NN, ML Commons, Neural Search)
2. Auto-discover all models from handler files in `src/test/resources/remote-models/torchserve/handlers/`
3. Start TorchServe Docker container and deploy all discovered models
4. Run tests that require remote models (classes ending with `RemoteModelIT`)
5. Display memory usage statistics at key checkpoints
6. Clean up all resources after completion

**Note**: Local model tests remain in regular integration test classes (ending with `IT.java`) and are run with the standard `./gradlew integTest` command.

#### Prerequisites

- **Docker**: Must be installed and running
- **Ports**: 8080 (inference) and 8081 (management) must be available

#### Helper Tasks

```bash
# List all discovered models
./gradlew listRemoteModels

# Start TorchServe container
./gradlew startTorchServe

# Check TorchServe status (includes memory usage)
./gradlew torchServeStatus

# Stop TorchServe container
./gradlew stopTorchServe
```

#### Adding New Models

To add a new model for testing:

1. Create a handler file in `src/test/resources/remote-models/torchserve/handlers/`:
- Name format: `<model_name>_handler.py`
- Example: `text_embedding_handler.py`

2. The model will be automatically discovered and deployed when running `remoteModelIntegTest`

#### Adding New Tests

To create tests that use remote models:

```java
// For remote model tests - place in a file ending with RemoteModelIT.java
public class SemanticHighlightingRemoteModelIT extends BaseNeuralSearchIT {

private String remoteModelId;
private boolean isTorchServeAvailable = false;

@Before
public void setUp() {
// Check for TorchServe endpoint availability
String endpoint = System.getenv("TORCHSERVE_ENDPOINT");
if (endpoint == null) {
endpoint = System.getProperty("tests.torchserve.endpoint");
}

if (endpoint != null && RemoteModelTestUtils.isRemoteEndpointAvailable(endpoint)) {
isTorchServeAvailable = true;
// Deploy remote model
remoteModelId = deployRemoteSemanticHighlightingModel(connectorId, "model-name");
}
}

@Test
public void testWithRemoteModel() {
Assume.assumeTrue("TorchServe is not available", isTorchServeAvailable);
// Your test code using remote model
}
}
```

#### Remote Model Infrastructure

The remote model infrastructure uses a unified script at:
```
src/test/resources/remote-models/torchserve/scripts/run.sh
```

You can use it directly for debugging:

```bash
# Check status
./src/test/resources/remote-models/torchserve/scripts/run.sh status

# Start and setup everything
./src/test/resources/remote-models/torchserve/scripts/run.sh lifecycle setup

# Stop everything
./src/test/resources/remote-models/torchserve/scripts/run.sh lifecycle teardown
```

#### Troubleshooting

If tests fail with connection errors:
- Verify Docker is running: `docker version`
- Check if ports are available: `lsof -i :8080`
- Review container logs: `docker logs torchserve-integ-test`

### Debugging

Sometimes it is useful to attach a debugger to either the OpenSearch cluster or the integration test runner to see what's going on. For running unit tests, hit **Debug** from the IDE's gutter to debug the tests. For the OpenSearch cluster, first, make sure that the debugger is listening on port `5005`. Then, to debug the cluster code, run:
Expand Down
170 changes: 170 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -478,3 +478,173 @@ task updateVersion {
ant.replaceregexp(file:'build.gradle', match: '"opensearch.version", "\\d.*"', replace: '"opensearch.version", "' + newVersion.tokenize('-')[0] + '-SNAPSHOT"', flags:'g', byline:true)
}
}


// Remote Model Integration Tests
def isDockerAvailable() {
try {
def process = "docker version".execute()
process.waitFor()
return process.exitValue() == 0
} catch (Exception e) {
return false
}
}

task remoteModelIntegTest(type: RestIntegTestTask) {
description = "Run all remote model integration tests with auto-discovered models"
testClassesDirs = sourceSets.test.output.classesDirs
classpath = sourceSets.test.runtimeClasspath

// Filter to only run remote model tests
filter {
includeTestsMatching "*RemoteModelIT*"
}

// Path to handlers directory for auto-discovery
def handlersDir = file("src/test/resources/remote-models/torchserve/handlers")
def discoveredModels = []

// Auto-discover all models from handler files
if (handlersDir.exists()) {
handlersDir.eachFile { file ->
if (file.name.endsWith("_handler.py")) {
def modelName = file.name.replace("_handler.py", "")
discoveredModels.add(modelName)
}
}
}

// Set endpoint for each discovered model
discoveredModels.each { model ->
def envVarName = "${model.toUpperCase().replace('_', '_')}_ENDPOINT"
def endpoint = "http://localhost:8080/predictions/${model}"
environment envVarName, endpoint
systemProperty "tests.${model}.endpoint", endpoint
}

// Common endpoints for backward compatibility
environment "TORCHSERVE_ENDPOINT", "http://localhost:8080"
systemProperty "tests.torchserve.endpoint", "http://localhost:8080"

// System properties from integTest
systemProperty 'tests.security.manager', 'false'
systemProperty 'java.io.tmpdir', opensearch_tmp_dir.absolutePath
systemProperty('project.root', project.rootDir.absolutePath)

// Path to the run.sh script
def runScript = file("src/test/resources/remote-models/torchserve/scripts/run.sh")

doFirst {
// Check Docker availability
if (!isDockerAvailable()) {
throw new GradleException("Docker is not available. Please install Docker to run remote model tests.")
}

// Ensure script exists
if (!runScript.exists()) {
throw new GradleException("run.sh script not found at: ${runScript.absolutePath}")
}

// Make script executable
runScript.setExecutable(true)

// Display discovered models
if (!discoveredModels.isEmpty()) {
println "Auto-discovered models: ${discoveredModels.join(', ')}"
} else {
println "WARNING: No models discovered in ${handlersDir.absolutePath}"
}

// Start TorchServe and setup all discovered models
println "Setting up TorchServe and deploying all models..."
exec {
commandLine runScript.absolutePath, "lifecycle", "setup"
standardOutput = System.out
errorOutput = System.err
}
}

doLast {
// Teardown TorchServe
println "Stopping TorchServe..."
exec {
commandLine runScript.absolutePath, "lifecycle", "teardown"
ignoreExitValue true // Do not fail if already stopped
}
}

// Test logging configuration
testLogging {
events "passed", "skipped", "failed"
showExceptions true
showCauses true
showStackTraces true
exceptionFormat = "full"
}

// Generate separate test report
reports {
html.destination = file("$buildDir/reports/remote-model-tests")
junitXml.destination = file("$buildDir/test-results/remote-model-tests")
}
}

// Configure test cluster for remoteModelIntegTest
testClusters.remoteModelIntegTest {
testDistribution = "ARCHIVE"

// Install plugins
configurations.zipArchive.asFileTree.each {
plugin(provider(new Callable<RegularFile>(){
@Override
RegularFile call() throws Exception {
return new RegularFile() {
@Override
File getAsFile() {
return it
}
}
}
}))
}

// This installs our neural-search plugin into the testClusters
plugin(project.tasks.bundlePlugin.archiveFile)

// Increase heap size to avoid memory issues
jvmArgs("-Xms1g", "-Xmx2g")
}

// Helper tasks for manual control (optional)
task startTorchServe(type: Exec) {
group = "Remote Model Testing"
description = "Start TorchServe container manually"

def runScript = file("src/test/resources/remote-models/torchserve/scripts/run.sh")
commandLine runScript.absolutePath, "start"
}

task stopTorchServe(type: Exec) {
group = "Remote Model Testing"
description = "Stop TorchServe container manually"

def runScript = file("src/test/resources/remote-models/torchserve/scripts/run.sh")
commandLine runScript.absolutePath, "stop"
}

task torchServeStatus(type: Exec) {
group = "Remote Model Testing"
description = "Check TorchServe status"

def runScript = file("src/test/resources/remote-models/torchserve/scripts/run.sh")
commandLine runScript.absolutePath, "status"
}

task listRemoteModels(type: Exec) {
group = "Remote Model Testing"
description = "List all discovered remote models"

def runScript = file("src/test/resources/remote-models/torchserve/scripts/run.sh")
commandLine runScript.absolutePath, "list-models"
}
Loading
Loading