From e7a46855f7010a9390fb878df0d8106c0a24df12 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 4 Aug 2025 14:26:30 -0700 Subject: [PATCH 01/40] CreateMonitorV2 RestHandler --- alerting/build.gradle | 1 + .../org/opensearch/alerting/AlertingPlugin.kt | 3 + .../resthandler/RestIndexMonitorV2Action.kt | 78 +++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt diff --git a/alerting/build.gradle b/alerting/build.gradle index 6c77a1db3..53723ac80 100644 --- a/alerting/build.gradle +++ b/alerting/build.gradle @@ -81,6 +81,7 @@ publishing { } repositories { + mavenLocal() maven { name = "Snapshots" url = "https://aws.oss.sonatype.org/content/repositories/snapshots" diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 4ad7b6361..324f792b6 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -138,6 +138,7 @@ import org.opensearch.threadpool.ThreadPool import org.opensearch.transport.client.Client import org.opensearch.watcher.ResourceWatcherService import java.util.function.Supplier +import org.opensearch.alerting.resthandler.RestIndexMonitorV2Action /** * Entry point of the OpenDistro for Elasticsearch alerting plugin @@ -157,6 +158,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R @JvmField val OPEN_SEARCH_DASHBOARDS_USER_AGENT = "OpenSearch-Dashboards" @JvmField val UI_METADATA_EXCLUDE = arrayOf("monitor.${Monitor.UI_METADATA_FIELD}") @JvmField val MONITOR_BASE_URI = "/_plugins/_alerting/monitors" + @JvmField val MONITOR_V2_BASE_URI = "/_plugins/_alerting/v2/monitors" @JvmField val WORKFLOW_BASE_URI = "/_plugins/_alerting/workflows" @JvmField val REMOTE_BASE_URI = "/_plugins/_alerting/remote" @JvmField val DESTINATION_BASE_URI = "/_plugins/_alerting/destinations" @@ -197,6 +199,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R RestGetMonitorAction(), RestDeleteMonitorAction(), RestIndexMonitorAction(), + RestIndexMonitorV2Action(), RestIndexWorkflowAction(), RestSearchMonitorAction(settings, clusterService), RestExecuteMonitorAction(), diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt new file mode 100644 index 000000000..010a6c4d1 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -0,0 +1,78 @@ +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest.Method.POST +import java.io.IOException +import java.time.Instant +import org.opensearch.action.support.WriteRequest +import org.opensearch.alerting.util.IF_PRIMARY_TERM +import org.opensearch.alerting.util.IF_SEQ_NO +import org.opensearch.alerting.util.REFRESH +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.IndexMonitorV2Request +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.xcontent.XContentParser.Token +import org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken +import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.rest.RestRequest +import org.opensearch.rest.action.RestToXContentListener +import org.opensearch.transport.client.node.NodeClient + +private val log = LogManager.getLogger(RestIndexMonitorV2Action::class.java) + +/** + * Rest handlers to create and update V2 Monitors like PPL Monitors + */ +class RestIndexMonitorV2Action : BaseRestHandler() { + override fun getName(): String { + return "index_monitor_v2_action" + } + + override fun routes(): List { + return mutableListOf( + Route( + POST, + AlertingPlugin.MONITOR_V2_BASE_URI + ), + // TODO: support UpdateMonitor +// Route( +// PUT, +// "${AlertingPlugin.PPL_MONITOR_BASE_URI}/{monitorID}" +// ) + ) + } + + @Throws(IOException::class) + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${request.path()}") + + val xcp = request.contentParser() + ensureExpectedToken(Token.START_OBJECT, xcp.nextToken(), xcp) + + val monitorV2: MonitorV2 + try { + monitorV2 = MonitorV2.parse(xcp) + } catch (e: Exception) { + throw AlertingException.wrap(e) + } + + val id = request.param("monitorID", MonitorV2.NO_ID) + val seqNo = request.paramAsLong(IF_SEQ_NO, SequenceNumbers.UNASSIGNED_SEQ_NO) + val primaryTerm = request.paramAsLong(IF_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_PRIMARY_TERM) + val refreshPolicy = if (request.hasParam(REFRESH)) { + WriteRequest.RefreshPolicy.parse(request.param(REFRESH)) + } else { + WriteRequest.RefreshPolicy.IMMEDIATE + } + + val indexMonitorV2Request = IndexMonitorV2Request(seqNo, primaryTerm, refreshPolicy, monitorV2) + + return RestChannelConsumer { channel -> + client.execute(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, indexMonitorV2Request, RestToXContentListener(channel)) + } + } +} From 0de50e6a0f3b5c657b7c591099909f28492d910d Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 11 Aug 2025 12:08:54 -0700 Subject: [PATCH 02/40] CreatePPLMonitor Transport --- .../org/opensearch/alerting/AlertingPlugin.kt | 4 +- .../resthandler/RestIndexMonitorV2Action.kt | 11 +- .../TransportIndexMonitorV2Action.kt | 318 ++++++++++++++++++ 3 files changed, 326 insertions(+), 7 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 324f792b6..8222fe49d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -46,6 +46,7 @@ import org.opensearch.alerting.resthandler.RestGetWorkflowAction import org.opensearch.alerting.resthandler.RestGetWorkflowAlertsAction import org.opensearch.alerting.resthandler.RestIndexAlertingCommentAction import org.opensearch.alerting.resthandler.RestIndexMonitorAction +import org.opensearch.alerting.resthandler.RestIndexMonitorV2Action import org.opensearch.alerting.resthandler.RestIndexWorkflowAction import org.opensearch.alerting.resthandler.RestSearchAlertingCommentAction import org.opensearch.alerting.resthandler.RestSearchEmailAccountAction @@ -78,6 +79,7 @@ import org.opensearch.alerting.transport.TransportGetWorkflowAction import org.opensearch.alerting.transport.TransportGetWorkflowAlertsAction import org.opensearch.alerting.transport.TransportIndexAlertingCommentAction import org.opensearch.alerting.transport.TransportIndexMonitorAction +import org.opensearch.alerting.transport.TransportIndexMonitorV2Action import org.opensearch.alerting.transport.TransportIndexWorkflowAction import org.opensearch.alerting.transport.TransportSearchAlertingCommentAction import org.opensearch.alerting.transport.TransportSearchEmailAccountAction @@ -138,7 +140,6 @@ import org.opensearch.threadpool.ThreadPool import org.opensearch.transport.client.Client import org.opensearch.watcher.ResourceWatcherService import java.util.function.Supplier -import org.opensearch.alerting.resthandler.RestIndexMonitorV2Action /** * Entry point of the OpenDistro for Elasticsearch alerting plugin @@ -228,6 +229,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R return listOf( ActionPlugin.ActionHandler(ScheduledJobsStatsAction.INSTANCE, ScheduledJobsStatsTransportAction::class.java), ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_ACTION_TYPE, TransportIndexMonitorAction::class.java), + ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, TransportIndexMonitorV2Action::class.java), ActionPlugin.ActionHandler(AlertingActions.GET_MONITOR_ACTION_TYPE, TransportGetMonitorAction::class.java), ActionPlugin.ActionHandler(ExecuteMonitorAction.INSTANCE, TransportExecuteMonitorAction::class.java), ActionPlugin.ActionHandler(AlertingActions.SEARCH_MONITORS_ACTION_TYPE, TransportSearchMonitorAction::class.java), diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index 010a6c4d1..81f3b1d17 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -1,13 +1,8 @@ package org.opensearch.alerting.resthandler import org.apache.logging.log4j.LogManager -import org.opensearch.alerting.AlertingPlugin -import org.opensearch.rest.BaseRestHandler -import org.opensearch.rest.RestHandler.Route -import org.opensearch.rest.RestRequest.Method.POST -import java.io.IOException -import java.time.Instant import org.opensearch.action.support.WriteRequest +import org.opensearch.alerting.AlertingPlugin import org.opensearch.alerting.util.IF_PRIMARY_TERM import org.opensearch.alerting.util.IF_SEQ_NO import org.opensearch.alerting.util.REFRESH @@ -18,9 +13,13 @@ import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.xcontent.XContentParser.Token import org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler.Route import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.POST import org.opensearch.rest.action.RestToXContentListener import org.opensearch.transport.client.node.NodeClient +import java.io.IOException private val log = LogManager.getLogger(RestIndexMonitorV2Action::class.java) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt new file mode 100644 index 000000000..4236791ea --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -0,0 +1,318 @@ +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchException +import org.opensearch.OpenSearchStatusException +import org.opensearch.ResourceAlreadyExistsException +import org.opensearch.action.admin.cluster.health.ClusterHealthAction +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse +import org.opensearch.action.admin.indices.create.CreateIndexResponse +import org.opensearch.action.index.IndexRequest +import org.opensearch.action.index.IndexResponse +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.action.support.clustermanager.AcknowledgedResponse +import org.opensearch.alerting.core.ScheduledJobIndices +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERTING_MAX_MONITORS +import org.opensearch.alerting.settings.AlertingSettings.Companion.INDEX_TIMEOUT +import org.opensearch.alerting.settings.AlertingSettings.Companion.REQUEST_TIMEOUT +import org.opensearch.alerting.util.IndexUtils +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.XContentFactory.jsonBuilder +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.IndexMonitorV2Request +import org.opensearch.commons.alerting.action.IndexMonitorV2Response +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.PPLMonitor +import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.action.ActionListener +import org.opensearch.core.common.io.stream.NamedWriteableRegistry +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.index.query.QueryBuilders +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import org.opensearch.transport.client.Client + +private val log = LogManager.getLogger(TransportIndexMonitorV2Action::class.java) +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + +class TransportIndexMonitorV2Action @Inject constructor( + transportService: TransportService, + val client: Client, + actionFilters: ActionFilters, + val scheduledJobIndices: ScheduledJobIndices, + val clusterService: ClusterService, + val settings: Settings, + val xContentRegistry: NamedXContentRegistry, + val namedWriteableRegistry: NamedWriteableRegistry, +) : HandledTransportAction( + AlertingActions.INDEX_MONITOR_V2_ACTION_NAME, transportService, actionFilters, ::IndexMonitorV2Request +), + SecureTransportAction { + + // TODO: add monitor v2 versions of these settings + @Volatile private var maxMonitors = ALERTING_MAX_MONITORS.get(settings) + @Volatile private var requestTimeout = REQUEST_TIMEOUT.get(settings) + @Volatile private var indexTimeout = INDEX_TIMEOUT.get(settings) +// @Volatile private var maxActionThrottle = MAX_ACTION_THROTTLE_VALUE.get(settings) + @Volatile override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + override fun doExecute(task: Task, indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + /* check to see if alerting-config index (scheduled job index) is created and updated before indexing MonitorV2 into it */ + if (!scheduledJobIndices.scheduledJobIndexExists()) { // if alerting-config index doesn't exist, send request to create it + scheduledJobIndices.initScheduledJobIndex(object : ActionListener { + override fun onResponse(response: CreateIndexResponse) { + onCreateMappingsResponse(response.isAcknowledged, indexMonitorRequest, actionListener) + } + + override fun onFailure(e: Exception) { + if (ExceptionsHelper.unwrapCause(e) is ResourceAlreadyExistsException) { + scope.launch { + // Wait for the yellow status + val clusterHealthRequest = ClusterHealthRequest() + .indices(SCHEDULED_JOBS_INDEX) + .waitForYellowStatus() + val response: ClusterHealthResponse = client.suspendUntil { + execute(ClusterHealthAction.INSTANCE, clusterHealthRequest, it) + } + if (response.isTimedOut) { + actionListener.onFailure( + OpenSearchException("Cannot determine that the $SCHEDULED_JOBS_INDEX index is healthy") + ) + } + // Retry mapping of monitor + onCreateMappingsResponse(true, indexMonitorRequest, actionListener) + } + } else { + actionListener.onFailure(AlertingException.wrap(e)) + } + } + }) + } else if (!IndexUtils.scheduledJobIndexUpdated) { + IndexUtils.updateIndexMapping( + SCHEDULED_JOBS_INDEX, + ScheduledJobIndices.scheduledJobMappings(), clusterService.state(), client.admin().indices(), + object : ActionListener { + override fun onResponse(response: AcknowledgedResponse) { + onUpdateMappingsResponse(response, indexMonitorRequest, actionListener) + } + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + ) + } else { + prepareMonitorIndexing(indexMonitorRequest, actionListener) + } + } + + private fun onCreateMappingsResponse( + isAcknowledged: Boolean, + request: IndexMonitorV2Request, + actionListener: ActionListener + ) { + if (isAcknowledged) { + log.info("Created $SCHEDULED_JOBS_INDEX with mappings.") + prepareMonitorIndexing(request, actionListener) + IndexUtils.scheduledJobIndexUpdated() + } else { + log.info("Create $SCHEDULED_JOBS_INDEX mappings call not acknowledged.") + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException( + "Create $SCHEDULED_JOBS_INDEX mappings call not acknowledged", RestStatus.INTERNAL_SERVER_ERROR + ) + ) + ) + } + } + + private fun onUpdateMappingsResponse( + response: AcknowledgedResponse, + indexMonitorRequest: IndexMonitorV2Request, + actionListener: ActionListener + ) { + if (response.isAcknowledged) { + log.info("Updated $SCHEDULED_JOBS_INDEX with mappings.") + IndexUtils.scheduledJobIndexUpdated() + prepareMonitorIndexing(indexMonitorRequest, actionListener) + } else { + log.info("Update $SCHEDULED_JOBS_INDEX mappings call not acknowledged.") + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException( + "Updated $SCHEDULED_JOBS_INDEX mappings call not acknowledged.", + RestStatus.INTERNAL_SERVER_ERROR + ) + ) + ) + } + } + + /** + * This function prepares for indexing a new monitor. + * If this is an update request we can simply update the monitor. Otherwise we first check to see how many monitors already exist, + * and compare this to the [maxMonitorCount]. Requests that breach this threshold will be rejected. + */ + private fun prepareMonitorIndexing(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + + // Below check needs to be async operations and needs to be refactored issue#269 + // checkForDisallowedDestinations(allowList) + + // TODO: checks for throttling/suppression +// try { +// validateActionThrottle(request.monitor, maxActionThrottle, TimeValue.timeValueMinutes(1)) +// } catch (e: RuntimeException) { +// actionListener.onFailure(AlertingException.wrap(e)) +// return +// } + + val query = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("${Monitor.MONITOR_TYPE}.type", Monitor.MONITOR_TYPE)) + val searchSource = SearchSourceBuilder().query(query).timeout(requestTimeout) + val searchRequest = SearchRequest(SCHEDULED_JOBS_INDEX).source(searchSource) + + client.search( + searchRequest, + object : ActionListener { + override fun onResponse(searchResponse: SearchResponse) { + onMonitorCountSearchResponse(searchResponse, indexMonitorRequest, actionListener) + } + + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + ) + + // TODO: this if else forks between update or create monitor, come back to this when supporting update monitor +// if (request.method == RestRequest.Method.PUT) { +// scope.launch { +// updateMonitor() +// } +// } else { +// val query = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("${Monitor.MONITOR_TYPE}.type", Monitor.MONITOR_TYPE)) +// val searchSource = SearchSourceBuilder().query(query).timeout(requestTimeout) +// val searchRequest = SearchRequest(SCHEDULED_JOBS_INDEX).source(searchSource) +// +// client.search( +// searchRequest, +// object : ActionListener { +// override fun onResponse(searchResponse: SearchResponse) { +// onSearchResponse(searchResponse) +// } +// +// override fun onFailure(t: Exception) { +// actionListener.onFailure(AlertingException.wrap(t)) +// } +// } +// ) +// } + } + + /** + * After searching for all existing monitors we validate the system can support another monitor to be created. + */ + private fun onMonitorCountSearchResponse( + monitorCountSearchResponse: SearchResponse, + indexMonitorRequest: IndexMonitorV2Request, + actionListener: ActionListener + ) { + val totalHits = monitorCountSearchResponse.hits.totalHits?.value + if (totalHits != null && totalHits >= maxMonitors) { + log.info("This request would create more than the allowed monitors [$maxMonitors].") + actionListener.onFailure( + AlertingException.wrap( + IllegalArgumentException( + "This request would create more than the allowed monitors [$maxMonitors]." + ) + ) + ) + } else { + scope.launch { + indexMonitor(indexMonitorRequest, actionListener) + } + } + } + + private suspend fun indexMonitor(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + // TODO: user permissions for PPL alerting +// if (user != null) { +// // Use the backend roles which is an intersection of the requested backend roles and the user's backend roles. +// // Admins can pass in any backend role. Also if no backend role is passed in, all the user's backend roles are used. +// val rbacRoles = if (request.rbacRoles == null) user.backendRoles.toSet() +// else if (!isAdmin(user)) request.rbacRoles?.intersect(user.backendRoles)?.toSet() +// else request.rbacRoles +// +// request.monitor = request.monitor.copy( +// user = User(user.name, rbacRoles.orEmpty().toList(), user.roles, user.customAttNames) +// ) +// log.debug("Created monitor's backend roles: $rbacRoles") +// } + var monitorV2 = when (indexMonitorRequest.monitorV2) { + is PPLMonitor -> indexMonitorRequest.monitorV2 as PPLMonitor + else -> throw IllegalStateException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") + } + + val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) + .setRefreshPolicy(indexMonitorRequest.refreshPolicy) + .source(monitorV2.toXContent(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) + .setIfSeqNo(indexMonitorRequest.seqNo) + .setIfPrimaryTerm(indexMonitorRequest.primaryTerm) + .timeout(indexTimeout) + + log.info( + "Creating new monitorV2: ${monitorV2.toXContent( + jsonBuilder(), + ToXContent.MapParams(mapOf("with_type" to "true")) + )}" + ) + + try { + val indexResponse: IndexResponse = client.suspendUntil { client.index(indexRequest, it) } + val failureReasons = checkShardsFailure(indexResponse) + if (failureReasons != null) { + log.info(failureReasons.toString()) + actionListener.onFailure( + AlertingException.wrap(OpenSearchStatusException(failureReasons.toString(), indexResponse.status())) + ) + return + } + + actionListener.onResponse( + IndexMonitorV2Response( + indexResponse.id, indexResponse.version, indexResponse.seqNo, + indexResponse.primaryTerm, monitorV2 + ) + ) + } catch (t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + + private fun checkShardsFailure(response: IndexResponse): String? { + val failureReasons = StringBuilder() + if (response.shardInfo.failed > 0) { + response.shardInfo.failures.forEach { + entry -> + failureReasons.append(entry.reason()) + } + return failureReasons.toString() + } + return null + } +} From c990c03a74ae3e5e8da340f5a82c4a64f2c769cd Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 15 Aug 2025 17:27:53 -0700 Subject: [PATCH 03/40] ExecutePPLMonitor with NumResults Trigger Implementation --- .../org/opensearch/alerting/AlertingPlugin.kt | 24 ++- .../alerting/MonitorRunnerService.kt | 94 ++++++++ .../opensearch/alerting/MonitorV2Runner.kt | 18 ++ .../opensearch/alerting/PPLMonitorRunner.kt | 204 ++++++++++++++++++ .../alerting/action/ExecuteMonitorV2Action.kt | 11 + .../action/ExecuteMonitorV2Request.kt | 74 +++++++ .../action/ExecuteMonitorV2Response.kt | 33 +++ .../resthandler/RestExecuteMonitorV2Action.kt | 70 ++++++ .../resthandler/RestIndexMonitorV2Action.kt | 7 +- .../TransportExecuteMonitorV2Action.kt | 146 +++++++++++++ .../opensearchapi/OpenSearchExtensions.kt | 15 ++ 11 files changed, 689 insertions(+), 7 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 8222fe49d..dec93eae7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -7,6 +7,7 @@ package org.opensearch.alerting import org.opensearch.action.ActionRequest import org.opensearch.alerting.action.ExecuteMonitorAction +import org.opensearch.alerting.action.ExecuteMonitorV2Action import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.GetDestinationsAction import org.opensearch.alerting.action.GetEmailAccountAction @@ -34,6 +35,7 @@ import org.opensearch.alerting.resthandler.RestDeleteAlertingCommentAction import org.opensearch.alerting.resthandler.RestDeleteMonitorAction import org.opensearch.alerting.resthandler.RestDeleteWorkflowAction import org.opensearch.alerting.resthandler.RestExecuteMonitorAction +import org.opensearch.alerting.resthandler.RestExecuteMonitorV2Action import org.opensearch.alerting.resthandler.RestExecuteWorkflowAction import org.opensearch.alerting.resthandler.RestGetAlertsAction import org.opensearch.alerting.resthandler.RestGetDestinationsAction @@ -47,7 +49,6 @@ import org.opensearch.alerting.resthandler.RestGetWorkflowAlertsAction import org.opensearch.alerting.resthandler.RestIndexAlertingCommentAction import org.opensearch.alerting.resthandler.RestIndexMonitorAction import org.opensearch.alerting.resthandler.RestIndexMonitorV2Action -import org.opensearch.alerting.resthandler.RestIndexWorkflowAction import org.opensearch.alerting.resthandler.RestSearchAlertingCommentAction import org.opensearch.alerting.resthandler.RestSearchEmailAccountAction import org.opensearch.alerting.resthandler.RestSearchEmailGroupAction @@ -67,6 +68,7 @@ import org.opensearch.alerting.transport.TransportDeleteMonitorAction import org.opensearch.alerting.transport.TransportDeleteWorkflowAction import org.opensearch.alerting.transport.TransportDocLevelMonitorFanOutAction import org.opensearch.alerting.transport.TransportExecuteMonitorAction +import org.opensearch.alerting.transport.TransportExecuteMonitorV2Action import org.opensearch.alerting.transport.TransportExecuteWorkflowAction import org.opensearch.alerting.transport.TransportGetAlertsAction import org.opensearch.alerting.transport.TransportGetDestinationsAction @@ -105,6 +107,7 @@ import org.opensearch.commons.alerting.model.ClusterMetricsInput import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.DocumentLevelTrigger import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.model.QueryLevelTrigger import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX @@ -197,11 +200,11 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R nodesInCluster: Supplier ): List { return listOf( + // Alerting V1 RestGetMonitorAction(), RestDeleteMonitorAction(), - RestIndexMonitorAction(), - RestIndexMonitorV2Action(), - RestIndexWorkflowAction(), +// RestIndexMonitorAction(), +// RestIndexWorkflowAction(), RestSearchMonitorAction(settings, clusterService), RestExecuteMonitorAction(), RestExecuteWorkflowAction(), @@ -222,14 +225,18 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R RestIndexAlertingCommentAction(), RestSearchAlertingCommentAction(), RestDeleteAlertingCommentAction(), + + // Alerting V2 + RestIndexMonitorV2Action(), + RestExecuteMonitorV2Action(), ) } override fun getActions(): List> { return listOf( + // Alerting V1 ActionPlugin.ActionHandler(ScheduledJobsStatsAction.INSTANCE, ScheduledJobsStatsTransportAction::class.java), ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_ACTION_TYPE, TransportIndexMonitorAction::class.java), - ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, TransportIndexMonitorV2Action::class.java), ActionPlugin.ActionHandler(AlertingActions.GET_MONITOR_ACTION_TYPE, TransportGetMonitorAction::class.java), ActionPlugin.ActionHandler(ExecuteMonitorAction.INSTANCE, TransportExecuteMonitorAction::class.java), ActionPlugin.ActionHandler(AlertingActions.SEARCH_MONITORS_ACTION_TYPE, TransportSearchMonitorAction::class.java), @@ -254,13 +261,18 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ActionPlugin.ActionHandler(AlertingActions.DELETE_COMMENT_ACTION_TYPE, TransportDeleteAlertingCommentAction::class.java), ActionPlugin.ActionHandler(ExecuteWorkflowAction.INSTANCE, TransportExecuteWorkflowAction::class.java), ActionPlugin.ActionHandler(GetRemoteIndexesAction.INSTANCE, TransportGetRemoteIndexesAction::class.java), - ActionPlugin.ActionHandler(DocLevelMonitorFanOutAction.INSTANCE, TransportDocLevelMonitorFanOutAction::class.java) + ActionPlugin.ActionHandler(DocLevelMonitorFanOutAction.INSTANCE, TransportDocLevelMonitorFanOutAction::class.java), + + // Alerting V2 + ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, TransportIndexMonitorV2Action::class.java), + ActionPlugin.ActionHandler(ExecuteMonitorV2Action.INSTANCE, TransportExecuteMonitorV2Action::class.java), ) } override fun getNamedXContent(): List { return listOf( Monitor.XCONTENT_REGISTRY, + MonitorV2.XCONTENT_REGISTRY, SearchInput.XCONTENT_REGISTRY, DocLevelMonitorInput.XCONTENT_REGISTRY, QueryLevelTrigger.XCONTENT_REGISTRY, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index f8703aec2..9a09bbb09 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -18,6 +18,9 @@ import org.opensearch.action.support.clustermanager.AcknowledgedResponse import org.opensearch.alerting.action.ExecuteMonitorAction import org.opensearch.alerting.action.ExecuteMonitorRequest import org.opensearch.alerting.action.ExecuteMonitorResponse +import org.opensearch.alerting.action.ExecuteMonitorV2Action +import org.opensearch.alerting.action.ExecuteMonitorV2Request +import org.opensearch.alerting.action.ExecuteMonitorV2Response import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.ExecuteWorkflowRequest import org.opensearch.alerting.action.ExecuteWorkflowResponse @@ -63,6 +66,10 @@ import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.MonitorRunResult +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.MonitorV2RunResult +import org.opensearch.commons.alerting.model.PPLMonitor +import org.opensearch.commons.alerting.model.PPLMonitor.Companion.PPL_MONITOR_TYPE import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.TriggerRunResult import org.opensearch.commons.alerting.model.Workflow @@ -408,6 +415,44 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } } } + is MonitorV2 -> { + if (job !is PPLMonitor) { + throw IllegalStateException("Unexpected invalid MonitorV2 type: ${job.javaClass.name}") + } + + launch { + var monitorLock: LockModel? = null + try { + monitorLock = monitorCtx.client!!.suspendUntil { + monitorCtx.lockService!!.acquireLock(job, it) + } ?: return@launch + logger.debug("lock ${monitorLock!!.lockId} acquired") + logger.debug( + "PERF_DEBUG: executing $PPL_MONITOR_TYPE ${job.id} on node " + + monitorCtx.clusterService!!.state().nodes().localNode.id + ) + val executeMonitorV2Request = ExecuteMonitorV2Request( + false, + job.id, + job, + TimeValue(periodStart.toEpochMilli()), + TimeValue(periodEnd.toEpochMilli()) + ) + monitorCtx.client!!.suspendUntil { + monitorCtx.client!!.execute( + ExecuteMonitorV2Action.INSTANCE, + executeMonitorV2Request, + it + ) + } + } catch (e: Exception) { + logger.error("MonitorV2 run failed for monitor with id ${job.id}", e) + } finally { + monitorCtx.client!!.suspendUntil { monitorCtx.lockService!!.release(monitorLock, it) } + logger.debug("lock ${monitorLock?.lockId} released") + } + } + } else -> { throw IllegalArgumentException("Invalid job type") } @@ -539,6 +584,55 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } } + suspend fun runJobV2( + monitorV2: MonitorV2, + periodStart: Instant, + periodEnd: Instant, + dryrun: Boolean, + transportService: TransportService, + ): MonitorV2RunResult<*> { + // Updating the scheduled job index at the start of monitor execution runs for when there is an upgrade the the schema mapping + // has not been updated. + if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { + IndexUtils.updateIndexMapping( + ScheduledJob.SCHEDULED_JOBS_INDEX, + ScheduledJobIndices.scheduledJobMappings(), monitorCtx.clusterService!!.state(), monitorCtx.client!!.admin().indices(), + object : ActionListener { + override fun onResponse(response: AcknowledgedResponse) { + } + + override fun onFailure(t: Exception) { + logger.error("Failed to update config index schema", t) + } + } + ) + } + + val executionId = "${monitorV2.id}_${LocalDateTime.now(ZoneOffset.UTC)}_${UUID.randomUUID()}" + val monitorV2Type = when (monitorV2) { + is PPLMonitor -> PPL_MONITOR_TYPE + else -> throw IllegalStateException("Unexpected MonitorV2 type: ${monitorV2.javaClass.name}") + } + + logger.info( + "Executing scheduled monitor - id: ${monitorV2.id}, type: $monitorV2Type, periodStart: $periodStart, " + + "periodEnd: $periodEnd, dryrun: $dryrun, executionId: $executionId" + ) + + // to introduce new MonitorV2 type, create its MonitorRunner, and if/else branch + // to the corresponding MonitorRunners based on type. For now, default to PPLMonitorRunner + val runResult = PPLMonitorRunner.runMonitorV2( + monitorV2, + monitorCtx, + periodStart, + periodEnd, + dryrun, + executionId = executionId, + transportService = transportService, + ) + return runResult + } + // TODO: See if we can move below methods (or few of these) to a common utils internal fun getRolesForMonitor(monitor: Monitor): List { /* diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt new file mode 100644 index 000000000..57c33efbd --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt @@ -0,0 +1,18 @@ +package org.opensearch.alerting + +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.MonitorV2RunResult +import org.opensearch.transport.TransportService +import java.time.Instant + +abstract class MonitorV2Runner { + abstract suspend fun runMonitorV2( + monitorV2: MonitorV2, + monitorCtx: MonitorRunnerExecutionContext, // MonitorV2 reads from same context as Monitor + periodStart: Instant, + periodEnd: Instant, + dryRun: Boolean, + executionId: String, + transportService: TransportService + ): MonitorV2RunResult<*> +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt new file mode 100644 index 000000000..7df37e903 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -0,0 +1,204 @@ +package org.opensearch.alerting + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import org.apache.logging.log4j.LogManager +import org.json.JSONObject +import org.opensearch.ExceptionsHelper +import org.opensearch.action.DocWriteRequest +import org.opensearch.action.bulk.BackoffPolicy +import org.opensearch.action.bulk.BulkRequest +import org.opensearch.action.bulk.BulkResponse +import org.opensearch.action.index.IndexRequest +import org.opensearch.action.support.WriteRequest +import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.opensearchapi.retry +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.AlertV2 +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.MonitorV2RunResult +import org.opensearch.commons.alerting.model.PPLMonitor +import org.opensearch.commons.alerting.model.PPLMonitorRunResult +import org.opensearch.commons.alerting.model.PPLTrigger +import org.opensearch.commons.alerting.model.PPLTrigger.ConditionType +import org.opensearch.commons.alerting.model.PPLTrigger.NumResultsCondition +import org.opensearch.commons.alerting.model.PPLTrigger.TriggerMode +import org.opensearch.commons.alerting.model.PPLTriggerRunResult +import org.opensearch.commons.ppl.PPLPluginInterface +import org.opensearch.commons.ppl.action.TransportPPLQueryRequest +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.transport.TransportService +import org.opensearch.transport.client.node.NodeClient +import java.time.Instant + +object PPLMonitorRunner : MonitorV2Runner() { + private val logger = LogManager.getLogger(javaClass) + private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + + const val PPL_SQL_QUERY_FIELD = "query" // name of PPL query field when passing into PPL/SQL Execute API call + + // TODO: this is a hacky implementation, needs serious revision and additions + override suspend fun runMonitorV2( + monitorV2: MonitorV2, + monitorCtx: MonitorRunnerExecutionContext, // MonitorV2 reads from same context as Monitor + periodStart: Instant, + periodEnd: Instant, + dryRun: Boolean, + executionId: String, + transportService: TransportService, + ): MonitorV2RunResult<*> { + logger.debug("Running monitor: ${monitorV2.name}. Thread: ${Thread.currentThread().name}") + + if (monitorV2 !is PPLMonitor) { + throw IllegalStateException("Unexpected monitor type: ${monitorV2.javaClass.name}") + } + + if (periodStart == periodEnd) { + logger.warn("Start and end time are the same: $periodStart. This monitor will probably only run once.") + } + + var monitorV2Result = PPLMonitorRunResult(monitorV2.name, null, periodStart, periodEnd, mapOf(), "") + + // TODO: should alerting v1 and v2 alerts index be separate? + // TODO: should alerting v1 and v2 alerting-config index be separate? + val currentAlerts = try { + // write generated V2 alerts to existing alerts v1 index for now + monitorCtx.alertIndices!!.createOrUpdateAlertIndex() + monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex() + } catch (e: Exception) { + // We can't save ERROR alerts to the index here as we don't know if there are existing ACTIVE alerts + val id = if (monitorV2.id.trim().isEmpty()) "_na_" else monitorV2.id + logger.error("Error loading alerts for monitorV2: $id", e) + return monitorV2Result.copy(error = e) + } + + // call PPL plugin to execute query + val transportPplQueryRequest = TransportPPLQueryRequest( + monitorV2.query, + JSONObject(mapOf(PPL_SQL_QUERY_FIELD to monitorV2.query)), + null // null path falls back to a default path internal to SQL/PPL Plugin + ) + + val transportPplQueryResponse = PPLPluginInterface.suspendUntil { + this.executeQuery( + monitorCtx.client as NodeClient, + transportPplQueryRequest, + it + ) + } + + val queryResponseJson = JSONObject(transportPplQueryResponse.result) + val numResults = queryResponseJson.getLong("total") + + val triggerResults = mutableMapOf() + val generatedAlerts = mutableListOf() + + for (trigger in monitorV2.triggers) { + val pplTrigger = trigger as PPLTrigger + if (pplTrigger.conditionType == ConditionType.CUSTOM || pplTrigger.mode == TriggerMode.PER_RESULT) { + break // TODO: handle custom condition case and per result trigger mode + } + + val triggered = evaluateNumResultsTrigger(numResults, trigger.numResultsCondition!!, trigger.numResultsValue!!) + + val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) + + // TODO: currently naively generates an alert and action every time + // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor + + val alertV2 = AlertV2( + monitorId = monitorV2.id, + monitorName = monitorV2.name, + monitorVersion = monitorV2.version, + triggerId = trigger.id, + triggerName = trigger.name, + state = Alert.State.ACTIVE, + startTime = Instant.now(), + errorHistory = listOf(), + severity = trigger.severity.value, + actionExecutionResults = listOf(), + ) + + triggerResults[pplTrigger.id] = pplTriggerRunResult + generatedAlerts.add(alertV2) + +// if (monitorCtx.triggerService!!.isQueryLevelTriggerActionable(triggerCtx, triggerResult, workflowRunContext)) { +// val actionCtx = triggerCtx.copy(error = monitorResult.error ?: triggerResult.error) +// for (action in trigger.actions) { +// triggerResult.actionResults[action.id] = this.runAction(action, actionCtx, monitorCtx, monitor, dryrun) +// } +// } + } + + // TODO: what if retry policy null? + monitorCtx.retryPolicy?.let { + saveAlertsV2( + generatedAlerts, + monitorCtx, + it, + monitorV2.id + ) + } + + return monitorV2Result.copy(triggerResults = triggerResults, pplQueryResults = queryResponseJson.toString()) + } + + private fun evaluateNumResultsTrigger(numResults: Long, numResultsCondition: NumResultsCondition, numResultsValue: Long): Boolean { + return when (numResultsCondition) { + NumResultsCondition.GREATER_THAN -> numResults > numResultsValue + NumResultsCondition.GREATER_THAN_EQUAL -> numResults >= numResultsValue + NumResultsCondition.LESS_THAN -> numResults < numResultsValue + NumResultsCondition.LESS_THAN_EQUAL -> numResults <= numResultsValue + NumResultsCondition.EQUAL -> numResults == numResultsValue + NumResultsCondition.NOT_EQUAL -> numResults != numResultsValue + } + } + + private suspend fun saveAlertsV2( + alerts: List, + monitorCtx: MonitorRunnerExecutionContext, + retryPolicy: BackoffPolicy, + routingId: String // routing is mandatory and set as monitor id. for workflow chained alerts we pass workflow id as routing + ) { + val alertsIndex = AlertIndices.ALERT_INDEX + val alertsHistoryIndex = AlertIndices.ALERT_HISTORY_WRITE_INDEX + + var requestsToRetry = alerts.flatMap { alert -> + // We don't want to set the version when saving alerts because the MonitorRunner has first priority when writing alerts. + // In the rare event that a user acknowledges an alert between when it's read and when it's written + // back we're ok if that acknowledgement is lost. It's easier to get the user to retry than for the runner to + // spend time reloading the alert and writing it back. + when (alert.state) { + Alert.State.ACTIVE, Alert.State.ERROR -> { + listOf>( + IndexRequest(alertsIndex) + .routing(routingId) + .source(alert.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS)) + .id(if (alert.id != Alert.NO_ID) alert.id else null) + ) + } + else -> throw IllegalStateException("trying to save non ACTIVE alert, unimplemented territory") + } + } + + val client = monitorCtx.client!! + + if (requestsToRetry.isEmpty()) return + // Retry Bulk requests if there was any 429 response + retryPolicy.retry(logger, listOf(RestStatus.TOO_MANY_REQUESTS)) { + val bulkRequest = BulkRequest().add(requestsToRetry).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + val bulkResponse: BulkResponse = client.suspendUntil { client.bulk(bulkRequest, it) } + val failedResponses = (bulkResponse.items ?: arrayOf()).filter { it.isFailed } + requestsToRetry = failedResponses.filter { it.status() == RestStatus.TOO_MANY_REQUESTS } + .map { bulkRequest.requests()[it.itemId] as IndexRequest } + + if (requestsToRetry.isNotEmpty()) { + val retryCause = failedResponses.first { it.status() == RestStatus.TOO_MANY_REQUESTS }.failure.cause + throw ExceptionsHelper.convertToOpenSearchException(retryCause) + } + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt new file mode 100644 index 000000000..3d7320f7c --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt @@ -0,0 +1,11 @@ +package org.opensearch.alerting.action + +import org.opensearch.action.ActionType + +// TODO: should this and the ExecuteMonitorV2Request/Response be moved to common utils? +class ExecuteMonitorV2Action private constructor() : ActionType(NAME, ::ExecuteMonitorV2Response) { + companion object { + val INSTANCE = ExecuteMonitorV2Action() + const val NAME = "cluster:admin/opensearch/alerting/v2/monitor/execute" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt new file mode 100644 index 000000000..86bdeeb35 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt @@ -0,0 +1,74 @@ +package org.opensearch.alerting.action + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.action.ValidateActions +import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import java.io.IOException + +class ExecuteMonitorV2Request : ActionRequest { + val dryrun: Boolean + val monitorId: String? // exactly one of monitorId or monitor must be non-null + val monitorV2: MonitorV2? + val requestStart: TimeValue? + val requestEnd: TimeValue + + constructor( + dryrun: Boolean, + monitorId: String?, + monitorV2: MonitorV2?, + requestStart: TimeValue? = null, + requestEnd: TimeValue, + ) : super() { + this.dryrun = dryrun + this.monitorId = monitorId + this.monitorV2 = monitorV2 + this.requestStart = requestStart + this.requestEnd = requestEnd + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + sin.readBoolean(), // dryrun + sin.readOptionalString(), // monitorId + if (sin.readBoolean()) { + MonitorV2.readFrom(sin) // monitor + } else null, + sin.readOptionalTimeValue(), + sin.readTimeValue(), // requestEnd + ) + + override fun validate(): ActionRequestValidationException? { + // ensure exactly one of monitor ID or monitorV2 is supplied + var exception: ActionRequestValidationException? = null + if (monitorV2 == null && monitorId == null) { + exception = ValidateActions.addValidationError( + "Neither a monitor ID or monitor object was supplied", + exception + ) + } else if (monitorV2 != null && monitorId != null) { + exception = ValidateActions.addValidationError( + "Both a monitor ID and a monitor object were supplied, only one of these is accepted", + exception + ) + } + return exception + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeBoolean(dryrun) + out.writeOptionalString(monitorId) + if (monitorV2 != null) { + out.writeBoolean(true) + monitorV2.writeTo(out) + } else { + out.writeBoolean(false) + } + out.writeOptionalTimeValue(requestStart) + out.writeTimeValue(requestEnd) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt new file mode 100644 index 000000000..5c2cf7ead --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt @@ -0,0 +1,33 @@ +package org.opensearch.alerting.action + +import org.opensearch.commons.alerting.model.MonitorV2RunResult +import org.opensearch.core.action.ActionResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class ExecuteMonitorV2Response : ActionResponse, ToXContentObject { + val monitorV2RunResult: MonitorV2RunResult<*> + + constructor(monitorV2RunResult: MonitorV2RunResult<*>) : super() { + this.monitorV2RunResult = monitorV2RunResult + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + MonitorV2RunResult.readFrom(sin) // monitorRunResult + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + monitorV2RunResult.writeTo(out) + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return monitorV2RunResult.toXContent(builder, ToXContent.EMPTY_PARAMS) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt new file mode 100644 index 000000000..3dcd06d62 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt @@ -0,0 +1,70 @@ +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.action.ExecuteMonitorV2Action +import org.opensearch.alerting.action.ExecuteMonitorV2Request +import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.xcontent.XContentParser.Token.START_OBJECT +import org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.POST +import org.opensearch.rest.action.RestToXContentListener +import org.opensearch.transport.client.node.NodeClient +import java.time.Instant + +private val log = LogManager.getLogger(RestExecuteMonitorV2Action::class.java) + +class RestExecuteMonitorV2Action : BaseRestHandler() { + + override fun getName(): String = "execute_monitor_v2_action" + + override fun routes(): List { + return listOf( + Route( + POST, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorId}/_execute" + ), + Route( + POST, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/_execute" + ) + ) + } + + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/_execute") + + return RestChannelConsumer { channel -> + val dryrun = request.paramAsBoolean("dryrun", false) + val requestEnd = request.paramAsTime("period_end", TimeValue(Instant.now().toEpochMilli())) + + if (request.hasParam("monitorId")) { + val monitorId = request.param("monitorId") + val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, monitorId, null, null, requestEnd) + client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) + } else { + val xcp = request.contentParser() + ensureExpectedToken(START_OBJECT, xcp.nextToken(), xcp) + + val monitorV2: MonitorV2 + try { + monitorV2 = MonitorV2.parse(xcp) + } catch (e: Exception) { + throw AlertingException.wrap(e) + } + + val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, null, monitorV2, null, requestEnd) + client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) + } + } + } + + override fun responseParams(): Set { + return setOf("dryrun", "period_end", "monitorId") + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index 81f3b1d17..8f4647dc9 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -32,7 +32,7 @@ class RestIndexMonitorV2Action : BaseRestHandler() { } override fun routes(): List { - return mutableListOf( + return listOf( Route( POST, AlertingPlugin.MONITOR_V2_BASE_URI @@ -48,10 +48,13 @@ class RestIndexMonitorV2Action : BaseRestHandler() { @Throws(IOException::class) override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { log.debug("${request.method()} ${request.path()}") + log.info("preparing rest request") val xcp = request.contentParser() ensureExpectedToken(Token.START_OBJECT, xcp.nextToken(), xcp) + log.info("initial ensure") + val monitorV2: MonitorV2 try { monitorV2 = MonitorV2.parse(xcp) @@ -59,6 +62,8 @@ class RestIndexMonitorV2Action : BaseRestHandler() { throw AlertingException.wrap(e) } + log.info("monitorv2 retrieved") + val id = request.param("monitorID", MonitorV2.NO_ID) val seqNo = request.paramAsLong(IF_SEQ_NO, SequenceNumbers.UNASSIGNED_SEQ_NO) val primaryTerm = request.paramAsLong(IF_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_PRIMARY_TERM) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt new file mode 100644 index 000000000..544721d8e --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -0,0 +1,146 @@ +package org.opensearch.alerting.transport + +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchStatusException +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.MonitorRunnerService +import org.opensearch.alerting.action.ExecuteMonitorV2Action +import org.opensearch.alerting.action.ExecuteMonitorV2Request +import org.opensearch.alerting.action.ExecuteMonitorV2Response +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.PPLMonitor +import org.opensearch.commons.alerting.model.PPLMonitor.Companion.PPL_MONITOR_TYPE +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.action.ActionListener +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import org.opensearch.transport.client.Client +import java.time.Instant + +private val log = LogManager.getLogger(TransportExecuteMonitorV2Action::class.java) + +class TransportExecuteMonitorV2Action @Inject constructor( + private val transportService: TransportService, + private val client: Client, + private val clusterService: ClusterService, + private val runner: MonitorRunnerService, + actionFilters: ActionFilters, + val xContentRegistry: NamedXContentRegistry, + private val settings: Settings +) : HandledTransportAction( + ExecuteMonitorV2Action.NAME, transportService, actionFilters, ::ExecuteMonitorV2Request +) { + @Volatile private var indexTimeout = AlertingSettings.INDEX_TIMEOUT.get(settings) + + override fun doExecute( + task: Task, + execMonitorV2Request: ExecuteMonitorV2Request, + actionListener: ActionListener + ) { +// client.threadPool().threadContext.stashContext().use { // TODO: include this when security plugin enabled + // first define a function that will be used later to run MonitorV2s + val executeMonitorV2 = fun (monitorV2: MonitorV2) { + runner.launch { + // get execution time interval + val (periodStart, periodEnd) = if (execMonitorV2Request.requestStart != null) { + Pair( + Instant.ofEpochMilli(execMonitorV2Request.requestStart.millis), + Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis) + ) + } else { + monitorV2.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis)) + } + + // call the MonitorRunnerService to execute the MonitorV2 + try { + val monitorV2Type = when (monitorV2) { + is PPLMonitor -> PPL_MONITOR_TYPE + else -> throw IllegalStateException("Unexpected MonitorV2 type: ${monitorV2.javaClass.name}") + } + log.info( + "Executing MonitorV2 from API - id: ${monitorV2.id}, type: $monitorV2Type, " + + "periodStart: $periodStart, periodEnd: $periodEnd, dryrun: ${execMonitorV2Request.dryrun}" + ) + val monitorV2RunResult = runner.runJobV2( + monitorV2, + periodStart, + periodEnd, + execMonitorV2Request.dryrun, + transportService + ) + withContext(Dispatchers.IO) { + actionListener.onResponse(ExecuteMonitorV2Response(monitorV2RunResult)) + } + } catch (e: Exception) { + log.error("Unexpected error running monitor", e) + withContext(Dispatchers.IO) { + actionListener.onFailure(AlertingException.wrap(e)) + } + } + } + } + + // now execute the MonitorV2 + if (execMonitorV2Request.monitorId != null) { // execute with monitor ID case + // search the alerting-config index for the MonitorV2 with this ID + val getMonitorV2Request = GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX).id(execMonitorV2Request.monitorId) + client.get( + getMonitorV2Request, + object : ActionListener { + override fun onResponse(getMonitorV2Response: GetResponse) { + if (!getMonitorV2Response.isExists) { + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException( + "Can't find monitorV2 with id: ${getMonitorV2Response.id}", + RestStatus.NOT_FOUND + ) + ) + ) + return + } + if (!getMonitorV2Response.isSourceEmpty) { + log.info("found monitor") + XContentHelper.createParser( + xContentRegistry, LoggingDeprecationHandler.INSTANCE, + getMonitorV2Response.sourceAsBytesRef, XContentType.JSON + ).use { xcp -> + val monitorV2 = ScheduledJob.parse( + xcp, + getMonitorV2Response.id, + getMonitorV2Response.version + ) as MonitorV2 + // TODO: validate that this is a MonitorV2 and not a Monitor + executeMonitorV2(monitorV2) + } + } + } + + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + ) + } else { // execute with monitor object case + val monitorV2 = execMonitorV2Request.monitorV2 as MonitorV2 + executeMonitorV2(monitorV2) + } +// } + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt b/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt index 582d13fbe..077bfc6ef 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt @@ -21,6 +21,7 @@ import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.InjectSecurity import org.opensearch.commons.authuser.User import org.opensearch.commons.notifications.NotificationsPluginInterface +import org.opensearch.commons.ppl.PPLPluginInterface import org.opensearch.core.action.ActionListener import org.opensearch.core.rest.RestStatus import org.opensearch.core.rest.RestStatus.BAD_GATEWAY @@ -170,6 +171,20 @@ suspend fun NotificationsPluginInterface.suspendUntil(block: NotificationsPl }) } +/** + * Converts [PPLPluginInterface] methods that take a callback into a kotlin suspending function. + * + * @param block - a block of code that is passed an [ActionListener] that should be passed to the PPLPluginInterface API. + */ +suspend fun PPLPluginInterface.suspendUntil(block: PPLPluginInterface.(ActionListener) -> Unit): T = + suspendCoroutine { cont -> + block(object : ActionListener { + override fun onResponse(response: T) = cont.resume(response) + + override fun onFailure(e: Exception) = cont.resumeWithException(e) + }) + } + class InjectorContextElement( id: String, settings: Settings, From 807450192b13486c2ab6d6a11eac5ee768d497d3 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 18 Aug 2025 14:39:52 -0700 Subject: [PATCH 04/40] DeletePPLMonitor initial implementation --- .../org/opensearch/alerting/AlertingPlugin.kt | 4 ++ .../resthandler/RestDeleteMonitorV2Action.kt | 47 ++++++++++++++ .../alerting/service/DeleteMonitorService.kt | 13 ++-- .../TransportDeleteMonitorV2Action.kt | 63 +++++++++++++++++++ 4 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index dec93eae7..925bea7a9 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -33,6 +33,7 @@ import org.opensearch.alerting.resthandler.RestAcknowledgeAlertAction import org.opensearch.alerting.resthandler.RestAcknowledgeChainedAlertAction import org.opensearch.alerting.resthandler.RestDeleteAlertingCommentAction import org.opensearch.alerting.resthandler.RestDeleteMonitorAction +import org.opensearch.alerting.resthandler.RestDeleteMonitorV2Action import org.opensearch.alerting.resthandler.RestDeleteWorkflowAction import org.opensearch.alerting.resthandler.RestExecuteMonitorAction import org.opensearch.alerting.resthandler.RestExecuteMonitorV2Action @@ -65,6 +66,7 @@ import org.opensearch.alerting.transport.TransportAcknowledgeAlertAction import org.opensearch.alerting.transport.TransportAcknowledgeChainedAlertAction import org.opensearch.alerting.transport.TransportDeleteAlertingCommentAction import org.opensearch.alerting.transport.TransportDeleteMonitorAction +import org.opensearch.alerting.transport.TransportDeleteMonitorV2Action import org.opensearch.alerting.transport.TransportDeleteWorkflowAction import org.opensearch.alerting.transport.TransportDocLevelMonitorFanOutAction import org.opensearch.alerting.transport.TransportExecuteMonitorAction @@ -229,6 +231,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R // Alerting V2 RestIndexMonitorV2Action(), RestExecuteMonitorV2Action(), + RestDeleteMonitorV2Action(), ) } @@ -265,6 +268,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R // Alerting V2 ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, TransportIndexMonitorV2Action::class.java), + ActionPlugin.ActionHandler(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, TransportDeleteMonitorV2Action::class.java), ActionPlugin.ActionHandler(ExecuteMonitorV2Action.INSTANCE, TransportExecuteMonitorV2Action::class.java), ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt new file mode 100644 index 000000000..46255f892 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt @@ -0,0 +1,47 @@ +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.apache.logging.log4j.Logger +import org.opensearch.action.support.WriteRequest.RefreshPolicy +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.util.REFRESH +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.DeleteMonitorV2Request +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.DELETE +import org.opensearch.rest.action.RestToXContentListener +import org.opensearch.transport.client.node.NodeClient +import java.io.IOException + +private val log: Logger = LogManager.getLogger(RestDeleteMonitorAction::class.java) + +class RestDeleteMonitorV2Action : BaseRestHandler() { + + override fun getName(): String { + return "delete_monitor_v2_action" + } + + override fun routes(): List { + return mutableListOf( + Route( + DELETE, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorId}" + ) + ) + } + + @Throws(IOException::class) + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + val monitorId = request.param("monitorId") + log.info("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/$monitorId") + + val refreshPolicy = RefreshPolicy.parse(request.param(REFRESH, RefreshPolicy.IMMEDIATE.value)) + val deleteMonitorV2Request = DeleteMonitorV2Request(monitorId, refreshPolicy) + + return RestChannelConsumer { channel -> + client.execute(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, deleteMonitorV2Request, RestToXContentListener(channel)) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt index fbc655543..2711c1805 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt @@ -51,7 +51,7 @@ object DeleteMonitorService : private val log = LogManager.getLogger(this.javaClass) private lateinit var client: Client - private lateinit var lockService: LockService + lateinit var lockService: LockService fun initialize( client: Client, @@ -74,7 +74,8 @@ object DeleteMonitorService : return DeleteMonitorResponse(deleteResponse.id, deleteResponse.version) } - private suspend fun deleteMonitor(monitorId: String, refreshPolicy: RefreshPolicy): DeleteResponse { + // both Alerting v1 and v2 workflows use this function + suspend fun deleteMonitor(monitorId: String, refreshPolicy: RefreshPolicy): DeleteResponse { val deleteMonitorRequest = DeleteRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, monitorId) .setRefreshPolicy(refreshPolicy) return client.suspendUntil { delete(deleteMonitorRequest, it) } @@ -166,8 +167,12 @@ object DeleteMonitorService : } } - private suspend fun deleteLock(monitor: Monitor) { - client.suspendUntil { lockService.deleteLock(LockModel.generateLockId(monitor.id), it) } + suspend fun deleteLock(monitor: Monitor) { + deleteLock(monitor.id) + } + + suspend fun deleteLock(monitorId: String) { + client.suspendUntil { lockService.deleteLock(LockModel.generateLockId(monitorId), it) } } /** diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt new file mode 100644 index 000000000..17ad5893e --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt @@ -0,0 +1,63 @@ +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.service.DeleteMonitorService +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.DeleteMonitorV2Request +import org.opensearch.commons.alerting.action.DeleteMonitorV2Response +import org.opensearch.core.action.ActionListener +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import org.opensearch.transport.client.Client + +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) +private val log = LogManager.getLogger(TransportDeleteMonitorAction::class.java) + +class TransportDeleteMonitorV2Action @Inject constructor( + transportService: TransportService, + val client: Client, + actionFilters: ActionFilters, + val clusterService: ClusterService, + settings: Settings, + val xContentRegistry: NamedXContentRegistry +) : HandledTransportAction( + AlertingActions.DELETE_MONITOR_V2_ACTION_NAME, transportService, actionFilters, ::DeleteMonitorV2Request +), + SecureTransportAction { + + @Volatile override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + init { + listenFilterBySettingChange(clusterService) + } + + override fun doExecute(task: Task, request: DeleteMonitorV2Request, actionListener: ActionListener) { +// val user = readUserFromThreadContext(client) +// +// if (!validateUserBackendRoles(user, actionListener)) { +// return +// } + scope.launch { + try { + // TODO: might be able to simply run DeleteMonitorService.deleteMonitor() directly, even if PPLMonitors have no metadata + val deleteResponse = DeleteMonitorService.deleteMonitor(request.monitorV2Id, request.refreshPolicy) + DeleteMonitorService.deleteLock(request.monitorV2Id) + actionListener.onResponse(DeleteMonitorV2Response(deleteResponse.id, deleteResponse.version)) + } catch (e: Exception) { + actionListener.onFailure(e) + } + + // TODO: logic for deleting alerts + } + } +} From 55ad56feed60b96fe7bd87966bdb9c18086b6d0f Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Wed, 20 Aug 2025 16:17:20 -0700 Subject: [PATCH 05/40] Adding time filtering to monitor execution queries --- .../org/opensearch/alerting/AlertingPlugin.kt | 6 +- .../alerting/MonitorRunnerService.kt | 6 +- .../opensearch/alerting/PPLMonitorRunner.kt | 45 ++++++- .../action/ExecuteMonitorV2Request.kt | 5 - .../resthandler/RestSearchMonitorV2Action.kt | 119 ++++++++++++++++++ .../TransportExecuteMonitorV2Action.kt | 14 ++- .../TransportSearchMonitorV2Action.kt | 78 ++++++++++++ 7 files changed, 258 insertions(+), 15 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 925bea7a9..43b3ebe7b 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -54,6 +54,7 @@ import org.opensearch.alerting.resthandler.RestSearchAlertingCommentAction import org.opensearch.alerting.resthandler.RestSearchEmailAccountAction import org.opensearch.alerting.resthandler.RestSearchEmailGroupAction import org.opensearch.alerting.resthandler.RestSearchMonitorAction +import org.opensearch.alerting.resthandler.RestSearchMonitorV2Action import org.opensearch.alerting.script.TriggerScript import org.opensearch.alerting.service.DeleteMonitorService import org.opensearch.alerting.settings.AlertingSettings @@ -89,6 +90,7 @@ import org.opensearch.alerting.transport.TransportSearchAlertingCommentAction import org.opensearch.alerting.transport.TransportSearchEmailAccountAction import org.opensearch.alerting.transport.TransportSearchEmailGroupAction import org.opensearch.alerting.transport.TransportSearchMonitorAction +import org.opensearch.alerting.transport.TransportSearchMonitorV2Action import org.opensearch.alerting.util.DocLevelMonitorQueries import org.opensearch.alerting.util.destinationmigration.DestinationMigrationCoordinator import org.opensearch.cluster.metadata.IndexNameExpressionResolver @@ -177,7 +179,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R @JvmField val FINDING_BASE_URI = "/_plugins/_alerting/findings" @JvmField val COMMENTS_BASE_URI = "/_plugins/_alerting/comments" - @JvmField val ALERTING_JOB_TYPES = listOf("monitor", "workflow") + @JvmField val ALERTING_JOB_TYPES = listOf("monitor", "workflow", "monitor_v2") } lateinit var runner: MonitorRunnerService @@ -232,6 +234,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R RestIndexMonitorV2Action(), RestExecuteMonitorV2Action(), RestDeleteMonitorV2Action(), + RestSearchMonitorV2Action(settings, clusterService), ) } @@ -268,6 +271,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R // Alerting V2 ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, TransportIndexMonitorV2Action::class.java), + ActionPlugin.ActionHandler(AlertingActions.SEARCH_MONITORS_V2_ACTION_TYPE, TransportSearchMonitorV2Action::class.java), ActionPlugin.ActionHandler(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, TransportDeleteMonitorV2Action::class.java), ActionPlugin.ActionHandler(ExecuteMonitorV2Action.INSTANCE, TransportExecuteMonitorV2Action::class.java), ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index 9a09bbb09..0161421ce 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -323,6 +323,8 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon logger.error("Failed to move active alerts for monitor [${job.id}].", e) } } + } else if (job is MonitorV2) { + return } else { throw IllegalArgumentException("Invalid job type") } @@ -433,8 +435,8 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon ) val executeMonitorV2Request = ExecuteMonitorV2Request( false, - job.id, - job, + job.id, // only need to pass in MonitorV2 ID + null, // no need to pass in MonitorV2 object itself TimeValue(periodStart.toEpochMilli()), TimeValue(periodEnd.toEpochMilli()) ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 7df37e903..2b122e9f0 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -33,6 +33,8 @@ import org.opensearch.core.xcontent.ToXContent import org.opensearch.transport.TransportService import org.opensearch.transport.client.node.NodeClient import java.time.Instant +import java.time.ZoneOffset.UTC +import java.time.format.DateTimeFormatter object PPLMonitorRunner : MonitorV2Runner() { private val logger = LogManager.getLogger(javaClass) @@ -41,6 +43,7 @@ object PPLMonitorRunner : MonitorV2Runner() { const val PPL_SQL_QUERY_FIELD = "query" // name of PPL query field when passing into PPL/SQL Execute API call // TODO: this is a hacky implementation, needs serious revision and additions + // TODO: implement custom condition triggering override suspend fun runMonitorV2( monitorV2: MonitorV2, monitorCtx: MonitorRunnerExecutionContext, // MonitorV2 reads from same context as Monitor @@ -65,7 +68,7 @@ object PPLMonitorRunner : MonitorV2Runner() { // TODO: should alerting v1 and v2 alerts index be separate? // TODO: should alerting v1 and v2 alerting-config index be separate? val currentAlerts = try { - // write generated V2 alerts to existing alerts v1 index for now + // TODO: write generated V2 alerts to existing alerts v1 index for now, revisit this decision monitorCtx.alertIndices!!.createOrUpdateAlertIndex() monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex() } catch (e: Exception) { @@ -75,9 +78,36 @@ object PPLMonitorRunner : MonitorV2Runner() { return monitorV2Result.copy(error = e) } - // call PPL plugin to execute query + // inject time filter into PPL query to only query for data within the (periodStart, periodEnd) interval + // TODO: if query contains "_time", "span", "earliest", "latest", skip adding filter + // TODO: pending https://github.com/opensearch-project/sql/issues/3969 + // for now assume "_time" field is always present in customer data + + // PPL plugin only accepts timestamp strings in this format + val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(UTC) + + val periodStartPplTimestamp = formatter.format(periodStart) + val periodEndPplTimeStamp = formatter.format(periodEnd) + + val timeFilterReplace = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp') |" + val timeFilterAppend = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp')" + + val timeFilteredQuery: String = if (monitorV2.query.contains("|")) { + // if Monitor query contains piped statements, inject the time filter + // as the first piped statement (i.e. before more complex statements + // like aggregations can take effect later in the query) + monitorV2.query.replaceFirst("|", timeFilterReplace) + } else { + // otherwise the query contains no piped statements and is simply a + // `search source=` statement, simply append time filter at the end + monitorV2.query + timeFilterAppend + } + + logger.info("time filtered query: $timeFilteredQuery") + + // call PPL plugin to execute time filtered query val transportPplQueryRequest = TransportPPLQueryRequest( - monitorV2.query, + timeFilteredQuery, JSONObject(mapOf(PPL_SQL_QUERY_FIELD to monitorV2.query)), null // null path falls back to a default path internal to SQL/PPL Plugin ) @@ -93,6 +123,7 @@ object PPLMonitorRunner : MonitorV2Runner() { val queryResponseJson = JSONObject(transportPplQueryResponse.result) val numResults = queryResponseJson.getLong("total") + // TODO: create Map queryResults val triggerResults = mutableMapOf() val generatedAlerts = mutableListOf() @@ -106,6 +137,9 @@ object PPLMonitorRunner : MonitorV2Runner() { val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) + logger.info("trigger ${trigger.name} triggered: $triggered") + logger.info("ppl query results: $queryResponseJson") + // TODO: currently naively generates an alert and action every time // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor @@ -123,7 +157,10 @@ object PPLMonitorRunner : MonitorV2Runner() { ) triggerResults[pplTrigger.id] = pplTriggerRunResult - generatedAlerts.add(alertV2) + + if (triggered) { + generatedAlerts.add(alertV2) + } // if (monitorCtx.triggerService!!.isQueryLevelTriggerActionable(triggerCtx, triggerResult, workflowRunContext)) { // val actionCtx = triggerCtx.copy(error = monitorResult.error ?: triggerResult.error) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt index 86bdeeb35..f1dded881 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt @@ -49,11 +49,6 @@ class ExecuteMonitorV2Request : ActionRequest { "Neither a monitor ID or monitor object was supplied", exception ) - } else if (monitorV2 != null && monitorId != null) { - exception = ValidateActions.addValidationError( - "Both a monitor ID and a monitor object were supplied, only one of these is accepted", - exception - ) } return exception } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt new file mode 100644 index 000000000..9622165e1 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt @@ -0,0 +1,119 @@ +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.alerting.util.context +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentFactory.jsonBuilder +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.SearchMonitorV2Request +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX +import org.opensearch.core.common.bytes.BytesReference +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.BytesRestResponse +import org.opensearch.rest.RestChannel +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.GET +import org.opensearch.rest.RestRequest.Method.POST +import org.opensearch.rest.RestResponse +import org.opensearch.rest.action.RestResponseListener +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.transport.client.node.NodeClient +import java.io.IOException + +private val log = LogManager.getLogger(RestSearchMonitorV2Action::class.java) + +class RestSearchMonitorV2Action( + val settings: Settings, + clusterService: ClusterService, +) : BaseRestHandler() { + + @Volatile private var filterBy = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + init { + clusterService.clusterSettings.addSettingsUpdateConsumer(AlertingSettings.FILTER_BY_BACKEND_ROLES) { filterBy = it } + } + + override fun getName(): String { + return "search_monitor_v2_action" + } + + override fun routes(): List { + return listOf( + Route( + POST, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/_search" + ), + Route( + GET, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/_search" + ) + ) + } + + @Throws(IOException::class) + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/_search") + +// val index = request.param("index", SCHEDULED_JOBS_INDEX) +// if (index != SCHEDULED_JOBS_INDEX && index != ALL_ALERT_INDEX_PATTERN) { +// throw IllegalArgumentException("Invalid index name.") +// } + + val searchSourceBuilder = SearchSourceBuilder() + searchSourceBuilder.parseXContent(request.contentOrSourceParamParser()) + searchSourceBuilder.fetchSource(context(request)) + + val searchRequest = SearchRequest() + .source(searchSourceBuilder) + .indices(SCHEDULED_JOBS_INDEX) + + val searchMonitorV2Request = SearchMonitorV2Request(searchRequest) + return RestChannelConsumer { channel -> + client.execute(AlertingActions.SEARCH_MONITORS_V2_ACTION_TYPE, searchMonitorV2Request, searchMonitorResponse(channel)) + } + } + + // once ths search response is received, rewrite the search hits to remove the extra "monitor_v2" JSON object wrapper + // that is used as ScheduledJob metadata + private fun searchMonitorResponse(channel: RestChannel): RestResponseListener { + return object : RestResponseListener(channel) { + @Throws(Exception::class) + override fun buildResponse(response: SearchResponse): RestResponse { + if (response.isTimedOut) { + return BytesRestResponse(RestStatus.REQUEST_TIMEOUT, response.toString()) + } + + // Swallow exception and return response as is + try { + for (hit in response.hits) { + XContentType.JSON.xContent().createParser( + channel.request().xContentRegistry, + LoggingDeprecationHandler.INSTANCE, hit.sourceAsString + ).use { hitsParser -> + val monitorV2 = ScheduledJob.parse(hitsParser, hit.id, hit.version) + val xcb = monitorV2.toXContent(jsonBuilder(), EMPTY_PARAMS) + + // rewrite the search hit as just the MonitorV2 source, + // without the extra "monitor_v2" JSON object wrapper + hit.sourceRef(BytesReference.bytes(xcb)) + } + } + } catch (e: Exception) { + log.error("The monitor_v2 parsing failed. Will return response as is.") + } + return BytesRestResponse(RestStatus.OK, response.toXContent(channel.newBuilder(), EMPTY_PARAMS)) + } + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt index 544721d8e..28587ca0f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -54,7 +54,7 @@ class TransportExecuteMonitorV2Action @Inject constructor( actionListener: ActionListener ) { // client.threadPool().threadContext.stashContext().use { // TODO: include this when security plugin enabled - // first define a function that will be used later to run MonitorV2s + /* first define a function that will be used later to run MonitorV2s */ val executeMonitorV2 = fun (monitorV2: MonitorV2) { runner.launch { // get execution time interval @@ -96,7 +96,16 @@ class TransportExecuteMonitorV2Action @Inject constructor( } } - // now execute the MonitorV2 + /* now execute the MonitorV2 */ + + // if both monitor_v2 id and object were passed in, ignore object and proceed with id + if (execMonitorV2Request.monitorId != null && execMonitorV2Request.monitorV2 != null) { + log.info( + "Both a monitor_v2 id and monitor_v2 object were passed in to ExecuteMonitorV2" + + "request. Proceeding to execute by monitor_v2 ID and ignoring monitor_v2 object." + ) + } + if (execMonitorV2Request.monitorId != null) { // execute with monitor ID case // search the alerting-config index for the MonitorV2 with this ID val getMonitorV2Request = GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX).id(execMonitorV2Request.monitorId) @@ -116,7 +125,6 @@ class TransportExecuteMonitorV2Action @Inject constructor( return } if (!getMonitorV2Response.isSourceEmpty) { - log.info("found monitor") XContentHelper.createParser( xContentRegistry, LoggingDeprecationHandler.INSTANCE, getMonitorV2Response.sourceAsBytesRef, XContentType.JSON diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt new file mode 100644 index 000000000..1a262ac94 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt @@ -0,0 +1,78 @@ +package org.opensearch.alerting.transport + +import org.apache.logging.log4j.LogManager +import org.opensearch.action.search.SearchResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.SearchMonitorV2Request +import org.opensearch.commons.alerting.model.MonitorV2.Companion.MONITOR_V2_TYPE +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.action.ActionListener +import org.opensearch.core.common.io.stream.NamedWriteableRegistry +import org.opensearch.index.query.BoolQueryBuilder +import org.opensearch.index.query.QueryBuilders +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import org.opensearch.transport.client.Client + +private val log = LogManager.getLogger(TransportSearchMonitorV2Action::class.java) + +class TransportSearchMonitorV2Action @Inject constructor( + transportService: TransportService, + val settings: Settings, + val client: Client, + clusterService: ClusterService, + actionFilters: ActionFilters, + val namedWriteableRegistry: NamedWriteableRegistry +) : HandledTransportAction( + AlertingActions.SEARCH_MONITORS_V2_ACTION_NAME, transportService, actionFilters, ::SearchMonitorV2Request +), + SecureTransportAction { + + @Volatile + override var filterByEnabled: Boolean = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + init { + listenFilterBySettingChange(clusterService) + } + + override fun doExecute(task: Task, request: SearchMonitorV2Request, actionListener: ActionListener) { + + val searchSourceBuilder = request.searchRequest.source() + + val queryBuilder = if (searchSourceBuilder.query() == null) BoolQueryBuilder() + else QueryBuilders.boolQuery().must(searchSourceBuilder.query()) + + // filter out MonitorV1s in the alerting config index + // only return MonitorV2s that match the user-given search query + queryBuilder.filter(QueryBuilders.existsQuery(MONITOR_V2_TYPE)) + + searchSourceBuilder.query(queryBuilder) + .seqNoAndPrimaryTerm(true) + .version(true) + +// addOwnerFieldIfNotExists(transformedRequest.searchRequest) +// val user = readUserFromThreadContext(client) +// client.threadPool().threadContext.stashContext().use { +// resolve(transformedRequest, actionListener, user) +// } + + client.search( + request.searchRequest, + object : ActionListener { + override fun onResponse(response: SearchResponse) { + actionListener.onResponse(response) + } + + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + ) + } +} From 89f8bdeac4188c307eab38918dddd069502595fd Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Wed, 20 Aug 2025 18:37:49 -0700 Subject: [PATCH 06/40] initial implementation of custom trigger conditions --- .../opensearch/alerting/PPLMonitorRunner.kt | 239 +++++++++++++----- 1 file changed, 173 insertions(+), 66 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 2b122e9f0..87d33a8c7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -78,88 +78,136 @@ object PPLMonitorRunner : MonitorV2Runner() { return monitorV2Result.copy(error = e) } - // inject time filter into PPL query to only query for data within the (periodStart, periodEnd) interval - // TODO: if query contains "_time", "span", "earliest", "latest", skip adding filter - // TODO: pending https://github.com/opensearch-project/sql/issues/3969 - // for now assume "_time" field is always present in customer data + val timeFilteredQuery = addTimeFilter(monitorV2.query, periodStart, periodEnd) - // PPL plugin only accepts timestamp strings in this format - val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(UTC) + // TODO: create Map queryResults + val triggerResults = mutableMapOf() + val generatedAlerts = mutableListOf() - val periodStartPplTimestamp = formatter.format(periodStart) - val periodEndPplTimeStamp = formatter.format(periodEnd) + for (trigger in monitorV2.triggers) { + val pplTrigger = trigger as PPLTrigger + if (pplTrigger.mode == TriggerMode.PER_RESULT) { + break // TODO: handle custom condition case and per result trigger mode + } - val timeFilterReplace = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp') |" - val timeFilterAppend = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp')" + if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number_of_results trigger + val queryResponseJson = executePplQuery(timeFilteredQuery, monitorCtx) - val timeFilteredQuery: String = if (monitorV2.query.contains("|")) { - // if Monitor query contains piped statements, inject the time filter - // as the first piped statement (i.e. before more complex statements - // like aggregations can take effect later in the query) - monitorV2.query.replaceFirst("|", timeFilterReplace) - } else { - // otherwise the query contains no piped statements and is simply a - // `search source=` statement, simply append time filter at the end - monitorV2.query + timeFilterAppend - } + val numResults = queryResponseJson.getLong("total") - logger.info("time filtered query: $timeFilteredQuery") + val triggered = evaluateNumResultsTrigger(numResults, trigger.numResultsCondition!!, trigger.numResultsValue!!) - // call PPL plugin to execute time filtered query - val transportPplQueryRequest = TransportPPLQueryRequest( - timeFilteredQuery, - JSONObject(mapOf(PPL_SQL_QUERY_FIELD to monitorV2.query)), - null // null path falls back to a default path internal to SQL/PPL Plugin - ) + val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) - val transportPplQueryResponse = PPLPluginInterface.suspendUntil { - this.executeQuery( - monitorCtx.client as NodeClient, - transportPplQueryRequest, - it - ) - } + triggerResults[pplTrigger.id] = pplTriggerRunResult - val queryResponseJson = JSONObject(transportPplQueryResponse.result) - val numResults = queryResponseJson.getLong("total") + logger.info("trigger ${trigger.name} triggered: $triggered") + logger.info("ppl query results: $queryResponseJson") - // TODO: create Map queryResults - val triggerResults = mutableMapOf() - val generatedAlerts = mutableListOf() + if (triggered) { + // TODO: currently naively generates an alert and action every time + // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor + val alertV2 = AlertV2( + monitorId = monitorV2.id, + monitorName = monitorV2.name, + monitorVersion = monitorV2.version, + triggerId = trigger.id, + triggerName = trigger.name, + state = Alert.State.ACTIVE, + startTime = Instant.now(), + errorHistory = listOf(), + severity = trigger.severity.value, + actionExecutionResults = listOf(), + ) - for (trigger in monitorV2.triggers) { - val pplTrigger = trigger as PPLTrigger - if (pplTrigger.conditionType == ConditionType.CUSTOM || pplTrigger.mode == TriggerMode.PER_RESULT) { - break // TODO: handle custom condition case and per result trigger mode - } + generatedAlerts.add(alertV2) + } + } else { // custom trigger + val queryWithCustomCondition = addCustomCondition(timeFilteredQuery, trigger.customCondition!!) + + val queryResponseJson = executePplQuery(queryWithCustomCondition, monitorCtx) - val triggered = evaluateNumResultsTrigger(numResults, trigger.numResultsCondition!!, trigger.numResultsValue!!) + // a PPL query with custom condition returning 0 results should imply a valid but not useful query. + // do not trigger alert, but warn that query likely is not functioning as user intended + if (queryResponseJson.getLong("total") == 0L) { + logger.warn( + "During execution of monitor ${monitorV2.name}, PPL query with custom" + + "condition returned no results. Proceeding without triggering alert." + ) - val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) + val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, false, null) + triggerResults[pplTrigger.id] = pplTriggerRunResult - logger.info("trigger ${trigger.name} triggered: $triggered") - logger.info("ppl query results: $queryResponseJson") + continue + } - // TODO: currently naively generates an alert and action every time - // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor + // find the name of the eval result variable defined in custom condition + val evalResultVarName = trigger.customCondition!!.split(" ")[1] // [0] is "eval", [1] is the var name + + // find the eval statement result variable in the PPL query response schema + val schemaList = queryResponseJson.getJSONArray("schema") + var evalResultVarIdx = -1 + for (i in 0 until schemaList.length()) { + val schemaObj = schemaList.getJSONObject(i) + val columnName = schemaObj.getString("name") + + if (columnName == evalResultVarName) { + if (schemaObj.getString("type") != "boolean") { + throw IllegalStateException( + "parsing results of PPL query with custom condition failed," + + "eval statement variable was not type boolean, but instead type: ${schemaObj.getString("type")}" + ) + } + + evalResultVarIdx = i + break + } + } - val alertV2 = AlertV2( - monitorId = monitorV2.id, - monitorName = monitorV2.name, - monitorVersion = monitorV2.version, - triggerId = trigger.id, - triggerName = trigger.name, - state = Alert.State.ACTIVE, - startTime = Instant.now(), - errorHistory = listOf(), - severity = trigger.severity.value, - actionExecutionResults = listOf(), - ) + // eval statement result variable should always be found + if (evalResultVarIdx == -1) { + throw IllegalStateException( + "expected to find eval statement results variable $evalResultVarName in results" + + "of PPL query with custom condition, but did not." + ) + } - triggerResults[pplTrigger.id] = pplTriggerRunResult + val dataRowList = queryResponseJson.getJSONArray("datarows") + var triggered = false + for (i in 0 until dataRowList.length()) { + val dataRow = dataRowList.getJSONArray(i) + val evalResult = dataRow.getBoolean(evalResultVarIdx) + if (evalResult) { + triggered = true + break + } + } - if (triggered) { - generatedAlerts.add(alertV2) + val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) + + triggerResults[pplTrigger.id] = pplTriggerRunResult + + logger.info("trigger ${trigger.name} triggered: $triggered") + logger.info("ppl query results: $queryResponseJson") + + if (triggered) { + // TODO: currently naively generates an alert and action every time + // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor + val alertV2 = AlertV2( + monitorId = monitorV2.id, + monitorName = monitorV2.name, + monitorVersion = monitorV2.version, + triggerId = trigger.id, + triggerName = trigger.name, + state = Alert.State.ACTIVE, + startTime = Instant.now(), + errorHistory = listOf(), + severity = trigger.severity.value, + actionExecutionResults = listOf(), + ) + + generatedAlerts.add(alertV2) + } } // if (monitorCtx.triggerService!!.isQueryLevelTriggerActionable(triggerCtx, triggerResult, workflowRunContext)) { @@ -180,7 +228,7 @@ object PPLMonitorRunner : MonitorV2Runner() { ) } - return monitorV2Result.copy(triggerResults = triggerResults, pplQueryResults = queryResponseJson.toString()) + return monitorV2Result.copy(triggerResults = triggerResults) } private fun evaluateNumResultsTrigger(numResults: Long, numResultsCondition: NumResultsCondition, numResultsValue: Long): Boolean { @@ -194,6 +242,65 @@ object PPLMonitorRunner : MonitorV2Runner() { } } + // adds monitor schedule-based time filter + private fun addTimeFilter(query: String, periodStart: Instant, periodEnd: Instant): String { + // inject time filter into PPL query to only query for data within the (periodStart, periodEnd) interval + // TODO: if query contains "_time", "span", "earliest", "latest", skip adding filter + // TODO: pending https://github.com/opensearch-project/sql/issues/3969 + // for now assume "_time" field is always present in customer data + + // PPL plugin only accepts timestamp strings in this format + val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(UTC) + + val periodStartPplTimestamp = formatter.format(periodStart) + val periodEndPplTimeStamp = formatter.format(periodEnd) + + val timeFilterReplace = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp') |" + val timeFilterAppend = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp')" + + val timeFilteredQuery: String = if (query.contains("|")) { + // if Monitor query contains piped statements, inject the time filter + // as the first piped statement (i.e. before more complex statements + // like aggregations can take effect later in the query) + query.replaceFirst("|", timeFilterReplace) + } else { + // otherwise the query contains no piped statements and is simply a + // `search source=` statement, simply append time filter at the end + query + timeFilterAppend + } + + logger.info("time filtered query: $timeFilteredQuery") + + return timeFilteredQuery + } + + // appendss user-defined custom trigger condition to PPL query, only for custom condition Triggers + private fun addCustomCondition(query: String, customCondition: String): String { + return "$query | $customCondition" + } + + // returns PPL query response as parsable JSONObject + private suspend fun executePplQuery(query: String, monitorCtx: MonitorRunnerExecutionContext): JSONObject { + // call PPL plugin to execute time filtered query + val transportPplQueryRequest = TransportPPLQueryRequest( + query, + JSONObject(mapOf(PPL_SQL_QUERY_FIELD to query)), // TODO: what is the purpose of this arg? + null // null path falls back to a default path internal to SQL/PPL Plugin + ) + + val transportPplQueryResponse = PPLPluginInterface.suspendUntil { + this.executeQuery( + monitorCtx.client as NodeClient, + transportPplQueryRequest, + it + ) + } + + val queryResponseJson = JSONObject(transportPplQueryResponse.result) + + return queryResponseJson + } + private suspend fun saveAlertsV2( alerts: List, monitorCtx: MonitorRunnerExecutionContext, From 23024ee99c3bb4b7c9a670c1fc96598fd16493d8 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 21 Aug 2025 14:39:42 -0700 Subject: [PATCH 07/40] initial implementation of per result triggers --- .../opensearch/alerting/PPLMonitorRunner.kt | 57 ++++++++++++------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 87d33a8c7..a2b9b5b54 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -63,32 +63,29 @@ object PPLMonitorRunner : MonitorV2Runner() { logger.warn("Start and end time are the same: $periodStart. This monitor will probably only run once.") } - var monitorV2Result = PPLMonitorRunResult(monitorV2.name, null, periodStart, periodEnd, mapOf(), "") + var monitorV2Result = PPLMonitorRunResult(monitorV2.name, null, periodStart, periodEnd, mapOf(), mapOf()) // TODO: should alerting v1 and v2 alerts index be separate? // TODO: should alerting v1 and v2 alerting-config index be separate? - val currentAlerts = try { - // TODO: write generated V2 alerts to existing alerts v1 index for now, revisit this decision - monitorCtx.alertIndices!!.createOrUpdateAlertIndex() - monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex() - } catch (e: Exception) { - // We can't save ERROR alerts to the index here as we don't know if there are existing ACTIVE alerts - val id = if (monitorV2.id.trim().isEmpty()) "_na_" else monitorV2.id - logger.error("Error loading alerts for monitorV2: $id", e) - return monitorV2Result.copy(error = e) - } +// val currentAlerts = try { +// // TODO: write generated V2 alerts to existing alerts v1 index for now, revisit this decision +// monitorCtx.alertIndices!!.createOrUpdateAlertIndex() +// monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex() +// } catch (e: Exception) { +// // We can't save ERROR alerts to the index here as we don't know if there are existing ACTIVE alerts +// val id = if (monitorV2.id.trim().isEmpty()) "_na_" else monitorV2.id +// logger.error("Error loading alerts for monitorV2: $id", e) +// return monitorV2Result.copy(error = e) +// } val timeFilteredQuery = addTimeFilter(monitorV2.query, periodStart, periodEnd) - // TODO: create Map queryResults val triggerResults = mutableMapOf() + val pplQueryResults = mutableMapOf() val generatedAlerts = mutableListOf() for (trigger in monitorV2.triggers) { val pplTrigger = trigger as PPLTrigger - if (pplTrigger.mode == TriggerMode.PER_RESULT) { - break // TODO: handle custom condition case and per result trigger mode - } if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number_of_results trigger val queryResponseJson = executePplQuery(timeFilteredQuery, monitorCtx) @@ -100,6 +97,7 @@ object PPLMonitorRunner : MonitorV2Runner() { val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) triggerResults[pplTrigger.id] = pplTriggerRunResult + pplQueryResults[pplTrigger.id] = queryResponseJson logger.info("trigger ${trigger.name} triggered: $triggered") logger.info("ppl query results: $queryResponseJson") @@ -107,6 +105,7 @@ object PPLMonitorRunner : MonitorV2Runner() { if (triggered) { // TODO: currently naively generates an alert and action every time // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor + // query results will not be stored in alerts, but are instead included in notification actions val alertV2 = AlertV2( monitorId = monitorV2.id, monitorName = monitorV2.name, @@ -120,7 +119,13 @@ object PPLMonitorRunner : MonitorV2Runner() { actionExecutionResults = listOf(), ) - generatedAlerts.add(alertV2) + if (pplTrigger.mode == TriggerMode.RESULT_SET) { + generatedAlerts.add(alertV2) + } else { // TriggerMode.PER_RESULT + for (i in 0 until numResults) { + generatedAlerts.add(alertV2) + } + } } } else { // custom trigger val queryWithCustomCondition = addCustomCondition(timeFilteredQuery, trigger.customCondition!!) @@ -173,19 +178,20 @@ object PPLMonitorRunner : MonitorV2Runner() { } val dataRowList = queryResponseJson.getJSONArray("datarows") - var triggered = false + var numTriggered = 0 // the number of query result rows that evaluated to true for (i in 0 until dataRowList.length()) { val dataRow = dataRowList.getJSONArray(i) val evalResult = dataRow.getBoolean(evalResultVarIdx) if (evalResult) { - triggered = true - break + numTriggered++ } } + val triggered = numTriggered > 0 val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) triggerResults[pplTrigger.id] = pplTriggerRunResult + pplQueryResults[pplTrigger.id] = queryResponseJson logger.info("trigger ${trigger.name} triggered: $triggered") logger.info("ppl query results: $queryResponseJson") @@ -206,7 +212,13 @@ object PPLMonitorRunner : MonitorV2Runner() { actionExecutionResults = listOf(), ) - generatedAlerts.add(alertV2) + if (pplTrigger.mode == TriggerMode.RESULT_SET) { + generatedAlerts.add(alertV2) + } else { // TriggerMode.PER_RESULT + for (i in 0 until numTriggered) { + generatedAlerts.add(alertV2) + } + } } } @@ -228,7 +240,10 @@ object PPLMonitorRunner : MonitorV2Runner() { ) } - return monitorV2Result.copy(triggerResults = triggerResults) + logger.info("trigger results: $triggerResults") + logger.info("ppl query results: $pplQueryResults") + + return monitorV2Result.copy(triggerResults = triggerResults, pplQueryResults = pplQueryResults) } private fun evaluateNumResultsTrigger(numResults: Long, numResultsCondition: NumResultsCondition, numResultsValue: Long): Boolean { From 9594b0ac1a317d15800bbd7bde9c94b7a126cc1c Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Tue, 26 Aug 2025 15:40:13 -0700 Subject: [PATCH 08/40] alert suppression, expiration, and some run monitor refactors --- .../org/opensearch/alerting/AlertV2Expirer.kt | 119 +++++ .../org/opensearch/alerting/AlertingPlugin.kt | 3 + .../opensearch/alerting/PPLMonitorRunner.kt | 438 +++++++++++------- .../resthandler/RestIndexMonitorV2Action.kt | 5 - .../alerting/alerts/alert_mapping.json | 3 + .../opensearch/alerting/core/JobSweeper.kt | 1 + 6 files changed, 385 insertions(+), 184 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt new file mode 100644 index 000000000..e657589f0 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt @@ -0,0 +1,119 @@ +package org.opensearch.alerting + +import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_HISTORY_WRITE_INDEX +import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_INDEX +import org.opensearch.alerting.alerts.AlertIndices.Companion.ALL_ALERT_INDEX_PATTERN +import org.opensearch.cluster.ClusterChangedEvent +import org.opensearch.cluster.ClusterStateListener +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.AlertV2.Companion.EXPIRATION_TIME_FIELD +import org.opensearch.core.action.ActionListener +import org.opensearch.index.query.QueryBuilders +import org.opensearch.index.reindex.BulkByScrollResponse +import org.opensearch.index.reindex.DeleteByQueryAction +import org.opensearch.index.reindex.DeleteByQueryRequestBuilder +import org.opensearch.threadpool.Scheduler +import org.opensearch.threadpool.ThreadPool +import org.opensearch.transport.client.Client +import java.time.Instant +import java.util.concurrent.TimeUnit + +private val logger = LogManager.getLogger(AlertV2Expirer::class.java) + +class AlertV2Expirer( + private val client: Client, + private val threadPool: ThreadPool, + private val clusterService: ClusterService, +) : ClusterStateListener { + + init { + clusterService.addListener(this) + } + + @Volatile private var isClusterManager = false + + private var alertIndexInitialized = false + + private var alertHistoryIndexInitialized = false + + private var scheduledAlertsV2CheckAndExpire: Scheduler.Cancellable? = null + + private val executorName = ThreadPool.Names.MANAGEMENT + + private val checkForExpirationInterval = TimeValue(1L, TimeUnit.MINUTES) + + override fun clusterChanged(event: ClusterChangedEvent) { + // Instead of using a LocalNodeClusterManagerListener to track clustermanager changes, this service will + // track them here to avoid conditions where clustermanager listener events run after other + // listeners that depend on what happened in the clustermanager listener + if (this.isClusterManager != event.localNodeClusterManager()) { + this.isClusterManager = event.localNodeClusterManager() + if (this.isClusterManager) { + onManager() + } else { + offManager() + } + } + + alertIndexInitialized = event.state().routingTable().hasIndex(ALERT_INDEX) + alertHistoryIndexInitialized = event.state().metadata().hasAlias(ALERT_HISTORY_WRITE_INDEX) + } + + fun onManager() { + try { + // try to sweep current AlertV2s immediately as we might be restarting the cluster + expireAlertV2s() + // schedule expiration checks and expirations to happen repeatedly at some interval + scheduledAlertsV2CheckAndExpire = threadPool + .scheduleWithFixedDelay({ expireAlertV2s() }, checkForExpirationInterval, executorName) + } catch (e: Exception) { + // This should be run on cluster startup + logger.error( + "Error creating comments indices. Comments can't be recorded until clustermanager node is restarted.", + e + ) + } + } + + fun offManager() { + scheduledAlertsV2CheckAndExpire?.cancel() + } + + private fun expireAlertV2s() { + if (!areAlertsIndicesInitialized()) { + // TODO: edge case: what if alert history indices are present but regular alerts index is absent + return + } + + try { + val deleteByQuery = QueryBuilders.rangeQuery(EXPIRATION_TIME_FIELD) + .lte(Instant.now().toEpochMilli()) + + DeleteByQueryRequestBuilder(client, DeleteByQueryAction.INSTANCE) + .source(ALL_ALERT_INDEX_PATTERN) + .filter(deleteByQuery) + .refresh(true) + .execute( + object : ActionListener { + override fun onResponse(response: BulkByScrollResponse) { + logger.info("noops: ${response.noops}") + logger.info("status: ${response.status}") + logger.info("searchFailures: ${response.searchFailures}") + logger.info("Deleted ${response.deleted} expired alerts") + } + override fun onFailure(e: Exception) { + logger.error("Failed to delete expired alerts", e) + } + } + ) + } catch (e: Exception) { + logger.error("Error during alert cleanup", e) + } + } + + private fun areAlertsIndicesInitialized(): Boolean { + return alertIndexInitialized && alertHistoryIndexInitialized + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 43b3ebe7b..e0497ab0d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -192,6 +192,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R lateinit var alertIndices: AlertIndices lateinit var clusterService: ClusterService lateinit var destinationMigrationCoordinator: DestinationMigrationCoordinator + lateinit var alertV2Expirer: AlertV2Expirer var monitorTypeToMonitorRunners: MutableMap = mutableMapOf() override fun getRestHandlers( @@ -347,6 +348,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R scheduler = JobScheduler(threadPool, runner) sweeper = JobSweeper(environment.settings(), client, clusterService, threadPool, xContentRegistry, scheduler, ALERTING_JOB_TYPES) destinationMigrationCoordinator = DestinationMigrationCoordinator(client, clusterService, threadPool, scheduledJobIndices) + alertV2Expirer = AlertV2Expirer(client, threadPool, clusterService) this.threadPool = threadPool this.clusterService = clusterService @@ -374,6 +376,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R commentsIndices, docLevelMonitorQueries, destinationMigrationCoordinator, + alertV2Expirer, lockService, alertService, triggerService diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index a2b9b5b54..dcaef1bd2 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -1,7 +1,5 @@ package org.opensearch.alerting -import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Dispatchers import org.apache.logging.log4j.LogManager import org.json.JSONObject import org.opensearch.ExceptionsHelper @@ -26,6 +24,7 @@ import org.opensearch.commons.alerting.model.PPLTrigger.ConditionType import org.opensearch.commons.alerting.model.PPLTrigger.NumResultsCondition import org.opensearch.commons.alerting.model.PPLTrigger.TriggerMode import org.opensearch.commons.alerting.model.PPLTriggerRunResult +import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX import org.opensearch.commons.ppl.PPLPluginInterface import org.opensearch.commons.ppl.action.TransportPPLQueryRequest import org.opensearch.core.rest.RestStatus @@ -35,15 +34,13 @@ import org.opensearch.transport.client.node.NodeClient import java.time.Instant import java.time.ZoneOffset.UTC import java.time.format.DateTimeFormatter +import java.time.temporal.ChronoUnit object PPLMonitorRunner : MonitorV2Runner() { private val logger = LogManager.getLogger(javaClass) - private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) - const val PPL_SQL_QUERY_FIELD = "query" // name of PPL query field when passing into PPL/SQL Execute API call + private const val PPL_SQL_QUERY_FIELD = "query" // name of PPL query field when passing into PPL/SQL Execute API call - // TODO: this is a hacky implementation, needs serious revision and additions - // TODO: implement custom condition triggering override suspend fun runMonitorV2( monitorV2: MonitorV2, monitorCtx: MonitorRunnerExecutionContext, // MonitorV2 reads from same context as Monitor @@ -53,175 +50,149 @@ object PPLMonitorRunner : MonitorV2Runner() { executionId: String, transportService: TransportService, ): MonitorV2RunResult<*> { - logger.debug("Running monitor: ${monitorV2.name}. Thread: ${Thread.currentThread().name}") - if (monitorV2 !is PPLMonitor) { throw IllegalStateException("Unexpected monitor type: ${monitorV2.javaClass.name}") } + if (monitorV2.id == MonitorV2.NO_ID) { + throw IllegalStateException("Received PPL Monitor to execute that unexpectedly has no ID") + } + if (periodStart == periodEnd) { - logger.warn("Start and end time are the same: $periodStart. This monitor will probably only run once.") + logger.warn("Start and end time are the same: $periodStart. This PPL Monitor will probably only run once.") } - var monitorV2Result = PPLMonitorRunResult(monitorV2.name, null, periodStart, periodEnd, mapOf(), mapOf()) + logger.debug("Running PPL Monitor: ${monitorV2.name}. Thread: ${Thread.currentThread().name}") - // TODO: should alerting v1 and v2 alerts index be separate? - // TODO: should alerting v1 and v2 alerting-config index be separate? -// val currentAlerts = try { -// // TODO: write generated V2 alerts to existing alerts v1 index for now, revisit this decision -// monitorCtx.alertIndices!!.createOrUpdateAlertIndex() -// monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex() -// } catch (e: Exception) { -// // We can't save ERROR alerts to the index here as we don't know if there are existing ACTIVE alerts -// val id = if (monitorV2.id.trim().isEmpty()) "_na_" else monitorV2.id -// logger.error("Error loading alerts for monitorV2: $id", e) -// return monitorV2Result.copy(error = e) -// } - - val timeFilteredQuery = addTimeFilter(monitorV2.query, periodStart, periodEnd) + val pplMonitor = monitorV2 + val nodeClient = monitorCtx.client as NodeClient + // create some objects that will be used later val triggerResults = mutableMapOf() val pplQueryResults = mutableMapOf() val generatedAlerts = mutableListOf() - for (trigger in monitorV2.triggers) { - val pplTrigger = trigger as PPLTrigger - - if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number_of_results trigger - val queryResponseJson = executePplQuery(timeFilteredQuery, monitorCtx) - - val numResults = queryResponseJson.getLong("total") - - val triggered = evaluateNumResultsTrigger(numResults, trigger.numResultsCondition!!, trigger.numResultsValue!!) - - val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) - - triggerResults[pplTrigger.id] = pplTriggerRunResult - pplQueryResults[pplTrigger.id] = queryResponseJson - - logger.info("trigger ${trigger.name} triggered: $triggered") - logger.info("ppl query results: $queryResponseJson") - - if (triggered) { - // TODO: currently naively generates an alert and action every time - // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor - // query results will not be stored in alerts, but are instead included in notification actions - val alertV2 = AlertV2( - monitorId = monitorV2.id, - monitorName = monitorV2.name, - monitorVersion = monitorV2.version, - triggerId = trigger.id, - triggerName = trigger.name, - state = Alert.State.ACTIVE, - startTime = Instant.now(), - errorHistory = listOf(), - severity = trigger.severity.value, - actionExecutionResults = listOf(), - ) + // TODO: should alerting v1 and v2 alerts index be separate? + // TODO: should alerting v1 and v2 alerting-config index be separate? + try { + // TODO: write generated V2 alerts to existing alerts v1 index for now, revisit this decision + monitorCtx.alertIndices!!.createOrUpdateAlertIndex() + monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex() + } catch (e: Exception) { + val id = if (pplMonitor.id.trim().isEmpty()) "_na_" else pplMonitor.id + logger.error("Error loading alerts for monitorV2: $id", e) + return PPLMonitorRunResult(pplMonitor.name, e, periodStart, periodEnd, mapOf(), mapOf()) + } - if (pplTrigger.mode == TriggerMode.RESULT_SET) { - generatedAlerts.add(alertV2) - } else { // TriggerMode.PER_RESULT - for (i in 0 until numResults) { - generatedAlerts.add(alertV2) - } - } - } - } else { // custom trigger - val queryWithCustomCondition = addCustomCondition(timeFilteredQuery, trigger.customCondition!!) + // only query data between now and the last PPL Monitor execution + // do this by injecting a time filtering where statement into PPL Monitor query + val timeFilteredQuery = addTimeFilter(pplMonitor.query, periodStart, periodEnd) - val queryResponseJson = executePplQuery(queryWithCustomCondition, monitorCtx) + // run each trigger + for (trigger in pplMonitor.triggers) { + val pplTrigger = trigger as PPLTrigger - // a PPL query with custom condition returning 0 results should imply a valid but not useful query. - // do not trigger alert, but warn that query likely is not functioning as user intended - if (queryResponseJson.getLong("total") == 0L) { - logger.warn( - "During execution of monitor ${monitorV2.name}, PPL query with custom" + - "condition returned no results. Proceeding without triggering alert." - ) + // check for suppression and skip execution + // before even running the trigger itself + val suppressed = checkForSuppress(pplTrigger) + if (suppressed) { + logger.info("throttling trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") + continue + } + logger.info("throttling check passed, executing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") - val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, false, null) - triggerResults[pplTrigger.id] = pplTriggerRunResult +// internal fun isActionActionable(action: Action, alert: Alert?): Boolean { +// if (alert != null && alert.state == Alert.State.AUDIT) +// return false +// if (alert == null || action.throttle == null) { +// return true +// } +// if (action.throttleEnabled) { +// val result = alert.actionExecutionResults.firstOrNull { r -> r.actionId == action.id } +// val lastExecutionTime: Instant? = result?.lastExecutionTime +// val throttledTimeBound = currentTime().minus(action.throttle!!.value.toLong(), action.throttle!!.unit) +// return (lastExecutionTime == null || lastExecutionTime.isBefore(throttledTimeBound)) +// } +// return true +// } - continue - } + // if trigger uses custom condition, append the custom condition to query, otherwise simply proceed + val queryToExecute = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger + timeFilteredQuery + } else { // custom condition trigger + appendCustomCondition(timeFilteredQuery, pplTrigger.customCondition!!) + } - // find the name of the eval result variable defined in custom condition - val evalResultVarName = trigger.customCondition!!.split(" ")[1] // [0] is "eval", [1] is the var name - - // find the eval statement result variable in the PPL query response schema - val schemaList = queryResponseJson.getJSONArray("schema") - var evalResultVarIdx = -1 - for (i in 0 until schemaList.length()) { - val schemaObj = schemaList.getJSONObject(i) - val columnName = schemaObj.getString("name") - - if (columnName == evalResultVarName) { - if (schemaObj.getString("type") != "boolean") { - throw IllegalStateException( - "parsing results of PPL query with custom condition failed," + - "eval statement variable was not type boolean, but instead type: ${schemaObj.getString("type")}" - ) - } - - evalResultVarIdx = i - break - } - } + // TODO: does this handle pagination? does it need to? + // execute the PPL query + val queryResponseJson = executePplQuery(queryToExecute, nodeClient) + + // retrieve the number of results + // for number of results triggers, this is simply the number of PPL query results + // for custom triggers, this is the number of rows in the query response's eval result column that evaluated to true + val numResults = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger + queryResponseJson.getLong("total") + } else { // custom condition trigger + evaluateCustomConditionTrigger(queryResponseJson, pplTrigger) + } - // eval statement result variable should always be found - if (evalResultVarIdx == -1) { - throw IllegalStateException( - "expected to find eval statement results variable $evalResultVarName in results" + - "of PPL query with custom condition, but did not." - ) - } + // determine if the trigger condition has been met + val triggered = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger + evaluateNumResultsTrigger(numResults, pplTrigger.numResultsCondition!!, pplTrigger.numResultsValue!!) + } else { // custom condition trigger + numResults > 0 // if any of the query results satisfied the custom condition, the trigger counts as triggered + } - val dataRowList = queryResponseJson.getJSONArray("datarows") - var numTriggered = 0 // the number of query result rows that evaluated to true - for (i in 0 until dataRowList.length()) { - val dataRow = dataRowList.getJSONArray(i) - val evalResult = dataRow.getBoolean(evalResultVarIdx) - if (evalResult) { - numTriggered++ - } - } + logger.info("PPLTrigger ${pplTrigger.name} triggered: $triggered") - val triggered = numTriggered > 0 - val pplTriggerRunResult = PPLTriggerRunResult(trigger.name, triggered, null) - - triggerResults[pplTrigger.id] = pplTriggerRunResult - pplQueryResults[pplTrigger.id] = queryResponseJson - - logger.info("trigger ${trigger.name} triggered: $triggered") - logger.info("ppl query results: $queryResponseJson") - - if (triggered) { - // TODO: currently naively generates an alert and action every time - // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor - val alertV2 = AlertV2( - monitorId = monitorV2.id, - monitorName = monitorV2.name, - monitorVersion = monitorV2.version, - triggerId = trigger.id, - triggerName = trigger.name, - state = Alert.State.ACTIVE, - startTime = Instant.now(), - errorHistory = listOf(), - severity = trigger.severity.value, - actionExecutionResults = listOf(), - ) + // store the trigger execution and ppl query results for + // trigger execution response and notification message context + triggerResults[pplTrigger.id] = PPLTriggerRunResult(pplTrigger.name, triggered, null) + pplQueryResults[pplTrigger.id] = queryResponseJson - if (pplTrigger.mode == TriggerMode.RESULT_SET) { - generatedAlerts.add(alertV2) - } else { // TriggerMode.PER_RESULT - for (i in 0 until numTriggered) { - generatedAlerts.add(alertV2) - } - } - } + // generate an alert if triggered + if (triggered) { + generatedAlerts.addAll(generateAlerts(pplTrigger, pplMonitor, numResults)) } +// // execute and evaluate trigger based on trigger type +// if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number_of_results trigger +// // execute the PPL query +// val queryResponseJson = executePplQuery(timeFilteredQuery, monitorCtx) +// +// // read in the number of results +// val numResults = queryResponseJson.getLong("total") +// +// // check if the number of results satisfies the trigger condition +// val triggered = evaluateNumResultsTrigger(numResults, trigger.numResultsCondition!!, trigger.numResultsValue!!) +// +// // store the trigger execution and ppl query results for execution response +// // and notification message context +// triggerResults[pplTrigger.id] = PPLTriggerRunResult(trigger.name, triggered, null) +// pplQueryResults[pplTrigger.id] = queryResponseJson +// +// logger.info("number of results trigger ${trigger.name} triggered: $triggered") +// +// // generate an alert if triggered +// if (triggered) { +// generateAlerts(trigger, numResults) +// } +// } else { // custom trigger +// val queryWithCustomCondition = appendCustomCondition(timeFilteredQuery, trigger.customCondition!!) +// val queryResponseJson = executePplQuery(queryWithCustomCondition, monitorCtx) +// val numTriggered = evaluateCustomConditionTrigger(queryResponseJson, pplTrigger, pplMonitor) +// val triggered = numTriggered > 0 +// +// triggerResults[pplTrigger.id] = PPLTriggerRunResult(trigger.name, triggered, null) +// pplQueryResults[pplTrigger.id] = queryResponseJson +// +// logger.info("custom condition trigger ${trigger.name} triggered: $triggered") +// +// if (triggered) { +// generateAlerts(trigger, numTriggered) +// } +// } + // if (monitorCtx.triggerService!!.isQueryLevelTriggerActionable(triggerCtx, triggerResult, workflowRunContext)) { // val actionCtx = triggerCtx.copy(error = monitorResult.error ?: triggerResult.error) // for (action in trigger.actions) { @@ -231,30 +202,32 @@ object PPLMonitorRunner : MonitorV2Runner() { } // TODO: what if retry policy null? + // write the alerts to the alerts index monitorCtx.retryPolicy?.let { - saveAlertsV2( - generatedAlerts, - monitorCtx, - it, - monitorV2.id - ) + saveAlertsV2(generatedAlerts, pplMonitor, it, nodeClient) } - logger.info("trigger results: $triggerResults") - logger.info("ppl query results: $pplQueryResults") + // TODO: collect all triggers that were throttled, and if none were throttled, skip update monitor? saves on write requests + // for suppression checking purposes, update the PPL Monitor in the alerting-config index + // with updated last triggered times for each of its triggers + updateMonitorWithLastTriggeredTimes(pplMonitor, nodeClient) - return monitorV2Result.copy(triggerResults = triggerResults, pplQueryResults = pplQueryResults) + return PPLMonitorRunResult(pplMonitor.name, null, periodStart, periodEnd, triggerResults, pplQueryResults) } - private fun evaluateNumResultsTrigger(numResults: Long, numResultsCondition: NumResultsCondition, numResultsValue: Long): Boolean { - return when (numResultsCondition) { - NumResultsCondition.GREATER_THAN -> numResults > numResultsValue - NumResultsCondition.GREATER_THAN_EQUAL -> numResults >= numResultsValue - NumResultsCondition.LESS_THAN -> numResults < numResultsValue - NumResultsCondition.LESS_THAN_EQUAL -> numResults <= numResultsValue - NumResultsCondition.EQUAL -> numResults == numResultsValue - NumResultsCondition.NOT_EQUAL -> numResults != numResultsValue + private fun checkForSuppress(pplTrigger: PPLTrigger): Boolean { + val currentTime = Instant.now() // TODO: Instant.ofEpochMilli(monitorCtx.threadPool!!.absoluteTimeInMillis()) alternative? + + // the interval between throttledTimeBound and now is the suppression window + // i.e. any PPLTrigger whose last trigger time is in this window must be suppressed + val throttledTimeBound = pplTrigger.suppressDuration?.let { + currentTime.minus(pplTrigger.suppressDuration!!.millis, ChronoUnit.MILLIS) } + + // the trigger must be suppressed if... + return pplTrigger.suppressDuration != null && // suppression is enabled on the PPLTrigger + pplTrigger.lastTriggeredTime != null && // and it has triggered before at least once + pplTrigger.lastTriggeredTime!!.isAfter(throttledTimeBound!!) // and it's not yet out of the suppression window } // adds monitor schedule-based time filter @@ -289,13 +262,13 @@ object PPLMonitorRunner : MonitorV2Runner() { return timeFilteredQuery } - // appendss user-defined custom trigger condition to PPL query, only for custom condition Triggers - private fun addCustomCondition(query: String, customCondition: String): String { + // appends user-defined custom trigger condition to PPL query, only for custom condition Triggers + private fun appendCustomCondition(query: String, customCondition: String): String { return "$query | $customCondition" } // returns PPL query response as parsable JSONObject - private suspend fun executePplQuery(query: String, monitorCtx: MonitorRunnerExecutionContext): JSONObject { + private suspend fun executePplQuery(query: String, client: NodeClient): JSONObject { // call PPL plugin to execute time filtered query val transportPplQueryRequest = TransportPPLQueryRequest( query, @@ -305,7 +278,7 @@ object PPLMonitorRunner : MonitorV2Runner() { val transportPplQueryResponse = PPLPluginInterface.suspendUntil { this.executeQuery( - monitorCtx.client as NodeClient, + client, transportPplQueryRequest, it ) @@ -316,25 +289,123 @@ object PPLMonitorRunner : MonitorV2Runner() { return queryResponseJson } + private fun evaluateNumResultsTrigger(numResults: Long, numResultsCondition: NumResultsCondition, numResultsValue: Long): Boolean { + return when (numResultsCondition) { + NumResultsCondition.GREATER_THAN -> numResults > numResultsValue + NumResultsCondition.GREATER_THAN_EQUAL -> numResults >= numResultsValue + NumResultsCondition.LESS_THAN -> numResults < numResultsValue + NumResultsCondition.LESS_THAN_EQUAL -> numResults <= numResultsValue + NumResultsCondition.EQUAL -> numResults == numResultsValue + NumResultsCondition.NOT_EQUAL -> numResults != numResultsValue + } + } + + private fun evaluateCustomConditionTrigger(customConditionQueryResponse: JSONObject, pplTrigger: PPLTrigger): Long { + // a PPL query with custom condition returning 0 results should imply a valid but not useful query. + // do not trigger alert, but warn that query likely is not functioning as user intended + if (customConditionQueryResponse.getLong("total") == 0L) { + logger.warn( + "During execution of PPL Trigger ${pplTrigger.name}, PPL query with custom" + + "condition returned no results. Proceeding without generating alert." + ) + return 0L + } + + // find the name of the eval result variable defined in custom condition + val evalResultVarName = pplTrigger.customCondition!!.split(" ")[1] // [0] is "eval", [1] is the var name + + // find the eval statement result variable in the PPL query response schema + val schemaList = customConditionQueryResponse.getJSONArray("schema") + var evalResultVarIdx = -1 + for (i in 0 until schemaList.length()) { + val schemaObj = schemaList.getJSONObject(i) + val columnName = schemaObj.getString("name") + + if (columnName == evalResultVarName) { + if (schemaObj.getString("type") != "boolean") { + throw IllegalStateException( + "parsing results of PPL query with custom condition failed," + + "eval statement variable was not type boolean, but instead type: ${schemaObj.getString("type")}" + ) + } + + evalResultVarIdx = i + break + } + } + + // eval statement result variable should always be found + if (evalResultVarIdx == -1) { + throw IllegalStateException( + "expected to find eval statement results variable $evalResultVarName in results" + + "of PPL query with custom condition, but did not." + ) + } + + val dataRowList = customConditionQueryResponse.getJSONArray("datarows") + var numTriggered = 0L // the number of query result rows that evaluated to true + for (i in 0 until dataRowList.length()) { + val dataRow = dataRowList.getJSONArray(i) + val evalResult = dataRow.getBoolean(evalResultVarIdx) + if (evalResult) { + numTriggered++ + } + } + + return numTriggered + } + + private fun generateAlerts(pplTrigger: PPLTrigger, pplMonitor: PPLMonitor, numAlertsToGenerate: Long): List { + // TODO: currently naively generates an alert and action every time + // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor + + val expirationTime = pplTrigger.expireDuration?.millis?.let { Instant.now().plus(it, ChronoUnit.MILLIS) } + + val alertV2 = AlertV2( + monitorId = pplMonitor.id, + monitorName = pplMonitor.name, + monitorVersion = pplMonitor.version, + triggerId = pplTrigger.id, + triggerName = pplTrigger.name, + state = Alert.State.ACTIVE, + startTime = Instant.now(), + expirationTime = expirationTime, + errorHistory = listOf(), + severity = pplTrigger.severity.value, + actionExecutionResults = listOf(), + ) + + val alertV2s = mutableListOf() + if (pplTrigger.mode == TriggerMode.RESULT_SET) { + alertV2s.add(alertV2) + } else { // TriggerMode.PER_RESULT + for (i in 0 until numAlertsToGenerate) { + alertV2s.add(alertV2) + } + } + + return alertV2s.toList() // return an immutable list + } + private suspend fun saveAlertsV2( alerts: List, - monitorCtx: MonitorRunnerExecutionContext, + pplMonitor: PPLMonitor, retryPolicy: BackoffPolicy, - routingId: String // routing is mandatory and set as monitor id. for workflow chained alerts we pass workflow id as routing + client: NodeClient ) { - val alertsIndex = AlertIndices.ALERT_INDEX - val alertsHistoryIndex = AlertIndices.ALERT_HISTORY_WRITE_INDEX + logger.info("received alerts: $alerts") var requestsToRetry = alerts.flatMap { alert -> // We don't want to set the version when saving alerts because the MonitorRunner has first priority when writing alerts. // In the rare event that a user acknowledges an alert between when it's read and when it's written // back we're ok if that acknowledgement is lost. It's easier to get the user to retry than for the runner to // spend time reloading the alert and writing it back. + when (alert.state) { Alert.State.ACTIVE, Alert.State.ERROR -> { listOf>( - IndexRequest(alertsIndex) - .routing(routingId) + IndexRequest(AlertIndices.ALERT_INDEX) + .routing(pplMonitor.id) // set routing ID to PPL Monitor ID .source(alert.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS)) .id(if (alert.id != Alert.NO_ID) alert.id else null) ) @@ -343,8 +414,6 @@ object PPLMonitorRunner : MonitorV2Runner() { } } - val client = monitorCtx.client!! - if (requestsToRetry.isEmpty()) return // Retry Bulk requests if there was any 429 response retryPolicy.retry(logger, listOf(RestStatus.TOO_MANY_REQUESTS)) { @@ -360,4 +429,15 @@ object PPLMonitorRunner : MonitorV2Runner() { } } } + + private suspend fun updateMonitorWithLastTriggeredTimes(pplMonitor: PPLMonitor, client: NodeClient) { + val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) + .id(pplMonitor.id) + .source(pplMonitor.toXContentWithType(XContentFactory.jsonBuilder())) + .routing(pplMonitor.id) + + val indexResponse = client.suspendUntil { index(indexRequest, it) } + + logger.info("PPLMonitor update with last execution times index response: ${indexResponse.result}") + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index 8f4647dc9..c8ebd3c54 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -48,13 +48,10 @@ class RestIndexMonitorV2Action : BaseRestHandler() { @Throws(IOException::class) override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { log.debug("${request.method()} ${request.path()}") - log.info("preparing rest request") val xcp = request.contentParser() ensureExpectedToken(Token.START_OBJECT, xcp.nextToken(), xcp) - log.info("initial ensure") - val monitorV2: MonitorV2 try { monitorV2 = MonitorV2.parse(xcp) @@ -62,8 +59,6 @@ class RestIndexMonitorV2Action : BaseRestHandler() { throw AlertingException.wrap(e) } - log.info("monitorv2 retrieved") - val id = request.param("monitorID", MonitorV2.NO_ID) val seqNo = request.paramAsLong(IF_SEQ_NO, SequenceNumbers.UNASSIGNED_SEQ_NO) val primaryTerm = request.paramAsLong(IF_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_PRIMARY_TERM) diff --git a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json index 76e5104cc..a1cbb45c8 100644 --- a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json +++ b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json @@ -131,6 +131,9 @@ "end_time": { "type": "date" }, + "expiration_time": { + "type": "date" + }, "error_message": { "type": "text" }, diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt b/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt index e08b1360f..d3ecf1fb6 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt @@ -428,6 +428,7 @@ class JobSweeper( "Unable to parse ScheduledJob source: {}", Strings.cleanTruncate(jobSource.utf8ToString(), 1000) ) + logger.warn("exception thrown: $e") sweep(shardId, jobId, jobVersion, null, true) null } From 2fe4279580d066d9e934a79458839226e29cb551 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Wed, 27 Aug 2025 15:36:00 -0700 Subject: [PATCH 09/40] refactors and initial result set notifications implementation --- .../org/opensearch/alerting/AlertV2Expirer.kt | 3 - .../org/opensearch/alerting/AlertingPlugin.kt | 66 ++-- .../org/opensearch/alerting/MonitorRunner.kt | 2 +- .../alerting/MonitorRunnerService.kt | 7 + .../opensearch/alerting/PPLMonitorRunner.kt | 301 +++++++++++------- .../resthandler/RestGetAlertsV2Action.kt | 76 +++++ .../script/PPLTriggerExecutionContext.kt | 39 +++ .../script/TriggerV2ExecutionContext.kt | 31 ++ 8 files changed, 364 insertions(+), 161 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt index e657589f0..bbf3c5676 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt @@ -98,9 +98,6 @@ class AlertV2Expirer( .execute( object : ActionListener { override fun onResponse(response: BulkByScrollResponse) { - logger.info("noops: ${response.noops}") - logger.info("status: ${response.status}") - logger.info("searchFailures: ${response.searchFailures}") logger.info("Deleted ${response.deleted} expired alerts") } override fun onFailure(e: Exception) { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index e0497ab0d..e224b4b9f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -24,36 +24,17 @@ import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction import org.opensearch.alerting.core.action.node.ScheduledJobsStatsTransportAction import org.opensearch.alerting.core.lock.LockService -import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler import org.opensearch.alerting.core.schedule.JobScheduler import org.opensearch.alerting.core.settings.LegacyOpenDistroScheduledJobSettings import org.opensearch.alerting.core.settings.ScheduledJobSettings import org.opensearch.alerting.remote.monitors.RemoteMonitorRegistry -import org.opensearch.alerting.resthandler.RestAcknowledgeAlertAction -import org.opensearch.alerting.resthandler.RestAcknowledgeChainedAlertAction -import org.opensearch.alerting.resthandler.RestDeleteAlertingCommentAction import org.opensearch.alerting.resthandler.RestDeleteMonitorAction import org.opensearch.alerting.resthandler.RestDeleteMonitorV2Action -import org.opensearch.alerting.resthandler.RestDeleteWorkflowAction -import org.opensearch.alerting.resthandler.RestExecuteMonitorAction import org.opensearch.alerting.resthandler.RestExecuteMonitorV2Action -import org.opensearch.alerting.resthandler.RestExecuteWorkflowAction -import org.opensearch.alerting.resthandler.RestGetAlertsAction -import org.opensearch.alerting.resthandler.RestGetDestinationsAction -import org.opensearch.alerting.resthandler.RestGetEmailAccountAction -import org.opensearch.alerting.resthandler.RestGetEmailGroupAction -import org.opensearch.alerting.resthandler.RestGetFindingsAction +import org.opensearch.alerting.resthandler.RestGetAlertsV2Action import org.opensearch.alerting.resthandler.RestGetMonitorAction -import org.opensearch.alerting.resthandler.RestGetRemoteIndexesAction -import org.opensearch.alerting.resthandler.RestGetWorkflowAction -import org.opensearch.alerting.resthandler.RestGetWorkflowAlertsAction -import org.opensearch.alerting.resthandler.RestIndexAlertingCommentAction import org.opensearch.alerting.resthandler.RestIndexMonitorAction import org.opensearch.alerting.resthandler.RestIndexMonitorV2Action -import org.opensearch.alerting.resthandler.RestSearchAlertingCommentAction -import org.opensearch.alerting.resthandler.RestSearchEmailAccountAction -import org.opensearch.alerting.resthandler.RestSearchEmailGroupAction -import org.opensearch.alerting.resthandler.RestSearchMonitorAction import org.opensearch.alerting.resthandler.RestSearchMonitorV2Action import org.opensearch.alerting.script.TriggerScript import org.opensearch.alerting.service.DeleteMonitorService @@ -206,36 +187,37 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ): List { return listOf( // Alerting V1 - RestGetMonitorAction(), - RestDeleteMonitorAction(), +// RestGetMonitorAction(), +// RestDeleteMonitorAction(), // RestIndexMonitorAction(), // RestIndexWorkflowAction(), - RestSearchMonitorAction(settings, clusterService), - RestExecuteMonitorAction(), - RestExecuteWorkflowAction(), - RestAcknowledgeAlertAction(), - RestAcknowledgeChainedAlertAction(), - RestScheduledJobStatsHandler("_alerting"), - RestSearchEmailAccountAction(), - RestGetEmailAccountAction(), - RestSearchEmailGroupAction(), - RestGetEmailGroupAction(), - RestGetDestinationsAction(), - RestGetAlertsAction(), - RestGetWorkflowAlertsAction(), - RestGetFindingsAction(), - RestGetWorkflowAction(), - RestDeleteWorkflowAction(), - RestGetRemoteIndexesAction(), - RestIndexAlertingCommentAction(), - RestSearchAlertingCommentAction(), - RestDeleteAlertingCommentAction(), +// RestSearchMonitorAction(settings, clusterService), +// RestExecuteMonitorAction(), +// RestExecuteWorkflowAction(), +// RestAcknowledgeAlertAction(), +// RestAcknowledgeChainedAlertAction(), +// RestScheduledJobStatsHandler("_alerting"), +// RestSearchEmailAccountAction(), +// RestGetEmailAccountAction(), +// RestSearchEmailGroupAction(), +// RestGetEmailGroupAction(), +// RestGetDestinationsAction(), +// RestGetAlertsAction(), +// RestGetWorkflowAlertsAction(), +// RestGetFindingsAction(), +// RestGetWorkflowAction(), +// RestDeleteWorkflowAction(), +// RestGetRemoteIndexesAction(), +// RestIndexAlertingCommentAction(), +// RestSearchAlertingCommentAction(), +// RestDeleteAlertingCommentAction(), // Alerting V2 RestIndexMonitorV2Action(), RestExecuteMonitorV2Action(), RestDeleteMonitorV2Action(), RestSearchMonitorV2Action(settings, clusterService), + RestGetAlertsV2Action(), ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt index 4e6cdbc02..7acfdff8b 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt @@ -94,7 +94,7 @@ abstract class MonitorRunner { } } - protected suspend fun getConfigAndSendNotification( + suspend fun getConfigAndSendNotification( action: Action, monitorCtx: MonitorRunnerExecutionContext, subject: String?, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index 0161421ce..e8b3011c9 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -93,6 +93,7 @@ import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset import java.util.UUID +import org.opensearch.alerting.script.TriggerV2ExecutionContext import kotlin.coroutines.CoroutineContext object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleComponent() { @@ -678,4 +679,10 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon .newInstance(template.params + mapOf("ctx" to ctx.asTemplateArg())) .execute() } + + internal fun compileTemplateV2(template: Script, ctx: TriggerV2ExecutionContext): String { + return monitorCtx.scriptService!!.compile(template, TemplateScript.CONTEXT) + .newInstance(template.params + mapOf("ctx" to ctx.asTemplateArg())) + .execute() + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index dcaef1bd2..c6cd64416 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -35,6 +35,11 @@ import java.time.Instant import java.time.ZoneOffset.UTC import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit +import org.opensearch.alerting.QueryLevelMonitorRunner.getConfigAndSendNotification +import org.opensearch.alerting.script.PPLTriggerExecutionContext +import org.opensearch.commons.alerting.model.ActionRunResult +import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.core.common.Strings object PPLMonitorRunner : MonitorV2Runner() { private val logger = LogManager.getLogger(javaClass) @@ -72,6 +77,10 @@ object PPLMonitorRunner : MonitorV2Runner() { val pplQueryResults = mutableMapOf() val generatedAlerts = mutableListOf() + // TODO: Instant.ofEpochMilli(monitorCtx.threadPool!!.absoluteTimeInMillis()) alternative? + // set the current execution time + val timeOfCurrentExecution = Instant.now() + // TODO: should alerting v1 and v2 alerts index be separate? // TODO: should alerting v1 and v2 alerting-config index be separate? try { @@ -90,115 +99,101 @@ object PPLMonitorRunner : MonitorV2Runner() { // run each trigger for (trigger in pplMonitor.triggers) { - val pplTrigger = trigger as PPLTrigger - - // check for suppression and skip execution - // before even running the trigger itself - val suppressed = checkForSuppress(pplTrigger) - if (suppressed) { - logger.info("throttling trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") - continue - } - logger.info("throttling check passed, executing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") - -// internal fun isActionActionable(action: Action, alert: Alert?): Boolean { -// if (alert != null && alert.state == Alert.State.AUDIT) -// return false -// if (alert == null || action.throttle == null) { + try { + val pplTrigger = trigger as PPLTrigger + + // check for suppression and skip execution + // before even running the trigger itself + val suppressed = checkForSuppress(pplTrigger, timeOfCurrentExecution) + if (suppressed) { + logger.info("suppressing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") + continue + } + logger.info("suppression check passed, executing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") + +// internal fun isActionActionable(action: Action, alert: Alert?): Boolean { +// if (alert != null && alert.state == Alert.State.AUDIT) +// return false +// if (alert == null || action.throttle == null) { +// return true +// } +// if (action.throttleEnabled) { +// val result = alert.actionExecutionResults.firstOrNull { r -> r.actionId == action.id } +// val lastExecutionTime: Instant? = result?.lastExecutionTime +// val throttledTimeBound = currentTime().minus(action.throttle!!.value.toLong(), action.throttle!!.unit) +// return (lastExecutionTime == null || lastExecutionTime.isBefore(throttledTimeBound)) +// } // return true // } -// if (action.throttleEnabled) { -// val result = alert.actionExecutionResults.firstOrNull { r -> r.actionId == action.id } -// val lastExecutionTime: Instant? = result?.lastExecutionTime -// val throttledTimeBound = currentTime().minus(action.throttle!!.value.toLong(), action.throttle!!.unit) -// return (lastExecutionTime == null || lastExecutionTime.isBefore(throttledTimeBound)) -// } -// return true -// } - - // if trigger uses custom condition, append the custom condition to query, otherwise simply proceed - val queryToExecute = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger - timeFilteredQuery - } else { // custom condition trigger - appendCustomCondition(timeFilteredQuery, pplTrigger.customCondition!!) - } - - // TODO: does this handle pagination? does it need to? - // execute the PPL query - val queryResponseJson = executePplQuery(queryToExecute, nodeClient) - - // retrieve the number of results - // for number of results triggers, this is simply the number of PPL query results - // for custom triggers, this is the number of rows in the query response's eval result column that evaluated to true - val numResults = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger - queryResponseJson.getLong("total") - } else { // custom condition trigger - evaluateCustomConditionTrigger(queryResponseJson, pplTrigger) - } - // determine if the trigger condition has been met - val triggered = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger - evaluateNumResultsTrigger(numResults, pplTrigger.numResultsCondition!!, pplTrigger.numResultsValue!!) - } else { // custom condition trigger - numResults > 0 // if any of the query results satisfied the custom condition, the trigger counts as triggered - } + // if trigger uses custom condition, append the custom condition to query, otherwise simply proceed + val queryToExecute = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger + timeFilteredQuery + } else { // custom condition trigger + appendCustomCondition(timeFilteredQuery, pplTrigger.customCondition!!) + } - logger.info("PPLTrigger ${pplTrigger.name} triggered: $triggered") + // TODO: does this handle pagination? does it need to? + // execute the PPL query + val queryResponseJson = executePplQuery(queryToExecute, nodeClient) + logger.info("query execution results for trigger ${pplTrigger.name}: $queryResponseJson") + + // retrieve the number of results + // for number of results triggers, this is simply the number of PPL query results + // for custom triggers, this is the number of rows in the query response's eval result column that evaluated to true + val numResults = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger + queryResponseJson.getLong("total") + } else { // custom condition trigger + evaluateCustomConditionTrigger(queryResponseJson, pplTrigger) + } - // store the trigger execution and ppl query results for - // trigger execution response and notification message context - triggerResults[pplTrigger.id] = PPLTriggerRunResult(pplTrigger.name, triggered, null) - pplQueryResults[pplTrigger.id] = queryResponseJson + // determine if the trigger condition has been met + val triggered = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger + evaluateNumResultsTrigger(numResults, pplTrigger.numResultsCondition!!, pplTrigger.numResultsValue!!) + } else { // custom condition trigger + numResults > 0 // if any of the query results satisfied the custom condition, the trigger counts as triggered + } - // generate an alert if triggered - if (triggered) { - generatedAlerts.addAll(generateAlerts(pplTrigger, pplMonitor, numResults)) + logger.info("PPLTrigger ${pplTrigger.name} triggered: $triggered") + + // store the trigger execution and ppl query results for + // trigger execution response and notification message context + triggerResults[pplTrigger.id] = PPLTriggerRunResult(pplTrigger.name, triggered, null) + pplQueryResults[pplTrigger.id] = queryResponseJson + + if (triggered) { + // collect the generated alerts to be written to alerts index + generatedAlerts.addAll(generateAlerts(pplTrigger, pplMonitor, numResults, timeOfCurrentExecution)) + + // update the trigger's last execution time for future suppression checks + pplTrigger.lastTriggeredTime = timeOfCurrentExecution + + // TODO: this is purely a result set implementation + // TODO: when trigger is per result, need to send for every individual result, not just the whole thing + // send alert notifications + val pplTriggerExecutionContext = PPLTriggerExecutionContext( + monitorV2, + periodStart, + periodEnd, + null, + pplTrigger, + pplQueryResults[pplTrigger.id]!!.toMap() + ) + for (action in pplTrigger.actions) { + runAction( + action, + pplTriggerExecutionContext, + monitorCtx, + pplMonitor, + dryRun, + timeOfCurrentExecution + ) + } + } + } catch (e: Exception) { + logger.error("failed to run PPL Trigger for PPL Monitor ${pplMonitor.name}", e) + continue } - -// // execute and evaluate trigger based on trigger type -// if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number_of_results trigger -// // execute the PPL query -// val queryResponseJson = executePplQuery(timeFilteredQuery, monitorCtx) -// -// // read in the number of results -// val numResults = queryResponseJson.getLong("total") -// -// // check if the number of results satisfies the trigger condition -// val triggered = evaluateNumResultsTrigger(numResults, trigger.numResultsCondition!!, trigger.numResultsValue!!) -// -// // store the trigger execution and ppl query results for execution response -// // and notification message context -// triggerResults[pplTrigger.id] = PPLTriggerRunResult(trigger.name, triggered, null) -// pplQueryResults[pplTrigger.id] = queryResponseJson -// -// logger.info("number of results trigger ${trigger.name} triggered: $triggered") -// -// // generate an alert if triggered -// if (triggered) { -// generateAlerts(trigger, numResults) -// } -// } else { // custom trigger -// val queryWithCustomCondition = appendCustomCondition(timeFilteredQuery, trigger.customCondition!!) -// val queryResponseJson = executePplQuery(queryWithCustomCondition, monitorCtx) -// val numTriggered = evaluateCustomConditionTrigger(queryResponseJson, pplTrigger, pplMonitor) -// val triggered = numTriggered > 0 -// -// triggerResults[pplTrigger.id] = PPLTriggerRunResult(trigger.name, triggered, null) -// pplQueryResults[pplTrigger.id] = queryResponseJson -// -// logger.info("custom condition trigger ${trigger.name} triggered: $triggered") -// -// if (triggered) { -// generateAlerts(trigger, numTriggered) -// } -// } - -// if (monitorCtx.triggerService!!.isQueryLevelTriggerActionable(triggerCtx, triggerResult, workflowRunContext)) { -// val actionCtx = triggerCtx.copy(error = monitorResult.error ?: triggerResult.error) -// for (action in trigger.actions) { -// triggerResult.actionResults[action.id] = this.runAction(action, actionCtx, monitorCtx, monitor, dryrun) -// } -// } } // TODO: what if retry policy null? @@ -212,22 +207,27 @@ object PPLMonitorRunner : MonitorV2Runner() { // with updated last triggered times for each of its triggers updateMonitorWithLastTriggeredTimes(pplMonitor, nodeClient) - return PPLMonitorRunResult(pplMonitor.name, null, periodStart, periodEnd, triggerResults, pplQueryResults) + return PPLMonitorRunResult( + pplMonitor.name, + null, + periodStart, + periodEnd, + triggerResults, + pplQueryResults.map { it.key to it.value.toMap() }.toMap() + ) } - private fun checkForSuppress(pplTrigger: PPLTrigger): Boolean { - val currentTime = Instant.now() // TODO: Instant.ofEpochMilli(monitorCtx.threadPool!!.absoluteTimeInMillis()) alternative? - + private fun checkForSuppress(pplTrigger: PPLTrigger, timeOfCurrentExecution: Instant): Boolean { // the interval between throttledTimeBound and now is the suppression window // i.e. any PPLTrigger whose last trigger time is in this window must be suppressed - val throttledTimeBound = pplTrigger.suppressDuration?.let { - currentTime.minus(pplTrigger.suppressDuration!!.millis, ChronoUnit.MILLIS) + val suppressTimeBound = pplTrigger.suppressDuration?.let { + timeOfCurrentExecution.minus(pplTrigger.suppressDuration!!.millis, ChronoUnit.MILLIS) } // the trigger must be suppressed if... return pplTrigger.suppressDuration != null && // suppression is enabled on the PPLTrigger pplTrigger.lastTriggeredTime != null && // and it has triggered before at least once - pplTrigger.lastTriggeredTime!!.isAfter(throttledTimeBound!!) // and it's not yet out of the suppression window + pplTrigger.lastTriggeredTime!!.isAfter(suppressTimeBound!!) // and it's not yet out of the suppression window } // adds monitor schedule-based time filter @@ -305,14 +305,14 @@ object PPLMonitorRunner : MonitorV2Runner() { // do not trigger alert, but warn that query likely is not functioning as user intended if (customConditionQueryResponse.getLong("total") == 0L) { logger.warn( - "During execution of PPL Trigger ${pplTrigger.name}, PPL query with custom" + + "During execution of PPL Trigger ${pplTrigger.name}, PPL query with custom " + "condition returned no results. Proceeding without generating alert." ) return 0L } // find the name of the eval result variable defined in custom condition - val evalResultVarName = pplTrigger.customCondition!!.split(" ")[1] // [0] is "eval", [1] is the var name + val evalResultVarName = findEvalResultVar(pplTrigger.customCondition!!) // find the eval statement result variable in the PPL query response schema val schemaList = customConditionQueryResponse.getJSONArray("schema") @@ -337,7 +337,7 @@ object PPLMonitorRunner : MonitorV2Runner() { // eval statement result variable should always be found if (evalResultVarIdx == -1) { throw IllegalStateException( - "expected to find eval statement results variable $evalResultVarName in results" + + "expected to find eval statement results variable \"$evalResultVarName\" in results " + "of PPL query with custom condition, but did not." ) } @@ -355,11 +355,30 @@ object PPLMonitorRunner : MonitorV2Runner() { return numTriggered } - private fun generateAlerts(pplTrigger: PPLTrigger, pplMonitor: PPLMonitor, numAlertsToGenerate: Long): List { + // TODO: is there maybe some PPL plugin util function we can use to replace this? + // searches a given custom condition eval statement for the name of the result + // variable and returns it + private fun findEvalResultVar(customCondition: String): String { + // the PPL keyword "eval", followed by a whitespace must be present, otherwise a syntax error from PPL plugin would've + // been thrown when executing the query (without the whitespace, the query would've had something like "evalresult", + // which is invalid PPL + val startOfEvalStatement = "eval " + + val startIdx = customCondition.indexOf(startOfEvalStatement) + startOfEvalStatement.length + val endIdx = startIdx + customCondition.substring(startIdx).indexOfFirst { it == ' ' || it == '=' } + return customCondition.substring(startIdx, endIdx) + } + + private fun generateAlerts( + pplTrigger: PPLTrigger, + pplMonitor: PPLMonitor, + numAlertsToGenerate: Long, + timeOfCurrentExecution: Instant + ): List { // TODO: currently naively generates an alert and action every time // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor - val expirationTime = pplTrigger.expireDuration?.millis?.let { Instant.now().plus(it, ChronoUnit.MILLIS) } + val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } val alertV2 = AlertV2( monitorId = pplMonitor.id, @@ -368,7 +387,7 @@ object PPLMonitorRunner : MonitorV2Runner() { triggerId = pplTrigger.id, triggerName = pplTrigger.name, state = Alert.State.ACTIVE, - startTime = Instant.now(), + startTime = timeOfCurrentExecution, expirationTime = expirationTime, errorHistory = listOf(), severity = pplTrigger.severity.value, @@ -440,4 +459,56 @@ object PPLMonitorRunner : MonitorV2Runner() { logger.info("PPLMonitor update with last execution times index response: ${indexResponse.result}") } + + suspend fun runAction( + action: Action, + triggerCtx: PPLTriggerExecutionContext, + monitorCtx: MonitorRunnerExecutionContext, + pplMonitor: PPLMonitor, + dryrun: Boolean, + timeOfCurrentExecution: Instant + ): ActionRunResult { + return try { + val actionOutput = mutableMapOf() + actionOutput[Action.SUBJECT] = if (action.subjectTemplate != null) + MonitorRunnerService.compileTemplateV2(action.subjectTemplate!!, triggerCtx) + else "" + actionOutput[Action.MESSAGE] = MonitorRunnerService.compileTemplateV2(action.messageTemplate, triggerCtx) + if (Strings.isNullOrEmpty(actionOutput[Action.MESSAGE])) { + throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") + } + + if (!dryrun) { +// val client = monitorCtx.client + actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( + action, + monitorCtx, + actionOutput[Action.SUBJECT], + actionOutput[Action.MESSAGE]!! + ) + // TODO: use this block when security plugin is enabled +// client!!.threadPool().threadContext.stashContext().use { +// withClosableContext( +// InjectorContextElement( +// pplMonitor.id, +// monitorCtx.settings!!, +// monitorCtx.threadPool!!.threadContext, +// pplMonitor.user?.roles, +// pplMonitor.user +// ) +// ) { +// actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( +// action, +// monitorCtx, +// actionOutput[Action.SUBJECT], +// actionOutput[Action.MESSAGE]!! +// ) +// } +// } + } + ActionRunResult(action.id, action.name, actionOutput, false, timeOfCurrentExecution, null) + } catch (e: Exception) { + ActionRunResult(action.id, action.name, mapOf(), false, timeOfCurrentExecution, e) + } + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt new file mode 100644 index 000000000..2962e5447 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt @@ -0,0 +1,76 @@ +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.GetAlertsRequest +import org.opensearch.commons.alerting.model.Table +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler.ReplacedRoute +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.GET +import org.opensearch.rest.action.RestToXContentListener +import org.opensearch.transport.client.node.NodeClient + +/** + * This class consists of the REST handler to retrieve alerts . + */ +class RestGetAlertsV2Action : BaseRestHandler() { + + private val log = LogManager.getLogger(RestGetAlertsV2Action::class.java) + + override fun getName(): String { + return "get_alerts_v2_action" + } + + override fun routes(): List { + return listOf( + Route( + GET, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/alerts" + ) + ) + } + + override fun replacedRoutes(): MutableList { + return mutableListOf() + } + + // TODO: this is an Get Alerts V2 rest handler that points to the Get Alerts V1 Transport action + // TODO: for now for playground, separate the 2 for GA + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/alerts") + + val sortString = request.param("sortString", "monitor_name.keyword") + val sortOrder = request.param("sortOrder", "asc") + val missing: String? = request.param("missing") + val size = request.paramAsInt("size", 20) + val startIndex = request.paramAsInt("startIndex", 0) + val searchString = request.param("searchString", "") + val severityLevel = request.param("severityLevel", "ALL") + val alertState = request.param("alertState", "ALL") + val monitorId: String? = request.param("monitorId") + val workflowId: String? = request.param("workflowIds") + val workflowIds = mutableListOf() + if (workflowId.isNullOrEmpty() == false) { + workflowIds.add(workflowId) + } else { + workflowIds.add("") + } + val table = Table( + sortOrder, + sortString, + missing, + size, + startIndex, + searchString + ) + + val getAlertsRequest = GetAlertsRequest(table, severityLevel, alertState, monitorId, null, workflowIds = workflowIds) + return RestChannelConsumer { + channel -> + client.execute(AlertingActions.GET_ALERTS_ACTION_TYPE, getAlertsRequest, RestToXContentListener(channel)) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt new file mode 100644 index 000000000..01dbd1883 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt @@ -0,0 +1,39 @@ +package org.opensearch.alerting.script + +import java.time.Instant +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.PPLMonitor +import org.opensearch.commons.alerting.model.PPLMonitorRunResult +import org.opensearch.commons.alerting.model.PPLMonitorRunResult.Companion.PPL_QUERY_RESULTS_FIELD +import org.opensearch.commons.alerting.model.PPLTrigger +import org.opensearch.commons.alerting.model.PPLTrigger.Companion.PPL_TRIGGER_FIELD + +data class PPLTriggerExecutionContext( + override val monitorV2: MonitorV2, + override val periodStart: Instant, + override val periodEnd: Instant, + override val error: Exception? = null, + val pplTrigger: PPLTrigger, + val pplQueryResults: Map // keys are PPL query result fields, not trigger ID + ) : TriggerV2ExecutionContext(monitorV2, periodStart, periodEnd, error) { + +// constructor( +// pplMonitor: PPLMonitor, +// pplTrigger: PPLTrigger, +// pplMonitorRunResult: PPLMonitorRunResult +// ) : this( +// pplMonitor, +// pplMonitorRunResult.periodStart, +// pplMonitorRunResult.periodEnd, +// pplMonitorRunResult.error, +// pplTrigger, +// pplMonitorRunResult.pplQueryResults[pplTrigger.id]!! +// ) + + override fun asTemplateArg(): Map { + val templateArg = super.asTemplateArg().toMutableMap() + templateArg[PPL_TRIGGER_FIELD] = pplTrigger.asTemplateArg() + templateArg[PPL_QUERY_RESULTS_FIELD] = pplQueryResults + return templateArg.toMap() + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt new file mode 100644 index 000000000..46993be91 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt @@ -0,0 +1,31 @@ +package org.opensearch.alerting.script + +import java.time.Instant +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.MonitorV2RunResult +import org.opensearch.commons.alerting.model.TriggerV2 + +abstract class TriggerV2ExecutionContext( + open val monitorV2: MonitorV2, + open val periodStart: Instant, + open val periodEnd: Instant, + open val error: Exception? = null +) { + + constructor(monitorV2: MonitorV2, triggerV2: TriggerV2, monitorV2RunResult: MonitorV2RunResult<*>) : + this( + monitorV2, + monitorV2RunResult.periodStart, + monitorV2RunResult.periodEnd, + monitorV2RunResult.triggerResults[triggerV2.id]?.error + ) + + open fun asTemplateArg(): Map { + return mapOf( + "monitorV2" to monitorV2.asTemplateArg(), + "periodStart" to periodStart, + "periodEnd" to periodEnd, + "error" to error + ) + } +} From 7a655d44d81f4d53358c7224df6a881abdb805aa Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 29 Aug 2025 11:42:32 -0700 Subject: [PATCH 10/40] initial implementation of notifications integration --- .../org/opensearch/alerting/MonitorRunnerService.kt | 2 +- .../kotlin/org/opensearch/alerting/PPLMonitorRunner.kt | 10 +++++----- .../alerting/script/PPLTriggerExecutionContext.kt | 6 ++---- .../alerting/script/TriggerV2ExecutionContext.kt | 2 +- .../transport/TransportDeleteMonitorV2Action.kt | 1 + 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index e8b3011c9..493891977 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -36,6 +36,7 @@ import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.remote.monitors.RemoteDocumentLevelMonitorRunner import org.opensearch.alerting.remote.monitors.RemoteMonitorRegistry import org.opensearch.alerting.script.TriggerExecutionContext +import org.opensearch.alerting.script.TriggerV2ExecutionContext import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_COUNT import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_MILLIS @@ -93,7 +94,6 @@ import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset import java.util.UUID -import org.opensearch.alerting.script.TriggerV2ExecutionContext import kotlin.coroutines.CoroutineContext object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleComponent() { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index c6cd64416..1c5ab2dc9 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -9,10 +9,13 @@ import org.opensearch.action.bulk.BulkRequest import org.opensearch.action.bulk.BulkResponse import org.opensearch.action.index.IndexRequest import org.opensearch.action.support.WriteRequest +import org.opensearch.alerting.QueryLevelMonitorRunner.getConfigAndSendNotification import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.script.PPLTriggerExecutionContext import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.commons.alerting.model.ActionRunResult import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.AlertV2 import org.opensearch.commons.alerting.model.MonitorV2 @@ -25,8 +28,10 @@ import org.opensearch.commons.alerting.model.PPLTrigger.NumResultsCondition import org.opensearch.commons.alerting.model.PPLTrigger.TriggerMode import org.opensearch.commons.alerting.model.PPLTriggerRunResult import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX +import org.opensearch.commons.alerting.model.action.Action import org.opensearch.commons.ppl.PPLPluginInterface import org.opensearch.commons.ppl.action.TransportPPLQueryRequest +import org.opensearch.core.common.Strings import org.opensearch.core.rest.RestStatus import org.opensearch.core.xcontent.ToXContent import org.opensearch.transport.TransportService @@ -35,11 +40,6 @@ import java.time.Instant import java.time.ZoneOffset.UTC import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit -import org.opensearch.alerting.QueryLevelMonitorRunner.getConfigAndSendNotification -import org.opensearch.alerting.script.PPLTriggerExecutionContext -import org.opensearch.commons.alerting.model.ActionRunResult -import org.opensearch.commons.alerting.model.action.Action -import org.opensearch.core.common.Strings object PPLMonitorRunner : MonitorV2Runner() { private val logger = LogManager.getLogger(javaClass) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt index 01dbd1883..1a6e139fe 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt @@ -1,12 +1,10 @@ package org.opensearch.alerting.script -import java.time.Instant import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.PPLMonitor -import org.opensearch.commons.alerting.model.PPLMonitorRunResult import org.opensearch.commons.alerting.model.PPLMonitorRunResult.Companion.PPL_QUERY_RESULTS_FIELD import org.opensearch.commons.alerting.model.PPLTrigger import org.opensearch.commons.alerting.model.PPLTrigger.Companion.PPL_TRIGGER_FIELD +import java.time.Instant data class PPLTriggerExecutionContext( override val monitorV2: MonitorV2, @@ -15,7 +13,7 @@ data class PPLTriggerExecutionContext( override val error: Exception? = null, val pplTrigger: PPLTrigger, val pplQueryResults: Map // keys are PPL query result fields, not trigger ID - ) : TriggerV2ExecutionContext(monitorV2, periodStart, periodEnd, error) { +) : TriggerV2ExecutionContext(monitorV2, periodStart, periodEnd, error) { // constructor( // pplMonitor: PPLMonitor, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt index 46993be91..8e1cd8b9f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt @@ -1,9 +1,9 @@ package org.opensearch.alerting.script -import java.time.Instant import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.model.MonitorV2RunResult import org.opensearch.commons.alerting.model.TriggerV2 +import java.time.Instant abstract class TriggerV2ExecutionContext( open val monitorV2: MonitorV2, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt index 17ad5893e..4af9ac095 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt @@ -47,6 +47,7 @@ class TransportDeleteMonitorV2Action @Inject constructor( // if (!validateUserBackendRoles(user, actionListener)) { // return // } + // TOOD: when monitor is deleted, immediately expire all alerts it generated scope.launch { try { // TODO: might be able to simply run DeleteMonitorService.deleteMonitor() directly, even if PPLMonitors have no metadata From 264c51a990a1129f3416096399bd34cadf6a5492 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Wed, 3 Sep 2025 12:19:37 -0700 Subject: [PATCH 11/40] initial implementations of GetMonitor, UpdateMonitor, MonitorStats --- .../org/opensearch/alerting/AlertingPlugin.kt | 11 +- .../resthandler/RestDeleteMonitorV2Action.kt | 8 +- .../resthandler/RestExecuteMonitorV2Action.kt | 10 +- .../resthandler/RestGetMonitorV2Action.kt | 54 +++++++ .../resthandler/RestIndexMonitorV2Action.kt | 2 +- .../transport/TransportGetMonitorV2Action.kt | 119 ++++++++++++++ .../TransportIndexMonitorV2Action.kt | 149 +++++++++++++----- .../opensearch/alerting/core/JobSweeper.kt | 1 - .../action/node/ScheduledJobsStatsRequest.kt | 8 +- .../node/ScheduledJobsStatsTransportAction.kt | 7 +- .../RestScheduledJobStatsHandler.kt | 4 +- .../RestScheduledJobStatsV2Handler.kt | 95 +++++++++++ .../alerting/core/schedule/JobScheduler.kt | 15 ++ 13 files changed, 430 insertions(+), 53 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index e224b4b9f..d5e9b59d5 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -24,6 +24,8 @@ import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction import org.opensearch.alerting.core.action.node.ScheduledJobsStatsTransportAction import org.opensearch.alerting.core.lock.LockService +import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler +import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsV2Handler import org.opensearch.alerting.core.schedule.JobScheduler import org.opensearch.alerting.core.settings.LegacyOpenDistroScheduledJobSettings import org.opensearch.alerting.core.settings.ScheduledJobSettings @@ -33,6 +35,7 @@ import org.opensearch.alerting.resthandler.RestDeleteMonitorV2Action import org.opensearch.alerting.resthandler.RestExecuteMonitorV2Action import org.opensearch.alerting.resthandler.RestGetAlertsV2Action import org.opensearch.alerting.resthandler.RestGetMonitorAction +import org.opensearch.alerting.resthandler.RestGetMonitorV2Action import org.opensearch.alerting.resthandler.RestIndexMonitorAction import org.opensearch.alerting.resthandler.RestIndexMonitorV2Action import org.opensearch.alerting.resthandler.RestSearchMonitorV2Action @@ -60,6 +63,7 @@ import org.opensearch.alerting.transport.TransportGetEmailAccountAction import org.opensearch.alerting.transport.TransportGetEmailGroupAction import org.opensearch.alerting.transport.TransportGetFindingsSearchAction import org.opensearch.alerting.transport.TransportGetMonitorAction +import org.opensearch.alerting.transport.TransportGetMonitorV2Action import org.opensearch.alerting.transport.TransportGetRemoteIndexesAction import org.opensearch.alerting.transport.TransportGetWorkflowAction import org.opensearch.alerting.transport.TransportGetWorkflowAlertsAction @@ -189,14 +193,14 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R // Alerting V1 // RestGetMonitorAction(), // RestDeleteMonitorAction(), -// RestIndexMonitorAction(), + RestIndexMonitorAction(), // RestIndexWorkflowAction(), // RestSearchMonitorAction(settings, clusterService), // RestExecuteMonitorAction(), // RestExecuteWorkflowAction(), // RestAcknowledgeAlertAction(), // RestAcknowledgeChainedAlertAction(), -// RestScheduledJobStatsHandler("_alerting"), + RestScheduledJobStatsHandler("_alerting"), // RestSearchEmailAccountAction(), // RestGetEmailAccountAction(), // RestSearchEmailGroupAction(), @@ -216,8 +220,10 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R RestIndexMonitorV2Action(), RestExecuteMonitorV2Action(), RestDeleteMonitorV2Action(), + RestGetMonitorV2Action(), RestSearchMonitorV2Action(settings, clusterService), RestGetAlertsV2Action(), + RestScheduledJobStatsV2Handler() ) } @@ -254,6 +260,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R // Alerting V2 ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, TransportIndexMonitorV2Action::class.java), + ActionPlugin.ActionHandler(AlertingActions.GET_MONITOR_V2_ACTION_TYPE, TransportGetMonitorV2Action::class.java), ActionPlugin.ActionHandler(AlertingActions.SEARCH_MONITORS_V2_ACTION_TYPE, TransportSearchMonitorV2Action::class.java), ActionPlugin.ActionHandler(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, TransportDeleteMonitorV2Action::class.java), ActionPlugin.ActionHandler(ExecuteMonitorV2Action.INSTANCE, TransportExecuteMonitorV2Action::class.java), diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt index 46255f892..c27d4795b 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt @@ -27,18 +27,18 @@ class RestDeleteMonitorV2Action : BaseRestHandler() { return mutableListOf( Route( DELETE, - "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorId}" + "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorV2Id}" ) ) } @Throws(IOException::class) override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { - val monitorId = request.param("monitorId") - log.info("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/$monitorId") + val monitorV2Id = request.param("monitorV2Id") + log.info("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/$monitorV2Id") val refreshPolicy = RefreshPolicy.parse(request.param(REFRESH, RefreshPolicy.IMMEDIATE.value)) - val deleteMonitorV2Request = DeleteMonitorV2Request(monitorId, refreshPolicy) + val deleteMonitorV2Request = DeleteMonitorV2Request(monitorV2Id, refreshPolicy) return RestChannelConsumer { channel -> client.execute(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, deleteMonitorV2Request, RestToXContentListener(channel)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt index 3dcd06d62..da2a59a38 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt @@ -27,7 +27,7 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { return listOf( Route( POST, - "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorId}/_execute" + "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorV2Id}/_execute" ), Route( POST, @@ -43,9 +43,9 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { val dryrun = request.paramAsBoolean("dryrun", false) val requestEnd = request.paramAsTime("period_end", TimeValue(Instant.now().toEpochMilli())) - if (request.hasParam("monitorId")) { - val monitorId = request.param("monitorId") - val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, monitorId, null, null, requestEnd) + if (request.hasParam("monitorV2Id")) { + val monitorV2Id = request.param("monitorV2Id") + val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, monitorV2Id, null, null, requestEnd) client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) } else { val xcp = request.contentParser() @@ -65,6 +65,6 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { } override fun responseParams(): Set { - return setOf("dryrun", "period_end", "monitorId") + return setOf("dryrun", "period_end", "monitorV2Id") } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt new file mode 100644 index 000000000..efbb3f044 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt @@ -0,0 +1,54 @@ +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.util.context +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.GetMonitorV2Request +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.GET +import org.opensearch.rest.RestRequest.Method.HEAD +import org.opensearch.rest.action.RestActions +import org.opensearch.rest.action.RestToXContentListener +import org.opensearch.search.fetch.subphase.FetchSourceContext +import org.opensearch.transport.client.node.NodeClient + +private val log = LogManager.getLogger(RestGetMonitorV2Action::class.java) + +class RestGetMonitorV2Action : BaseRestHandler() { + + override fun getName(): String { + return "get_monitor_v2_action" + } + + override fun routes(): List { + return mutableListOf( + Route( + GET, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorV2Id}" + ) + ) + } + + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorV2Id}") + + val monitorV2Id = request.param("monitorV2Id") + if (monitorV2Id == null || monitorV2Id.isEmpty()) { + throw IllegalArgumentException("No MonitorV2 ID provided") + } + + var srcContext = context(request) + if (request.method() == HEAD) { + srcContext = FetchSourceContext.DO_NOT_FETCH_SOURCE + } + + val getMonitorV2Request = GetMonitorV2Request(monitorV2Id, RestActions.parseVersion(request), srcContext) + return RestChannelConsumer { + channel -> + client.execute(AlertingActions.GET_MONITOR_V2_ACTION_TYPE, getMonitorV2Request, RestToXContentListener(channel)) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index c8ebd3c54..8f109b017 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -68,7 +68,7 @@ class RestIndexMonitorV2Action : BaseRestHandler() { WriteRequest.RefreshPolicy.IMMEDIATE } - val indexMonitorV2Request = IndexMonitorV2Request(seqNo, primaryTerm, refreshPolicy, monitorV2) + val indexMonitorV2Request = IndexMonitorV2Request(id, seqNo, primaryTerm, refreshPolicy, request.method(), monitorV2) return RestChannelConsumer { channel -> client.execute(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, indexMonitorV2Request, RestToXContentListener(channel)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt new file mode 100644 index 000000000..87e19db2b --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt @@ -0,0 +1,119 @@ +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchStatusException +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.GetMonitorV2Request +import org.opensearch.commons.alerting.action.GetMonitorV2Response +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.action.ActionListener +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import org.opensearch.transport.client.Client + +private val log = LogManager.getLogger(TransportGetMonitorAction::class.java) +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + +class TransportGetMonitorV2Action @Inject constructor( + transportService: TransportService, + val client: Client, + actionFilters: ActionFilters, + val xContentRegistry: NamedXContentRegistry, + val clusterService: ClusterService, + settings: Settings, +) : HandledTransportAction( + AlertingActions.GET_MONITOR_V2_ACTION_NAME, + transportService, + actionFilters, + ::GetMonitorV2Request +), + SecureTransportAction { + + @Volatile + override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + init { + listenFilterBySettingChange(clusterService) + } + + override fun doExecute(task: Task, request: GetMonitorV2Request, actionListener: ActionListener) { + val getRequest = GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, request.monitorV2Id) + .version(request.version) + .fetchSourceContext(request.srcContext) + +// if (!validateUserBackendRoles(user, actionListener)) { +// return +// } + +// client.threadPool().threadContext.stashContext().use { + client.get( + getRequest, + object : ActionListener { + override fun onResponse(response: GetResponse) { + if (!response.isExists) { + actionListener.onFailure( + AlertingException.wrap(OpenSearchStatusException("MonitorV2 not found.", RestStatus.NOT_FOUND)) + ) + return + } + + var monitorV2: MonitorV2? = null + if (!response.isSourceEmpty) { + XContentHelper.createParser( + xContentRegistry, + LoggingDeprecationHandler.INSTANCE, + response.sourceAsBytesRef, + XContentType.JSON + ).use { xcp -> + monitorV2 = ScheduledJob.parse(xcp, response.id, response.version) as MonitorV2 +// +// // security is enabled and filterby is enabled +// if (!checkUserPermissionsWithResource( +// user, +// monitor?.user, +// actionListener, +// "monitor", +// transformedRequest.monitorId +// ) +// ) { +// return +// } + } + } + + actionListener.onResponse( + GetMonitorV2Response( + response.id, + response.version, + response.seqNo, + response.primaryTerm, + monitorV2 + ) + ) + } + + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + ) +// } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index 4236791ea..bf05af6ec 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -12,6 +12,8 @@ import org.opensearch.action.admin.cluster.health.ClusterHealthAction import org.opensearch.action.admin.cluster.health.ClusterHealthRequest import org.opensearch.action.admin.cluster.health.ClusterHealthResponse import org.opensearch.action.admin.indices.create.CreateIndexResponse +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse import org.opensearch.action.index.IndexRequest import org.opensearch.action.index.IndexResponse import org.opensearch.action.search.SearchRequest @@ -29,12 +31,17 @@ import org.opensearch.alerting.util.IndexUtils import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentFactory.jsonBuilder +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.alerting.action.AlertingActions import org.opensearch.commons.alerting.action.IndexMonitorV2Request import org.opensearch.commons.alerting.action.IndexMonitorV2Response import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.model.PPLMonitor +import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.action.ActionListener @@ -43,6 +50,7 @@ import org.opensearch.core.rest.RestStatus import org.opensearch.core.xcontent.NamedXContentRegistry import org.opensearch.core.xcontent.ToXContent import org.opensearch.index.query.QueryBuilders +import org.opensearch.rest.RestRequest import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.tasks.Task import org.opensearch.transport.TransportService @@ -174,7 +182,7 @@ class TransportIndexMonitorV2Action @Inject constructor( // Below check needs to be async operations and needs to be refactored issue#269 // checkForDisallowedDestinations(allowList) - // TODO: checks for throttling/suppression + // TODO: checks for throttling/suppression, should not be needed here, done in common utils when parsing PPLTriggers // try { // validateActionThrottle(request.monitor, maxActionThrottle, TimeValue.timeValueMinutes(1)) // } catch (e: RuntimeException) { @@ -182,48 +190,116 @@ class TransportIndexMonitorV2Action @Inject constructor( // return // } - val query = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("${Monitor.MONITOR_TYPE}.type", Monitor.MONITOR_TYPE)) - val searchSource = SearchSourceBuilder().query(query).timeout(requestTimeout) - val searchRequest = SearchRequest(SCHEDULED_JOBS_INDEX).source(searchSource) + if (indexMonitorRequest.method == RestRequest.Method.PUT) { // update monitor case + scope.launch { + updateMonitor(indexMonitorRequest, actionListener) + } + } else { // create monitor case + val query = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("${Monitor.MONITOR_TYPE}.type", Monitor.MONITOR_TYPE)) + val searchSource = SearchSourceBuilder().query(query).timeout(requestTimeout) + val searchRequest = SearchRequest(SCHEDULED_JOBS_INDEX).source(searchSource) - client.search( - searchRequest, - object : ActionListener { - override fun onResponse(searchResponse: SearchResponse) { - onMonitorCountSearchResponse(searchResponse, indexMonitorRequest, actionListener) - } + client.search( + searchRequest, + object : ActionListener { + override fun onResponse(searchResponse: SearchResponse) { + onMonitorCountSearchResponse(searchResponse, indexMonitorRequest, actionListener) + } - override fun onFailure(t: Exception) { - actionListener.onFailure(AlertingException.wrap(t)) + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } } + ) + } + } + + /* Functions for Update Monitor flow */ + + private suspend fun updateMonitor(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + val getRequest = GetRequest(SCHEDULED_JOBS_INDEX, indexMonitorRequest.monitorId) + try { + val getResponse: GetResponse = client.suspendUntil { client.get(getRequest, it) } + if (!getResponse.isExists) { + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException("MonitorV2 with ${indexMonitorRequest.monitorId} is not found", RestStatus.NOT_FOUND) + ) + ) + return } + val xcp = XContentHelper.createParser( + xContentRegistry, LoggingDeprecationHandler.INSTANCE, + getResponse.sourceAsBytesRef, XContentType.JSON + ) + val monitorV2 = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as MonitorV2 + onGetMonitorResponseForUpdate(monitorV2, indexMonitorRequest, actionListener) + } catch (t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + + private suspend fun onGetMonitorResponseForUpdate( + currentMonitorV2: MonitorV2, + indexMonitorRequest: IndexMonitorV2Request, + actionListener: ActionListener + ) { + var newMonitorV2 = when (indexMonitorRequest.monitorV2) { + is PPLMonitor -> indexMonitorRequest.monitorV2 as PPLMonitor + else -> throw IllegalArgumentException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") + } + + if (currentMonitorV2 !is PPLMonitor) { + throw IllegalStateException( + "During update, existing monitor ${currentMonitorV2.id} had unexpected type ${currentMonitorV2::class.java}" + ) + } + + if (newMonitorV2.enabled && currentMonitorV2.enabled) { + newMonitorV2 = newMonitorV2.copy(enabledTime = currentMonitorV2.enabledTime) + } + + // TODO: add schemaVersion field to MonitorV2 model +// newPplMonitor = newPplMonitor.copy(schemaVersion = IndexUtils.scheduledJobIndexSchemaVersion) + + val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) + .setRefreshPolicy(indexMonitorRequest.refreshPolicy) + .source(newMonitorV2.toXContent(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) + .id(indexMonitorRequest.monitorId) + .setIfSeqNo(indexMonitorRequest.seqNo) + .setIfPrimaryTerm(indexMonitorRequest.primaryTerm) + .timeout(indexTimeout) + + log.info( + "Updating monitor, ${currentMonitorV2.id}, from: ${currentMonitorV2.toXContent( + jsonBuilder(), + ToXContent.MapParams(mapOf("with_type" to "true")) + )} \n to: ${newMonitorV2.toXContent(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))}" ) - // TODO: this if else forks between update or create monitor, come back to this when supporting update monitor -// if (request.method == RestRequest.Method.PUT) { -// scope.launch { -// updateMonitor() -// } -// } else { -// val query = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("${Monitor.MONITOR_TYPE}.type", Monitor.MONITOR_TYPE)) -// val searchSource = SearchSourceBuilder().query(query).timeout(requestTimeout) -// val searchRequest = SearchRequest(SCHEDULED_JOBS_INDEX).source(searchSource) -// -// client.search( -// searchRequest, -// object : ActionListener { -// override fun onResponse(searchResponse: SearchResponse) { -// onSearchResponse(searchResponse) -// } -// -// override fun onFailure(t: Exception) { -// actionListener.onFailure(AlertingException.wrap(t)) -// } -// } -// ) -// } + try { + val indexResponse: IndexResponse = client.suspendUntil { client.index(indexRequest, it) } + val failureReasons = checkShardsFailure(indexResponse) + if (failureReasons != null) { + actionListener.onFailure( + AlertingException.wrap(OpenSearchStatusException(failureReasons.toString(), indexResponse.status())) + ) + return + } + + actionListener.onResponse( + IndexMonitorV2Response( + indexResponse.id, indexResponse.version, indexResponse.seqNo, + indexResponse.primaryTerm, newMonitorV2 + ) + ) + } catch (e: Exception) { + actionListener.onFailure(AlertingException.wrap(e)) + } } + /* Functions for Create Monitor flow */ + /** * After searching for all existing monitors we validate the system can support another monitor to be created. */ @@ -265,7 +341,7 @@ class TransportIndexMonitorV2Action @Inject constructor( // } var monitorV2 = when (indexMonitorRequest.monitorV2) { is PPLMonitor -> indexMonitorRequest.monitorV2 as PPLMonitor - else -> throw IllegalStateException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") + else -> throw IllegalArgumentException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") } val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) @@ -304,6 +380,7 @@ class TransportIndexMonitorV2Action @Inject constructor( } } + // TODO: copied from V1 TransportIndexMonitorAction, abstract this out into a util function private fun checkShardsFailure(response: IndexResponse): String? { val failureReasons = StringBuilder() if (response.shardInfo.failed > 0) { diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt b/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt index d3ecf1fb6..e08b1360f 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/JobSweeper.kt @@ -428,7 +428,6 @@ class JobSweeper( "Unable to parse ScheduledJob source: {}", Strings.cleanTruncate(jobSource.utf8ToString(), 1000) ) - logger.warn("exception thrown: $e") sweep(shardId, jobId, jobVersion, null, true) null } diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsRequest.kt b/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsRequest.kt index 6a82e8204..79e36c3b0 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsRequest.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsRequest.kt @@ -17,18 +17,24 @@ import java.io.IOException class ScheduledJobsStatsRequest : BaseNodesRequest { var jobSchedulingMetrics: Boolean = true var jobsInfo: Boolean = true + var showAlertingV2ScheduledJobs: Boolean = false // show Alerting V2 scheduled jobs if true, Alerting V1 scheduled jobs if false constructor(si: StreamInput) : super(si) { jobSchedulingMetrics = si.readBoolean() jobsInfo = si.readBoolean() + showAlertingV2ScheduledJobs = si.readBoolean() + } + + constructor(nodeIds: Array, showAlertingV2ScheduledJobs: Boolean) : super(*nodeIds) { + this.showAlertingV2ScheduledJobs = showAlertingV2ScheduledJobs } - constructor(nodeIds: Array) : super(*nodeIds) @Throws(IOException::class) override fun writeTo(out: StreamOutput) { super.writeTo(out) out.writeBoolean(jobSchedulingMetrics) out.writeBoolean(jobsInfo) + out.writeBoolean(showAlertingV2ScheduledJobs) } fun all(): ScheduledJobsStatsRequest { diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsTransportAction.kt b/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsTransportAction.kt index f2ed94623..42343e296 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsTransportAction.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/action/node/ScheduledJobsStatsTransportAction.kt @@ -93,7 +93,12 @@ class ScheduledJobsStatsTransportAction : TransportNodesAction Unit>( + val METRICS = mapOf Unit>( JOB_SCHEDULING_METRICS to { it -> it.jobSchedulingMetrics = true }, JOBS_INFO to { it -> it.jobsInfo = true } ) @@ -84,7 +84,7 @@ class RestScheduledJobStatsHandler(private val path: String) : BaseRestHandler() private fun getRequest(request: RestRequest): ScheduledJobsStatsRequest { val nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId")) val metrics = Strings.tokenizeByCommaToSet(request.param("metric")) - val scheduledJobsStatsRequest = ScheduledJobsStatsRequest(nodesIds) + val scheduledJobsStatsRequest = ScheduledJobsStatsRequest(nodeIds = nodesIds, showAlertingV2ScheduledJobs = false) scheduledJobsStatsRequest.timeout(request.param("timeout")) if (metrics.isEmpty()) { diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt new file mode 100644 index 000000000..91e06097a --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt @@ -0,0 +1,95 @@ +package org.opensearch.alerting.core.resthandler + +import java.util.* +import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction +import org.opensearch.alerting.core.action.node.ScheduledJobsStatsRequest +import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler.Companion.METRICS +import org.opensearch.core.common.Strings +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.GET +import org.opensearch.rest.action.RestActions +import org.opensearch.transport.client.node.NodeClient + +/** + * RestScheduledJobStatsHandler is handler for getting ScheduledJob Stats for Alerting V2 Scheduled Jobs. + */ +class RestScheduledJobStatsV2Handler : BaseRestHandler() { + + override fun getName(): String { + return "alerting_jobs_stats_v2" + } + + override fun routes(): List { + return listOf( + Route( + GET, + "/_plugins/_alerting/v2/stats/" + ), + Route( + GET, + "/_plugins/_alerting/v2/stats/{metric}" + ), + Route( + GET, + "/_plugins/_alerting/v2/{nodeId}/stats/" + ), + Route( + GET, + "/_plugins/_alerting/v2/{nodeId}/stats/{metric}" + ) + ) + } + + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + val scheduledJobNodesStatsRequest = getRequest(request) + return RestChannelConsumer { channel -> + client.execute( + ScheduledJobsStatsAction.INSTANCE, + scheduledJobNodesStatsRequest, + RestActions.NodesResponseRestListener(channel) + ) + } + } + + private fun getRequest(request: RestRequest): ScheduledJobsStatsRequest { + val nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId")) + val metrics = Strings.tokenizeByCommaToSet(request.param("metric")) + val scheduledJobsStatsRequest = ScheduledJobsStatsRequest(nodeIds = nodesIds, showAlertingV2ScheduledJobs = true) + scheduledJobsStatsRequest.timeout(request.param("timeout")) + + if (metrics.isEmpty()) { + return scheduledJobsStatsRequest + } else if (metrics.size == 1 && metrics.contains("_all")) { + scheduledJobsStatsRequest.all() + } else if (metrics.contains("_all")) { + throw IllegalArgumentException( + String.format( + Locale.ROOT, + "request [%s] contains _all and individual metrics [%s]", + request.path(), + request.param("metric") + ) + ) + } else { + // use a sorted set so the unrecognized parameters appear in a reliable sorted order + scheduledJobsStatsRequest.clear() + val invalidMetrics = TreeSet() + for (metric in metrics) { + val handler = METRICS[metric] + if (handler != null) { + handler.invoke(scheduledJobsStatsRequest) + } else { + invalidMetrics.add(metric) + } + } + + if (!invalidMetrics.isEmpty()) { + throw IllegalArgumentException(unrecognized(request, invalidMetrics, METRICS.keys, "metric")) + } + } + return scheduledJobsStatsRequest + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt index a4a729121..e3bd454c5 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt @@ -8,6 +8,8 @@ package org.opensearch.alerting.core.schedule import org.apache.logging.log4j.LogManager import org.opensearch.alerting.core.JobRunner import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.MonitorV2 +import org.opensearch.commons.alerting.model.PPLMonitor import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.threadpool.Scheduler import org.opensearch.threadpool.ThreadPool @@ -203,6 +205,19 @@ class JobScheduler(private val threadPool: ThreadPool, private val jobRunner: Jo .collect(Collectors.toList()) } + fun getJobSchedulerV2Metric(): List { + return scheduledJobIdToInfo.entries.filter { it.value.scheduledJob is MonitorV2 } + .stream() + .map { entry -> + JobSchedulerMetrics( + entry.value.scheduledJobId, + entry.value.actualPreviousExecutionTime?.toEpochMilli(), + entry.value.scheduledJob.schedule.runningOnTime(entry.value.actualPreviousExecutionTime) + ) + } + .collect(Collectors.toList()) + } + fun postIndex(job: ScheduledJob) { jobRunner.postIndex(job) } From 98702022607488a99bf2387f1be7a407b5c9d624 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Wed, 3 Sep 2025 15:27:34 -0700 Subject: [PATCH 12/40] misc cleanup --- .../main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt | 7 ++----- .../core/resthandler/RestScheduledJobStatsV2Handler.kt | 4 ++-- .../org/opensearch/alerting/core/schedule/JobScheduler.kt | 1 - 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt index bbf3c5676..d92df62a7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt @@ -45,9 +45,6 @@ class AlertV2Expirer( private val checkForExpirationInterval = TimeValue(1L, TimeUnit.MINUTES) override fun clusterChanged(event: ClusterChangedEvent) { - // Instead of using a LocalNodeClusterManagerListener to track clustermanager changes, this service will - // track them here to avoid conditions where clustermanager listener events run after other - // listeners that depend on what happened in the clustermanager listener if (this.isClusterManager != event.localNodeClusterManager()) { this.isClusterManager = event.localNodeClusterManager() if (this.isClusterManager) { @@ -63,7 +60,7 @@ class AlertV2Expirer( fun onManager() { try { - // try to sweep current AlertV2s immediately as we might be restarting the cluster + // try to sweep current AlertV2s for expiration immediately as we might be restarting the cluster expireAlertV2s() // schedule expiration checks and expirations to happen repeatedly at some interval scheduledAlertsV2CheckAndExpire = threadPool @@ -71,7 +68,7 @@ class AlertV2Expirer( } catch (e: Exception) { // This should be run on cluster startup logger.error( - "Error creating comments indices. Comments can't be recorded until clustermanager node is restarted.", + "Error sweeping AlertV2s for expiration. This cannot be done until clustermanager node is restarted.", e ) } diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt index 91e06097a..43a0f1a78 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt @@ -1,17 +1,17 @@ package org.opensearch.alerting.core.resthandler -import java.util.* import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction import org.opensearch.alerting.core.action.node.ScheduledJobsStatsRequest import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler.Companion.METRICS import org.opensearch.core.common.Strings import org.opensearch.rest.BaseRestHandler -import org.opensearch.rest.RestHandler import org.opensearch.rest.RestHandler.Route import org.opensearch.rest.RestRequest import org.opensearch.rest.RestRequest.Method.GET import org.opensearch.rest.action.RestActions import org.opensearch.transport.client.node.NodeClient +import java.util.Locale +import java.util.TreeSet /** * RestScheduledJobStatsHandler is handler for getting ScheduledJob Stats for Alerting V2 Scheduled Jobs. diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt index e3bd454c5..1d528384a 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt @@ -9,7 +9,6 @@ import org.apache.logging.log4j.LogManager import org.opensearch.alerting.core.JobRunner import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.PPLMonitor import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.threadpool.Scheduler import org.opensearch.threadpool.ThreadPool From e748065e4995542eae913d176f42957c46d1a491 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 4 Sep 2025 16:25:10 -0700 Subject: [PATCH 13/40] update monitor and lookback window implementations --- .../opensearch/alerting/PPLMonitorRunner.kt | 30 +++++++++++++++---- .../resthandler/RestIndexMonitorV2Action.kt | 12 ++++---- .../TransportIndexMonitorV2Action.kt | 4 +-- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 1c5ab2dc9..0740ba3e6 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -14,6 +14,7 @@ import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.script.PPLTriggerExecutionContext +import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.XContentFactory import org.opensearch.commons.alerting.model.ActionRunResult import org.opensearch.commons.alerting.model.Alert @@ -94,8 +95,12 @@ object PPLMonitorRunner : MonitorV2Runner() { } // only query data between now and the last PPL Monitor execution - // do this by injecting a time filtering where statement into PPL Monitor query - val timeFilteredQuery = addTimeFilter(pplMonitor.query, periodStart, periodEnd) + // unless a look back window is specified, in which case use that instead, + // then inject a time filter where statement into PPL Monitor query. + // if the given monitor query already has any time check whatsoever, this + // simply returns the original query itself + val timeFilteredQuery = addTimeFilter(pplMonitor.query, periodStart, periodEnd, pplMonitor.lookBackWindow) + logger.info("time filtered query: $timeFilteredQuery") // run each trigger for (trigger in pplMonitor.triggers) { @@ -231,16 +236,31 @@ object PPLMonitorRunner : MonitorV2Runner() { } // adds monitor schedule-based time filter - private fun addTimeFilter(query: String, periodStart: Instant, periodEnd: Instant): String { + // query: the raw PPL Monitor query + // periodStart: the lower bound of the initially computed query interval based on monitor schedule + // periodEnd: the upper bound of the initially computed query interval based on monitor schedule + // lookBackWindow: customer's desired query look back window, overrides [periodStart, periodEnd] if not null + private fun addTimeFilter(query: String, periodStart: Instant, periodEnd: Instant, lookBackWindow: TimeValue?): String { // inject time filter into PPL query to only query for data within the (periodStart, periodEnd) interval // TODO: if query contains "_time", "span", "earliest", "latest", skip adding filter // TODO: pending https://github.com/opensearch-project/sql/issues/3969 // for now assume "_time" field is always present in customer data + // if the raw query contained any time check whatsoever, skip adding a time filter internally + // and return query as is, customer's in-query time checks instantly and automatically overrides + if (query.contains("_time")) { // TODO: replace with PPL time keyword checks after that's GA + return query + } + + // if customer passed in a look back window, override the precomputed interval with it + val updatedPeriodStart = lookBackWindow?.let { window -> + periodEnd.minus(window.millis, ChronoUnit.MILLIS) + } ?: periodStart + // PPL plugin only accepts timestamp strings in this format val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(UTC) - val periodStartPplTimestamp = formatter.format(periodStart) + val periodStartPplTimestamp = formatter.format(updatedPeriodStart) val periodEndPplTimeStamp = formatter.format(periodEnd) val timeFilterReplace = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp') |" @@ -257,8 +277,6 @@ object PPLMonitorRunner : MonitorV2Runner() { query + timeFilterAppend } - logger.info("time filtered query: $timeFilteredQuery") - return timeFilteredQuery } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index 8f109b017..15ee7cc22 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -17,6 +17,7 @@ import org.opensearch.rest.BaseRestHandler import org.opensearch.rest.RestHandler.Route import org.opensearch.rest.RestRequest import org.opensearch.rest.RestRequest.Method.POST +import org.opensearch.rest.RestRequest.Method.PUT import org.opensearch.rest.action.RestToXContentListener import org.opensearch.transport.client.node.NodeClient import java.io.IOException @@ -37,11 +38,10 @@ class RestIndexMonitorV2Action : BaseRestHandler() { POST, AlertingPlugin.MONITOR_V2_BASE_URI ), - // TODO: support UpdateMonitor -// Route( -// PUT, -// "${AlertingPlugin.PPL_MONITOR_BASE_URI}/{monitorID}" -// ) + Route( + PUT, + "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorV2Id}" + ) ) } @@ -59,7 +59,7 @@ class RestIndexMonitorV2Action : BaseRestHandler() { throw AlertingException.wrap(e) } - val id = request.param("monitorID", MonitorV2.NO_ID) + val id = request.param("monitorV2Id", MonitorV2.NO_ID) val seqNo = request.paramAsLong(IF_SEQ_NO, SequenceNumbers.UNASSIGNED_SEQ_NO) val primaryTerm = request.paramAsLong(IF_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_PRIMARY_TERM) val refreshPolicy = if (request.hasParam(REFRESH)) { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index bf05af6ec..021831419 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -259,9 +259,7 @@ class TransportIndexMonitorV2Action @Inject constructor( newMonitorV2 = newMonitorV2.copy(enabledTime = currentMonitorV2.enabledTime) } - // TODO: add schemaVersion field to MonitorV2 model -// newPplMonitor = newPplMonitor.copy(schemaVersion = IndexUtils.scheduledJobIndexSchemaVersion) - + newMonitorV2 = newMonitorV2.copy(schemaVersion = IndexUtils.scheduledJobIndexSchemaVersion) val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) .setRefreshPolicy(indexMonitorRequest.refreshPolicy) .source(newMonitorV2.toXContent(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) From 6fb26e8ba9050fc84bb207e29d47383ad75ddc2a Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 4 Sep 2025 19:21:05 -0700 Subject: [PATCH 14/40] per results notifications --- .../opensearch/alerting/PPLMonitorRunner.kt | 145 ++++++++++++------ 1 file changed, 98 insertions(+), 47 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 0740ba3e6..e5c0be9f3 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -1,6 +1,7 @@ package org.opensearch.alerting import org.apache.logging.log4j.LogManager +import org.json.JSONArray import org.json.JSONObject import org.opensearch.ExceptionsHelper import org.opensearch.action.DocWriteRequest @@ -143,14 +144,22 @@ object PPLMonitorRunner : MonitorV2Runner() { val queryResponseJson = executePplQuery(queryToExecute, nodeClient) logger.info("query execution results for trigger ${pplTrigger.name}: $queryResponseJson") + // retrieve only the relevant query response rows. + // for num_results triggers, that's the entire response + // for custom triggers, that's only rows that evaluated to true + val relevantQueryResultRows = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { + // number of results trigger + queryResponseJson + } else { + // custom condition trigger + evaluateCustomConditionTrigger(queryResponseJson, pplTrigger) + } + // retrieve the number of results // for number of results triggers, this is simply the number of PPL query results // for custom triggers, this is the number of rows in the query response's eval result column that evaluated to true - val numResults = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger - queryResponseJson.getLong("total") - } else { // custom condition trigger - evaluateCustomConditionTrigger(queryResponseJson, pplTrigger) - } + val numResults = relevantQueryResultRows.getLong("total") + logger.info("number of results: $numResults") // determine if the trigger condition has been met val triggered = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger @@ -167,32 +176,44 @@ object PPLMonitorRunner : MonitorV2Runner() { pplQueryResults[pplTrigger.id] = queryResponseJson if (triggered) { + // generate alerts based on trigger mode + // if this trigger is on result_set mode, this list contains exactly 1 alert + // if this trigger is on per_result mode, this list has any alerts as there are relevant query results + val thisTriggersGeneratedAlerts = generateAlerts( + pplTrigger, + pplMonitor, + relevantQueryResultRows, + timeOfCurrentExecution + ) + // collect the generated alerts to be written to alerts index - generatedAlerts.addAll(generateAlerts(pplTrigger, pplMonitor, numResults, timeOfCurrentExecution)) + // if the trigger is on result_set mode + generatedAlerts.addAll(thisTriggersGeneratedAlerts) // update the trigger's last execution time for future suppression checks pplTrigger.lastTriggeredTime = timeOfCurrentExecution - // TODO: this is purely a result set implementation - // TODO: when trigger is per result, need to send for every individual result, not just the whole thing // send alert notifications - val pplTriggerExecutionContext = PPLTriggerExecutionContext( - monitorV2, - periodStart, - periodEnd, - null, - pplTrigger, - pplQueryResults[pplTrigger.id]!!.toMap() - ) for (action in pplTrigger.actions) { - runAction( - action, - pplTriggerExecutionContext, - monitorCtx, - pplMonitor, - dryRun, - timeOfCurrentExecution - ) + for (alert in generatedAlerts) { + val pplTriggerExecutionContext = PPLTriggerExecutionContext( + monitorV2, + periodStart, + periodEnd, + null, + pplTrigger, + alert.queryResults + ) + + runAction( + action, + pplTriggerExecutionContext, + monitorCtx, + pplMonitor, + dryRun, + timeOfCurrentExecution + ) + } } } } catch (e: Exception) { @@ -318,7 +339,7 @@ object PPLMonitorRunner : MonitorV2Runner() { } } - private fun evaluateCustomConditionTrigger(customConditionQueryResponse: JSONObject, pplTrigger: PPLTrigger): Long { + private fun evaluateCustomConditionTrigger(customConditionQueryResponse: JSONObject, pplTrigger: PPLTrigger): JSONObject { // a PPL query with custom condition returning 0 results should imply a valid but not useful query. // do not trigger alert, but warn that query likely is not functioning as user intended if (customConditionQueryResponse.getLong("total") == 0L) { @@ -326,9 +347,18 @@ object PPLMonitorRunner : MonitorV2Runner() { "During execution of PPL Trigger ${pplTrigger.name}, PPL query with custom " + "condition returned no results. Proceeding without generating alert." ) - return 0L + return customConditionQueryResponse } + // this will eventually store just the rows that triggered the custom condition + val relevantQueryResultRows = JSONObject() + + // first add a deep copy of the schema + relevantQueryResultRows.put("schema", JSONArray(customConditionQueryResponse.getJSONArray("schema").toList())) + + // append empty datarows list, to be populated later + relevantQueryResultRows.put("datarows", JSONArray()) + // find the name of the eval result variable defined in custom condition val evalResultVarName = findEvalResultVar(pplTrigger.customCondition!!) @@ -361,16 +391,20 @@ object PPLMonitorRunner : MonitorV2Runner() { } val dataRowList = customConditionQueryResponse.getJSONArray("datarows") - var numTriggered = 0L // the number of query result rows that evaluated to true for (i in 0 until dataRowList.length()) { val dataRow = dataRowList.getJSONArray(i) val evalResult = dataRow.getBoolean(evalResultVarIdx) if (evalResult) { - numTriggered++ + // if the row triggered the custom condition + // add it to the relevant results deep copy + relevantQueryResultRows.getJSONArray("datarows").put(JSONArray(dataRow.toList())) } } - return numTriggered + relevantQueryResultRows.put("total", relevantQueryResultRows.getJSONArray("datarows").length()) + + // return only the rows that triggered the custom condition + return relevantQueryResultRows } // TODO: is there maybe some PPL plugin util function we can use to replace this? @@ -390,33 +424,50 @@ object PPLMonitorRunner : MonitorV2Runner() { private fun generateAlerts( pplTrigger: PPLTrigger, pplMonitor: PPLMonitor, - numAlertsToGenerate: Long, + relevantQueryResultRows: JSONObject, timeOfCurrentExecution: Instant ): List { - // TODO: currently naively generates an alert and action every time - // TODO: maintain alert state, check for COMPLETED alert and suppression condition, like query level monitor - val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } - val alertV2 = AlertV2( - monitorId = pplMonitor.id, - monitorName = pplMonitor.name, - monitorVersion = pplMonitor.version, - triggerId = pplTrigger.id, - triggerName = pplTrigger.name, - state = Alert.State.ACTIVE, - startTime = timeOfCurrentExecution, - expirationTime = expirationTime, - errorHistory = listOf(), - severity = pplTrigger.severity.value, - actionExecutionResults = listOf(), - ) - val alertV2s = mutableListOf() if (pplTrigger.mode == TriggerMode.RESULT_SET) { + val alertV2 = AlertV2( + monitorId = pplMonitor.id, + monitorName = pplMonitor.name, + monitorVersion = pplMonitor.version, + triggerId = pplTrigger.id, + triggerName = pplTrigger.name, + queryResults = relevantQueryResultRows.toMap(), + state = Alert.State.ACTIVE, + startTime = timeOfCurrentExecution, + expirationTime = expirationTime, + errorHistory = listOf(), + severity = pplTrigger.severity.value, + actionExecutionResults = listOf(), + ) alertV2s.add(alertV2) } else { // TriggerMode.PER_RESULT + val numAlertsToGenerate = relevantQueryResultRows.getInt("total") + for (i in 0 until numAlertsToGenerate) { + val individualRow = JSONObject() + individualRow.put("schema", JSONArray(relevantQueryResultRows.getJSONArray("schema").toList())) + individualRow.put("datarows", JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList())) + + val alertV2 = AlertV2( + monitorId = pplMonitor.id, + monitorName = pplMonitor.name, + monitorVersion = pplMonitor.version, + triggerId = pplTrigger.id, + triggerName = pplTrigger.name, + queryResults = individualRow.toMap(), + state = Alert.State.ACTIVE, + startTime = timeOfCurrentExecution, + expirationTime = expirationTime, + errorHistory = listOf(), + severity = pplTrigger.severity.value, + actionExecutionResults = listOf(), + ) alertV2s.add(alertV2) } } From 6eed1cd1cebaeb4ad066150f0259d5755a4f1f0b Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Sat, 6 Sep 2025 10:31:00 -0700 Subject: [PATCH 15/40] alert generation refactors --- .../opensearch/alerting/PPLMonitorRunner.kt | 86 ++++++++++--------- .../alerting/alerts/alert_mapping.json | 12 +++ 2 files changed, 59 insertions(+), 39 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index e5c0be9f3..c4d43dfb2 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -32,10 +32,10 @@ import org.opensearch.commons.alerting.model.PPLTriggerRunResult import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX import org.opensearch.commons.alerting.model.action.Action import org.opensearch.commons.ppl.PPLPluginInterface -import org.opensearch.commons.ppl.action.TransportPPLQueryRequest import org.opensearch.core.common.Strings import org.opensearch.core.rest.RestStatus import org.opensearch.core.xcontent.ToXContent +import org.opensearch.sql.plugin.transport.TransportPPLQueryRequest import org.opensearch.transport.TransportService import org.opensearch.transport.client.node.NodeClient import java.time.Instant @@ -77,7 +77,7 @@ object PPLMonitorRunner : MonitorV2Runner() { // create some objects that will be used later val triggerResults = mutableMapOf() val pplQueryResults = mutableMapOf() - val generatedAlerts = mutableListOf() +// val generatedAlerts = mutableListOf() // TODO: Instant.ofEpochMilli(monitorCtx.threadPool!!.absoluteTimeInMillis()) alternative? // set the current execution time @@ -176,26 +176,30 @@ object PPLMonitorRunner : MonitorV2Runner() { pplQueryResults[pplTrigger.id] = queryResponseJson if (triggered) { + // if trigger is on result set mode, this list will have exactly 1 element + // if trigger is on per result mode, this list will have as many elements as the query results had rows + val preparedQueryResults = prepareQueryResults(relevantQueryResultRows, pplTrigger.mode) + // generate alerts based on trigger mode // if this trigger is on result_set mode, this list contains exactly 1 alert // if this trigger is on per_result mode, this list has any alerts as there are relevant query results val thisTriggersGeneratedAlerts = generateAlerts( pplTrigger, pplMonitor, - relevantQueryResultRows, + preparedQueryResults, timeOfCurrentExecution ) // collect the generated alerts to be written to alerts index // if the trigger is on result_set mode - generatedAlerts.addAll(thisTriggersGeneratedAlerts) +// generatedAlerts.addAll(thisTriggersGeneratedAlerts) // update the trigger's last execution time for future suppression checks pplTrigger.lastTriggeredTime = timeOfCurrentExecution // send alert notifications for (action in pplTrigger.actions) { - for (alert in generatedAlerts) { + for (alert in thisTriggersGeneratedAlerts) { val pplTriggerExecutionContext = PPLTriggerExecutionContext( monitorV2, periodStart, @@ -205,6 +209,7 @@ object PPLMonitorRunner : MonitorV2Runner() { alert.queryResults ) + // TODO: store this in trigger action results and store in alerts runAction( action, pplTriggerExecutionContext, @@ -215,6 +220,12 @@ object PPLMonitorRunner : MonitorV2Runner() { ) } } + + // TODO: what if retry policy null? + // write the alerts to the alerts index + monitorCtx.retryPolicy?.let { + saveAlertsV2(thisTriggersGeneratedAlerts, pplMonitor, it, nodeClient) + } } } catch (e: Exception) { logger.error("failed to run PPL Trigger for PPL Monitor ${pplMonitor.name}", e) @@ -222,12 +233,6 @@ object PPLMonitorRunner : MonitorV2Runner() { } } - // TODO: what if retry policy null? - // write the alerts to the alerts index - monitorCtx.retryPolicy?.let { - saveAlertsV2(generatedAlerts, pplMonitor, it, nodeClient) - } - // TODO: collect all triggers that were throttled, and if none were throttled, skip update monitor? saves on write requests // for suppression checking purposes, update the PPL Monitor in the alerting-config index // with updated last triggered times for each of its triggers @@ -350,6 +355,8 @@ object PPLMonitorRunner : MonitorV2Runner() { return customConditionQueryResponse } + // TODO: dont create a copy, just remove irrelevant rows from the original reference in-place + // this will eventually store just the rows that triggered the custom condition val relevantQueryResultRows = JSONObject() @@ -421,23 +428,45 @@ object PPLMonitorRunner : MonitorV2Runner() { return customCondition.substring(startIdx, endIdx) } + // prepares the query results to be passed into alerts and notifications based on trigger mode + // if result set, alert and notification simply stores all of the query results + // if per result, each alert and notification stores a single row of the query results + private fun prepareQueryResults(relevantQueryResultRows: JSONObject, triggerMode: TriggerMode): List { + // case: result set + if (triggerMode == TriggerMode.RESULT_SET) { + return listOf(relevantQueryResultRows) + } + + // case: per result + val individualRows = mutableListOf() + val numAlertsToGenerate = relevantQueryResultRows.getInt("total") + for (i in 0 until numAlertsToGenerate) { + val individualRow = JSONObject() + individualRow.put("schema", JSONArray(relevantQueryResultRows.getJSONArray("schema").toList())) + individualRow.put("datarows", JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList())) + individualRows.add(individualRow) + } + + return individualRows + } + private fun generateAlerts( pplTrigger: PPLTrigger, pplMonitor: PPLMonitor, - relevantQueryResultRows: JSONObject, + preparedQueryResults: List, timeOfCurrentExecution: Instant ): List { val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } val alertV2s = mutableListOf() - if (pplTrigger.mode == TriggerMode.RESULT_SET) { + for (queryResult in preparedQueryResults) { val alertV2 = AlertV2( monitorId = pplMonitor.id, monitorName = pplMonitor.name, monitorVersion = pplMonitor.version, triggerId = pplTrigger.id, triggerName = pplTrigger.name, - queryResults = relevantQueryResultRows.toMap(), + queryResults = queryResult.toMap(), state = Alert.State.ACTIVE, startTime = timeOfCurrentExecution, expirationTime = expirationTime, @@ -446,33 +475,9 @@ object PPLMonitorRunner : MonitorV2Runner() { actionExecutionResults = listOf(), ) alertV2s.add(alertV2) - } else { // TriggerMode.PER_RESULT - val numAlertsToGenerate = relevantQueryResultRows.getInt("total") - - for (i in 0 until numAlertsToGenerate) { - val individualRow = JSONObject() - individualRow.put("schema", JSONArray(relevantQueryResultRows.getJSONArray("schema").toList())) - individualRow.put("datarows", JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList())) - - val alertV2 = AlertV2( - monitorId = pplMonitor.id, - monitorName = pplMonitor.name, - monitorVersion = pplMonitor.version, - triggerId = pplTrigger.id, - triggerName = pplTrigger.name, - queryResults = individualRow.toMap(), - state = Alert.State.ACTIVE, - startTime = timeOfCurrentExecution, - expirationTime = expirationTime, - errorHistory = listOf(), - severity = pplTrigger.severity.value, - actionExecutionResults = listOf(), - ) - alertV2s.add(alertV2) - } } - return alertV2s.toList() // return an immutable list + return alertV2s.toList() // return as immutable list } private suspend fun saveAlertsV2( @@ -508,6 +513,9 @@ object PPLMonitorRunner : MonitorV2Runner() { val bulkRequest = BulkRequest().add(requestsToRetry).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) val bulkResponse: BulkResponse = client.suspendUntil { client.bulk(bulkRequest, it) } val failedResponses = (bulkResponse.items ?: arrayOf()).filter { it.isFailed } + failedResponses.forEach { + logger.info("write alerts failed responses: ${it.failureMessage}") + } requestsToRetry = failedResponses.filter { it.status() == RestStatus.TOO_MANY_REQUESTS } .map { bulkRequest.requests()[it.itemId] as IndexRequest } diff --git a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json index a1cbb45c8..7e2592286 100644 --- a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json +++ b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json @@ -137,6 +137,18 @@ "error_message": { "type": "text" }, + "query_results": { + "type": "nested", + "properties": { + "schema": { + "type": "nested", + "dynamic": true + }, + "datarows": { + "type": "text" + } + } + }, "alert_history": { "type": "nested", "properties": { From dc5f445f28f747b634dee4e8b5ab4257f4e31713 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Sat, 6 Sep 2025 11:58:33 -0700 Subject: [PATCH 16/40] cleaning up based on deleted AlertV2 fields --- .../opensearch/alerting/PPLMonitorRunner.kt | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index c4d43dfb2..059d33b1b 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -190,6 +190,8 @@ object PPLMonitorRunner : MonitorV2Runner() { timeOfCurrentExecution ) + // TODO: save alert error message and action results in alert + // collect the generated alerts to be written to alerts index // if the trigger is on result_set mode // generatedAlerts.addAll(thisTriggersGeneratedAlerts) @@ -198,6 +200,7 @@ object PPLMonitorRunner : MonitorV2Runner() { pplTrigger.lastTriggeredTime = timeOfCurrentExecution // send alert notifications +// val actionExecutionResults = mutableListOf() for (action in pplTrigger.actions) { for (alert in thisTriggersGeneratedAlerts) { val pplTriggerExecutionContext = PPLTriggerExecutionContext( @@ -467,10 +470,7 @@ object PPLMonitorRunner : MonitorV2Runner() { triggerId = pplTrigger.id, triggerName = pplTrigger.name, queryResults = queryResult.toMap(), - state = Alert.State.ACTIVE, - startTime = timeOfCurrentExecution, expirationTime = expirationTime, - errorHistory = listOf(), severity = pplTrigger.severity.value, actionExecutionResults = listOf(), ) @@ -494,17 +494,12 @@ object PPLMonitorRunner : MonitorV2Runner() { // back we're ok if that acknowledgement is lost. It's easier to get the user to retry than for the runner to // spend time reloading the alert and writing it back. - when (alert.state) { - Alert.State.ACTIVE, Alert.State.ERROR -> { - listOf>( - IndexRequest(AlertIndices.ALERT_INDEX) - .routing(pplMonitor.id) // set routing ID to PPL Monitor ID - .source(alert.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS)) - .id(if (alert.id != Alert.NO_ID) alert.id else null) - ) - } - else -> throw IllegalStateException("trying to save non ACTIVE alert, unimplemented territory") - } + listOf>( + IndexRequest(AlertIndices.ALERT_INDEX) + .routing(pplMonitor.id) // set routing ID to PPL Monitor ID + .source(alert.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS)) + .id(if (alert.id != Alert.NO_ID) alert.id else null) + ) } if (requestsToRetry.isEmpty()) return From 2f9974c2476b34a7bcc6e372581437a107137adf Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 8 Sep 2025 06:23:46 -0700 Subject: [PATCH 17/40] various cleanup --- .../org/opensearch/alerting/AlertV2Expirer.kt | 7 +- .../opensearch/alerting/PPLMonitorRunner.kt | 138 +++++++++++------- .../alerting/alerts/alert_mapping.json | 3 + 3 files changed, 88 insertions(+), 60 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt index d92df62a7..905e25755 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt @@ -79,8 +79,7 @@ class AlertV2Expirer( } private fun expireAlertV2s() { - if (!areAlertsIndicesInitialized()) { - // TODO: edge case: what if alert history indices are present but regular alerts index is absent + if (!areAlertsIndicesPresent()) { return } @@ -107,7 +106,7 @@ class AlertV2Expirer( } } - private fun areAlertsIndicesInitialized(): Boolean { - return alertIndexInitialized && alertHistoryIndexInitialized + private fun areAlertsIndicesPresent(): Boolean { + return alertIndexInitialized || alertHistoryIndexInitialized } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 059d33b1b..f5149e611 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -17,7 +17,7 @@ import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.script.PPLTriggerExecutionContext import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.XContentFactory -import org.opensearch.commons.alerting.model.ActionRunResult +import org.opensearch.commons.alerting.alerts.AlertError import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.AlertV2 import org.opensearch.commons.alerting.model.MonitorV2 @@ -30,7 +30,9 @@ import org.opensearch.commons.alerting.model.PPLTrigger.NumResultsCondition import org.opensearch.commons.alerting.model.PPLTrigger.TriggerMode import org.opensearch.commons.alerting.model.PPLTriggerRunResult import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX +import org.opensearch.commons.alerting.model.TriggerV2 import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.commons.alerting.model.userErrorMessage import org.opensearch.commons.ppl.PPLPluginInterface import org.opensearch.core.common.Strings import org.opensearch.core.rest.RestStatus @@ -76,15 +78,13 @@ object PPLMonitorRunner : MonitorV2Runner() { // create some objects that will be used later val triggerResults = mutableMapOf() - val pplQueryResults = mutableMapOf() -// val generatedAlerts = mutableListOf() + val pplQueryResults = mutableMapOf>() - // TODO: Instant.ofEpochMilli(monitorCtx.threadPool!!.absoluteTimeInMillis()) alternative? // set the current execution time - val timeOfCurrentExecution = Instant.now() + // use threadpool time for cross node consistency + val timeOfCurrentExecution = Instant.ofEpochMilli(MonitorRunnerService.monitorCtx.threadPool!!.absoluteTimeInMillis()) // TODO: should alerting v1 and v2 alerts index be separate? - // TODO: should alerting v1 and v2 alerting-config index be separate? try { // TODO: write generated V2 alerts to existing alerts v1 index for now, revisit this decision monitorCtx.alertIndices!!.createOrUpdateAlertIndex() @@ -104,15 +104,17 @@ object PPLMonitorRunner : MonitorV2Runner() { logger.info("time filtered query: $timeFilteredQuery") // run each trigger - for (trigger in pplMonitor.triggers) { + for (pplTrigger in pplMonitor.triggers) { try { - val pplTrigger = trigger as PPLTrigger - // check for suppression and skip execution // before even running the trigger itself val suppressed = checkForSuppress(pplTrigger, timeOfCurrentExecution) if (suppressed) { logger.info("suppressing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") + + // automatically set this trigger to untriggered + triggerResults[pplTrigger.id] = PPLTriggerRunResult(pplTrigger.name, false, null) + continue } logger.info("suppression check passed, executing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") @@ -173,7 +175,7 @@ object PPLMonitorRunner : MonitorV2Runner() { // store the trigger execution and ppl query results for // trigger execution response and notification message context triggerResults[pplTrigger.id] = PPLTriggerRunResult(pplTrigger.name, triggered, null) - pplQueryResults[pplTrigger.id] = queryResponseJson + pplQueryResults[pplTrigger.id] = queryResponseJson.toMap() if (triggered) { // if trigger is on result set mode, this list will have exactly 1 element @@ -190,8 +192,6 @@ object PPLMonitorRunner : MonitorV2Runner() { timeOfCurrentExecution ) - // TODO: save alert error message and action results in alert - // collect the generated alerts to be written to alerts index // if the trigger is on result_set mode // generatedAlerts.addAll(thisTriggersGeneratedAlerts) @@ -212,34 +212,43 @@ object PPLMonitorRunner : MonitorV2Runner() { alert.queryResults ) - // TODO: store this in trigger action results and store in alerts runAction( action, pplTriggerExecutionContext, monitorCtx, pplMonitor, - dryRun, - timeOfCurrentExecution + dryRun ) } } - // TODO: what if retry policy null? // write the alerts to the alerts index monitorCtx.retryPolicy?.let { saveAlertsV2(thisTriggersGeneratedAlerts, pplMonitor, it, nodeClient) } } } catch (e: Exception) { - logger.error("failed to run PPL Trigger for PPL Monitor ${pplMonitor.name}", e) + logger.error("failed to run PPL Trigger ${pplTrigger.name} for PPL Monitor ${pplMonitor.name}", e) + + // generate an alert with an error message + monitorCtx.retryPolicy?.let { + saveAlertsV2( + generateErrorAlert(pplTrigger, pplMonitor, e, timeOfCurrentExecution), + pplMonitor, + it, + nodeClient + ) + } + continue } } - // TODO: collect all triggers that were throttled, and if none were throttled, skip update monitor? saves on write requests - // for suppression checking purposes, update the PPL Monitor in the alerting-config index + // for suppression checking purposes, reindex the PPL Monitor into the alerting-config index // with updated last triggered times for each of its triggers - updateMonitorWithLastTriggeredTimes(pplMonitor, nodeClient) + if (triggerResults.any { it.value.triggered }) { + updateMonitorWithLastTriggeredTimes(pplMonitor, nodeClient) + } return PPLMonitorRunResult( pplMonitor.name, @@ -247,7 +256,7 @@ object PPLMonitorRunner : MonitorV2Runner() { periodStart, periodEnd, triggerResults, - pplQueryResults.map { it.key to it.value.toMap() }.toMap() + pplQueryResults ) } @@ -272,7 +281,7 @@ object PPLMonitorRunner : MonitorV2Runner() { private fun addTimeFilter(query: String, periodStart: Instant, periodEnd: Instant, lookBackWindow: TimeValue?): String { // inject time filter into PPL query to only query for data within the (periodStart, periodEnd) interval // TODO: if query contains "_time", "span", "earliest", "latest", skip adding filter - // TODO: pending https://github.com/opensearch-project/sql/issues/3969 + // pending https://github.com/opensearch-project/sql/issues/3969 // for now assume "_time" field is always present in customer data // if the raw query contained any time check whatsoever, skip adding a time filter internally @@ -319,7 +328,7 @@ object PPLMonitorRunner : MonitorV2Runner() { // call PPL plugin to execute time filtered query val transportPplQueryRequest = TransportPPLQueryRequest( query, - JSONObject(mapOf(PPL_SQL_QUERY_FIELD to query)), // TODO: what is the purpose of this arg? + JSONObject(mapOf(PPL_SQL_QUERY_FIELD to query)), null // null path falls back to a default path internal to SQL/PPL Plugin ) @@ -358,8 +367,6 @@ object PPLMonitorRunner : MonitorV2Runner() { return customConditionQueryResponse } - // TODO: dont create a copy, just remove irrelevant rows from the original reference in-place - // this will eventually store just the rows that triggered the custom condition val relevantQueryResultRows = JSONObject() @@ -470,9 +477,9 @@ object PPLMonitorRunner : MonitorV2Runner() { triggerId = pplTrigger.id, triggerName = pplTrigger.name, queryResults = queryResult.toMap(), + triggeredTime = timeOfCurrentExecution, expirationTime = expirationTime, - severity = pplTrigger.severity.value, - actionExecutionResults = listOf(), + severity = pplTrigger.severity.value ) alertV2s.add(alertV2) } @@ -480,6 +487,34 @@ object PPLMonitorRunner : MonitorV2Runner() { return alertV2s.toList() // return as immutable list } + private fun generateErrorAlert( + pplTrigger: PPLTrigger, + pplMonitor: PPLMonitor, + exception: Exception, + timeOfCurrentExecution: Instant + ): List { + val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } + + val errorMessage = "Failed to run PPL Trigger ${pplTrigger.name} from PPL Monitor ${pplMonitor.name}: " + + exception.userErrorMessage() + val obfuscatedErrorMessage = AlertError.obfuscateIPAddresses(errorMessage) + + val alertV2 = AlertV2( + monitorId = pplMonitor.id, + monitorName = pplMonitor.name, + monitorVersion = pplMonitor.version, + triggerId = pplTrigger.id, + triggerName = pplTrigger.name, + queryResults = mapOf(), + triggeredTime = timeOfCurrentExecution, + expirationTime = expirationTime, + errorMessage = obfuscatedErrorMessage, + severity = TriggerV2.Severity.ERROR.value + ) + + return listOf(alertV2) + } + private suspend fun saveAlertsV2( alerts: List, pplMonitor: PPLMonitor, @@ -489,11 +524,6 @@ object PPLMonitorRunner : MonitorV2Runner() { logger.info("received alerts: $alerts") var requestsToRetry = alerts.flatMap { alert -> - // We don't want to set the version when saving alerts because the MonitorRunner has first priority when writing alerts. - // In the rare event that a user acknowledges an alert between when it's read and when it's written - // back we're ok if that acknowledgement is lost. It's easier to get the user to retry than for the runner to - // spend time reloading the alert and writing it back. - listOf>( IndexRequest(AlertIndices.ALERT_INDEX) .routing(pplMonitor.id) // set routing ID to PPL Monitor ID @@ -537,28 +567,28 @@ object PPLMonitorRunner : MonitorV2Runner() { triggerCtx: PPLTriggerExecutionContext, monitorCtx: MonitorRunnerExecutionContext, pplMonitor: PPLMonitor, - dryrun: Boolean, - timeOfCurrentExecution: Instant - ): ActionRunResult { - return try { - val actionOutput = mutableMapOf() - actionOutput[Action.SUBJECT] = if (action.subjectTemplate != null) - MonitorRunnerService.compileTemplateV2(action.subjectTemplate!!, triggerCtx) - else "" - actionOutput[Action.MESSAGE] = MonitorRunnerService.compileTemplateV2(action.messageTemplate, triggerCtx) - if (Strings.isNullOrEmpty(actionOutput[Action.MESSAGE])) { - throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") - } + dryrun: Boolean + ) { + // this function can throw an exception, which is caught by the try + // catch in runMonitor() to generate an error alert + val actionOutput = mutableMapOf() + actionOutput[Action.SUBJECT] = if (action.subjectTemplate != null) + MonitorRunnerService.compileTemplateV2(action.subjectTemplate!!, triggerCtx) + else "" + actionOutput[Action.MESSAGE] = MonitorRunnerService.compileTemplateV2(action.messageTemplate, triggerCtx) + if (Strings.isNullOrEmpty(actionOutput[Action.MESSAGE])) { + throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") + } - if (!dryrun) { + if (!dryrun) { // val client = monitorCtx.client - actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( - action, - monitorCtx, - actionOutput[Action.SUBJECT], - actionOutput[Action.MESSAGE]!! - ) - // TODO: use this block when security plugin is enabled + actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( + action, + monitorCtx, + actionOutput[Action.SUBJECT], + actionOutput[Action.MESSAGE]!! + ) + // TODO: use this block when security plugin is enabled // client!!.threadPool().threadContext.stashContext().use { // withClosableContext( // InjectorContextElement( @@ -577,10 +607,6 @@ object PPLMonitorRunner : MonitorV2Runner() { // ) // } // } - } - ActionRunResult(action.id, action.name, actionOutput, false, timeOfCurrentExecution, null) - } catch (e: Exception) { - ActionRunResult(action.id, action.name, mapOf(), false, timeOfCurrentExecution, e) } } } diff --git a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json index 7e2592286..19c4f6348 100644 --- a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json +++ b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json @@ -131,6 +131,9 @@ "end_time": { "type": "date" }, + "triggered_time": { + "type": "date" + }, "expiration_time": { "type": "date" }, From 2f57c5c2fe391ab7bdd4bf3c9cc1560a6dafc620 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 8 Sep 2025 08:25:40 -0700 Subject: [PATCH 18/40] removing execute monitor actions to move to common utils, add to alert_mapping --- .../org/opensearch/alerting/AlertingPlugin.kt | 3 +- .../alerting/MonitorRunnerService.kt | 10 ++- .../alerting/action/ExecuteMonitorV2Action.kt | 11 --- .../action/ExecuteMonitorV2Request.kt | 69 ------------------- .../action/ExecuteMonitorV2Response.kt | 33 --------- .../resthandler/RestExecuteMonitorV2Action.kt | 8 +-- .../TransportExecuteMonitorV2Action.kt | 10 +-- .../alerting/alerts/alert_mapping.json | 3 + 8 files changed, 17 insertions(+), 130 deletions(-) delete mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt delete mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt delete mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index d5e9b59d5..0ec8b6ca8 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -7,7 +7,6 @@ package org.opensearch.alerting import org.opensearch.action.ActionRequest import org.opensearch.alerting.action.ExecuteMonitorAction -import org.opensearch.alerting.action.ExecuteMonitorV2Action import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.GetDestinationsAction import org.opensearch.alerting.action.GetEmailAccountAction @@ -263,7 +262,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ActionPlugin.ActionHandler(AlertingActions.GET_MONITOR_V2_ACTION_TYPE, TransportGetMonitorV2Action::class.java), ActionPlugin.ActionHandler(AlertingActions.SEARCH_MONITORS_V2_ACTION_TYPE, TransportSearchMonitorV2Action::class.java), ActionPlugin.ActionHandler(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, TransportDeleteMonitorV2Action::class.java), - ActionPlugin.ActionHandler(ExecuteMonitorV2Action.INSTANCE, TransportExecuteMonitorV2Action::class.java), + ActionPlugin.ActionHandler(AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, TransportExecuteMonitorV2Action::class.java), ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index 493891977..bfb956367 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -18,9 +18,6 @@ import org.opensearch.action.support.clustermanager.AcknowledgedResponse import org.opensearch.alerting.action.ExecuteMonitorAction import org.opensearch.alerting.action.ExecuteMonitorRequest import org.opensearch.alerting.action.ExecuteMonitorResponse -import org.opensearch.alerting.action.ExecuteMonitorV2Action -import org.opensearch.alerting.action.ExecuteMonitorV2Request -import org.opensearch.alerting.action.ExecuteMonitorV2Response import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.ExecuteWorkflowRequest import org.opensearch.alerting.action.ExecuteWorkflowResponse @@ -63,6 +60,9 @@ import org.opensearch.cluster.service.ClusterService import org.opensearch.common.lifecycle.AbstractLifecycleComponent import org.opensearch.common.settings.Settings import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.ExecuteMonitorV2Request +import org.opensearch.commons.alerting.action.ExecuteMonitorV2Response import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.Monitor @@ -443,7 +443,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon ) monitorCtx.client!!.suspendUntil { monitorCtx.client!!.execute( - ExecuteMonitorV2Action.INSTANCE, + AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, executeMonitorV2Request, it ) @@ -594,8 +594,6 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon dryrun: Boolean, transportService: TransportService, ): MonitorV2RunResult<*> { - // Updating the scheduled job index at the start of monitor execution runs for when there is an upgrade the the schema mapping - // has not been updated. if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { IndexUtils.updateIndexMapping( ScheduledJob.SCHEDULED_JOBS_INDEX, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt deleted file mode 100644 index 3d7320f7c..000000000 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Action.kt +++ /dev/null @@ -1,11 +0,0 @@ -package org.opensearch.alerting.action - -import org.opensearch.action.ActionType - -// TODO: should this and the ExecuteMonitorV2Request/Response be moved to common utils? -class ExecuteMonitorV2Action private constructor() : ActionType(NAME, ::ExecuteMonitorV2Response) { - companion object { - val INSTANCE = ExecuteMonitorV2Action() - const val NAME = "cluster:admin/opensearch/alerting/v2/monitor/execute" - } -} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt deleted file mode 100644 index f1dded881..000000000 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Request.kt +++ /dev/null @@ -1,69 +0,0 @@ -package org.opensearch.alerting.action - -import org.opensearch.action.ActionRequest -import org.opensearch.action.ActionRequestValidationException -import org.opensearch.action.ValidateActions -import org.opensearch.common.unit.TimeValue -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.core.common.io.stream.StreamInput -import org.opensearch.core.common.io.stream.StreamOutput -import java.io.IOException - -class ExecuteMonitorV2Request : ActionRequest { - val dryrun: Boolean - val monitorId: String? // exactly one of monitorId or monitor must be non-null - val monitorV2: MonitorV2? - val requestStart: TimeValue? - val requestEnd: TimeValue - - constructor( - dryrun: Boolean, - monitorId: String?, - monitorV2: MonitorV2?, - requestStart: TimeValue? = null, - requestEnd: TimeValue, - ) : super() { - this.dryrun = dryrun - this.monitorId = monitorId - this.monitorV2 = monitorV2 - this.requestStart = requestStart - this.requestEnd = requestEnd - } - - @Throws(IOException::class) - constructor(sin: StreamInput) : this( - sin.readBoolean(), // dryrun - sin.readOptionalString(), // monitorId - if (sin.readBoolean()) { - MonitorV2.readFrom(sin) // monitor - } else null, - sin.readOptionalTimeValue(), - sin.readTimeValue(), // requestEnd - ) - - override fun validate(): ActionRequestValidationException? { - // ensure exactly one of monitor ID or monitorV2 is supplied - var exception: ActionRequestValidationException? = null - if (monitorV2 == null && monitorId == null) { - exception = ValidateActions.addValidationError( - "Neither a monitor ID or monitor object was supplied", - exception - ) - } - return exception - } - - @Throws(IOException::class) - override fun writeTo(out: StreamOutput) { - out.writeBoolean(dryrun) - out.writeOptionalString(monitorId) - if (monitorV2 != null) { - out.writeBoolean(true) - monitorV2.writeTo(out) - } else { - out.writeBoolean(false) - } - out.writeOptionalTimeValue(requestStart) - out.writeTimeValue(requestEnd) - } -} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt deleted file mode 100644 index 5c2cf7ead..000000000 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorV2Response.kt +++ /dev/null @@ -1,33 +0,0 @@ -package org.opensearch.alerting.action - -import org.opensearch.commons.alerting.model.MonitorV2RunResult -import org.opensearch.core.action.ActionResponse -import org.opensearch.core.common.io.stream.StreamInput -import org.opensearch.core.common.io.stream.StreamOutput -import org.opensearch.core.xcontent.ToXContent -import org.opensearch.core.xcontent.ToXContentObject -import org.opensearch.core.xcontent.XContentBuilder -import java.io.IOException - -class ExecuteMonitorV2Response : ActionResponse, ToXContentObject { - val monitorV2RunResult: MonitorV2RunResult<*> - - constructor(monitorV2RunResult: MonitorV2RunResult<*>) : super() { - this.monitorV2RunResult = monitorV2RunResult - } - - @Throws(IOException::class) - constructor(sin: StreamInput) : this( - MonitorV2RunResult.readFrom(sin) // monitorRunResult - ) - - @Throws(IOException::class) - override fun writeTo(out: StreamOutput) { - monitorV2RunResult.writeTo(out) - } - - @Throws(IOException::class) - override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { - return monitorV2RunResult.toXContent(builder, ToXContent.EMPTY_PARAMS) - } -} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt index da2a59a38..d05126c1a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt @@ -2,9 +2,9 @@ package org.opensearch.alerting.resthandler import org.apache.logging.log4j.LogManager import org.opensearch.alerting.AlertingPlugin -import org.opensearch.alerting.action.ExecuteMonitorV2Action -import org.opensearch.alerting.action.ExecuteMonitorV2Request import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.ExecuteMonitorV2Request import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.xcontent.XContentParser.Token.START_OBJECT @@ -46,7 +46,7 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { if (request.hasParam("monitorV2Id")) { val monitorV2Id = request.param("monitorV2Id") val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, monitorV2Id, null, null, requestEnd) - client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) + client.execute(AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, execMonitorV2Request, RestToXContentListener(channel)) } else { val xcp = request.contentParser() ensureExpectedToken(START_OBJECT, xcp.nextToken(), xcp) @@ -59,7 +59,7 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { } val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, null, monitorV2, null, requestEnd) - client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) + client.execute(AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, execMonitorV2Request, RestToXContentListener(channel)) } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt index 28587ca0f..4e831de3e 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -10,9 +10,6 @@ import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.alerting.MonitorRunnerService -import org.opensearch.alerting.action.ExecuteMonitorV2Action -import org.opensearch.alerting.action.ExecuteMonitorV2Request -import org.opensearch.alerting.action.ExecuteMonitorV2Response import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject @@ -20,6 +17,9 @@ import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.ExecuteMonitorV2Request +import org.opensearch.commons.alerting.action.ExecuteMonitorV2Response import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.model.PPLMonitor import org.opensearch.commons.alerting.model.PPLMonitor.Companion.PPL_MONITOR_TYPE @@ -44,7 +44,7 @@ class TransportExecuteMonitorV2Action @Inject constructor( val xContentRegistry: NamedXContentRegistry, private val settings: Settings ) : HandledTransportAction( - ExecuteMonitorV2Action.NAME, transportService, actionFilters, ::ExecuteMonitorV2Request + AlertingActions.EXECUTE_MONITOR_V2_ACTION_NAME, transportService, actionFilters, ::ExecuteMonitorV2Request ) { @Volatile private var indexTimeout = AlertingSettings.INDEX_TIMEOUT.get(settings) @@ -60,7 +60,7 @@ class TransportExecuteMonitorV2Action @Inject constructor( // get execution time interval val (periodStart, periodEnd) = if (execMonitorV2Request.requestStart != null) { Pair( - Instant.ofEpochMilli(execMonitorV2Request.requestStart.millis), + Instant.ofEpochMilli(execMonitorV2Request.requestStart!!.millis), Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis) ) } else { diff --git a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json index 19c4f6348..6ae614f71 100644 --- a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json +++ b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json @@ -149,6 +149,9 @@ }, "datarows": { "type": "text" + }, + "total": { + "type": "integer" } } }, From 7b6d90df71ad4fe8b028718731c5f7b362a1389e Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 8 Sep 2025 08:50:39 -0700 Subject: [PATCH 19/40] adding executionId to alert generations --- .../org/opensearch/alerting/PPLMonitorRunner.kt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index f5149e611..a1ed0d5cf 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -189,6 +189,7 @@ object PPLMonitorRunner : MonitorV2Runner() { pplTrigger, pplMonitor, preparedQueryResults, + executionId, timeOfCurrentExecution ) @@ -233,7 +234,7 @@ object PPLMonitorRunner : MonitorV2Runner() { // generate an alert with an error message monitorCtx.retryPolicy?.let { saveAlertsV2( - generateErrorAlert(pplTrigger, pplMonitor, e, timeOfCurrentExecution), + generateErrorAlert(pplTrigger, pplMonitor, e, executionId, timeOfCurrentExecution), pplMonitor, it, nodeClient @@ -464,6 +465,7 @@ object PPLMonitorRunner : MonitorV2Runner() { pplTrigger: PPLTrigger, pplMonitor: PPLMonitor, preparedQueryResults: List, + executionId: String, timeOfCurrentExecution: Instant ): List { val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } @@ -479,7 +481,8 @@ object PPLMonitorRunner : MonitorV2Runner() { queryResults = queryResult.toMap(), triggeredTime = timeOfCurrentExecution, expirationTime = expirationTime, - severity = pplTrigger.severity.value + severity = pplTrigger.severity.value, + executionId = executionId ) alertV2s.add(alertV2) } @@ -491,6 +494,7 @@ object PPLMonitorRunner : MonitorV2Runner() { pplTrigger: PPLTrigger, pplMonitor: PPLMonitor, exception: Exception, + executionId: String, timeOfCurrentExecution: Instant ): List { val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } @@ -509,7 +513,8 @@ object PPLMonitorRunner : MonitorV2Runner() { triggeredTime = timeOfCurrentExecution, expirationTime = expirationTime, errorMessage = obfuscatedErrorMessage, - severity = TriggerV2.Severity.ERROR.value + severity = TriggerV2.Severity.ERROR.value, + executionId = executionId ) return listOf(alertV2) From a66165c42922adad7c9778a184106f1802c2c071 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 8 Sep 2025 10:58:16 -0700 Subject: [PATCH 20/40] misc cleanup --- .../alerting/MonitorRunnerService.kt | 2 ++ .../opensearch/alerting/PPLMonitorRunner.kt | 27 +++++++++++++++-- .../resthandler/RestSearchMonitorV2Action.kt | 5 ---- .../alerting/service/DeleteMonitorService.kt | 24 +++++++++++---- .../TransportDeleteMonitorV2Action.kt | 8 ++--- .../TransportIndexMonitorV2Action.kt | 29 ++----------------- .../opensearch/alerting/util/IndexUtils.kt | 14 +++++++++ 7 files changed, 65 insertions(+), 44 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index bfb956367..a310eb430 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -331,6 +331,8 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } } + // TODO: if MonitorV2 was deleted, skip trying to move alerts + // cluster throws failed to move alerts exception whenever a MonitorV2 is deleted override fun postDelete(jobId: String) { launch { try { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index a1ed0d5cf..23327effd 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -151,7 +151,7 @@ object PPLMonitorRunner : MonitorV2Runner() { // for custom triggers, that's only rows that evaluated to true val relevantQueryResultRows = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger - queryResponseJson + getQueryResponseWithoutSize(queryResponseJson) } else { // custom condition trigger evaluateCustomConditionTrigger(queryResponseJson, pplTrigger) @@ -357,6 +357,28 @@ object PPLMonitorRunner : MonitorV2Runner() { } } + private fun getQueryResponseWithoutSize(queryResponseJson: JSONObject): JSONObject { + // this will eventually store a deep copy of just the rows that triggered the custom condition + val queryResponseDeepCopy = JSONObject() + + // first add a deep copy of the schema + queryResponseDeepCopy.put("schema", JSONArray(queryResponseJson.getJSONArray("schema").toList())) + + // append empty datarows list, to be populated later + queryResponseDeepCopy.put("datarows", JSONArray()) + + val dataRowList = queryResponseJson.getJSONArray("datarows") + for (i in 0 until dataRowList.length()) { + val dataRow = dataRowList.getJSONArray(i) + queryResponseDeepCopy.getJSONArray("datarows").put(JSONArray(dataRow.toList())) + } + + // include the total but not the size field of the PPL Query response + queryResponseDeepCopy.put("total", queryResponseJson.getLong("total")) + + return queryResponseDeepCopy + } + private fun evaluateCustomConditionTrigger(customConditionQueryResponse: JSONObject, pplTrigger: PPLTrigger): JSONObject { // a PPL query with custom condition returning 0 results should imply a valid but not useful query. // do not trigger alert, but warn that query likely is not functioning as user intended @@ -368,7 +390,7 @@ object PPLMonitorRunner : MonitorV2Runner() { return customConditionQueryResponse } - // this will eventually store just the rows that triggered the custom condition + // this will eventually store a deep copy of just the rows that triggered the custom condition val relevantQueryResultRows = JSONObject() // first add a deep copy of the schema @@ -419,6 +441,7 @@ object PPLMonitorRunner : MonitorV2Runner() { } } + // include the total but not the size field of the PPL Query response relevantQueryResultRows.put("total", relevantQueryResultRows.getJSONArray("datarows").length()) // return only the rows that triggered the custom condition diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt index 9622165e1..7cb79a171 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt @@ -65,11 +65,6 @@ class RestSearchMonitorV2Action( override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { log.debug("${request.method()} ${AlertingPlugin.MONITOR_V2_BASE_URI}/_search") -// val index = request.param("index", SCHEDULED_JOBS_INDEX) -// if (index != SCHEDULED_JOBS_INDEX && index != ALL_ALERT_INDEX_PATTERN) { -// throw IllegalArgumentException("Invalid index name.") -// } - val searchSourceBuilder = SearchSourceBuilder() searchSourceBuilder.parseXContent(request.contentOrSourceParamParser()) searchSourceBuilder.fetchSource(context(request)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt index 2711c1805..cd6c96516 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt @@ -29,6 +29,7 @@ import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_DELEGAT import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_MONITOR_PATH import org.opensearch.alerting.util.use import org.opensearch.commons.alerting.action.DeleteMonitorResponse +import org.opensearch.commons.alerting.action.DeleteMonitorV2Response import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.util.AlertingException @@ -51,7 +52,7 @@ object DeleteMonitorService : private val log = LogManager.getLogger(this.javaClass) private lateinit var client: Client - lateinit var lockService: LockService + private lateinit var lockService: LockService fun initialize( client: Client, @@ -74,8 +75,20 @@ object DeleteMonitorService : return DeleteMonitorResponse(deleteResponse.id, deleteResponse.version) } - // both Alerting v1 and v2 workflows use this function - suspend fun deleteMonitor(monitorId: String, refreshPolicy: RefreshPolicy): DeleteResponse { + /** + * Deletes the monitorV2, which does not come with other metadata and queries + * like doc level monitors + * @param monitorV2Id monitorV2 ID to be deleted + * @param refreshPolicy + */ + suspend fun deleteMonitorV2(monitorV2Id: String, refreshPolicy: RefreshPolicy): DeleteMonitorV2Response { + val deleteResponse = deleteMonitor(monitorV2Id, refreshPolicy) + deleteLock(monitorV2Id) + return DeleteMonitorV2Response(deleteResponse.id, deleteResponse.version) + } + + // both Alerting v1 and v2 workflows flow through this function + private suspend fun deleteMonitor(monitorId: String, refreshPolicy: RefreshPolicy): DeleteResponse { val deleteMonitorRequest = DeleteRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, monitorId) .setRefreshPolicy(refreshPolicy) return client.suspendUntil { delete(deleteMonitorRequest, it) } @@ -167,11 +180,12 @@ object DeleteMonitorService : } } - suspend fun deleteLock(monitor: Monitor) { + private suspend fun deleteLock(monitor: Monitor) { deleteLock(monitor.id) } - suspend fun deleteLock(monitorId: String) { + // both Alerting v1 and v2 workflows flow through this function + private suspend fun deleteLock(monitorId: String) { client.suspendUntil { lockService.deleteLock(LockModel.generateLockId(monitorId), it) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt index 4af9ac095..4abe2b21c 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt @@ -50,15 +50,13 @@ class TransportDeleteMonitorV2Action @Inject constructor( // TOOD: when monitor is deleted, immediately expire all alerts it generated scope.launch { try { - // TODO: might be able to simply run DeleteMonitorService.deleteMonitor() directly, even if PPLMonitors have no metadata - val deleteResponse = DeleteMonitorService.deleteMonitor(request.monitorV2Id, request.refreshPolicy) - DeleteMonitorService.deleteLock(request.monitorV2Id) - actionListener.onResponse(DeleteMonitorV2Response(deleteResponse.id, deleteResponse.version)) + val deleteResponse = DeleteMonitorService.deleteMonitorV2(request.monitorV2Id, request.refreshPolicy) + actionListener.onResponse(deleteResponse) } catch (e: Exception) { actionListener.onFailure(e) } - // TODO: logic for deleting alerts + // we do not expire the alerts associated with the deleted monitor, but instead let its expiration time delete it } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index 021831419..be5e1ed5e 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -178,18 +178,6 @@ class TransportIndexMonitorV2Action @Inject constructor( * and compare this to the [maxMonitorCount]. Requests that breach this threshold will be rejected. */ private fun prepareMonitorIndexing(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { - - // Below check needs to be async operations and needs to be refactored issue#269 - // checkForDisallowedDestinations(allowList) - - // TODO: checks for throttling/suppression, should not be needed here, done in common utils when parsing PPLTriggers -// try { -// validateActionThrottle(request.monitor, maxActionThrottle, TimeValue.timeValueMinutes(1)) -// } catch (e: RuntimeException) { -// actionListener.onFailure(AlertingException.wrap(e)) -// return -// } - if (indexMonitorRequest.method == RestRequest.Method.PUT) { // update monitor case scope.launch { updateMonitor(indexMonitorRequest, actionListener) @@ -277,7 +265,7 @@ class TransportIndexMonitorV2Action @Inject constructor( try { val indexResponse: IndexResponse = client.suspendUntil { client.index(indexRequest, it) } - val failureReasons = checkShardsFailure(indexResponse) + val failureReasons = IndexUtils.checkShardsFailure(indexResponse) if (failureReasons != null) { actionListener.onFailure( AlertingException.wrap(OpenSearchStatusException(failureReasons.toString(), indexResponse.status())) @@ -358,7 +346,7 @@ class TransportIndexMonitorV2Action @Inject constructor( try { val indexResponse: IndexResponse = client.suspendUntil { client.index(indexRequest, it) } - val failureReasons = checkShardsFailure(indexResponse) + val failureReasons = IndexUtils.checkShardsFailure(indexResponse) if (failureReasons != null) { log.info(failureReasons.toString()) actionListener.onFailure( @@ -377,17 +365,4 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener.onFailure(AlertingException.wrap(t)) } } - - // TODO: copied from V1 TransportIndexMonitorAction, abstract this out into a util function - private fun checkShardsFailure(response: IndexResponse): String? { - val failureReasons = StringBuilder() - if (response.shardInfo.failed > 0) { - response.shardInfo.failures.forEach { - entry -> - failureReasons.append(entry.reason()) - } - return failureReasons.toString() - } - return null - } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt b/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt index 093b0bd39..df5c0adce 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt @@ -6,6 +6,7 @@ package org.opensearch.alerting.util import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest +import org.opensearch.action.index.IndexResponse import org.opensearch.action.support.IndicesOptions import org.opensearch.action.support.clustermanager.AcknowledgedResponse import org.opensearch.alerting.alerts.AlertIndices @@ -205,5 +206,18 @@ class IndexUtils { fun getCreationDateForIndex(index: String, clusterState: ClusterState): Long { return clusterState.metadata.index(index).creationDate } + + @JvmStatic + fun checkShardsFailure(response: IndexResponse): String? { + val failureReasons = StringBuilder() + if (response.shardInfo.failed > 0) { + response.shardInfo.failures.forEach { + entry -> + failureReasons.append(entry.reason()) + } + return failureReasons.toString() + } + return null + } } } From f1b4c37422f56696103a732ade6330fabd32b02b Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 8 Sep 2025 11:59:21 -0700 Subject: [PATCH 21/40] more misc cleanup --- .../org/opensearch/alerting/AlertingPlugin.kt | 4 +- .../alerting/MonitorRunnerService.kt | 52 +++++++++--------- .../opensearch/alerting/MonitorV2Runner.kt | 4 +- .../opensearch/alerting/PPLMonitorRunner.kt | 2 +- .../script/PPLTriggerExecutionContext.kt | 13 ----- .../TransportExecuteMonitorV2Action.kt | 34 ++++++++---- .../RestScheduledJobStatsHandler.kt | 44 +-------------- .../RestScheduledJobStatsV2Handler.kt | 46 +--------------- .../core/resthandler/StatsRequestUtils.kt | 53 +++++++++++++++++++ .../alerting/core/schedule/JobScheduler.kt | 3 +- 10 files changed, 111 insertions(+), 144 deletions(-) create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/resthandler/StatsRequestUtils.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 0ec8b6ca8..1d12a549a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -192,14 +192,14 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R // Alerting V1 // RestGetMonitorAction(), // RestDeleteMonitorAction(), - RestIndexMonitorAction(), +// RestIndexMonitorAction(), // RestIndexWorkflowAction(), // RestSearchMonitorAction(settings, clusterService), // RestExecuteMonitorAction(), // RestExecuteWorkflowAction(), // RestAcknowledgeAlertAction(), // RestAcknowledgeChainedAlertAction(), - RestScheduledJobStatsHandler("_alerting"), +// RestScheduledJobStatsHandler("_alerting"), // RestSearchEmailAccountAction(), // RestGetEmailAccountAction(), // RestSearchEmailGroupAction(), diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index a310eb430..a1737c913 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -333,6 +333,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon // TODO: if MonitorV2 was deleted, skip trying to move alerts // cluster throws failed to move alerts exception whenever a MonitorV2 is deleted + // because Alerting V2's stateless alerts don't need to be moved override fun postDelete(jobId: String) { launch { try { @@ -483,20 +484,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon ): MonitorRunResult<*> { // Updating the scheduled job index at the start of monitor execution runs for when there is an upgrade the the schema mapping // has not been updated. - if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { - IndexUtils.updateIndexMapping( - ScheduledJob.SCHEDULED_JOBS_INDEX, - ScheduledJobIndices.scheduledJobMappings(), monitorCtx.clusterService!!.state(), monitorCtx.client!!.admin().indices(), - object : ActionListener { - override fun onResponse(response: AcknowledgedResponse) { - } - - override fun onFailure(t: Exception) { - logger.error("Failed to update config index schema", t) - } - } - ) - } + updateAlertingConfigIndexSchema() if (job is Workflow) { logger.info("Executing scheduled workflow - id: ${job.id}, periodStart: $periodStart, periodEnd: $periodEnd, dryrun: $dryrun") @@ -589,6 +577,9 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } } + // after the above JobRunner interface override runJob calls ExecuteMonitorV2 API, + // the ExecuteMonitorV2 transport action calls this function to call the PPLMonitorRunner, + // where the core PPL Monitor execution logic resides suspend fun runJobV2( monitorV2: MonitorV2, periodStart: Instant, @@ -596,20 +587,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon dryrun: Boolean, transportService: TransportService, ): MonitorV2RunResult<*> { - if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { - IndexUtils.updateIndexMapping( - ScheduledJob.SCHEDULED_JOBS_INDEX, - ScheduledJobIndices.scheduledJobMappings(), monitorCtx.clusterService!!.state(), monitorCtx.client!!.admin().indices(), - object : ActionListener { - override fun onResponse(response: AcknowledgedResponse) { - } - - override fun onFailure(t: Exception) { - logger.error("Failed to update config index schema", t) - } - } - ) - } + updateAlertingConfigIndexSchema() val executionId = "${monitorV2.id}_${LocalDateTime.now(ZoneOffset.UTC)}_${UUID.randomUUID()}" val monitorV2Type = when (monitorV2) { @@ -622,6 +600,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon "periodEnd: $periodEnd, dryrun: $dryrun, executionId: $executionId" ) + // for now, always call PPLMonitorRunner since only PPL Monitors are initially supported // to introduce new MonitorV2 type, create its MonitorRunner, and if/else branch // to the corresponding MonitorRunners based on type. For now, default to PPLMonitorRunner val runResult = PPLMonitorRunner.runMonitorV2( @@ -685,4 +664,21 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon .newInstance(template.params + mapOf("ctx" to ctx.asTemplateArg())) .execute() } + + private fun updateAlertingConfigIndexSchema() { + if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { + IndexUtils.updateIndexMapping( + ScheduledJob.SCHEDULED_JOBS_INDEX, + ScheduledJobIndices.scheduledJobMappings(), monitorCtx.clusterService!!.state(), monitorCtx.client!!.admin().indices(), + object : ActionListener { + override fun onResponse(response: AcknowledgedResponse) { + } + + override fun onFailure(t: Exception) { + logger.error("Failed to update config index schema", t) + } + } + ) + } + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt index 57c33efbd..6474b3b8f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt @@ -5,8 +5,8 @@ import org.opensearch.commons.alerting.model.MonitorV2RunResult import org.opensearch.transport.TransportService import java.time.Instant -abstract class MonitorV2Runner { - abstract suspend fun runMonitorV2( +interface MonitorV2Runner { + suspend fun runMonitorV2( monitorV2: MonitorV2, monitorCtx: MonitorRunnerExecutionContext, // MonitorV2 reads from same context as Monitor periodStart: Instant, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 23327effd..720168e94 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -45,7 +45,7 @@ import java.time.ZoneOffset.UTC import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit -object PPLMonitorRunner : MonitorV2Runner() { +object PPLMonitorRunner : MonitorV2Runner { private val logger = LogManager.getLogger(javaClass) private const val PPL_SQL_QUERY_FIELD = "query" // name of PPL query field when passing into PPL/SQL Execute API call diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt index 1a6e139fe..a8b236673 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt @@ -15,19 +15,6 @@ data class PPLTriggerExecutionContext( val pplQueryResults: Map // keys are PPL query result fields, not trigger ID ) : TriggerV2ExecutionContext(monitorV2, periodStart, periodEnd, error) { -// constructor( -// pplMonitor: PPLMonitor, -// pplTrigger: PPLTrigger, -// pplMonitorRunResult: PPLMonitorRunResult -// ) : this( -// pplMonitor, -// pplMonitorRunResult.periodStart, -// pplMonitorRunResult.periodEnd, -// pplMonitorRunResult.error, -// pplTrigger, -// pplMonitorRunResult.pplQueryResults[pplTrigger.id]!! -// ) - override fun asTemplateArg(): Map { val templateArg = super.asTemplateArg().toMutableMap() templateArg[PPL_TRIGGER_FIELD] = pplTrigger.asTemplateArg() diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt index 4e831de3e..984fa0c96 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -129,13 +129,25 @@ class TransportExecuteMonitorV2Action @Inject constructor( xContentRegistry, LoggingDeprecationHandler.INSTANCE, getMonitorV2Response.sourceAsBytesRef, XContentType.JSON ).use { xcp -> - val monitorV2 = ScheduledJob.parse( - xcp, - getMonitorV2Response.id, - getMonitorV2Response.version - ) as MonitorV2 - // TODO: validate that this is a MonitorV2 and not a Monitor - executeMonitorV2(monitorV2) + try { + val monitorV2 = ScheduledJob.parse( + xcp, + getMonitorV2Response.id, + getMonitorV2Response.version + ) as MonitorV2 + executeMonitorV2(monitorV2) + } catch (e: ClassCastException) { + actionListener.onFailure( + AlertingException.wrap( + IllegalArgumentException( + "Passed in Monitor ID is a legacy Alerting Monitor, please pass in an " + + "Alerting V2 Monitor" + ) + ) + ) + } catch (e: Exception) { + actionListener.onFailure(AlertingException.wrap(e)) + } } } } @@ -146,8 +158,12 @@ class TransportExecuteMonitorV2Action @Inject constructor( } ) } else { // execute with monitor object case - val monitorV2 = execMonitorV2Request.monitorV2 as MonitorV2 - executeMonitorV2(monitorV2) + try { + val monitorV2 = execMonitorV2Request.monitorV2 as MonitorV2 + executeMonitorV2(monitorV2) + } catch (e: Exception) { + actionListener.onFailure(AlertingException.wrap(e)) + } } // } } diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsHandler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsHandler.kt index 5fbb925e8..a8fce9bf3 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsHandler.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsHandler.kt @@ -7,7 +7,6 @@ package org.opensearch.alerting.core.resthandler import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction import org.opensearch.alerting.core.action.node.ScheduledJobsStatsRequest -import org.opensearch.core.common.Strings import org.opensearch.rest.BaseRestHandler import org.opensearch.rest.BaseRestHandler.RestChannelConsumer import org.opensearch.rest.RestHandler @@ -16,8 +15,6 @@ import org.opensearch.rest.RestRequest import org.opensearch.rest.RestRequest.Method.GET import org.opensearch.rest.action.RestActions import org.opensearch.transport.client.node.NodeClient -import java.util.Locale -import java.util.TreeSet /** * RestScheduledJobStatsHandler is handler for getting ScheduledJob Stats. @@ -71,7 +68,7 @@ class RestScheduledJobStatsHandler(private val path: String) : BaseRestHandler() } override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { - val scheduledJobNodesStatsRequest = getRequest(request) + val scheduledJobNodesStatsRequest = StatsRequestUtils.getStatsRequest(request, false, this::unrecognized) return RestChannelConsumer { channel -> client.execute( ScheduledJobsStatsAction.INSTANCE, @@ -80,43 +77,4 @@ class RestScheduledJobStatsHandler(private val path: String) : BaseRestHandler() ) } } - - private fun getRequest(request: RestRequest): ScheduledJobsStatsRequest { - val nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId")) - val metrics = Strings.tokenizeByCommaToSet(request.param("metric")) - val scheduledJobsStatsRequest = ScheduledJobsStatsRequest(nodeIds = nodesIds, showAlertingV2ScheduledJobs = false) - scheduledJobsStatsRequest.timeout(request.param("timeout")) - - if (metrics.isEmpty()) { - return scheduledJobsStatsRequest - } else if (metrics.size == 1 && metrics.contains("_all")) { - scheduledJobsStatsRequest.all() - } else if (metrics.contains("_all")) { - throw IllegalArgumentException( - String.format( - Locale.ROOT, - "request [%s] contains _all and individual metrics [%s]", - request.path(), - request.param("metric") - ) - ) - } else { - // use a sorted set so the unrecognized parameters appear in a reliable sorted order - scheduledJobsStatsRequest.clear() - val invalidMetrics = TreeSet() - for (metric in metrics) { - val handler = METRICS[metric] - if (handler != null) { - handler.invoke(scheduledJobsStatsRequest) - } else { - invalidMetrics.add(metric) - } - } - - if (!invalidMetrics.isEmpty()) { - throw IllegalArgumentException(unrecognized(request, invalidMetrics, METRICS.keys, "metric")) - } - } - return scheduledJobsStatsRequest - } } diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt index 43a0f1a78..7bfbfc47c 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/RestScheduledJobStatsV2Handler.kt @@ -1,17 +1,12 @@ package org.opensearch.alerting.core.resthandler import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction -import org.opensearch.alerting.core.action.node.ScheduledJobsStatsRequest -import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler.Companion.METRICS -import org.opensearch.core.common.Strings import org.opensearch.rest.BaseRestHandler import org.opensearch.rest.RestHandler.Route import org.opensearch.rest.RestRequest import org.opensearch.rest.RestRequest.Method.GET import org.opensearch.rest.action.RestActions import org.opensearch.transport.client.node.NodeClient -import java.util.Locale -import java.util.TreeSet /** * RestScheduledJobStatsHandler is handler for getting ScheduledJob Stats for Alerting V2 Scheduled Jobs. @@ -44,7 +39,7 @@ class RestScheduledJobStatsV2Handler : BaseRestHandler() { } override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { - val scheduledJobNodesStatsRequest = getRequest(request) + val scheduledJobNodesStatsRequest = StatsRequestUtils.getStatsRequest(request, true, this::unrecognized) return RestChannelConsumer { channel -> client.execute( ScheduledJobsStatsAction.INSTANCE, @@ -53,43 +48,4 @@ class RestScheduledJobStatsV2Handler : BaseRestHandler() { ) } } - - private fun getRequest(request: RestRequest): ScheduledJobsStatsRequest { - val nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId")) - val metrics = Strings.tokenizeByCommaToSet(request.param("metric")) - val scheduledJobsStatsRequest = ScheduledJobsStatsRequest(nodeIds = nodesIds, showAlertingV2ScheduledJobs = true) - scheduledJobsStatsRequest.timeout(request.param("timeout")) - - if (metrics.isEmpty()) { - return scheduledJobsStatsRequest - } else if (metrics.size == 1 && metrics.contains("_all")) { - scheduledJobsStatsRequest.all() - } else if (metrics.contains("_all")) { - throw IllegalArgumentException( - String.format( - Locale.ROOT, - "request [%s] contains _all and individual metrics [%s]", - request.path(), - request.param("metric") - ) - ) - } else { - // use a sorted set so the unrecognized parameters appear in a reliable sorted order - scheduledJobsStatsRequest.clear() - val invalidMetrics = TreeSet() - for (metric in metrics) { - val handler = METRICS[metric] - if (handler != null) { - handler.invoke(scheduledJobsStatsRequest) - } else { - invalidMetrics.add(metric) - } - } - - if (!invalidMetrics.isEmpty()) { - throw IllegalArgumentException(unrecognized(request, invalidMetrics, METRICS.keys, "metric")) - } - } - return scheduledJobsStatsRequest - } } diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/StatsRequestUtils.kt b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/StatsRequestUtils.kt new file mode 100644 index 000000000..db05dd5e6 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/resthandler/StatsRequestUtils.kt @@ -0,0 +1,53 @@ +package org.opensearch.alerting.core.resthandler + +import org.opensearch.alerting.core.action.node.ScheduledJobsStatsRequest +import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler.Companion.METRICS +import org.opensearch.core.common.Strings +import org.opensearch.rest.RestRequest +import java.util.Locale +import java.util.TreeSet + +internal object StatsRequestUtils { + fun getStatsRequest( + request: RestRequest, + showAlertingV2ScheduledJobs: Boolean, + unrecognizedFn: (RestRequest, Set, Set, String) -> String + ): ScheduledJobsStatsRequest { + val nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId")) + val metrics = Strings.tokenizeByCommaToSet(request.param("metric")) + val scheduledJobsStatsRequest = ScheduledJobsStatsRequest(nodeIds = nodesIds, showAlertingV2ScheduledJobs) + scheduledJobsStatsRequest.timeout(request.param("timeout")) + + if (metrics.isEmpty()) { + return scheduledJobsStatsRequest + } else if (metrics.size == 1 && metrics.contains("_all")) { + scheduledJobsStatsRequest.all() + } else if (metrics.contains("_all")) { + throw IllegalArgumentException( + String.format( + Locale.ROOT, + "request [%s] contains _all and individual metrics [%s]", + request.path(), + request.param("metric") + ) + ) + } else { + // use a sorted set so the unrecognized parameters appear in a reliable sorted order + scheduledJobsStatsRequest.clear() + val invalidMetrics = TreeSet() + for (metric in metrics) { + val handler = METRICS[metric] + if (handler != null) { + handler.invoke(scheduledJobsStatsRequest) + } else { + invalidMetrics.add(metric) + } + } + + if (!invalidMetrics.isEmpty()) { + throw IllegalArgumentException(unrecognizedFn(request, invalidMetrics, METRICS.keys, "metric")) + } + } + return scheduledJobsStatsRequest + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt index 1d528384a..dd3676b66 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt @@ -193,7 +193,8 @@ class JobScheduler(private val threadPool: ThreadPool, private val jobRunner: Jo } fun getJobSchedulerMetric(): List { - return scheduledJobIdToInfo.entries.stream() + return scheduledJobIdToInfo.entries.filter { it.value.scheduledJob !is MonitorV2 } + .stream() .map { entry -> JobSchedulerMetrics( entry.value.scheduledJobId, From b9fc041ce8cde8052bef2de1bc45cfc5b83041d6 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 8 Sep 2025 12:25:49 -0700 Subject: [PATCH 22/40] removing unused import --- .../src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 1d12a549a..2c4c99661 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -23,7 +23,6 @@ import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction import org.opensearch.alerting.core.action.node.ScheduledJobsStatsTransportAction import org.opensearch.alerting.core.lock.LockService -import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsV2Handler import org.opensearch.alerting.core.schedule.JobScheduler import org.opensearch.alerting.core.settings.LegacyOpenDistroScheduledJobSettings From c270057fe28ce91c5baec197fc191b7f8d945914 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Wed, 10 Sep 2025 14:43:30 -0700 Subject: [PATCH 23/40] moving all common utils models and dependencies to alerting --- alerting/build.gradle | 66 +++ .../org/opensearch/alerting/AlertV2Expirer.kt | 2 +- .../org/opensearch/alerting/AlertingPlugin.kt | 17 +- .../alerting/MonitorRunnerService.kt | 16 +- .../opensearch/alerting/MonitorV2Runner.kt | 4 +- .../opensearch/alerting/PPLMonitorRunner.kt | 36 +- .../actionv2/DeleteMonitorV2Action.kt | 10 + .../actionv2/DeleteMonitorV2Request.kt | 34 ++ .../actionv2/DeleteMonitorV2Response.kt | 38 ++ .../actionv2/ExecuteMonitorV2Action.kt | 10 + .../actionv2/ExecuteMonitorV2Request.kt | 66 +++ .../actionv2/ExecuteMonitorV2Response.kt | 33 ++ .../alerting/actionv2/GetMonitorV2Action.kt | 10 + .../alerting/actionv2/GetMonitorV2Request.kt | 47 +++ .../alerting/actionv2/GetMonitorV2Response.kt | 75 ++++ .../alerting/actionv2/IndexMonitorV2Action.kt | 10 + .../actionv2/IndexMonitorV2Request.kt | 64 +++ .../actionv2/IndexMonitorV2Response.kt | 68 ++++ .../actionv2/SearchMonitorV2Action.kt | 11 + .../actionv2/SearchMonitorV2Request.kt | 32 ++ .../resthandler/RestDeleteMonitorV2Action.kt | 6 +- .../resthandler/RestExecuteMonitorV2Action.kt | 10 +- .../resthandler/RestGetMonitorV2Action.kt | 6 +- .../resthandler/RestIndexMonitorV2Action.kt | 8 +- .../resthandler/RestSearchMonitorV2Action.kt | 6 +- .../script/PPLTriggerExecutionContext.kt | 12 +- .../script/TriggerV2ExecutionContext.kt | 6 +- .../alerting/service/DeleteMonitorService.kt | 2 +- .../TransportDeleteMonitorV2Action.kt | 8 +- .../TransportExecuteMonitorV2Action.kt | 14 +- .../transport/TransportGetMonitorV2Action.kt | 10 +- .../TransportIndexMonitorV2Action.kt | 12 +- .../TransportSearchMonitorV2Action.kt | 8 +- core/build.gradle | 70 ++++ .../alerting/core/modelv2/AlertV2.kt | 239 +++++++++++ .../alerting/core/modelv2/MonitorV2.kt | 108 +++++ .../core/modelv2/MonitorV2RunResult.kt | 44 ++ .../alerting/core/modelv2/PPLMonitor.kt | 336 ++++++++++++++++ .../core/modelv2/PPLMonitorRunResult.kt | 61 +++ .../alerting/core/modelv2/PPLTrigger.kt | 380 ++++++++++++++++++ .../core/modelv2/PPLTriggerRunResult.kt | 51 +++ .../alerting/core/modelv2/TriggerV2.kt | 51 +++ .../core/modelv2/TriggerV2RunResult.kt | 17 + .../alerting/core/ppl/PPLPluginInterface.kt | 50 +++ .../alerting/core/schedule/JobScheduler.kt | 2 +- .../opensearchapi/OpenSearchExtensions.kt | 2 +- 46 files changed, 2077 insertions(+), 91 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Request.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Response.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Response.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Request.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Response.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Response.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Request.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2RunResult.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTriggerRunResult.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2RunResult.kt create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/ppl/PPLPluginInterface.kt diff --git a/alerting/build.gradle b/alerting/build.gradle index 53723ac80..3731c6b22 100644 --- a/alerting/build.gradle +++ b/alerting/build.gradle @@ -21,6 +21,24 @@ import org.opensearch.gradle.test.RestIntegTestTask import org.opensearch.gradle.testclusters.OpenSearchCluster import org.opensearch.gradle.testclusters.StandaloneRestIntegTestTask +buildscript { + ext { + opensearch_group = "org.opensearch" + opensearch_version = System.getProperty("opensearch.version", "3.1.0-SNAPSHOT") + isSnapshot = "true" == System.getProperty("build.snapshot", "true") + buildVersionQualifier = System.getProperty("build.version_qualifier", "") + kotlin_version = System.getProperty("kotlin.version", "1.9.25") + version_tokens = opensearch_version.tokenize('-') + opensearch_build = version_tokens[0] + '.0' + if (buildVersionQualifier) { + opensearch_build += "-${buildVersionQualifier}" + } + if (isSnapshot) { + opensearch_build += "-SNAPSHOT" + } + } +} + apply plugin: 'java' apply plugin: 'idea' apply plugin: 'org.jetbrains.kotlin.jvm' @@ -147,6 +165,18 @@ def bwcVersion = bwcVersionShort + ".0" def bwcOpenSearchVersion = bwcVersionShort + "-SNAPSHOT" def bwcPluginVersion = bwcVersion + "-SNAPSHOT" +def sqlJarDirectory = "$buildDir/dependencies/opensearch-sql-plugin" + +task addJarsToClasspath(type: Copy) { + from(fileTree(dir: sqlJarDirectory)) { + include "opensearch-sql-${opensearch_build}.jar" + include "ppl-${opensearch_build}.jar" + include "protocol-${opensearch_build}.jar" + include "core-${opensearch_build}.jar" + } + into("$buildDir/classes") +} + dependencies { // Needed for integ tests zipArchive group: 'org.opensearch.plugin', name:'opensearch-notifications-core', version: "${opensearch_build}" @@ -168,6 +198,11 @@ dependencies { implementation "org.jetbrains.kotlin:kotlin-stdlib-common:${kotlin_version}" implementation "org.jetbrains:annotations:13.0" + // SQL/PPL plugin dependencies + implementation 'org.json:json:20240303' +// implementation fileTree(dir: sqlJarDirectory, include: ["opensearch-sql-thin-${opensearch_build}.jar", "ppl-${opensearch_build}.jar", "protocol-${opensearch_build}.jar", "core-${opensearch_build}.jar"]) +// zipArchive group: 'org.opensearch.plugin', name:'opensearch-sql-plugin', version: "${opensearch_build}" + api project(":alerting-core") implementation "com.github.seancfoley:ipaddress:5.4.1" implementation project(path: ":alerting-spi", configuration: 'shadow') @@ -181,6 +216,29 @@ dependencies { testImplementation "org.opensearch.plugin:lang-mustache-client:${opensearch_version}" } +task extractSqlJar(type: Copy) { + mustRunAfter() + from(zipTree(configurations.zipArchive.find { it.name.startsWith("opensearch-sql-plugin") })) + into sqlJarDirectory +} + +task extractSqlClass(type: Copy, dependsOn: [extractSqlJar]) { + from zipTree("${sqlJarDirectory}/opensearch-sql-${opensearch_build}.jar") + into("$buildDir/opensearch-sql") + include 'org/opensearch/sql/**' +} + +task replaceSqlJar(type: Jar, dependsOn: [extractSqlClass]) { + from("$buildDir/opensearch-sql") + archiveFileName = "opensearch-sql-thin-${opensearch_build}.jar" + destinationDirectory = file(sqlJarDirectory) + doLast { + file("${sqlJarDirectory}/opensearch-sql-${opensearch_build}.jar").delete() + } +} + +tasks.addJarsToClasspath.dependsOn(replaceSqlJar) + javadoc.enabled = false // turn off javadoc as it barfs on Kotlin code licenseHeaders.enabled = true dependencyLicenses.enabled = false @@ -656,3 +714,11 @@ def waitForClusterSetup(OpenSearchCluster cluster, Boolean securityEnabled) { } apply from: '../build-tools/pkgbuild.gradle' + +//compileJava { +// dependsOn addJarsToClasspath +//} +// +//compileKotlin { +// dependsOn addJarsToClasspath +//} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt index 905e25755..8f411ca36 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt @@ -4,11 +4,11 @@ import org.apache.logging.log4j.LogManager import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_HISTORY_WRITE_INDEX import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_INDEX import org.opensearch.alerting.alerts.AlertIndices.Companion.ALL_ALERT_INDEX_PATTERN +import org.opensearch.alerting.core.modelv2.AlertV2.Companion.EXPIRATION_TIME_FIELD import org.opensearch.cluster.ClusterChangedEvent import org.opensearch.cluster.ClusterStateListener import org.opensearch.cluster.service.ClusterService import org.opensearch.common.unit.TimeValue -import org.opensearch.commons.alerting.model.AlertV2.Companion.EXPIRATION_TIME_FIELD import org.opensearch.core.action.ActionListener import org.opensearch.index.query.QueryBuilders import org.opensearch.index.reindex.BulkByScrollResponse diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 2c4c99661..bdb59886f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -14,6 +14,11 @@ import org.opensearch.alerting.action.GetEmailGroupAction import org.opensearch.alerting.action.GetRemoteIndexesAction import org.opensearch.alerting.action.SearchEmailAccountAction import org.opensearch.alerting.action.SearchEmailGroupAction +import org.opensearch.alerting.actionv2.DeleteMonitorV2Action +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Action +import org.opensearch.alerting.actionv2.GetMonitorV2Action +import org.opensearch.alerting.actionv2.IndexMonitorV2Action +import org.opensearch.alerting.actionv2.SearchMonitorV2Action import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.alerts.AlertIndices.Companion.ALL_ALERT_INDEX_PATTERN import org.opensearch.alerting.comments.CommentsIndices @@ -23,6 +28,7 @@ import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction import org.opensearch.alerting.core.action.node.ScheduledJobsStatsTransportAction import org.opensearch.alerting.core.lock.LockService +import org.opensearch.alerting.core.modelv2.MonitorV2 import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsV2Handler import org.opensearch.alerting.core.schedule.JobScheduler import org.opensearch.alerting.core.settings.LegacyOpenDistroScheduledJobSettings @@ -94,7 +100,6 @@ import org.opensearch.commons.alerting.model.ClusterMetricsInput import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.DocumentLevelTrigger import org.opensearch.commons.alerting.model.Monitor -import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.model.QueryLevelTrigger import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX @@ -257,11 +262,11 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ActionPlugin.ActionHandler(DocLevelMonitorFanOutAction.INSTANCE, TransportDocLevelMonitorFanOutAction::class.java), // Alerting V2 - ActionPlugin.ActionHandler(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, TransportIndexMonitorV2Action::class.java), - ActionPlugin.ActionHandler(AlertingActions.GET_MONITOR_V2_ACTION_TYPE, TransportGetMonitorV2Action::class.java), - ActionPlugin.ActionHandler(AlertingActions.SEARCH_MONITORS_V2_ACTION_TYPE, TransportSearchMonitorV2Action::class.java), - ActionPlugin.ActionHandler(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, TransportDeleteMonitorV2Action::class.java), - ActionPlugin.ActionHandler(AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, TransportExecuteMonitorV2Action::class.java), + ActionPlugin.ActionHandler(IndexMonitorV2Action.INSTANCE, TransportIndexMonitorV2Action::class.java), + ActionPlugin.ActionHandler(GetMonitorV2Action.INSTANCE, TransportGetMonitorV2Action::class.java), + ActionPlugin.ActionHandler(SearchMonitorV2Action.INSTANCE, TransportSearchMonitorV2Action::class.java), + ActionPlugin.ActionHandler(DeleteMonitorV2Action.INSTANCE, TransportDeleteMonitorV2Action::class.java), + ActionPlugin.ActionHandler(ExecuteMonitorV2Action.INSTANCE, TransportExecuteMonitorV2Action::class.java), ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index a1737c913..e917fd048 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -21,12 +21,19 @@ import org.opensearch.alerting.action.ExecuteMonitorResponse import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.ExecuteWorkflowRequest import org.opensearch.alerting.action.ExecuteWorkflowResponse +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Action +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Request +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Response import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.alerts.AlertMover.Companion.moveAlerts import org.opensearch.alerting.core.JobRunner import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.core.lock.LockModel import org.opensearch.alerting.core.lock.LockService +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult +import org.opensearch.alerting.core.modelv2.PPLMonitor +import org.opensearch.alerting.core.modelv2.PPLMonitor.Companion.PPL_MONITOR_TYPE import org.opensearch.alerting.model.destination.DestinationContextFactory import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.opensearchapi.suspendUntil @@ -60,17 +67,10 @@ import org.opensearch.cluster.service.ClusterService import org.opensearch.common.lifecycle.AbstractLifecycleComponent import org.opensearch.common.settings.Settings import org.opensearch.common.unit.TimeValue -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.ExecuteMonitorV2Request -import org.opensearch.commons.alerting.action.ExecuteMonitorV2Response import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.MonitorRunResult -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.MonitorV2RunResult -import org.opensearch.commons.alerting.model.PPLMonitor -import org.opensearch.commons.alerting.model.PPLMonitor.Companion.PPL_MONITOR_TYPE import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.TriggerRunResult import org.opensearch.commons.alerting.model.Workflow @@ -446,7 +446,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon ) monitorCtx.client!!.suspendUntil { monitorCtx.client!!.execute( - AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, + ExecuteMonitorV2Action.INSTANCE, executeMonitorV2Request, it ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt index 6474b3b8f..410076f19 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt @@ -1,7 +1,7 @@ package org.opensearch.alerting -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.MonitorV2RunResult +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult import org.opensearch.transport.TransportService import java.time.Instant diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 720168e94..6b262074f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -12,6 +12,18 @@ import org.opensearch.action.index.IndexRequest import org.opensearch.action.support.WriteRequest import org.opensearch.alerting.QueryLevelMonitorRunner.getConfigAndSendNotification import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.core.modelv2.AlertV2 +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult +import org.opensearch.alerting.core.modelv2.PPLMonitor +import org.opensearch.alerting.core.modelv2.PPLMonitorRunResult +import org.opensearch.alerting.core.modelv2.PPLTrigger +import org.opensearch.alerting.core.modelv2.PPLTrigger.ConditionType +import org.opensearch.alerting.core.modelv2.PPLTrigger.NumResultsCondition +import org.opensearch.alerting.core.modelv2.PPLTrigger.TriggerMode +import org.opensearch.alerting.core.modelv2.PPLTriggerRunResult +import org.opensearch.alerting.core.modelv2.TriggerV2.Severity +import org.opensearch.alerting.core.ppl.PPLPluginInterface import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.script.PPLTriggerExecutionContext @@ -19,21 +31,9 @@ import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.XContentFactory import org.opensearch.commons.alerting.alerts.AlertError import org.opensearch.commons.alerting.model.Alert -import org.opensearch.commons.alerting.model.AlertV2 -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.MonitorV2RunResult -import org.opensearch.commons.alerting.model.PPLMonitor -import org.opensearch.commons.alerting.model.PPLMonitorRunResult -import org.opensearch.commons.alerting.model.PPLTrigger -import org.opensearch.commons.alerting.model.PPLTrigger.ConditionType -import org.opensearch.commons.alerting.model.PPLTrigger.NumResultsCondition -import org.opensearch.commons.alerting.model.PPLTrigger.TriggerMode -import org.opensearch.commons.alerting.model.PPLTriggerRunResult import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX -import org.opensearch.commons.alerting.model.TriggerV2 import org.opensearch.commons.alerting.model.action.Action import org.opensearch.commons.alerting.model.userErrorMessage -import org.opensearch.commons.ppl.PPLPluginInterface import org.opensearch.core.common.Strings import org.opensearch.core.rest.RestStatus import org.opensearch.core.xcontent.ToXContent @@ -205,7 +205,7 @@ object PPLMonitorRunner : MonitorV2Runner { for (action in pplTrigger.actions) { for (alert in thisTriggersGeneratedAlerts) { val pplTriggerExecutionContext = PPLTriggerExecutionContext( - monitorV2, + pplMonitor, periodStart, periodEnd, null, @@ -463,7 +463,7 @@ object PPLMonitorRunner : MonitorV2Runner { } // prepares the query results to be passed into alerts and notifications based on trigger mode - // if result set, alert and notification simply stores all of the query results + // if result set, alert and notification simply stores all query results // if per result, each alert and notification stores a single row of the query results private fun prepareQueryResults(relevantQueryResultRows: JSONObject, triggerMode: TriggerMode): List { // case: result set @@ -491,7 +491,7 @@ object PPLMonitorRunner : MonitorV2Runner { executionId: String, timeOfCurrentExecution: Instant ): List { - val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } + val expirationTime = pplTrigger.expireDuration.millis.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } val alertV2s = mutableListOf() for (queryResult in preparedQueryResults) { @@ -504,7 +504,7 @@ object PPLMonitorRunner : MonitorV2Runner { queryResults = queryResult.toMap(), triggeredTime = timeOfCurrentExecution, expirationTime = expirationTime, - severity = pplTrigger.severity.value, + severity = pplTrigger.severity, executionId = executionId ) alertV2s.add(alertV2) @@ -520,7 +520,7 @@ object PPLMonitorRunner : MonitorV2Runner { executionId: String, timeOfCurrentExecution: Instant ): List { - val expirationTime = pplTrigger.expireDuration?.millis?.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } + val expirationTime = pplTrigger.expireDuration.millis.let { timeOfCurrentExecution.plus(it, ChronoUnit.MILLIS) } val errorMessage = "Failed to run PPL Trigger ${pplTrigger.name} from PPL Monitor ${pplMonitor.name}: " + exception.userErrorMessage() @@ -536,7 +536,7 @@ object PPLMonitorRunner : MonitorV2Runner { triggeredTime = timeOfCurrentExecution, expirationTime = expirationTime, errorMessage = obfuscatedErrorMessage, - severity = TriggerV2.Severity.ERROR.value, + severity = Severity.ERROR, executionId = executionId ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Action.kt new file mode 100644 index 000000000..2cd1ba703 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Action.kt @@ -0,0 +1,10 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionType + +class DeleteMonitorV2Action private constructor() : ActionType(NAME, ::DeleteMonitorV2Response) { + companion object { + val INSTANCE = DeleteMonitorV2Action() + const val NAME = "cluster:admin/opensearch/alerting/v2/monitor/delete" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Request.kt new file mode 100644 index 000000000..601d83588 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Request.kt @@ -0,0 +1,34 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.action.support.WriteRequest +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import java.io.IOException + +class DeleteMonitorV2Request : ActionRequest { + val monitorV2Id: String + val refreshPolicy: WriteRequest.RefreshPolicy + + constructor(monitorV2Id: String, refreshPolicy: WriteRequest.RefreshPolicy) : super() { + this.monitorV2Id = monitorV2Id + this.refreshPolicy = refreshPolicy + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + monitorV2Id = sin.readString(), + refreshPolicy = WriteRequest.RefreshPolicy.readFrom(sin) + ) + + override fun validate(): ActionRequestValidationException? { + return null + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(monitorV2Id) + refreshPolicy.writeTo(out) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Response.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Response.kt new file mode 100644 index 000000000..71dcfcbd4 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/DeleteMonitorV2Response.kt @@ -0,0 +1,38 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.commons.alerting.util.IndexUtils +import org.opensearch.commons.notifications.action.BaseResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder + +class DeleteMonitorV2Response : BaseResponse { + var id: String + var version: Long + + constructor( + id: String, + version: Long + ) : super() { + this.id = id + this.version = version + } + + constructor(sin: StreamInput) : this( + sin.readString(), // id + sin.readLong() // version + ) + + override fun writeTo(out: StreamOutput) { + out.writeString(id) + out.writeLong(version) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return builder.startObject() + .field(IndexUtils._ID, id) + .field(IndexUtils._VERSION, version) + .endObject() + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Action.kt new file mode 100644 index 000000000..c3ba7968b --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Action.kt @@ -0,0 +1,10 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionType + +class ExecuteMonitorV2Action private constructor() : ActionType(NAME, ::ExecuteMonitorV2Response) { + companion object { + val INSTANCE = ExecuteMonitorV2Action() + const val NAME = "cluster:admin/opensearch/alerting/v2/monitor/execute" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt new file mode 100644 index 000000000..8084430c5 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt @@ -0,0 +1,66 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.action.ValidateActions +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.common.unit.TimeValue +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import java.io.IOException + +class ExecuteMonitorV2Request : ActionRequest { + val dryrun: Boolean + val monitorId: String? // exactly one of monitorId or monitor must be non-null + val monitorV2: MonitorV2? + val requestStart: TimeValue? + val requestEnd: TimeValue + + constructor( + dryrun: Boolean, + monitorId: String?, + monitorV2: MonitorV2?, + requestStart: TimeValue? = null, + requestEnd: TimeValue + ) : super() { + this.dryrun = dryrun + this.monitorId = monitorId + this.monitorV2 = monitorV2 + this.requestStart = requestStart + this.requestEnd = requestEnd + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + sin.readBoolean(), // dryrun + sin.readOptionalString(), // monitorId + if (sin.readBoolean()) { + MonitorV2.readFrom(sin) // monitor + } else { + null + }, + sin.readOptionalTimeValue(), + sin.readTimeValue() // requestEnd + ) + + override fun validate(): ActionRequestValidationException? = + if (monitorV2 == null && monitorId == null) { + ValidateActions.addValidationError("Neither a monitor ID nor monitor object was supplied", null) + } else { + null + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeBoolean(dryrun) + out.writeOptionalString(monitorId) + if (monitorV2 != null) { + out.writeBoolean(true) + monitorV2.writeTo(out) + } else { + out.writeBoolean(false) + } + out.writeOptionalTimeValue(requestStart) + out.writeTimeValue(requestEnd) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Response.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Response.kt new file mode 100644 index 000000000..6635f3791 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Response.kt @@ -0,0 +1,33 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult +import org.opensearch.core.action.ActionResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class ExecuteMonitorV2Response : ActionResponse, ToXContentObject { + val monitorV2RunResult: MonitorV2RunResult<*> + + constructor(monitorV2RunResult: MonitorV2RunResult<*>) : super() { + this.monitorV2RunResult = monitorV2RunResult + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + MonitorV2RunResult.readFrom(sin) // monitorRunResult + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + monitorV2RunResult.writeTo(out) + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return monitorV2RunResult.toXContent(builder, ToXContent.EMPTY_PARAMS) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Action.kt new file mode 100644 index 000000000..9fb0915c6 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Action.kt @@ -0,0 +1,10 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionType + +class GetMonitorV2Action private constructor() : ActionType(NAME, ::GetMonitorV2Response) { + companion object { + val INSTANCE = GetMonitorV2Action() + const val NAME = "cluster:admin/opensearch/alerting/v2/monitor/get" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Request.kt new file mode 100644 index 000000000..a14f482e7 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Request.kt @@ -0,0 +1,47 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.search.fetch.subphase.FetchSourceContext +import java.io.IOException + +class GetMonitorV2Request : ActionRequest { + val monitorV2Id: String + val version: Long + val srcContext: FetchSourceContext? + + constructor( + monitorV2Id: String, + version: Long, + srcContext: FetchSourceContext? + ) : super() { + this.monitorV2Id = monitorV2Id + this.version = version + this.srcContext = srcContext + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + sin.readString(), // monitorV2Id + sin.readLong(), // version + if (sin.readBoolean()) { + FetchSourceContext(sin) // srcContext + } else { + null + } + ) + + override fun validate(): ActionRequestValidationException? { + return null + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(monitorV2Id) + out.writeLong(version) + out.writeBoolean(srcContext != null) + srcContext?.writeTo(out) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Response.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Response.kt new file mode 100644 index 000000000..fe083f5e0 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetMonitorV2Response.kt @@ -0,0 +1,75 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.commons.alerting.util.IndexUtils.Companion._ID +import org.opensearch.commons.alerting.util.IndexUtils.Companion._PRIMARY_TERM +import org.opensearch.commons.alerting.util.IndexUtils.Companion._SEQ_NO +import org.opensearch.commons.alerting.util.IndexUtils.Companion._VERSION +import org.opensearch.commons.notifications.action.BaseResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class GetMonitorV2Response : BaseResponse { + var id: String + var version: Long + var seqNo: Long + var primaryTerm: Long + var monitorV2: MonitorV2? + + constructor( + id: String, + version: Long, + seqNo: Long, + primaryTerm: Long, + monitorV2: MonitorV2? + ) : super() { + this.id = id + this.version = version + this.seqNo = seqNo + this.primaryTerm = primaryTerm + this.monitorV2 = monitorV2 + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + id = sin.readString(), // id + version = sin.readLong(), // version + seqNo = sin.readLong(), // seqNo + primaryTerm = sin.readLong(), // primaryTerm + monitorV2 = if (sin.readBoolean()) { + MonitorV2.readFrom(sin) // monitorV2 + } else { + null + } + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(id) + out.writeLong(version) + out.writeLong(seqNo) + out.writeLong(primaryTerm) + if (monitorV2 != null) { + out.writeBoolean(true) + monitorV2?.writeTo(out) + } else { + out.writeBoolean(false) + } + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + .field(_ID, id) + .field(_VERSION, version) + .field(_SEQ_NO, seqNo) + .field(_PRIMARY_TERM, primaryTerm) + if (monitorV2 != null) { + builder.field("monitorV2", monitorV2) + } + return builder.endObject() + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Action.kt new file mode 100644 index 000000000..cff851598 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Action.kt @@ -0,0 +1,10 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionType + +class IndexMonitorV2Action private constructor() : ActionType(NAME, ::IndexMonitorV2Response) { + companion object { + val INSTANCE = IndexMonitorV2Action() + const val NAME = "cluster:admin/opensearch/alerting/v2/monitor/write" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt new file mode 100644 index 000000000..39b0d594e --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt @@ -0,0 +1,64 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.action.support.WriteRequest +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.rest.RestRequest +import java.io.IOException + +class IndexMonitorV2Request : ActionRequest { + val monitorId: String + val seqNo: Long + val primaryTerm: Long + val refreshPolicy: WriteRequest.RefreshPolicy + val method: RestRequest.Method + var monitorV2: MonitorV2 +// val rbacRoles: List? + + constructor( + monitorId: String, + seqNo: Long, + primaryTerm: Long, + refreshPolicy: WriteRequest.RefreshPolicy, + method: RestRequest.Method, + monitorV2: MonitorV2 +// rbacRoles: List? = null + ) : super() { + this.monitorId = monitorId + this.seqNo = seqNo + this.primaryTerm = primaryTerm + this.refreshPolicy = refreshPolicy + this.method = method + this.monitorV2 = monitorV2 +// this.rbacRoles = rbacRoles + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + monitorId = sin.readString(), + seqNo = sin.readLong(), + primaryTerm = sin.readLong(), + refreshPolicy = WriteRequest.RefreshPolicy.readFrom(sin), + method = sin.readEnum(RestRequest.Method::class.java), + monitorV2 = MonitorV2.readFrom(sin) +// rbacRoles = sin.readOptionalStringList() + ) + + override fun validate(): ActionRequestValidationException? { + return null + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(monitorId) + out.writeLong(seqNo) + out.writeLong(primaryTerm) + refreshPolicy.writeTo(out) + out.writeEnum(method) + MonitorV2.writeTo(out, monitorV2) +// out.writeOptionalStringCollection(rbacRoles) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Response.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Response.kt new file mode 100644 index 000000000..35640330b --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Response.kt @@ -0,0 +1,68 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.commons.alerting.util.IndexUtils.Companion._ID +import org.opensearch.commons.alerting.util.IndexUtils.Companion._PRIMARY_TERM +import org.opensearch.commons.alerting.util.IndexUtils.Companion._SEQ_NO +import org.opensearch.commons.alerting.util.IndexUtils.Companion._VERSION +import org.opensearch.commons.notifications.action.BaseResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class IndexMonitorV2Response : BaseResponse { + var id: String + var version: Long + var seqNo: Long + var primaryTerm: Long + var monitorV2: MonitorV2 + + constructor( + id: String, + version: Long, + seqNo: Long, + primaryTerm: Long, + monitorV2: MonitorV2 + ) : super() { + this.id = id + this.version = version + this.seqNo = seqNo + this.primaryTerm = primaryTerm + this.monitorV2 = monitorV2 + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + sin.readString(), // id + sin.readLong(), // version + sin.readLong(), // seqNo + sin.readLong(), // primaryTerm + MonitorV2.readFrom(sin) // monitorV2 + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(id) + out.writeLong(version) + out.writeLong(seqNo) + out.writeLong(primaryTerm) + MonitorV2.writeTo(out, monitorV2) + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return builder.startObject() + .field(_ID, id) + .field(_VERSION, version) + .field(_SEQ_NO, seqNo) + .field(_PRIMARY_TERM, primaryTerm) + .field(MONITOR_V2_FIELD, monitorV2) + .endObject() + } + + companion object { + const val MONITOR_V2_FIELD = "monitor_v2" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Action.kt new file mode 100644 index 000000000..d83ffd510 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Action.kt @@ -0,0 +1,11 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionType +import org.opensearch.action.search.SearchResponse + +class SearchMonitorV2Action private constructor() : ActionType(NAME, ::SearchResponse) { + companion object { + val INSTANCE = SearchMonitorV2Action() + const val NAME = "cluster:admin/opensearch/alerting/v2/monitor/search" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Request.kt new file mode 100644 index 000000000..51fba09aa --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/SearchMonitorV2Request.kt @@ -0,0 +1,32 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.action.search.SearchRequest +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import java.io.IOException + +class SearchMonitorV2Request : ActionRequest { + val searchRequest: SearchRequest + + constructor( + searchRequest: SearchRequest + ) : super() { + this.searchRequest = searchRequest + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + searchRequest = SearchRequest(sin) + ) + + override fun validate(): ActionRequestValidationException? { + return null + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + searchRequest.writeTo(out) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt index c27d4795b..76b3c656e 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestDeleteMonitorV2Action.kt @@ -4,9 +4,9 @@ import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.Logger import org.opensearch.action.support.WriteRequest.RefreshPolicy import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.actionv2.DeleteMonitorV2Action +import org.opensearch.alerting.actionv2.DeleteMonitorV2Request import org.opensearch.alerting.util.REFRESH -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.DeleteMonitorV2Request import org.opensearch.rest.BaseRestHandler import org.opensearch.rest.RestHandler.Route import org.opensearch.rest.RestRequest @@ -41,7 +41,7 @@ class RestDeleteMonitorV2Action : BaseRestHandler() { val deleteMonitorV2Request = DeleteMonitorV2Request(monitorV2Id, refreshPolicy) return RestChannelConsumer { channel -> - client.execute(AlertingActions.DELETE_MONITOR_V2_ACTION_TYPE, deleteMonitorV2Request, RestToXContentListener(channel)) + client.execute(DeleteMonitorV2Action.INSTANCE, deleteMonitorV2Request, RestToXContentListener(channel)) } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt index d05126c1a..6c74ada9f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt @@ -2,10 +2,10 @@ package org.opensearch.alerting.resthandler import org.apache.logging.log4j.LogManager import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Action +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Request +import org.opensearch.alerting.core.modelv2.MonitorV2 import org.opensearch.common.unit.TimeValue -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.ExecuteMonitorV2Request -import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.xcontent.XContentParser.Token.START_OBJECT import org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken @@ -46,7 +46,7 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { if (request.hasParam("monitorV2Id")) { val monitorV2Id = request.param("monitorV2Id") val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, monitorV2Id, null, null, requestEnd) - client.execute(AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, execMonitorV2Request, RestToXContentListener(channel)) + client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) } else { val xcp = request.contentParser() ensureExpectedToken(START_OBJECT, xcp.nextToken(), xcp) @@ -59,7 +59,7 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { } val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, null, monitorV2, null, requestEnd) - client.execute(AlertingActions.EXECUTE_MONITOR_V2_ACTION_TYPE, execMonitorV2Request, RestToXContentListener(channel)) + client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt index efbb3f044..a6a7e6e0b 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt @@ -2,9 +2,9 @@ package org.opensearch.alerting.resthandler import org.apache.logging.log4j.LogManager import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.actionv2.GetMonitorV2Action +import org.opensearch.alerting.actionv2.GetMonitorV2Request import org.opensearch.alerting.util.context -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.GetMonitorV2Request import org.opensearch.rest.BaseRestHandler import org.opensearch.rest.RestHandler.Route import org.opensearch.rest.RestRequest @@ -48,7 +48,7 @@ class RestGetMonitorV2Action : BaseRestHandler() { val getMonitorV2Request = GetMonitorV2Request(monitorV2Id, RestActions.parseVersion(request), srcContext) return RestChannelConsumer { channel -> - client.execute(AlertingActions.GET_MONITOR_V2_ACTION_TYPE, getMonitorV2Request, RestToXContentListener(channel)) + client.execute(GetMonitorV2Action.INSTANCE, getMonitorV2Request, RestToXContentListener(channel)) } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index 15ee7cc22..fd1db99c6 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -3,12 +3,12 @@ package org.opensearch.alerting.resthandler import org.apache.logging.log4j.LogManager import org.opensearch.action.support.WriteRequest import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.actionv2.IndexMonitorV2Action +import org.opensearch.alerting.actionv2.IndexMonitorV2Request +import org.opensearch.alerting.core.modelv2.MonitorV2 import org.opensearch.alerting.util.IF_PRIMARY_TERM import org.opensearch.alerting.util.IF_SEQ_NO import org.opensearch.alerting.util.REFRESH -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.IndexMonitorV2Request -import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.xcontent.XContentParser.Token import org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken @@ -71,7 +71,7 @@ class RestIndexMonitorV2Action : BaseRestHandler() { val indexMonitorV2Request = IndexMonitorV2Request(id, seqNo, primaryTerm, refreshPolicy, request.method(), monitorV2) return RestChannelConsumer { channel -> - client.execute(AlertingActions.INDEX_MONITOR_V2_ACTION_TYPE, indexMonitorV2Request, RestToXContentListener(channel)) + client.execute(IndexMonitorV2Action.INSTANCE, indexMonitorV2Request, RestToXContentListener(channel)) } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt index 7cb79a171..2b8600bca 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt @@ -4,6 +4,8 @@ import org.apache.logging.log4j.LogManager import org.opensearch.action.search.SearchRequest import org.opensearch.action.search.SearchResponse import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.actionv2.SearchMonitorV2Action +import org.opensearch.alerting.actionv2.SearchMonitorV2Request import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.util.context import org.opensearch.cluster.service.ClusterService @@ -11,8 +13,6 @@ import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentFactory.jsonBuilder import org.opensearch.common.xcontent.XContentType -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.SearchMonitorV2Request import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX import org.opensearch.core.common.bytes.BytesReference @@ -75,7 +75,7 @@ class RestSearchMonitorV2Action( val searchMonitorV2Request = SearchMonitorV2Request(searchRequest) return RestChannelConsumer { channel -> - client.execute(AlertingActions.SEARCH_MONITORS_V2_ACTION_TYPE, searchMonitorV2Request, searchMonitorResponse(channel)) + client.execute(SearchMonitorV2Action.INSTANCE, searchMonitorV2Request, searchMonitorResponse(channel)) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt index a8b236673..5eac38663 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt @@ -1,18 +1,18 @@ package org.opensearch.alerting.script -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.PPLMonitorRunResult.Companion.PPL_QUERY_RESULTS_FIELD -import org.opensearch.commons.alerting.model.PPLTrigger -import org.opensearch.commons.alerting.model.PPLTrigger.Companion.PPL_TRIGGER_FIELD +import org.opensearch.alerting.core.modelv2.PPLMonitor +import org.opensearch.alerting.core.modelv2.PPLMonitorRunResult.Companion.PPL_QUERY_RESULTS_FIELD +import org.opensearch.alerting.core.modelv2.PPLTrigger +import org.opensearch.alerting.core.modelv2.PPLTrigger.Companion.PPL_TRIGGER_FIELD import java.time.Instant data class PPLTriggerExecutionContext( - override val monitorV2: MonitorV2, + override val monitorV2: PPLMonitor, override val periodStart: Instant, override val periodEnd: Instant, override val error: Exception? = null, val pplTrigger: PPLTrigger, - val pplQueryResults: Map // keys are PPL query result fields, not trigger ID + val pplQueryResults: Map // keys are PPL query result fields ) : TriggerV2ExecutionContext(monitorV2, periodStart, periodEnd, error) { override fun asTemplateArg(): Map { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt index 8e1cd8b9f..ebbbbfc2d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/TriggerV2ExecutionContext.kt @@ -1,8 +1,8 @@ package org.opensearch.alerting.script -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.MonitorV2RunResult -import org.opensearch.commons.alerting.model.TriggerV2 +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult +import org.opensearch.alerting.core.modelv2.TriggerV2 import java.time.Instant abstract class TriggerV2ExecutionContext( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt index cd6c96516..c1a56a4c6 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt @@ -22,6 +22,7 @@ import org.opensearch.action.support.IndicesOptions import org.opensearch.action.support.WriteRequest.RefreshPolicy import org.opensearch.action.support.clustermanager.AcknowledgedResponse import org.opensearch.alerting.MonitorMetadataService +import org.opensearch.alerting.actionv2.DeleteMonitorV2Response import org.opensearch.alerting.core.lock.LockModel import org.opensearch.alerting.core.lock.LockService import org.opensearch.alerting.opensearchapi.suspendUntil @@ -29,7 +30,6 @@ import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_DELEGAT import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_MONITOR_PATH import org.opensearch.alerting.util.use import org.opensearch.commons.alerting.action.DeleteMonitorResponse -import org.opensearch.commons.alerting.action.DeleteMonitorV2Response import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.util.AlertingException diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt index 4abe2b21c..c2852b022 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt @@ -6,14 +6,14 @@ import kotlinx.coroutines.launch import org.apache.logging.log4j.LogManager import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.actionv2.DeleteMonitorV2Action +import org.opensearch.alerting.actionv2.DeleteMonitorV2Request +import org.opensearch.alerting.actionv2.DeleteMonitorV2Response import org.opensearch.alerting.service.DeleteMonitorService import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject import org.opensearch.common.settings.Settings -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.DeleteMonitorV2Request -import org.opensearch.commons.alerting.action.DeleteMonitorV2Response import org.opensearch.core.action.ActionListener import org.opensearch.core.xcontent.NamedXContentRegistry import org.opensearch.tasks.Task @@ -31,7 +31,7 @@ class TransportDeleteMonitorV2Action @Inject constructor( settings: Settings, val xContentRegistry: NamedXContentRegistry ) : HandledTransportAction( - AlertingActions.DELETE_MONITOR_V2_ACTION_NAME, transportService, actionFilters, ::DeleteMonitorV2Request + DeleteMonitorV2Action.NAME, transportService, actionFilters, ::DeleteMonitorV2Request ), SecureTransportAction { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt index 984fa0c96..6a5654ad6 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -10,6 +10,12 @@ import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.alerting.MonitorRunnerService +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Action +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Request +import org.opensearch.alerting.actionv2.ExecuteMonitorV2Response +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.core.modelv2.PPLMonitor +import org.opensearch.alerting.core.modelv2.PPLMonitor.Companion.PPL_MONITOR_TYPE import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject @@ -17,12 +23,6 @@ import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.ExecuteMonitorV2Request -import org.opensearch.commons.alerting.action.ExecuteMonitorV2Response -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.PPLMonitor -import org.opensearch.commons.alerting.model.PPLMonitor.Companion.PPL_MONITOR_TYPE import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.action.ActionListener @@ -44,7 +44,7 @@ class TransportExecuteMonitorV2Action @Inject constructor( val xContentRegistry: NamedXContentRegistry, private val settings: Settings ) : HandledTransportAction( - AlertingActions.EXECUTE_MONITOR_V2_ACTION_NAME, transportService, actionFilters, ::ExecuteMonitorV2Request + ExecuteMonitorV2Action.NAME, transportService, actionFilters, ::ExecuteMonitorV2Request ) { @Volatile private var indexTimeout = AlertingSettings.INDEX_TIMEOUT.get(settings) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt index 87e19db2b..034a90d99 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt @@ -8,6 +8,10 @@ import org.opensearch.action.get.GetRequest import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.actionv2.GetMonitorV2Action +import org.opensearch.alerting.actionv2.GetMonitorV2Request +import org.opensearch.alerting.actionv2.GetMonitorV2Response +import org.opensearch.alerting.core.modelv2.MonitorV2 import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject @@ -15,10 +19,6 @@ import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.GetMonitorV2Request -import org.opensearch.commons.alerting.action.GetMonitorV2Response -import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.action.ActionListener @@ -39,7 +39,7 @@ class TransportGetMonitorV2Action @Inject constructor( val clusterService: ClusterService, settings: Settings, ) : HandledTransportAction( - AlertingActions.GET_MONITOR_V2_ACTION_NAME, + GetMonitorV2Action.NAME, transportService, actionFilters, ::GetMonitorV2Request diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index be5e1ed5e..373d4311e 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -21,7 +21,12 @@ import org.opensearch.action.search.SearchResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.action.support.clustermanager.AcknowledgedResponse +import org.opensearch.alerting.actionv2.IndexMonitorV2Action +import org.opensearch.alerting.actionv2.IndexMonitorV2Request +import org.opensearch.alerting.actionv2.IndexMonitorV2Response import org.opensearch.alerting.core.ScheduledJobIndices +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.core.modelv2.PPLMonitor import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERTING_MAX_MONITORS @@ -35,12 +40,7 @@ import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentFactory.jsonBuilder import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.IndexMonitorV2Request -import org.opensearch.commons.alerting.action.IndexMonitorV2Response import org.opensearch.commons.alerting.model.Monitor -import org.opensearch.commons.alerting.model.MonitorV2 -import org.opensearch.commons.alerting.model.PPLMonitor import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX import org.opensearch.commons.alerting.util.AlertingException @@ -69,7 +69,7 @@ class TransportIndexMonitorV2Action @Inject constructor( val xContentRegistry: NamedXContentRegistry, val namedWriteableRegistry: NamedWriteableRegistry, ) : HandledTransportAction( - AlertingActions.INDEX_MONITOR_V2_ACTION_NAME, transportService, actionFilters, ::IndexMonitorV2Request + IndexMonitorV2Action.NAME, transportService, actionFilters, ::IndexMonitorV2Request ), SecureTransportAction { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt index 1a262ac94..eb05cc0c8 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt @@ -4,13 +4,13 @@ import org.apache.logging.log4j.LogManager import org.opensearch.action.search.SearchResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.actionv2.SearchMonitorV2Action +import org.opensearch.alerting.actionv2.SearchMonitorV2Request +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.MONITOR_V2_TYPE import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject import org.opensearch.common.settings.Settings -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.SearchMonitorV2Request -import org.opensearch.commons.alerting.model.MonitorV2.Companion.MONITOR_V2_TYPE import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.action.ActionListener import org.opensearch.core.common.io.stream.NamedWriteableRegistry @@ -30,7 +30,7 @@ class TransportSearchMonitorV2Action @Inject constructor( actionFilters: ActionFilters, val namedWriteableRegistry: NamedWriteableRegistry ) : HandledTransportAction( - AlertingActions.SEARCH_MONITORS_V2_ACTION_NAME, transportService, actionFilters, ::SearchMonitorV2Request + SearchMonitorV2Action.NAME, transportService, actionFilters, ::SearchMonitorV2Request ), SecureTransportAction { diff --git a/core/build.gradle b/core/build.gradle index 25a943bee..31749234e 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -3,11 +3,45 @@ * SPDX-License-Identifier: Apache-2.0 */ +buildscript { + ext { + opensearch_group = "org.opensearch" + opensearch_version = System.getProperty("opensearch.version", "3.1.0-SNAPSHOT") + isSnapshot = "true" == System.getProperty("build.snapshot", "true") + buildVersionQualifier = System.getProperty("build.version_qualifier", "") + kotlin_version = System.getProperty("kotlin.version", "1.9.25") + version_tokens = opensearch_version.tokenize('-') + opensearch_build = version_tokens[0] + '.0' + if (buildVersionQualifier) { + opensearch_build += "-${buildVersionQualifier}" + } + if (isSnapshot) { + opensearch_build += "-SNAPSHOT" + } + } +} + apply plugin: 'java' apply plugin: 'opensearch.java-rest-test' apply plugin: 'org.jetbrains.kotlin.jvm' apply plugin: 'jacoco' +configurations { + zipArchive +} + +def sqlJarDirectory = "$buildDir/dependencies/opensearch-sql-plugin" + +task addJarsToClasspath(type: Copy) { + from(fileTree(dir: sqlJarDirectory)) { + include "opensearch-sql-${opensearch_build}.jar" + include "ppl-${opensearch_build}.jar" + include "protocol-${opensearch_build}.jar" + include "core-${opensearch_build}.jar" + } + into("$buildDir/classes") +} + dependencies { compileOnly "org.opensearch:opensearch:${opensearch_version}" implementation "org.jetbrains.kotlin:kotlin-stdlib:${kotlin_version}" @@ -17,8 +51,44 @@ dependencies { api "org.opensearch.client:opensearch-rest-client:${opensearch_version}" api "org.opensearch:common-utils:${common_utils_version}@jar" implementation 'commons-validator:commons-validator:1.7' + implementation 'org.json:json:20240303' + + api fileTree(dir: sqlJarDirectory, include: ["opensearch-sql-thin-${opensearch_build}.jar", "ppl-${opensearch_build}.jar", "protocol-${opensearch_build}.jar", "core-${opensearch_build}.jar"]) + + zipArchive group: 'org.opensearch.plugin', name:'opensearch-sql-plugin', version: "${opensearch_build}" testImplementation "org.opensearch.test:framework:${opensearch_version}" testImplementation "org.jetbrains.kotlin:kotlin-test:${kotlin_version}" testImplementation "org.jetbrains.kotlin:kotlin-test-junit:${kotlin_version}" } + +task extractSqlJar(type: Copy) { + mustRunAfter() + from(zipTree(configurations.zipArchive.find { it.name.startsWith("opensearch-sql-plugin") })) + into sqlJarDirectory +} + +task extractSqlClass(type: Copy, dependsOn: [extractSqlJar]) { + from zipTree("${sqlJarDirectory}/opensearch-sql-${opensearch_build}.jar") + into("$buildDir/opensearch-sql") + include 'org/opensearch/sql/**' +} + +task replaceSqlJar(type: Jar, dependsOn: [extractSqlClass]) { + from("$buildDir/opensearch-sql") + archiveFileName = "opensearch-sql-thin-${opensearch_build}.jar" + destinationDirectory = file(sqlJarDirectory) + doLast { + file("${sqlJarDirectory}/opensearch-sql-${opensearch_build}.jar").delete() + } +} + +tasks.addJarsToClasspath.dependsOn(replaceSqlJar) + +compileJava { + dependsOn addJarsToClasspath +} + +compileKotlin { + dependsOn addJarsToClasspath +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt new file mode 100644 index 000000000..aa7155a66 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt @@ -0,0 +1,239 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.alerting.core.modelv2.TriggerV2.Severity +import org.opensearch.common.lucene.uid.Versions +import org.opensearch.commons.alerting.model.Alert.Companion.ALERT_ID_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.ALERT_VERSION_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.ERROR_MESSAGE_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.EXECUTION_ID_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.MONITOR_ID_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.MONITOR_NAME_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.MONITOR_VERSION_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.NO_ID +import org.opensearch.commons.alerting.model.Alert.Companion.NO_VERSION +import org.opensearch.commons.alerting.model.Alert.Companion.SCHEMA_VERSION_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.SEVERITY_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.TRIGGER_ID_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.TRIGGER_NAME_FIELD +import org.opensearch.commons.alerting.util.IndexUtils.Companion.NO_SCHEMA_VERSION +import org.opensearch.commons.alerting.util.instant +import org.opensearch.commons.alerting.util.nonOptionalTimeField +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken +import java.io.IOException +import java.time.Instant + +/** + * Alert generated by Alerting V2 + * An alert is created when a Trigger's trigger conditions are met. + * + * @property id Alert ID. Defaults to [NO_ID]. + * @property version Version number of the Alert. Defaults to [NO_VERSION]. + * @property schemaVersion Version of the alerting-alerts index schema when this Alert was indexed. Defaults to [NO_SCHEMA_VERSION]. + * @property monitorId ID of the Monitor that generated this Alert. + * @property monitorName Name of the Monitor that generated this Alert. + * @property monitorVersion Version of the Monitor at the time it generated this Alert. + * @property triggerId ID of the specific Trigger that generated this alert. + * @property triggerName Name of the trigger that generated this alert. + * @property queryResults Results from the Monitor's query that caused the Trigger to fire. + * Stored as a map of field names to their values. + * @property triggeredTime Timestamp when the Alert was generated. + * @property expirationTime Timestamp when the Alert should be considered expired. + * @property errorMessage Optional error message if there were issues during Trigger execution. + * Null indicates no errors occurred. + * @property severity Severity level of the alert (e.g., "HIGH", "MEDIUM", "LOW"). + * @property executionId Optional ID for the Monitor execution that generated this Alert. + * + * @see MonitorV2 For the monitor that generates alerts + * @see TriggerV2 For the trigger conditions that create alerts + * + * Lifecycle: + * 1. Created when a TriggerV2's condition is met. The TriggerV2 fires and forgets the Alert. + * 2. Stored in the alerts index. AlertV2s are stateless. (e.g. they are never ACTIVE or COMPLETED) + * 3. Alert is permanently deleted at [expirationTime] + */ +data class AlertV2( + val id: String = NO_ID, + val version: Long = NO_VERSION, + val schemaVersion: Int = NO_SCHEMA_VERSION, + val monitorId: String, + val monitorName: String, + val monitorVersion: Long, +// val monitorUser: User?, + val triggerId: String, + val triggerName: String, + val queryResults: Map, + val triggeredTime: Instant, + val expirationTime: Instant, + val errorMessage: String? = null, + val severity: Severity, + val executionId: String? = null +) : Writeable, ToXContent { + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + id = sin.readString(), + version = sin.readLong(), + schemaVersion = sin.readInt(), + monitorId = sin.readString(), + monitorName = sin.readString(), + monitorVersion = sin.readLong(), +// monitorUser = if (sin.readBoolean()) { +// User(sin) +// } else { +// null +// }, + triggerId = sin.readString(), + triggerName = sin.readString(), + queryResults = sin.readMap()!!.toMap(), + triggeredTime = sin.readInstant(), + expirationTime = sin.readInstant(), + errorMessage = sin.readOptionalString(), + severity = sin.readEnum(Severity::class.java), + executionId = sin.readOptionalString() + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(id) + out.writeLong(version) + out.writeInt(schemaVersion) + out.writeString(monitorId) + out.writeString(monitorName) + out.writeLong(monitorVersion) +// out.writeBoolean(monitorUser != null) +// monitorUser?.writeTo(out) + out.writeString(triggerId) + out.writeString(triggerName) + out.writeMap(queryResults) + out.writeInstant(triggeredTime) + out.writeInstant(expirationTime) + out.writeOptionalString(errorMessage) + out.writeEnum(severity) + out.writeOptionalString(executionId) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + .field(ALERT_ID_FIELD, id) + .field(ALERT_VERSION_FIELD, version) + .field(MONITOR_ID_FIELD, monitorId) + .field(SCHEMA_VERSION_FIELD, schemaVersion) + .field(MONITOR_VERSION_FIELD, monitorVersion) + .field(MONITOR_NAME_FIELD, monitorName) + .field(EXECUTION_ID_FIELD, executionId) + .field(TRIGGER_ID_FIELD, triggerId) + .field(TRIGGER_NAME_FIELD, triggerName) + .field(QUERY_RESULTS_FIELD, queryResults) + .field(ERROR_MESSAGE_FIELD, errorMessage) + .field(SEVERITY_FIELD, severity.value) + .nonOptionalTimeField(TRIGGERED_TIME_FIELD, triggeredTime) + .nonOptionalTimeField(EXPIRATION_TIME_FIELD, expirationTime) + .endObject() + +// if (!secure) { +// builder.optionalUserField(MONITOR_USER_FIELD, monitorUser) +// } + + return builder + } + + fun asTemplateArg(): Map { + return mapOf( + ALERT_ID_FIELD to id, + ALERT_VERSION_FIELD to version, + ERROR_MESSAGE_FIELD to errorMessage, + EXECUTION_ID_FIELD to executionId, + EXPIRATION_TIME_FIELD to expirationTime?.toEpochMilli(), + SEVERITY_FIELD to severity + ) + } + + companion object { + const val TRIGGERED_TIME_FIELD = "triggered_time" + const val EXPIRATION_TIME_FIELD = "expiration_time" + const val QUERY_RESULTS_FIELD = "query_results" + + @JvmStatic + @JvmOverloads + @Throws(IOException::class) + fun parse(xcp: XContentParser, id: String = NO_ID, version: Long = NO_VERSION): AlertV2 { + var schemaVersion = NO_SCHEMA_VERSION + lateinit var monitorId: String + lateinit var monitorName: String + var monitorVersion: Long = Versions.NOT_FOUND +// var monitorUser: User? = null + lateinit var triggerId: String + lateinit var triggerName: String + var queryResults: Map = mapOf() + lateinit var severity: Severity + var triggeredTime: Instant? = null + var expirationTime: Instant? = null + var errorMessage: String? = null + var executionId: String? = null + + ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + + when (fieldName) { + MONITOR_ID_FIELD -> monitorId = xcp.text() + SCHEMA_VERSION_FIELD -> schemaVersion = xcp.intValue() + MONITOR_NAME_FIELD -> monitorName = xcp.text() + MONITOR_VERSION_FIELD -> monitorVersion = xcp.longValue() +// MONITOR_USER_FIELD -> +// monitorUser = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { +// null +// } else { +// User.parse(xcp) +// } + TRIGGER_ID_FIELD -> triggerId = xcp.text() + TRIGGER_NAME_FIELD -> triggerName = xcp.text() + QUERY_RESULTS_FIELD -> queryResults = xcp.map() + TRIGGERED_TIME_FIELD -> triggeredTime = xcp.instant() + EXPIRATION_TIME_FIELD -> expirationTime = xcp.instant() + ERROR_MESSAGE_FIELD -> errorMessage = xcp.textOrNull() + EXECUTION_ID_FIELD -> executionId = xcp.textOrNull() + TriggerV2.SEVERITY_FIELD -> { + val input = xcp.text() + val enumMatchResult = Severity.enumFromString(input) + ?: throw IllegalStateException( + "Invalid value for ${TriggerV2.SEVERITY_FIELD}: $input. " + + "Supported values are ${Severity.entries.map { it.value }}" + ) + severity = enumMatchResult + } + } + } + + return AlertV2( + id = id, + version = version, + schemaVersion = schemaVersion, + monitorId = requireNotNull(monitorId), + monitorName = requireNotNull(monitorName), + monitorVersion = monitorVersion, +// monitorUser = monitorUser, + triggerId = requireNotNull(triggerId), + triggerName = requireNotNull(triggerName), + queryResults = requireNotNull(queryResults), + triggeredTime = requireNotNull(triggeredTime), + expirationTime = requireNotNull(expirationTime), + errorMessage = errorMessage, + severity = severity, + executionId = executionId + ) + } + + @JvmStatic + @Throws(IOException::class) + fun readFrom(sin: StreamInput): AlertV2 { + return AlertV2(sin) + } + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt new file mode 100644 index 000000000..b626d0602 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt @@ -0,0 +1,108 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.alerting.core.modelv2.PPLMonitor.Companion.PPL_MONITOR_TYPE +import org.opensearch.common.CheckedFunction +import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.Schedule +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.core.ParseField +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.core.xcontent.XContentParserUtils +import java.io.IOException +import java.time.Instant + +interface MonitorV2 : ScheduledJob { + override val id: String + override val version: Long + override val name: String + override val enabled: Boolean + override val schedule: Schedule + override val lastUpdateTime: Instant // required for scheduled job maintenance + override val enabledTime: Instant? // required for scheduled job maintenance + val triggers: List + val schemaVersion: Int // for updating monitors + val lookBackWindow: TimeValue? // how far back to look when querying data during monitor execution + + fun asTemplateArg(): Map + + enum class MonitorV2Type(val value: String) { + PPL_MONITOR(PPL_MONITOR_TYPE); + + override fun toString(): String { + return value + } + + companion object { + fun enumFromString(value: String): MonitorV2Type? { + return MonitorV2Type.entries.find { it.value == value } + } + } + } + + companion object { + // scheduled job field names + const val MONITOR_V2_TYPE = "monitor_v2" // scheduled job type is MonitorV2 + + // field names + const val NAME_FIELD = "name" + const val MONITOR_TYPE_FIELD = "monitor_type" + const val ENABLED_FIELD = "enabled" + const val SCHEDULE_FIELD = "schedule" + const val LAST_UPDATE_TIME_FIELD = "last_update_time" + const val ENABLED_TIME_FIELD = "enabled_time" + const val TRIGGERS_FIELD = "triggers" + const val LOOK_BACK_WINDOW_FIELD = "look_back_window" + + // default values + const val NO_ID = "" + const val NO_VERSION = 1L + + val XCONTENT_REGISTRY = NamedXContentRegistry.Entry( + ScheduledJob::class.java, + ParseField(MONITOR_V2_TYPE), + CheckedFunction { parse(it) } + ) + + @JvmStatic + @Throws(IOException::class) + fun parse(xcp: XContentParser): MonitorV2 { + /* parse outer object for monitorV2 type, then delegate to correct monitorV2 parser */ + + XContentParserUtils.ensureExpectedToken( // outer monitor object start + XContentParser.Token.START_OBJECT, + xcp.currentToken(), + xcp + ) + + XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, xcp.nextToken(), xcp) // monitor type field name + val monitorTypeText = xcp.currentName() + val monitorType = MonitorV2Type.enumFromString(monitorTypeText) + ?: throw IllegalStateException("when parsing MonitorV2, received invalid monitor type: $monitorTypeText") + + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) // inner monitor object start + + return when (monitorType) { + MonitorV2Type.PPL_MONITOR -> PPLMonitor.parse(xcp) + } + } + + fun readFrom(sin: StreamInput): MonitorV2 { + return when (val monitorType = sin.readEnum(MonitorV2Type::class.java)) { + MonitorV2Type.PPL_MONITOR -> PPLMonitor(sin) + else -> throw IllegalStateException("Unexpected input \"$monitorType\" when reading MonitorV2") + } + } + + fun writeTo(out: StreamOutput, monitorV2: MonitorV2) { + when (monitorV2) { + is PPLMonitor -> { + out.writeEnum(MonitorV2Type.PPL_MONITOR) + monitorV2.writeTo(out) + } + } + } + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2RunResult.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2RunResult.kt new file mode 100644 index 000000000..0db8b9ce8 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2RunResult.kt @@ -0,0 +1,44 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.xcontent.ToXContent +import java.time.Instant + +interface MonitorV2RunResult : Writeable, ToXContent { + val monitorName: String + val error: Exception? + val periodStart: Instant + val periodEnd: Instant + val triggerResults: Map + + enum class MonitorV2RunResultType() { + PPL_MONITOR_RUN_RESULT; + } + + companion object { + const val MONITOR_V2_NAME_FIELD = "monitor_v2_name" + const val ERROR_FIELD = "error" + const val PERIOD_START_FIELD = "period_start" + const val PERIOD_END_FIELD = "period_end" + const val TRIGGER_RESULTS_FIELD = "trigger_results" + + fun readFrom(sin: StreamInput): MonitorV2RunResult { + val monitorRunResultType = sin.readEnum(MonitorV2RunResultType::class.java) + return when (monitorRunResultType) { + MonitorV2RunResultType.PPL_MONITOR_RUN_RESULT -> PPLMonitorRunResult(sin) + else -> throw IllegalStateException("Unexpected input [$monitorRunResultType] when reading MonitorV2RunResult") + } + } + + fun writeTo(out: StreamOutput, monitorV2RunResult: MonitorV2RunResult) { + when (monitorV2RunResult) { + is PPLMonitorRunResult -> { + out.writeEnum(MonitorV2RunResultType.PPL_MONITOR_RUN_RESULT) + monitorV2RunResult.writeTo(out) + } + } + } + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt new file mode 100644 index 000000000..16b3b6fea --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -0,0 +1,336 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.CronSchedule +import org.opensearch.commons.alerting.model.IntervalSchedule +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.Schedule +import org.opensearch.commons.alerting.util.IndexUtils +import org.opensearch.commons.alerting.util.instant +import org.opensearch.commons.alerting.util.nonOptionalTimeField +import org.opensearch.commons.alerting.util.optionalTimeField +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.core.xcontent.XContentParserUtils +import java.io.IOException +import java.time.Instant + +// TODO: probably change this to be called PPLSQLMonitor. A PPL Monitor and SQL Monitor +// would have the exact same functionality, except the choice of language +// when calling PPL/SQL plugin's execute API would be different. +// we dont need 2 different monitor types for that, just a simple if check +// for query language at monitor execution time +/** + * PPL (Piped Processing Language) Monitor for OpenSearch Alerting V2 + * + * @property id Monitor ID. Defaults to [NO_ID]. + * @property version Version number of the monitor. Defaults to [NO_VERSION]. + * @property name Display name of the monitor. + * @property enabled Boolean flag indicating whether the monitor is currently on or off. + * @property schedule Defines when and how often the monitor should run. Can be a CRON or interval schedule. + * @property lookBackWindow How far back each Monitor execution's query should look back when searching data. + * Only applicable if Monitor uses CRON schedule. Optional even if CRON schedule is used. + * @property lastUpdateTime Timestamp of the last update to this monitor. + * @property enabledTime Timestamp when the monitor was last enabled. Null if never enabled. + * @property triggers List of [PPLTrigger]s associated with this monitor. + * @property schemaVersion Version of the alerting-config index schema used when this Monitor was indexed. Defaults to [NO_SCHEMA_VERSION]. + * @property queryLanguage The query language used. Defaults to [QueryLanguage.PPL]. + * @property query The PPL query string to be executed by this monitor. + */ +data class PPLMonitor( + override val id: String = MonitorV2.NO_ID, + override val version: Long = MonitorV2.NO_VERSION, + override val name: String, + override val enabled: Boolean, + override val schedule: Schedule, + override val lookBackWindow: TimeValue? = null, + override val lastUpdateTime: Instant, + override val enabledTime: Instant?, + override val triggers: List, + override val schemaVersion: Int = IndexUtils.NO_SCHEMA_VERSION, + val queryLanguage: QueryLanguage = QueryLanguage.PPL, // default to PPL, SQL not currently supported + val query: String +) : MonitorV2 { + + // specify scheduled job type + override val type = MonitorV2.MONITOR_V2_TYPE + + override fun fromDocument(id: String, version: Long): PPLMonitor = copy(id = id, version = version) + + init { + // SQL monitors are not yet supported + if (this.queryLanguage == QueryLanguage.SQL) { + throw IllegalStateException("Monitors with SQL queries are not supported") + } + + // for checking trigger ID uniqueness + val triggerIds = mutableSetOf() + triggers.forEach { trigger -> + require(triggerIds.add(trigger.id)) { "Duplicate trigger id: ${trigger.id}. Trigger ids must be unique." } + } + + if (enabled) { + requireNotNull(enabledTime) + } else { + require(enabledTime == null) + } + + triggers.forEach { trigger -> + require(trigger is PPLTrigger) { "Incompatible trigger [${trigger.id}] for monitor type [$PPL_MONITOR_TYPE]" } + } + + // TODO: create setting for max triggers and check for max triggers here + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + id = sin.readString(), + version = sin.readLong(), + name = sin.readString(), + enabled = sin.readBoolean(), + schedule = Schedule.readFrom(sin), + lookBackWindow = TimeValue.parseTimeValue(sin.readString(), PLACEHOLDER_LOOK_BACK_WINDOW_SETTING_NAME), + lastUpdateTime = sin.readInstant(), + enabledTime = sin.readOptionalInstant(), + triggers = sin.readList(PPLTrigger::readFrom), + schemaVersion = sin.readInt(), + queryLanguage = sin.readEnum(QueryLanguage::class.java), + query = sin.readString() + ) + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() // overall start object + + // if this is being written as ScheduledJob, add extra object layer and add ScheduledJob + // related metadata, default to false + if (params.paramAsBoolean("with_type", false)) { + builder.startObject(MonitorV2.MONITOR_V2_TYPE) + } + + // wrap PPLMonitor in outer object named after its monitor type + // required for MonitorV2 XContentParser to first encounter this, + // read in monitor type, then delegate to correct parse() function + builder.startObject(PPL_MONITOR_TYPE) // monitor type start object + + builder.field(MonitorV2.NAME_FIELD, name) + builder.field(MonitorV2.SCHEDULE_FIELD, schedule) + builder.field(MonitorV2.LOOK_BACK_WINDOW_FIELD, lookBackWindow?.toHumanReadableString(0)) + builder.field(MonitorV2.ENABLED_FIELD, enabled) + builder.nonOptionalTimeField(MonitorV2.LAST_UPDATE_TIME_FIELD, lastUpdateTime) + builder.optionalTimeField(MonitorV2.ENABLED_TIME_FIELD, enabledTime) + builder.field(MonitorV2.TRIGGERS_FIELD, triggers.toTypedArray()) + builder.field(Monitor.SCHEMA_VERSION_FIELD, schemaVersion) + builder.field(QUERY_LANGUAGE_FIELD, queryLanguage.value) + builder.field(QUERY_FIELD, query) + + builder.endObject() // monitor type end object + + // if ScheduledJob metadata was added, end the extra object layer that was created + if (params.paramAsBoolean("with_type", false)) { + builder.endObject() + } + + builder.endObject() // overall end object + + return builder + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(id) + out.writeLong(version) + out.writeString(name) + out.writeBoolean(enabled) + if (schedule is CronSchedule) { + out.writeEnum(Schedule.TYPE.CRON) + } else { + out.writeEnum(Schedule.TYPE.INTERVAL) + } + + out.writeBoolean(lookBackWindow != null) + lookBackWindow?.let { out.writeString(lookBackWindow.toHumanReadableString(0)) } + + out.writeInstant(lastUpdateTime) + out.writeOptionalInstant(enabledTime) + out.writeVInt(triggers.size) + triggers.forEach { it.writeTo(out) } + out.writeInt(schemaVersion) + out.writeEnum(queryLanguage) + out.writeString(query) + } + + override fun asTemplateArg(): Map { + return mapOf( + IndexUtils._ID to id, + IndexUtils._VERSION to version, + MonitorV2.NAME_FIELD to name, + MonitorV2.ENABLED_FIELD to enabled, + MonitorV2.SCHEDULE_FIELD to schedule, + MonitorV2.LOOK_BACK_WINDOW_FIELD to lookBackWindow?.toHumanReadableString(0), + MonitorV2.LAST_UPDATE_TIME_FIELD to lastUpdateTime.toEpochMilli(), + MonitorV2.ENABLED_TIME_FIELD to enabledTime?.toEpochMilli(), + MonitorV2.TRIGGERS_FIELD to triggers, + QUERY_LANGUAGE_FIELD to queryLanguage.value, + QUERY_FIELD to query + ) + } + + enum class QueryLanguage(val value: String) { + PPL(PPL_QUERY_LANGUAGE), + SQL(SQL_QUERY_LANGUAGE); + + companion object { + fun enumFromString(value: String): QueryLanguage? = QueryLanguage.entries.firstOrNull { it.value == value } + } + } + + companion object { + // monitor type name + const val PPL_MONITOR_TYPE = "ppl_monitor" // TODO: eventually change to SQL_PPL_MONITOR_TYPE + + // query languages + const val PPL_QUERY_LANGUAGE = "ppl" + const val SQL_QUERY_LANGUAGE = "sql" + + // field names + const val QUERY_LANGUAGE_FIELD = "query_language" + const val QUERY_FIELD = "query" + + // mock setting name used when parsing TimeValue + // TimeValue class is usually reserved for declaring settings, but we're using it + // outside that use case here, which is why we need these placeholders + private const val PLACEHOLDER_LOOK_BACK_WINDOW_SETTING_NAME = "ppl_monitor_look_back_window" + + @JvmStatic + @JvmOverloads + @Throws(IOException::class) + fun parse(xcp: XContentParser, id: String = MonitorV2.NO_ID, version: Long = MonitorV2.NO_VERSION): PPLMonitor { + var name: String? = null + var monitorType: String = PPL_MONITOR_TYPE + var enabled = true + var schedule: Schedule? = null + var lookBackWindow: TimeValue? = null + var lastUpdateTime: Instant? = null + var enabledTime: Instant? = null + val triggers: MutableList = mutableListOf() + var schemaVersion = IndexUtils.NO_SCHEMA_VERSION + var queryLanguage: QueryLanguage = QueryLanguage.PPL // default to PPL + var query: String? = null + + /* parse */ + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + + when (fieldName) { + MonitorV2.NAME_FIELD -> name = xcp.text() + MonitorV2.MONITOR_TYPE_FIELD -> monitorType = xcp.text() + MonitorV2.ENABLED_FIELD -> enabled = xcp.booleanValue() + MonitorV2.SCHEDULE_FIELD -> schedule = Schedule.parse(xcp) + MonitorV2.LOOK_BACK_WINDOW_FIELD -> { + lookBackWindow = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { + null + } else { + val input = xcp.text() + // throws IllegalArgumentException if there's parsing error + TimeValue.parseTimeValue(input, PLACEHOLDER_LOOK_BACK_WINDOW_SETTING_NAME) + } + } + MonitorV2.LAST_UPDATE_TIME_FIELD -> lastUpdateTime = xcp.instant() + MonitorV2.ENABLED_TIME_FIELD -> enabledTime = xcp.instant() + MonitorV2.TRIGGERS_FIELD -> { + XContentParserUtils.ensureExpectedToken( + XContentParser.Token.START_ARRAY, + xcp.currentToken(), + xcp + ) + while (xcp.nextToken() != XContentParser.Token.END_ARRAY) { + triggers.add(PPLTrigger.parseInner(xcp)) + } + } + Monitor.SCHEMA_VERSION_FIELD -> schemaVersion = xcp.intValue() + QUERY_LANGUAGE_FIELD -> { + val input = xcp.text() + val enumMatchResult = QueryLanguage.enumFromString(input) + ?: throw IllegalArgumentException( + "Invalid value for $QUERY_LANGUAGE_FIELD: $input. " + + "Supported values are ${QueryLanguage.entries.map { it.value }}" + ) + queryLanguage = enumMatchResult + } + QUERY_FIELD -> query = xcp.text() + else -> throw IllegalArgumentException("Unexpected field \"$fieldName\" when parsing PPL Monitor") + } + } + + /* validations */ + + // ensure MonitorV2 XContent being parsed by PPLMonitor class is PPL Monitor type + if (monitorType != PPL_MONITOR_TYPE) { + throw IllegalArgumentException("Invalid monitor type: $monitorType") + } + + // ensure there's at least 1 trigger + if (triggers.isEmpty()) { + throw IllegalArgumentException("Monitor must include at least 1 trigger") + } + + // ensure the trigger suppress durations are valid + triggers.forEach { trigger -> + trigger.suppressDuration?.let { suppressDuration -> + // TODO: these max and min values are completely arbitrary, make them settings + val minValue = TimeValue.timeValueMinutes(1) + val maxValue = TimeValue.timeValueDays(5) + + require(suppressDuration <= maxValue) { "Suppress duration must be at most $maxValue but was $suppressDuration" } + + require(suppressDuration >= minValue) { "Suppress duration must be at least $minValue but was $suppressDuration" } + } + } + + // if enabled, set time of MonitorV2 creation/update is set as enable time + if (enabled && enabledTime == null) { + enabledTime = Instant.now() + } else if (!enabled) { + enabledTime = null + } + + lastUpdateTime = lastUpdateTime ?: Instant.now() + + // check for required fields + requireNotNull(name) { "Monitor name is null" } + requireNotNull(schedule) { "Schedule is null" } + requireNotNull(queryLanguage) { "Query language is null" } + requireNotNull(query) { "Query is null" } + requireNotNull(lastUpdateTime) { "Last update time is null" } + + if (schedule is IntervalSchedule && lookBackWindow != null) { + throw IllegalArgumentException("Look back windows only supported for CRON schedules") + } + + if (queryLanguage == QueryLanguage.SQL) { + throw IllegalArgumentException("SQL queries are not supported. Please use a PPL query.") + } + + /* return PPLMonitor */ + return PPLMonitor( + id, + version, + name, + enabled, + schedule, + lookBackWindow, + lastUpdateTime, + enabledTime, + triggers, + schemaVersion, + queryLanguage, + query + ) + } + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt new file mode 100644 index 000000000..29b5744d6 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt @@ -0,0 +1,61 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.ERROR_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.MONITOR_V2_NAME_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.PERIOD_END_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.PERIOD_START_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.TRIGGER_RESULTS_FIELD +import org.opensearch.commons.alerting.util.nonOptionalTimeField +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException +import java.time.Instant + +data class PPLMonitorRunResult( + override val monitorName: String, + override val error: Exception?, + override val periodStart: Instant, + override val periodEnd: Instant, + override val triggerResults: Map, + val pplQueryResults: Map> // key: trigger id, value: query results +) : MonitorV2RunResult { + + @Throws(IOException::class) + @Suppress("UNCHECKED_CAST") + constructor(sin: StreamInput) : this( + sin.readString(), // monitorName + sin.readException(), // error + sin.readInstant(), // periodStart + sin.readInstant(), // periodEnd + sin.readMap() as Map, // triggerResults + sin.readMap() as Map> // pplQueryResults + ) + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + builder.field(MONITOR_V2_NAME_FIELD, monitorName) + builder.nonOptionalTimeField(PERIOD_START_FIELD, periodStart) + builder.nonOptionalTimeField(PERIOD_END_FIELD, periodEnd) + builder.field(ERROR_FIELD, error?.message) + builder.field(TRIGGER_RESULTS_FIELD, triggerResults) + builder.field(PPL_QUERY_RESULTS_FIELD, pplQueryResults) + builder.endObject() + return builder + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(monitorName) + out.writeException(error) + out.writeInstant(periodStart) + out.writeInstant(periodEnd) + out.writeMap(triggerResults) + out.writeMap(pplQueryResults) + } + + companion object { + const val PPL_QUERY_RESULTS_FIELD = "ppl_query_results" + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt new file mode 100644 index 000000000..b9ebd776d --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt @@ -0,0 +1,380 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.alerting.core.modelv2.TriggerV2.Companion.ACTIONS_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2.Companion.EXPIRE_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2.Companion.ID_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2.Companion.LAST_TRIGGERED_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2.Companion.NAME_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2.Companion.SEVERITY_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2.Companion.SUPPRESS_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2.Severity +import org.opensearch.common.CheckedFunction +import org.opensearch.common.UUIDs +import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.commons.alerting.util.instant +import org.opensearch.commons.alerting.util.optionalTimeField +import org.opensearch.core.ParseField +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.core.xcontent.XContentParserUtils +import java.io.IOException +import java.time.Instant + +/** + * The PPL Trigger for PPL Monitors + * + * There are two types of PPLTrigger conditions: NUMBER_OF_RESULT and CUSTOM + * NUMBER_OF_RESULTS: triggers based on if the number of query results returned by the PPLMonitor + * query meets some threshold + * CUSTOM: triggers based on a custom condition that user specifies + * This trigger can operate in either result set or per-result mode and supports + * both numeric result conditions and custom conditions. + * + * PPLTriggers can run on two modes: RESULT_SET and PER_RESULT + * RESULT_SET: exactly one Alert is generated when the Trigger condition is met + * PER_RESULT: one Alert is generated per trigger condition-meeting query result row + * + * @property id Trigger ID, defaults to a base64 UUID. + * @property name Display name of the Trigger. + * @property severity The severity level of the Trigger. + * @property suppressDuration Optional duration for which alerts from this Trigger should be suppressed. + * Null indicates no suppression. + * @property expireDuration Duration after which alerts from this Trigger should be deleted permanently. + * @property lastTriggeredTime The last time this Trigger generated an Alert. Null if Trigger hasn't generated an Alert yet. + * @property actions List of notification-sending actions to run when the Trigger condition is met. + * @property mode Specifies whether the trigger evaluates the entire result set or each result individually. + * Can be either [TriggerMode.RESULT_SET] or [TriggerMode.PER_RESULT]. + * @property conditionType The type of condition to evaluate. + * Can be either [ConditionType.NUMBER_OF_RESULTS] or [ConditionType.CUSTOM]. + * @property numResultsCondition The comparison operator for NUMBER_OF_RESULTS conditions. Required if using NUMBER_OF_RESULTS conditions, + * null otherwise. + * @property numResultsValue The threshold value for NUMBER_OF_RESULTS conditions. Required if using NUMBER_OF_RESULTS conditions, + * null otherwise. + * @property customCondition A custom condition expression. Required if using CUSTOM conditions, + * null otherwise. + */ +data class PPLTrigger( + override val id: String = UUIDs.base64UUID(), + override val name: String, + override val severity: Severity, + override val suppressDuration: TimeValue?, + override val expireDuration: TimeValue, + override var lastTriggeredTime: Instant?, + override val actions: List, + val mode: TriggerMode, // result_set or per_result + val conditionType: ConditionType, + val numResultsCondition: NumResultsCondition?, + val numResultsValue: Long?, + val customCondition: String? +) : TriggerV2 { + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + sin.readString(), // id + sin.readString(), // name + sin.readEnum(Severity::class.java), // severity + // parseTimeValue() is typically used to parse OpenSearch settings + // the second param is supposed to accept a setting name, but here we're passing in our own name + TimeValue.parseTimeValue(sin.readOptionalString(), PLACEHOLDER_SUPPRESS_SETTING_NAME), // suppressDuration + TimeValue.parseTimeValue(sin.readString(), PLACEHOLDER_EXPIRE_SETTING_NAME), // expireDuration + sin.readOptionalInstant(), // lastTriggeredTime + sin.readList(::Action), // actions + sin.readEnum(TriggerMode::class.java), // trigger mode + sin.readEnum(ConditionType::class.java), // condition type + if (sin.readBoolean()) sin.readEnum(NumResultsCondition::class.java) else null, // num results condition + sin.readOptionalLong(), // num results value + sin.readOptionalString() // custom condition + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(id) + out.writeString(name) + out.writeEnum(severity) + + out.writeBoolean(suppressDuration != null) + suppressDuration?.let { out.writeString(suppressDuration.toHumanReadableString(0)) } + + out.writeString(expireDuration.toHumanReadableString(0)) + out.writeOptionalInstant(lastTriggeredTime) + out.writeCollection(actions) + out.writeEnum(mode) + out.writeEnum(conditionType) + + out.writeBoolean(numResultsCondition != null) + numResultsCondition?.let { out.writeEnum(numResultsCondition) } + + out.writeOptionalLong(numResultsValue) + out.writeOptionalString(customCondition) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + builder.startObject() + builder.field(ID_FIELD, id) + builder.field(NAME_FIELD, name) + builder.field(SEVERITY_FIELD, severity.value) + builder.field(SUPPRESS_FIELD, suppressDuration?.toHumanReadableString(0)) + builder.field(EXPIRE_FIELD, expireDuration.toHumanReadableString(0)) + builder.optionalTimeField(LAST_TRIGGERED_FIELD, lastTriggeredTime) + builder.field(ACTIONS_FIELD, actions.toTypedArray()) + builder.field(MODE_FIELD, mode.value) + builder.field(CONDITION_TYPE_FIELD, conditionType.value) + numResultsCondition?.let { builder.field(NUM_RESULTS_CONDITION_FIELD, numResultsCondition.value) } + numResultsValue?.let { builder.field(NUM_RESULTS_VALUE_FIELD, numResultsValue) } + customCondition?.let { builder.field(CUSTOM_CONDITION_FIELD, customCondition) } + builder.endObject() + return builder + } + + fun asTemplateArg(): Map { + return mapOf( + ID_FIELD to id, + NAME_FIELD to name, + SEVERITY_FIELD to severity.value, + SUPPRESS_FIELD to suppressDuration?.toHumanReadableString(0), + EXPIRE_FIELD to expireDuration?.toHumanReadableString(0), + ACTIONS_FIELD to actions.map { it.asTemplateArg() }, + MODE_FIELD to mode.value, + CONDITION_TYPE_FIELD to conditionType.value, + NUM_RESULTS_CONDITION_FIELD to numResultsCondition?.value, + NUM_RESULTS_VALUE_FIELD to numResultsValue, + CUSTOM_CONDITION_FIELD to customCondition + ) + } + + enum class TriggerMode(val value: String) { + RESULT_SET("result_set"), + PER_RESULT("per_result"); + + companion object { + fun enumFromString(value: String): TriggerMode? = entries.firstOrNull { it.value == value } + } + } + + enum class ConditionType(val value: String) { + NUMBER_OF_RESULTS("number_of_results"), + CUSTOM("custom"); + + companion object { + fun enumFromString(value: String): ConditionType? = entries.firstOrNull { it.value == value } + } + } + + enum class NumResultsCondition(val value: String) { + GREATER_THAN(">"), + GREATER_THAN_EQUAL(">="), + LESS_THAN("<"), + LESS_THAN_EQUAL("<="), + EQUAL("=="), + NOT_EQUAL("!="); + + companion object { + fun enumFromString(value: String): NumResultsCondition? = entries.firstOrNull { it.value == value } + } + } + + companion object { + // trigger wrapper object field name + const val PPL_TRIGGER_FIELD = "ppl_trigger" + + // field names + const val MODE_FIELD = "mode" + const val CONDITION_TYPE_FIELD = "type" + const val NUM_RESULTS_CONDITION_FIELD = "num_results_condition" + const val NUM_RESULTS_VALUE_FIELD = "num_results_value" + const val CUSTOM_CONDITION_FIELD = "custom_condition" + + // mock setting name used when parsing TimeValue + // TimeValue class is usually reserved for declaring settings, but we're using it + // outside that use case here, which is why we need these placeholders + private const val PLACEHOLDER_SUPPRESS_SETTING_NAME = "ppl_trigger_suppress_duration" + private const val PLACEHOLDER_EXPIRE_SETTING_NAME = "ppl_trigger_expire_duration" + + val XCONTENT_REGISTRY = NamedXContentRegistry.Entry( + TriggerV2::class.java, + ParseField(PPL_TRIGGER_FIELD), + CheckedFunction { parseInner(it) } + ) + + @JvmStatic + @Throws(IOException::class) + fun parseInner(xcp: XContentParser): PPLTrigger { + var id = UUIDs.base64UUID() // assign a default triggerId if one is not specified + var name: String? = null + var severity: Severity? = null + var suppressDuration: TimeValue? = null + var expireDuration: TimeValue = + TimeValue.timeValueDays(7) // default to 7 days // TODO: add this as a setting + var lastTriggeredTime: Instant? = null + val actions: MutableList = mutableListOf() + var mode: TriggerMode? = null + var conditionType: ConditionType? = null + var numResultsCondition: NumResultsCondition? = null + var numResultsValue: Long? = null + var customCondition: String? = null + + /* parse */ + XContentParserUtils.ensureExpectedToken( // outer trigger object start + XContentParser.Token.START_OBJECT, + xcp.currentToken(), xcp + ) + + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + + when (fieldName) { + ID_FIELD -> id = xcp.text() + NAME_FIELD -> name = xcp.text() + SEVERITY_FIELD -> { + val input = xcp.text() + val enumMatchResult = Severity.enumFromString(input) + ?: throw IllegalArgumentException( + "Invalid value for $SEVERITY_FIELD: $input. " + + "Supported values are ${Severity.entries.map { it.value }}" + ) + severity = enumMatchResult + } + MODE_FIELD -> { + val input = xcp.text() + val enumMatchResult = TriggerMode.enumFromString(input) + ?: throw IllegalArgumentException( + "Invalid value for $MODE_FIELD: $input. " + + "Supported values are ${TriggerMode.entries.map { it.value }}" + ) + mode = enumMatchResult + } + CONDITION_TYPE_FIELD -> { + val input = xcp.text() + val enumMatchResult = ConditionType.enumFromString(input) + ?: throw IllegalArgumentException( + "Invalid value for $CONDITION_TYPE_FIELD: $input. " + + "Supported values are ${ConditionType.entries.map { it.value }}" + ) + conditionType = enumMatchResult + } + NUM_RESULTS_CONDITION_FIELD -> { + numResultsCondition = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { + null + } else { + val input = xcp.text() + val enumMatchResult = NumResultsCondition.enumFromString(input) + ?: throw IllegalArgumentException( + "Invalid value for $NUM_RESULTS_CONDITION_FIELD: $input. " + + "Supported values are ${NumResultsCondition.entries.map { it.value }}" + ) + enumMatchResult + } + } + NUM_RESULTS_VALUE_FIELD -> { + numResultsValue = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { + null + } else { + xcp.longValue() + } + } + CUSTOM_CONDITION_FIELD -> { + customCondition = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { + null + } else { + xcp.text() + } + } + SUPPRESS_FIELD -> { + suppressDuration = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { + null + } else { + val input = xcp.text() + // throws IllegalArgumentException if there's parsing error + TimeValue.parseTimeValue(input, PLACEHOLDER_SUPPRESS_SETTING_NAME) + } + } + EXPIRE_FIELD -> { + if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { + // if expire field is null, skip reading it and let it retain the default value + val input = xcp.text() + // throws IllegalArgumentException if there's parsing error + expireDuration = TimeValue.parseTimeValue(input, PLACEHOLDER_EXPIRE_SETTING_NAME) + } + } + LAST_TRIGGERED_FIELD -> lastTriggeredTime = xcp.instant() + ACTIONS_FIELD -> { + XContentParserUtils.ensureExpectedToken( + XContentParser.Token.START_ARRAY, + xcp.currentToken(), + xcp + ) + while (xcp.nextToken() != XContentParser.Token.END_ARRAY) { + actions.add(Action.parse(xcp)) + } + } + else -> throw IllegalArgumentException("Unexpected field $fieldName when parsing PPL Trigger") + } + } + + /* validations */ + requireNotNull(name) { "Trigger name must be included" } + requireNotNull(severity) { "Trigger severity must be included" } + requireNotNull(mode) { "Trigger mode must be included" } + requireNotNull(conditionType) { "Trigger condition type must be included" } + + when (conditionType) { + ConditionType.NUMBER_OF_RESULTS -> { + requireNotNull(numResultsCondition) { + "if trigger condition is of type ${ConditionType.NUMBER_OF_RESULTS.value}," + + "$NUM_RESULTS_CONDITION_FIELD must be included" + } + requireNotNull(numResultsValue) { + "if trigger condition is of type ${ConditionType.NUMBER_OF_RESULTS.value}," + + "$NUM_RESULTS_VALUE_FIELD must be included" + } + require(customCondition == null) { + "if trigger condition is of type ${ConditionType.NUMBER_OF_RESULTS.value}," + + "$CUSTOM_CONDITION_FIELD must not be included" + } + } + ConditionType.CUSTOM -> { + requireNotNull(customCondition) { + "if trigger condition is of type ${ConditionType.CUSTOM.value}," + + "$CUSTOM_CONDITION_FIELD must be included" + } + require(numResultsCondition == null) { + "if trigger condition is of type ${ConditionType.CUSTOM.value}," + + "$NUM_RESULTS_CONDITION_FIELD must not be included" + } + require(numResultsValue == null) { + "if trigger condition is of type ${ConditionType.CUSTOM.value}," + + "$NUM_RESULTS_VALUE_FIELD must not be included" + } + } + } + + // 3. prepare and return PPLTrigger object + return PPLTrigger( + id, + name, + severity, + suppressDuration, + expireDuration, + lastTriggeredTime, + actions, + mode, + conditionType, + numResultsCondition, + numResultsValue, + customCondition + ) + } + + @JvmStatic + @Throws(IOException::class) + fun readFrom(sin: StreamInput): PPLTrigger { + return PPLTrigger(sin) + } + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTriggerRunResult.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTriggerRunResult.kt new file mode 100644 index 000000000..175275a97 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTriggerRunResult.kt @@ -0,0 +1,51 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.alerting.core.modelv2.TriggerV2RunResult.Companion.ERROR_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2RunResult.Companion.NAME_FIELD +import org.opensearch.alerting.core.modelv2.TriggerV2RunResult.Companion.TRIGGERED_FIELD +import org.opensearch.commons.alerting.model.QueryLevelTriggerRunResult +import org.opensearch.commons.alerting.model.TriggerRunResult +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +data class PPLTriggerRunResult( + override var triggerName: String, + override var triggered: Boolean, + override var error: Exception?, +) : TriggerV2RunResult { + + @Throws(IOException::class) + @Suppress("UNCHECKED_CAST") + constructor(sin: StreamInput) : this( + triggerName = sin.readString(), + triggered = sin.readBoolean(), + error = sin.readException() + ) + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + builder.field(NAME_FIELD, triggerName) + builder.field(TRIGGERED_FIELD, triggered) + builder.field(ERROR_FIELD, error?.message) + builder.endObject() + return builder + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(triggerName) + out.writeBoolean(triggered) + out.writeException(error) + } + + companion object { + @JvmStatic + @Throws(IOException::class) + fun readFrom(sin: StreamInput): TriggerRunResult { + return QueryLevelTriggerRunResult(sin) + } + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2.kt new file mode 100644 index 000000000..c544bf275 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2.kt @@ -0,0 +1,51 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.alerting.core.modelv2.PPLTrigger.Companion.PPL_TRIGGER_FIELD +import org.opensearch.common.unit.TimeValue +import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.commons.notifications.model.BaseModel +import java.time.Instant + +interface TriggerV2 : BaseModel { + + val id: String + val name: String + val severity: Severity + val suppressDuration: TimeValue? + val expireDuration: TimeValue? + var lastTriggeredTime: Instant? + val actions: List + + enum class TriggerV2Type(val value: String) { + PPL_TRIGGER(PPL_TRIGGER_FIELD); + + override fun toString(): String { + return value + } + } + + enum class Severity(val value: String) { + INFO("info"), + ERROR("error"), + LOW("low"), + MEDIUM("medium"), + HIGH("high"), + CRITICAL("critical"); + + companion object { + fun enumFromString(value: String): Severity? { + return entries.find { it.value == value } + } + } + } + + companion object { + const val ID_FIELD = "id" + const val NAME_FIELD = "name" + const val SEVERITY_FIELD = "severity" + const val SUPPRESS_FIELD = "suppress" + const val LAST_TRIGGERED_FIELD = "last_triggered_time" + const val EXPIRE_FIELD = "expires" + const val ACTIONS_FIELD = "actions" + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2RunResult.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2RunResult.kt new file mode 100644 index 000000000..37ea50c90 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/TriggerV2RunResult.kt @@ -0,0 +1,17 @@ +package org.opensearch.alerting.core.modelv2 + +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.xcontent.ToXContent + +interface TriggerV2RunResult : Writeable, ToXContent { + + val triggerName: String + val triggered: Boolean + val error: Exception? + + companion object { + const val NAME_FIELD = "name" + const val TRIGGERED_FIELD = "triggered" + const val ERROR_FIELD = "error" + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/ppl/PPLPluginInterface.kt b/core/src/main/kotlin/org/opensearch/alerting/core/ppl/PPLPluginInterface.kt new file mode 100644 index 000000000..477b417ce --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/ppl/PPLPluginInterface.kt @@ -0,0 +1,50 @@ +package org.opensearch.alerting.core.ppl + +import org.opensearch.commons.utils.recreateObject +import org.opensearch.core.action.ActionListener +import org.opensearch.core.action.ActionResponse +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.sql.plugin.transport.PPLQueryAction +import org.opensearch.sql.plugin.transport.TransportPPLQueryRequest +import org.opensearch.sql.plugin.transport.TransportPPLQueryResponse +import org.opensearch.transport.client.node.NodeClient + +/** + * Various transport action plugin interfaces for the SQL/PPL plugin + */ +object PPLPluginInterface { + fun executeQuery( + client: NodeClient, + request: TransportPPLQueryRequest, + listener: ActionListener + ) { + client.execute( + PPLQueryAction.INSTANCE, + request, + wrapActionListener(listener) { response -> recreateObject(response) { TransportPPLQueryResponse(it) } } + ) + } + + /** + * Wrap action listener on concrete response class by a new created one on ActionResponse. + * This is required because the response may be loaded by different classloader across plugins. + * The onResponse(ActionResponse) avoids type cast exception and give a chance to recreate + * the response object. + */ + @Suppress("UNCHECKED_CAST") + private fun wrapActionListener( + listener: ActionListener, + recreate: (Writeable) -> Response + ): ActionListener { + return object : ActionListener { + override fun onResponse(response: ActionResponse) { + val recreated = recreate(response) + listener.onResponse(recreated) + } + + override fun onFailure(exception: java.lang.Exception) { + listener.onFailure(exception) + } + } as ActionListener + } +} diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt index dd3676b66..0dddb80b1 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/schedule/JobScheduler.kt @@ -7,8 +7,8 @@ package org.opensearch.alerting.core.schedule import org.apache.logging.log4j.LogManager import org.opensearch.alerting.core.JobRunner +import org.opensearch.alerting.core.modelv2.MonitorV2 import org.opensearch.common.unit.TimeValue -import org.opensearch.commons.alerting.model.MonitorV2 import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.threadpool.Scheduler import org.opensearch.threadpool.ThreadPool diff --git a/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt b/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt index 077bfc6ef..fd500ef1d 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/opensearchapi/OpenSearchExtensions.kt @@ -14,6 +14,7 @@ import org.opensearch.OpenSearchException import org.opensearch.action.bulk.BackoffPolicy import org.opensearch.action.search.SearchResponse import org.opensearch.action.search.ShardSearchFailure +import org.opensearch.alerting.core.ppl.PPLPluginInterface import org.opensearch.common.settings.Settings import org.opensearch.common.util.concurrent.ThreadContext import org.opensearch.common.xcontent.XContentHelper @@ -21,7 +22,6 @@ import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.InjectSecurity import org.opensearch.commons.authuser.User import org.opensearch.commons.notifications.NotificationsPluginInterface -import org.opensearch.commons.ppl.PPLPluginInterface import org.opensearch.core.action.ActionListener import org.opensearch.core.rest.RestStatus import org.opensearch.core.rest.RestStatus.BAD_GATEWAY From 45f92502af8a2700aba5d86ca9149faa6430a30c Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Wed, 10 Sep 2025 15:24:25 -0700 Subject: [PATCH 24/40] moving nonOptionalTimeField function from common utils to alerting --- .../org/opensearch/alerting/core/modelv2/AlertV2.kt | 2 +- .../org/opensearch/alerting/core/modelv2/PPLMonitor.kt | 2 +- .../alerting/core/modelv2/PPLMonitorRunResult.kt | 2 +- .../opensearch/alerting/core/util/XContentExtensions.kt | 8 ++++++++ 4 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 core/src/main/kotlin/org/opensearch/alerting/core/util/XContentExtensions.kt diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt index aa7155a66..1dae3dc73 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt @@ -1,6 +1,7 @@ package org.opensearch.alerting.core.modelv2 import org.opensearch.alerting.core.modelv2.TriggerV2.Severity +import org.opensearch.alerting.core.util.nonOptionalTimeField import org.opensearch.common.lucene.uid.Versions import org.opensearch.commons.alerting.model.Alert.Companion.ALERT_ID_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.ALERT_VERSION_FIELD @@ -17,7 +18,6 @@ import org.opensearch.commons.alerting.model.Alert.Companion.TRIGGER_ID_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.TRIGGER_NAME_FIELD import org.opensearch.commons.alerting.util.IndexUtils.Companion.NO_SCHEMA_VERSION import org.opensearch.commons.alerting.util.instant -import org.opensearch.commons.alerting.util.nonOptionalTimeField import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.core.common.io.stream.StreamOutput import org.opensearch.core.common.io.stream.Writeable diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt index 16b3b6fea..7464ce0d4 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -1,5 +1,6 @@ package org.opensearch.alerting.core.modelv2 +import org.opensearch.alerting.core.util.nonOptionalTimeField import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.CronSchedule import org.opensearch.commons.alerting.model.IntervalSchedule @@ -7,7 +8,6 @@ import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.Schedule import org.opensearch.commons.alerting.util.IndexUtils import org.opensearch.commons.alerting.util.instant -import org.opensearch.commons.alerting.util.nonOptionalTimeField import org.opensearch.commons.alerting.util.optionalTimeField import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.core.common.io.stream.StreamOutput diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt index 29b5744d6..135186abb 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitorRunResult.kt @@ -5,7 +5,7 @@ import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.MONITOR import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.PERIOD_END_FIELD import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.PERIOD_START_FIELD import org.opensearch.alerting.core.modelv2.MonitorV2RunResult.Companion.TRIGGER_RESULTS_FIELD -import org.opensearch.commons.alerting.util.nonOptionalTimeField +import org.opensearch.alerting.core.util.nonOptionalTimeField import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.core.common.io.stream.StreamOutput import org.opensearch.core.xcontent.ToXContent diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/util/XContentExtensions.kt b/core/src/main/kotlin/org/opensearch/alerting/core/util/XContentExtensions.kt new file mode 100644 index 000000000..c36ed2dc5 --- /dev/null +++ b/core/src/main/kotlin/org/opensearch/alerting/core/util/XContentExtensions.kt @@ -0,0 +1,8 @@ +package org.opensearch.alerting.core.util + +import org.opensearch.core.xcontent.XContentBuilder +import java.time.Instant + +fun XContentBuilder.nonOptionalTimeField(name: String, instant: Instant): XContentBuilder { + return this.timeField(name, "${name}_in_millis", instant.toEpochMilli()) +} From ad9b57e182b3446c9b94ae499093e3673c64e732 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 11 Sep 2025 13:27:15 -0700 Subject: [PATCH 25/40] adding PPL Monitor query validations before creating PPL Monitors --- .../opensearch/alerting/PPLMonitorRunner.kt | 153 +++++++++--------- .../TransportIndexMonitorV2Action.kt | 87 ++++++++++ .../alerting/core/modelv2/MonitorV2.kt | 1 - .../alerting/core/modelv2/PPLMonitor.kt | 9 +- .../alerting/core/modelv2/PPLTrigger.kt | 2 +- 5 files changed, 167 insertions(+), 85 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 6b262074f..8da1d523d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -50,6 +50,8 @@ object PPLMonitorRunner : MonitorV2Runner { private const val PPL_SQL_QUERY_FIELD = "query" // name of PPL query field when passing into PPL/SQL Execute API call + private const val TIMESTAMP_FIELD = "timestamp" // TODO: this should be deleted once PPL plugin side time keywords are introduced + override suspend fun runMonitorV2( monitorV2: MonitorV2, monitorCtx: MonitorRunnerExecutionContext, // MonitorV2 reads from same context as Monitor @@ -141,7 +143,6 @@ object PPLMonitorRunner : MonitorV2Runner { appendCustomCondition(timeFilteredQuery, pplTrigger.customCondition!!) } - // TODO: does this handle pagination? does it need to? // execute the PPL query val queryResponseJson = executePplQuery(queryToExecute, nodeClient) logger.info("query execution results for trigger ${pplTrigger.name}: $queryResponseJson") @@ -229,7 +230,7 @@ object PPLMonitorRunner : MonitorV2Runner { } } } catch (e: Exception) { - logger.error("failed to run PPL Trigger ${pplTrigger.name} for PPL Monitor ${pplMonitor.name}", e) + logger.error("failed to run PPL Trigger ${pplTrigger.name} from PPL Monitor ${pplMonitor.name}", e) // generate an alert with an error message monitorCtx.retryPolicy?.let { @@ -283,11 +284,11 @@ object PPLMonitorRunner : MonitorV2Runner { // inject time filter into PPL query to only query for data within the (periodStart, periodEnd) interval // TODO: if query contains "_time", "span", "earliest", "latest", skip adding filter // pending https://github.com/opensearch-project/sql/issues/3969 - // for now assume "_time" field is always present in customer data + // for now assume TIMESTAMP_FIELD field is always present in customer data // if the raw query contained any time check whatsoever, skip adding a time filter internally // and return query as is, customer's in-query time checks instantly and automatically overrides - if (query.contains("_time")) { // TODO: replace with PPL time keyword checks after that's GA + if (query.contains(TIMESTAMP_FIELD)) { // TODO: replace with PPL time keyword checks after that's GA return query } @@ -302,8 +303,9 @@ object PPLMonitorRunner : MonitorV2Runner { val periodStartPplTimestamp = formatter.format(updatedPeriodStart) val periodEndPplTimeStamp = formatter.format(periodEnd) - val timeFilterReplace = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp') |" - val timeFilterAppend = "| where _time > TIMESTAMP('$periodStartPplTimestamp') and _time < TIMESTAMP('$periodEndPplTimeStamp')" + val timeFilterAppend = "| where $TIMESTAMP_FIELD > TIMESTAMP('$periodStartPplTimestamp') and " + + "$TIMESTAMP_FIELD < TIMESTAMP('$periodEndPplTimeStamp')" + val timeFilterReplace = "$timeFilterAppend |" val timeFilteredQuery: String = if (query.contains("|")) { // if Monitor query contains piped statements, inject the time filter @@ -319,33 +321,6 @@ object PPLMonitorRunner : MonitorV2Runner { return timeFilteredQuery } - // appends user-defined custom trigger condition to PPL query, only for custom condition Triggers - private fun appendCustomCondition(query: String, customCondition: String): String { - return "$query | $customCondition" - } - - // returns PPL query response as parsable JSONObject - private suspend fun executePplQuery(query: String, client: NodeClient): JSONObject { - // call PPL plugin to execute time filtered query - val transportPplQueryRequest = TransportPPLQueryRequest( - query, - JSONObject(mapOf(PPL_SQL_QUERY_FIELD to query)), - null // null path falls back to a default path internal to SQL/PPL Plugin - ) - - val transportPplQueryResponse = PPLPluginInterface.suspendUntil { - this.executeQuery( - client, - transportPplQueryRequest, - it - ) - } - - val queryResponseJson = JSONObject(transportPplQueryResponse.result) - - return queryResponseJson - } - private fun evaluateNumResultsTrigger(numResults: Long, numResultsCondition: NumResultsCondition, numResultsValue: Long): Boolean { return when (numResultsCondition) { NumResultsCondition.GREATER_THAN -> numResults > numResultsValue @@ -402,33 +377,8 @@ object PPLMonitorRunner : MonitorV2Runner { // find the name of the eval result variable defined in custom condition val evalResultVarName = findEvalResultVar(pplTrigger.customCondition!!) - // find the eval statement result variable in the PPL query response schema - val schemaList = customConditionQueryResponse.getJSONArray("schema") - var evalResultVarIdx = -1 - for (i in 0 until schemaList.length()) { - val schemaObj = schemaList.getJSONObject(i) - val columnName = schemaObj.getString("name") - - if (columnName == evalResultVarName) { - if (schemaObj.getString("type") != "boolean") { - throw IllegalStateException( - "parsing results of PPL query with custom condition failed," + - "eval statement variable was not type boolean, but instead type: ${schemaObj.getString("type")}" - ) - } - - evalResultVarIdx = i - break - } - } - - // eval statement result variable should always be found - if (evalResultVarIdx == -1) { - throw IllegalStateException( - "expected to find eval statement results variable \"$evalResultVarName\" in results " + - "of PPL query with custom condition, but did not." - ) - } + // find the index eval statement result variable in the PPL query response schema + val evalResultVarIdx = findEvalResultVarIdxInSchema(customConditionQueryResponse, evalResultVarName) val dataRowList = customConditionQueryResponse.getJSONArray("datarows") for (i in 0 until dataRowList.length()) { @@ -448,20 +398,6 @@ object PPLMonitorRunner : MonitorV2Runner { return relevantQueryResultRows } - // TODO: is there maybe some PPL plugin util function we can use to replace this? - // searches a given custom condition eval statement for the name of the result - // variable and returns it - private fun findEvalResultVar(customCondition: String): String { - // the PPL keyword "eval", followed by a whitespace must be present, otherwise a syntax error from PPL plugin would've - // been thrown when executing the query (without the whitespace, the query would've had something like "evalresult", - // which is invalid PPL - val startOfEvalStatement = "eval " - - val startIdx = customCondition.indexOf(startOfEvalStatement) + startOfEvalStatement.length - val endIdx = startIdx + customCondition.substring(startIdx).indexOfFirst { it == ' ' || it == '=' } - return customCondition.substring(startIdx, endIdx) - } - // prepares the query results to be passed into alerts and notifications based on trigger mode // if result set, alert and notification simply stores all query results // if per result, each alert and notification stores a single row of the query results @@ -480,7 +416,6 @@ object PPLMonitorRunner : MonitorV2Runner { individualRow.put("datarows", JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList())) individualRows.add(individualRow) } - return individualRows } @@ -637,4 +572,72 @@ object PPLMonitorRunner : MonitorV2Runner { // } } } + + /* public util functions */ + + // appends user-defined custom trigger condition to PPL query, only for custom condition Triggers + fun appendCustomCondition(query: String, customCondition: String): String { + return "$query | $customCondition" + } + + // returns PPL query response as parsable JSONObject + suspend fun executePplQuery(query: String, client: NodeClient): JSONObject { + // call PPL plugin to execute time filtered query + val transportPplQueryRequest = TransportPPLQueryRequest( + query, + JSONObject(mapOf(PPL_SQL_QUERY_FIELD to query)), + null // null path falls back to a default path internal to SQL/PPL Plugin + ) + + val transportPplQueryResponse = PPLPluginInterface.suspendUntil { + this.executeQuery( + client, + transportPplQueryRequest, + it + ) + } + + val queryResponseJson = JSONObject(transportPplQueryResponse.result) + + return queryResponseJson + } + + // TODO: is there maybe some PPL plugin util function we can use to replace this? + // searches a given custom condition eval statement for the name of + // the eval result variable and returns it + fun findEvalResultVar(customCondition: String): String { + // the PPL keyword "eval", followed by a whitespace must be present, otherwise a syntax error from PPL plugin would've + // been thrown when executing the query (without the whitespace, the query would've had something like "evalresult", + // which is invalid PPL + val startOfEvalStatement = "eval " + + val startIdx = customCondition.indexOf(startOfEvalStatement) + startOfEvalStatement.length + val endIdx = startIdx + customCondition.substring(startIdx).indexOfFirst { it == ' ' || it == '=' } + return customCondition.substring(startIdx, endIdx) + } + + fun findEvalResultVarIdxInSchema(customConditionQueryResponse: JSONObject, evalResultVarName: String): Int { + // find the index eval statement result variable in the PPL query response schema + val schemaList = customConditionQueryResponse.getJSONArray("schema") + var evalResultVarIdx = -1 + for (i in 0 until schemaList.length()) { + val schemaObj = schemaList.getJSONObject(i) + val columnName = schemaObj.getString("name") + + if (columnName == evalResultVarName) { + evalResultVarIdx = i + break + } + } + + // eval statement result variable should always be found + if (evalResultVarIdx == -1) { + throw IllegalStateException( + "expected to find eval statement results variable \"$evalResultVarName\" in results " + + "of PPL query with custom condition, but did not." + ) + } + + return evalResultVarIdx + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index 373d4311e..604fa238b 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -21,12 +21,17 @@ import org.opensearch.action.search.SearchResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.action.support.clustermanager.AcknowledgedResponse +import org.opensearch.alerting.PPLMonitorRunner.appendCustomCondition +import org.opensearch.alerting.PPLMonitorRunner.executePplQuery +import org.opensearch.alerting.PPLMonitorRunner.findEvalResultVar +import org.opensearch.alerting.PPLMonitorRunner.findEvalResultVarIdxInSchema import org.opensearch.alerting.actionv2.IndexMonitorV2Action import org.opensearch.alerting.actionv2.IndexMonitorV2Request import org.opensearch.alerting.actionv2.IndexMonitorV2Response import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.core.modelv2.MonitorV2 import org.opensearch.alerting.core.modelv2.PPLMonitor +import org.opensearch.alerting.core.modelv2.PPLTrigger.ConditionType import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERTING_MAX_MONITORS @@ -43,6 +48,7 @@ import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX +import org.opensearch.commons.alerting.model.userErrorMessage import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.action.ActionListener import org.opensearch.core.common.io.stream.NamedWriteableRegistry @@ -55,6 +61,7 @@ import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.tasks.Task import org.opensearch.transport.TransportService import org.opensearch.transport.client.Client +import org.opensearch.transport.client.node.NodeClient private val log = LogManager.getLogger(TransportIndexMonitorV2Action::class.java) private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) @@ -81,6 +88,86 @@ class TransportIndexMonitorV2Action @Inject constructor( @Volatile override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) override fun doExecute(task: Task, indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + // validate the MonitorV2 based on its type + when (indexMonitorRequest.monitorV2) { + is PPLMonitor -> validateMonitorPplQuery( + indexMonitorRequest.monitorV2 as PPLMonitor, + object : ActionListener { // validationListener + override fun onResponse(response: Unit) { + checkScheduledJobIndex(indexMonitorRequest, actionListener) + } + + override fun onFailure(e: Exception) { + actionListener.onFailure(e) + } + } + ) + else -> actionListener.onFailure( + AlertingException.wrap( + IllegalStateException( + "unexpected MonitorV2 type: ${indexMonitorRequest.monitorV2.javaClass.name}" + ) + ) + ) + } + } + + private fun validateMonitorPplQuery(pplMonitor: PPLMonitor, validationListener: ActionListener) { + scope.launch { + try { + val nodeClient = client as NodeClient + + // first attempt to run the base query + // if there are any PPL syntax errors, this will throw an exception + executePplQuery(pplMonitor.query, nodeClient) + + // now scan all the triggers with custom conditions, and ensure each query constructed + // from the base query + custom condition is valid + val allCustomTriggersValid = true + for (pplTrigger in pplMonitor.triggers) { + if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { + continue + } + + val evalResultVar = findEvalResultVar(pplTrigger.customCondition!!) + + val queryWithCustomCondition = appendCustomCondition(pplMonitor.query, pplTrigger.customCondition!!) + + val executePplQueryResponse = executePplQuery(queryWithCustomCondition, nodeClient) + + val evalResultVarIdx = findEvalResultVarIdxInSchema(executePplQueryResponse, evalResultVar) + + val resultVarType = executePplQueryResponse + .getJSONArray("schema") + .getJSONObject(evalResultVarIdx) + .getString("type") + + // custom conditions must evaluate to a boolean result, otherwise it's invalid + if (resultVarType != "boolean") { + validationListener.onFailure( + AlertingException.wrap( + IllegalArgumentException( + "Custom condition in trigger ${pplTrigger.name} is invalid because it does not " + + "evaluate to a boolean, but instead to type: $resultVarType" + ) + ) + ) + return@launch + } + } + + validationListener.onResponse(Unit) + } catch (e: Exception) { + validationListener.onFailure( + AlertingException.wrap( + IllegalArgumentException("Invalid PPL Query in PPL Monitor: ${e.userErrorMessage()}") + ) + ) + } + } + } + + private fun checkScheduledJobIndex(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { /* check to see if alerting-config index (scheduled job index) is created and updated before indexing MonitorV2 into it */ if (!scheduledJobIndices.scheduledJobIndexExists()) { // if alerting-config index doesn't exist, send request to create it scheduledJobIndices.initScheduledJobIndex(object : ActionListener { diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt index b626d0602..427ff8dee 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt @@ -48,7 +48,6 @@ interface MonitorV2 : ScheduledJob { // field names const val NAME_FIELD = "name" - const val MONITOR_TYPE_FIELD = "monitor_type" const val ENABLED_FIELD = "enabled" const val SCHEDULE_FIELD = "schedule" const val LAST_UPDATE_TIME_FIELD = "last_update_time" diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt index 7464ce0d4..d134dd3ed 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -209,7 +209,6 @@ data class PPLMonitor( @Throws(IOException::class) fun parse(xcp: XContentParser, id: String = MonitorV2.NO_ID, version: Long = MonitorV2.NO_VERSION): PPLMonitor { var name: String? = null - var monitorType: String = PPL_MONITOR_TYPE var enabled = true var schedule: Schedule? = null var lookBackWindow: TimeValue? = null @@ -228,7 +227,6 @@ data class PPLMonitor( when (fieldName) { MonitorV2.NAME_FIELD -> name = xcp.text() - MonitorV2.MONITOR_TYPE_FIELD -> monitorType = xcp.text() MonitorV2.ENABLED_FIELD -> enabled = xcp.booleanValue() MonitorV2.SCHEDULE_FIELD -> schedule = Schedule.parse(xcp) MonitorV2.LOOK_BACK_WINDOW_FIELD -> { @@ -263,17 +261,12 @@ data class PPLMonitor( queryLanguage = enumMatchResult } QUERY_FIELD -> query = xcp.text() - else -> throw IllegalArgumentException("Unexpected field \"$fieldName\" when parsing PPL Monitor") + else -> throw IllegalArgumentException("Unexpected field when parsing PPL Monitor: $fieldName") } } /* validations */ - // ensure MonitorV2 XContent being parsed by PPLMonitor class is PPL Monitor type - if (monitorType != PPL_MONITOR_TYPE) { - throw IllegalArgumentException("Invalid monitor type: $monitorType") - } - // ensure there's at least 1 trigger if (triggers.isEmpty()) { throw IllegalArgumentException("Monitor must include at least 1 trigger") diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt index b9ebd776d..4f3f5237d 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt @@ -313,7 +313,7 @@ data class PPLTrigger( actions.add(Action.parse(xcp)) } } - else -> throw IllegalArgumentException("Unexpected field $fieldName when parsing PPL Trigger") + else -> throw IllegalArgumentException("Unexpected field when parsing PPL Trigger: $fieldName") } } From 9130713a43adcae8832bb05be2216613a6ff19e2 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 12 Sep 2025 11:47:01 -0700 Subject: [PATCH 26/40] initial alert v2 history implementation --- .../org/opensearch/alerting/AlertV2Expirer.kt | 112 ----- .../org/opensearch/alerting/AlertingPlugin.kt | 10 +- .../alerting/MonitorRunnerExecutionContext.kt | 2 + .../alerting/MonitorRunnerService.kt | 6 + .../opensearch/alerting/PPLMonitorRunner.kt | 40 +- .../alerting/alertsv2/AlertV2Indices.kt | 426 ++++++++++++++++++ .../alerting/alertsv2/AlertV2Mover.kt | 227 ++++++++++ .../alerting/settings/AlertingSettings.kt | 30 ++ .../opensearch/alerting/util/IndexUtils.kt | 13 + 9 files changed, 723 insertions(+), 143 deletions(-) delete mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt deleted file mode 100644 index 8f411ca36..000000000 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertV2Expirer.kt +++ /dev/null @@ -1,112 +0,0 @@ -package org.opensearch.alerting - -import org.apache.logging.log4j.LogManager -import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_HISTORY_WRITE_INDEX -import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_INDEX -import org.opensearch.alerting.alerts.AlertIndices.Companion.ALL_ALERT_INDEX_PATTERN -import org.opensearch.alerting.core.modelv2.AlertV2.Companion.EXPIRATION_TIME_FIELD -import org.opensearch.cluster.ClusterChangedEvent -import org.opensearch.cluster.ClusterStateListener -import org.opensearch.cluster.service.ClusterService -import org.opensearch.common.unit.TimeValue -import org.opensearch.core.action.ActionListener -import org.opensearch.index.query.QueryBuilders -import org.opensearch.index.reindex.BulkByScrollResponse -import org.opensearch.index.reindex.DeleteByQueryAction -import org.opensearch.index.reindex.DeleteByQueryRequestBuilder -import org.opensearch.threadpool.Scheduler -import org.opensearch.threadpool.ThreadPool -import org.opensearch.transport.client.Client -import java.time.Instant -import java.util.concurrent.TimeUnit - -private val logger = LogManager.getLogger(AlertV2Expirer::class.java) - -class AlertV2Expirer( - private val client: Client, - private val threadPool: ThreadPool, - private val clusterService: ClusterService, -) : ClusterStateListener { - - init { - clusterService.addListener(this) - } - - @Volatile private var isClusterManager = false - - private var alertIndexInitialized = false - - private var alertHistoryIndexInitialized = false - - private var scheduledAlertsV2CheckAndExpire: Scheduler.Cancellable? = null - - private val executorName = ThreadPool.Names.MANAGEMENT - - private val checkForExpirationInterval = TimeValue(1L, TimeUnit.MINUTES) - - override fun clusterChanged(event: ClusterChangedEvent) { - if (this.isClusterManager != event.localNodeClusterManager()) { - this.isClusterManager = event.localNodeClusterManager() - if (this.isClusterManager) { - onManager() - } else { - offManager() - } - } - - alertIndexInitialized = event.state().routingTable().hasIndex(ALERT_INDEX) - alertHistoryIndexInitialized = event.state().metadata().hasAlias(ALERT_HISTORY_WRITE_INDEX) - } - - fun onManager() { - try { - // try to sweep current AlertV2s for expiration immediately as we might be restarting the cluster - expireAlertV2s() - // schedule expiration checks and expirations to happen repeatedly at some interval - scheduledAlertsV2CheckAndExpire = threadPool - .scheduleWithFixedDelay({ expireAlertV2s() }, checkForExpirationInterval, executorName) - } catch (e: Exception) { - // This should be run on cluster startup - logger.error( - "Error sweeping AlertV2s for expiration. This cannot be done until clustermanager node is restarted.", - e - ) - } - } - - fun offManager() { - scheduledAlertsV2CheckAndExpire?.cancel() - } - - private fun expireAlertV2s() { - if (!areAlertsIndicesPresent()) { - return - } - - try { - val deleteByQuery = QueryBuilders.rangeQuery(EXPIRATION_TIME_FIELD) - .lte(Instant.now().toEpochMilli()) - - DeleteByQueryRequestBuilder(client, DeleteByQueryAction.INSTANCE) - .source(ALL_ALERT_INDEX_PATTERN) - .filter(deleteByQuery) - .refresh(true) - .execute( - object : ActionListener { - override fun onResponse(response: BulkByScrollResponse) { - logger.info("Deleted ${response.deleted} expired alerts") - } - override fun onFailure(e: Exception) { - logger.error("Failed to delete expired alerts", e) - } - } - ) - } catch (e: Exception) { - logger.error("Error during alert cleanup", e) - } - } - - private fun areAlertsIndicesPresent(): Boolean { - return alertIndexInitialized || alertHistoryIndexInitialized - } -} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index bdb59886f..eeca338dc 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -135,6 +135,8 @@ import org.opensearch.threadpool.ThreadPool import org.opensearch.transport.client.Client import org.opensearch.watcher.ResourceWatcherService import java.util.function.Supplier +import org.opensearch.alerting.alertsv2.AlertV2Indices +import org.opensearch.alerting.alertsv2.AlertV2Mover /** * Entry point of the OpenDistro for Elasticsearch alerting plugin @@ -178,9 +180,10 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R lateinit var docLevelMonitorQueries: DocLevelMonitorQueries lateinit var threadPool: ThreadPool lateinit var alertIndices: AlertIndices + lateinit var alertV2Indices: AlertV2Indices lateinit var clusterService: ClusterService lateinit var destinationMigrationCoordinator: DestinationMigrationCoordinator - lateinit var alertV2Expirer: AlertV2Expirer + lateinit var alertV2Mover: AlertV2Mover var monitorTypeToMonitorRunners: MutableMap = mutableMapOf() override fun getRestHandlers( @@ -314,6 +317,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R .registerSettings(settings) .registerThreadPool(threadPool) .registerAlertIndices(alertIndices) + .registerAlertV2Indices(alertV2Indices) .registerInputService( InputService( client, @@ -340,7 +344,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R scheduler = JobScheduler(threadPool, runner) sweeper = JobSweeper(environment.settings(), client, clusterService, threadPool, xContentRegistry, scheduler, ALERTING_JOB_TYPES) destinationMigrationCoordinator = DestinationMigrationCoordinator(client, clusterService, threadPool, scheduledJobIndices) - alertV2Expirer = AlertV2Expirer(client, threadPool, clusterService) + alertV2Mover = AlertV2Mover(environment.settings(), client, threadPool, clusterService) this.threadPool = threadPool this.clusterService = clusterService @@ -368,7 +372,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R commentsIndices, docLevelMonitorQueries, destinationMigrationCoordinator, - alertV2Expirer, + alertV2Mover, lockService, alertService, triggerService diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt index a890ec1a6..5c5e24070 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt @@ -7,6 +7,7 @@ package org.opensearch.alerting import org.opensearch.action.bulk.BackoffPolicy import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.alertsv2.AlertV2Indices import org.opensearch.alerting.core.lock.LockService import org.opensearch.alerting.model.destination.DestinationContextFactory import org.opensearch.alerting.remote.monitors.RemoteMonitorRegistry @@ -35,6 +36,7 @@ data class MonitorRunnerExecutionContext( var settings: Settings? = null, var threadPool: ThreadPool? = null, var alertIndices: AlertIndices? = null, + var alertV2Indices: AlertV2Indices? = null, var inputService: InputService? = null, var triggerService: TriggerService? = null, var alertService: AlertService? = null, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index e917fd048..024a03e13 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -94,6 +94,7 @@ import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset import java.util.UUID +import org.opensearch.alerting.alertsv2.AlertV2Indices import kotlin.coroutines.CoroutineContext object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleComponent() { @@ -145,6 +146,11 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon return this } + fun registerAlertV2Indices(alertV2Indices: AlertV2Indices): MonitorRunnerService { + this.monitorCtx.alertV2Indices = alertV2Indices + return this + } + fun registerInputService(inputService: InputService): MonitorRunnerService { this.monitorCtx.inputService = inputService return this diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 8da1d523d..3aca2db90 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -44,6 +44,7 @@ import java.time.Instant import java.time.ZoneOffset.UTC import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit +import org.opensearch.alerting.alertsv2.AlertV2Indices object PPLMonitorRunner : MonitorV2Runner { private val logger = LogManager.getLogger(javaClass) @@ -86,11 +87,10 @@ object PPLMonitorRunner : MonitorV2Runner { // use threadpool time for cross node consistency val timeOfCurrentExecution = Instant.ofEpochMilli(MonitorRunnerService.monitorCtx.threadPool!!.absoluteTimeInMillis()) - // TODO: should alerting v1 and v2 alerts index be separate? + // TODO: put alertV2s in their own index try { - // TODO: write generated V2 alerts to existing alerts v1 index for now, revisit this decision - monitorCtx.alertIndices!!.createOrUpdateAlertIndex() - monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex() + monitorCtx.alertV2Indices!!.createOrUpdateAlertV2Index() + monitorCtx.alertV2Indices!!.createOrUpdateInitialAlertV2HistoryIndex() } catch (e: Exception) { val id = if (pplMonitor.id.trim().isEmpty()) "_na_" else pplMonitor.id logger.error("Error loading alerts for monitorV2: $id", e) @@ -121,21 +121,6 @@ object PPLMonitorRunner : MonitorV2Runner { } logger.info("suppression check passed, executing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") -// internal fun isActionActionable(action: Action, alert: Alert?): Boolean { -// if (alert != null && alert.state == Alert.State.AUDIT) -// return false -// if (alert == null || action.throttle == null) { -// return true -// } -// if (action.throttleEnabled) { -// val result = alert.actionExecutionResults.firstOrNull { r -> r.actionId == action.id } -// val lastExecutionTime: Instant? = result?.lastExecutionTime -// val throttledTimeBound = currentTime().minus(action.throttle!!.value.toLong(), action.throttle!!.unit) -// return (lastExecutionTime == null || lastExecutionTime.isBefore(throttledTimeBound)) -// } -// return true -// } - // if trigger uses custom condition, append the custom condition to query, otherwise simply proceed val queryToExecute = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { // number of results trigger timeFilteredQuery @@ -147,7 +132,7 @@ object PPLMonitorRunner : MonitorV2Runner { val queryResponseJson = executePplQuery(queryToExecute, nodeClient) logger.info("query execution results for trigger ${pplTrigger.name}: $queryResponseJson") - // retrieve only the relevant query response rows. + // retrieve deep copies of only the relevant query response rows. // for num_results triggers, that's the entire response // for custom triggers, that's only rows that evaluated to true val relevantQueryResultRows = if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { @@ -155,7 +140,7 @@ object PPLMonitorRunner : MonitorV2Runner { getQueryResponseWithoutSize(queryResponseJson) } else { // custom condition trigger - evaluateCustomConditionTrigger(queryResponseJson, pplTrigger) + collectCustomConditionResults(queryResponseJson, pplTrigger) } // retrieve the number of results @@ -194,10 +179,6 @@ object PPLMonitorRunner : MonitorV2Runner { timeOfCurrentExecution ) - // collect the generated alerts to be written to alerts index - // if the trigger is on result_set mode -// generatedAlerts.addAll(thisTriggersGeneratedAlerts) - // update the trigger's last execution time for future suppression checks pplTrigger.lastTriggeredTime = timeOfCurrentExecution @@ -354,7 +335,7 @@ object PPLMonitorRunner : MonitorV2Runner { return queryResponseDeepCopy } - private fun evaluateCustomConditionTrigger(customConditionQueryResponse: JSONObject, pplTrigger: PPLTrigger): JSONObject { + private fun collectCustomConditionResults(customConditionQueryResponse: JSONObject, pplTrigger: PPLTrigger): JSONObject { // a PPL query with custom condition returning 0 results should imply a valid but not useful query. // do not trigger alert, but warn that query likely is not functioning as user intended if (customConditionQueryResponse.getLong("total") == 0L) { @@ -445,7 +426,10 @@ object PPLMonitorRunner : MonitorV2Runner { alertV2s.add(alertV2) } - return alertV2s.toList() // return as immutable list + // TODO: this is a magic number right now, make it a setting + val alertsLimit = 10 + + return alertV2s.take(alertsLimit).toList() // return as immutable list } private fun generateErrorAlert( @@ -488,7 +472,7 @@ object PPLMonitorRunner : MonitorV2Runner { var requestsToRetry = alerts.flatMap { alert -> listOf>( - IndexRequest(AlertIndices.ALERT_INDEX) + IndexRequest(AlertV2Indices.ALERT_V2_INDEX) .routing(pplMonitor.id) // set routing ID to PPL Monitor ID .source(alert.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS)) .id(if (alert.id != Alert.NO_ID) alert.id else null) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt new file mode 100644 index 000000000..b58b31178 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt @@ -0,0 +1,426 @@ +package org.opensearch.alerting.alertsv2 + +import java.time.Instant +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.ResourceAlreadyExistsException +import org.opensearch.action.admin.cluster.state.ClusterStateRequest +import org.opensearch.action.admin.cluster.state.ClusterStateResponse +import org.opensearch.action.admin.indices.alias.Alias +import org.opensearch.action.admin.indices.create.CreateIndexRequest +import org.opensearch.action.admin.indices.create.CreateIndexResponse +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest +import org.opensearch.action.admin.indices.exists.indices.IndicesExistsRequest +import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse +import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest +import org.opensearch.action.admin.indices.rollover.RolloverRequest +import org.opensearch.action.admin.indices.rollover.RolloverResponse +import org.opensearch.action.support.IndicesOptions +import org.opensearch.action.support.clustermanager.AcknowledgedResponse +import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_ENABLED +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_INDEX_MAX_AGE +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_MAX_DOCS +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_RETENTION_PERIOD +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_ROLLOVER_PERIOD +import org.opensearch.alerting.settings.AlertingSettings.Companion.REQUEST_TIMEOUT +import org.opensearch.alerting.util.IndexUtils +import org.opensearch.cluster.ClusterChangedEvent +import org.opensearch.cluster.ClusterStateListener +import org.opensearch.cluster.metadata.IndexMetadata +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.TimeValue +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.action.ActionListener +import org.opensearch.threadpool.Scheduler.Cancellable +import org.opensearch.threadpool.ThreadPool +import org.opensearch.transport.client.Client + +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) +private val logger = LogManager.getLogger(AlertV2Indices::class.java) + +// TODO: find what you can and factor into to IndexUtils.kt +class AlertV2Indices( + settings: Settings, + private val client: Client, + private val threadPool: ThreadPool, + private val clusterService: ClusterService +) : ClusterStateListener { + + init { + clusterService.addListener(this) + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_V2_HISTORY_ENABLED) { alertV2HistoryEnabled = it } + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_V2_HISTORY_MAX_DOCS) { alertV2HistoryMaxDocs = it } + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_V2_HISTORY_INDEX_MAX_AGE) { alertV2HistoryMaxAge = it } + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_V2_HISTORY_ROLLOVER_PERIOD) { + alertV2HistoryRolloverPeriod = it + rescheduleAlertRollover() + } + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_V2_HISTORY_RETENTION_PERIOD) { + alertV2HistoryRetentionPeriod = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(REQUEST_TIMEOUT) { requestTimeout = it } + } + + companion object { + + /** The in progress alert history index. */ + const val ALERT_V2_INDEX = ".opensearch-alerting-v2-alerts" + + /** The alias of the index in which to write alert history */ + const val ALERT_V2_HISTORY_WRITE_INDEX = ".opensearch-alerting-v2-alert-history-write" + + /** The index name pattern referring to all alert history indices */ + const val ALERT_V2_HISTORY_ALL = ".opensearch-alerting-v2-alert-history*" + + /** The index name pattern to create alert history indices */ + const val ALERT_V2_HISTORY_INDEX_PATTERN = "<.opensearch-alerting-v2-alert-history-{now/d}-1>" + + /** The index name pattern to query all alerts, history and current alerts. */ + const val ALL_ALERT_V2_INDEX_PATTERN = ".opensearch-alerting-v2-alert*" + + @JvmStatic + fun alertV2Mapping() = // TODO: create alert_v2 mappings + AlertV2Indices::class.java.getResource("alert_v2_mapping.json").readText() + } + + @Volatile private var alertV2HistoryEnabled = ALERT_V2_HISTORY_ENABLED.get(settings) + + + @Volatile private var alertV2HistoryMaxDocs = ALERT_V2_HISTORY_MAX_DOCS.get(settings) + + + @Volatile private var alertV2HistoryMaxAge = ALERT_V2_HISTORY_INDEX_MAX_AGE.get(settings) + + + @Volatile private var alertV2HistoryRolloverPeriod = ALERT_V2_HISTORY_ROLLOVER_PERIOD.get(settings) + + + @Volatile private var alertV2HistoryRetentionPeriod = ALERT_V2_HISTORY_RETENTION_PERIOD.get(settings) + + + @Volatile private var requestTimeout = REQUEST_TIMEOUT.get(settings) + + @Volatile private var isClusterManager = false + + // for JobsMonitor to report + var lastRolloverTime: TimeValue? = null + + private var alertV2HistoryIndexInitialized: Boolean = false + + private var alertV2IndexInitialized: Boolean = false + + private var scheduledAlertV2Rollover: Cancellable? = null + + fun onClusterManager() { + try { + // try to rollover immediately as we might be restarting the cluster + rolloverAlertV2HistoryIndex() + + // schedule the next rollover for approx MAX_AGE later + scheduledAlertV2Rollover = threadPool + .scheduleWithFixedDelay({ rolloverAndDeleteAlertV2HistoryIndices() }, alertV2HistoryRolloverPeriod, executorName()) + } catch (e: Exception) { + logger.error( + "Error creating alert/finding indices. " + + "Alerts/Findings can't be recorded until clustermanager node is restarted.", + e + ) + } + } + + fun offClusterManager() { + scheduledAlertV2Rollover?.cancel() + } + + private fun executorName(): String { + return ThreadPool.Names.MANAGEMENT + } + + override fun clusterChanged(event: ClusterChangedEvent) { + // Instead of using a LocalNodeClusterManagerListener to track clustermanager changes, this service will + // track them here to avoid conditions where clustermanager listener events run after other + // listeners that depend on what happened in the clustermanager listener + if (this.isClusterManager != event.localNodeClusterManager()) { + this.isClusterManager = event.localNodeClusterManager() + if (this.isClusterManager) { + onClusterManager() + } else { + offClusterManager() + } + } + + // if the indexes have been deleted they need to be reinitialized + alertV2IndexInitialized = event.state().routingTable().hasIndex(ALERT_V2_INDEX) + alertV2HistoryIndexInitialized = event.state().metadata().hasAlias(ALERT_V2_HISTORY_WRITE_INDEX) + } + + private fun rescheduleAlertRollover() { + if (clusterService.state().nodes.isLocalNodeElectedClusterManager) { + scheduledAlertV2Rollover?.cancel() + scheduledAlertV2Rollover = threadPool + .scheduleWithFixedDelay({ rolloverAndDeleteAlertV2HistoryIndices() }, alertV2HistoryRolloverPeriod, executorName()) + } + } + + fun isAlertV2Initialized(): Boolean { + return alertV2IndexInitialized && alertV2HistoryIndexInitialized + } + + fun isAlertV2HistoryEnabled(): Boolean { + return alertV2HistoryEnabled + } + + suspend fun createOrUpdateAlertV2Index() { + if (!alertV2IndexInitialized) { + alertV2IndexInitialized = createIndex(ALERT_V2_INDEX, alertV2Mapping()) + if (alertV2IndexInitialized) IndexUtils.alertIndexUpdated() + } else { + if (!IndexUtils.alertIndexUpdated) updateIndexMapping(ALERT_V2_INDEX, alertV2Mapping()) + } + alertV2IndexInitialized + } + + suspend fun createOrUpdateInitialAlertV2HistoryIndex() { + if (!alertV2HistoryIndexInitialized) { + alertV2HistoryIndexInitialized = createIndex(ALERT_V2_HISTORY_INDEX_PATTERN, alertV2Mapping(), ALERT_V2_HISTORY_WRITE_INDEX) + if (alertV2HistoryIndexInitialized) + IndexUtils.lastUpdatedAlertV2HistoryIndex = IndexUtils.getIndexNameWithAlias( + clusterService.state(), + ALERT_V2_HISTORY_WRITE_INDEX + ) + } else { + updateIndexMapping(ALERT_V2_HISTORY_WRITE_INDEX, alertV2Mapping(), true) + } + alertV2HistoryIndexInitialized // TODO: potentially delete this + } + + private fun rolloverAndDeleteAlertV2HistoryIndices() { + if (alertV2HistoryEnabled) rolloverAlertV2HistoryIndex() + deleteOldIndices("History", ALERT_V2_HISTORY_ALL) + } + + private suspend fun createIndex(index: String, schemaMapping: String, alias: String? = null): Boolean { + // This should be a fast check of local cluster state. Should be exceedingly rare that the local cluster + // state does not contain the index and multiple nodes concurrently try to create the index. + // If it does happen that error is handled we catch the ResourceAlreadyExistsException + val existsResponse: IndicesExistsResponse = client.admin().indices().suspendUntil { + exists(IndicesExistsRequest(index).local(true), it) + } + if (existsResponse.isExists) return true + + logger.debug("index: [$index] schema mappings: [$schemaMapping]") + val request = CreateIndexRequest(index) + .mapping(schemaMapping) + .settings(Settings.builder().put("index.hidden", true).build()) + + if (alias != null) request.alias(Alias(alias)) + return try { + val createIndexResponse: CreateIndexResponse = client.admin().indices().suspendUntil { create(request, it) } + createIndexResponse.isAcknowledged + } catch (t: Exception) { + if (ExceptionsHelper.unwrapCause(t) is ResourceAlreadyExistsException) { + true + } else { + throw AlertingException.wrap(t) + } + } + } + + private suspend fun updateIndexMapping(index: String, mapping: String, alias: Boolean = false) { + val clusterState = clusterService.state() + var targetIndex = index + if (alias) { + targetIndex = IndexUtils.getIndexNameWithAlias(clusterState, index) + } + + // TODO call getMapping and compare actual mappings here instead of this + if (targetIndex == IndexUtils.lastUpdatedAlertV2HistoryIndex) { + return + } + + val putMappingRequest: PutMappingRequest = PutMappingRequest(targetIndex) + .source(mapping, XContentType.JSON) + val updateResponse: AcknowledgedResponse = client.admin().indices().suspendUntil { putMapping(putMappingRequest, it) } + if (updateResponse.isAcknowledged) { + logger.info("Index mapping of $targetIndex is updated") + setIndexUpdateFlag(index, targetIndex) + } else { + logger.info("Failed to update index mapping of $targetIndex") + } + } + + private fun setIndexUpdateFlag(index: String, targetIndex: String) { + when (index) { + ALERT_V2_INDEX -> IndexUtils.alertV2IndexUpdated() + ALERT_V2_HISTORY_WRITE_INDEX -> IndexUtils.lastUpdatedAlertV2HistoryIndex = targetIndex + } + } + + private fun rolloverAndDeleteAlertHistoryIndices() { + if (alertV2HistoryEnabled) rolloverAlertV2HistoryIndex() + deleteOldIndices("History", ALERT_V2_HISTORY_ALL) + } + + private fun rolloverIndex( + initialized: Boolean, + index: String, + pattern: String, + map: String, + docsCondition: Long, + ageCondition: TimeValue, + writeIndex: String + ) { + if (!initialized) { + return + } + + // We have to pass null for newIndexName in order to get Elastic to increment the index count. + val request = RolloverRequest(index, null) + request.createIndexRequest.index(pattern) + .mapping(map) + .settings(Settings.builder().put("index.hidden", true).build()) + request.addMaxIndexDocsCondition(docsCondition) + request.addMaxIndexAgeCondition(ageCondition) + client.admin().indices().rolloverIndex( + request, + object : ActionListener { + override fun onResponse(response: RolloverResponse) { + if (!response.isRolledOver) { + logger.info("$writeIndex not rolled over. Conditions were: ${response.conditionStatus}") + } else { + lastRolloverTime = TimeValue.timeValueMillis(threadPool.absoluteTimeInMillis()) + } + } + override fun onFailure(e: Exception) { + logger.error("$writeIndex not roll over failed.") + } + } + ) + } + + private fun rolloverAlertV2HistoryIndex() { + rolloverIndex( + alertV2HistoryIndexInitialized, + ALERT_V2_HISTORY_WRITE_INDEX, + ALERT_V2_HISTORY_INDEX_PATTERN, + alertV2Mapping(), + alertV2HistoryMaxDocs, + alertV2HistoryMaxAge, + ALERT_V2_HISTORY_WRITE_INDEX + ) + } + + private fun deleteOldIndices(tag: String, indices: String) { + logger.info("info deleteOldIndices") + val clusterStateRequest = ClusterStateRequest() + .clear() + .indices(indices) + .metadata(true) + .local(true) + .indicesOptions(IndicesOptions.strictExpand()) + client.admin().cluster().state( + clusterStateRequest, + object : ActionListener { + override fun onResponse(clusterStateResponse: ClusterStateResponse) { + if (clusterStateResponse.state.metadata.indices.isNotEmpty()) { + scope.launch { + val indicesToDelete = getIndicesToDelete(clusterStateResponse) + logger.info("Deleting old $tag indices viz $indicesToDelete") + deleteAllOldHistoryIndices(indicesToDelete) + } + } else { + logger.info("No Old $tag Indices to delete") + } + } + override fun onFailure(e: Exception) { + logger.error("Error fetching cluster state") + } + } + ) + } + + private fun getIndicesToDelete(clusterStateResponse: ClusterStateResponse): List { + val indicesToDelete = mutableListOf() + for (entry in clusterStateResponse.state.metadata.indices) { + val indexMetaData = entry.value + getHistoryIndexToDelete(indexMetaData, alertV2HistoryRetentionPeriod.millis, ALERT_V2_HISTORY_WRITE_INDEX, alertV2HistoryEnabled) + ?.let { indicesToDelete.add(it) } + } + return indicesToDelete + } + + private fun getHistoryIndexToDelete( + indexMetadata: IndexMetadata, + retentionPeriodMillis: Long, + writeIndex: String, + historyEnabled: Boolean + ): String? { + val creationTime = indexMetadata.creationDate + if ((Instant.now().toEpochMilli() - creationTime) > retentionPeriodMillis) { + val alias = indexMetadata.aliases.entries.firstOrNull { writeIndex == it.value.alias } + if (alias != null) { + if (historyEnabled) { + // If the index has the write alias and history is enabled, don't delete the index + return null + } else if (writeIndex == ALERT_V2_HISTORY_WRITE_INDEX) { + // Otherwise reset alertHistoryIndexInitialized since index will be deleted + alertV2HistoryIndexInitialized = false + } + } + + return indexMetadata.index.name + } + return null + } + + private fun deleteAllOldHistoryIndices(indicesToDelete: List) { + if (indicesToDelete.isNotEmpty()) { + val deleteIndexRequest = DeleteIndexRequest(*indicesToDelete.toTypedArray()) + client.admin().indices().delete( + deleteIndexRequest, + object : ActionListener { + override fun onResponse(deleteIndicesResponse: AcknowledgedResponse) { + if (!deleteIndicesResponse.isAcknowledged) { + logger.error( + "Could not delete one or more Alerting V2 history indices: $indicesToDelete. Retrying one by one." + ) + deleteOldHistoryIndex(indicesToDelete) + } + } + override fun onFailure(e: Exception) { + logger.error("Delete for Alerting V2 History Indices $indicesToDelete Failed. Retrying one By one.") + deleteOldHistoryIndex(indicesToDelete) + } + } + ) + } + } + + private fun deleteOldHistoryIndex(indicesToDelete: List) { + for (index in indicesToDelete) { + val singleDeleteRequest = DeleteIndexRequest(*indicesToDelete.toTypedArray()) + client.admin().indices().delete( + singleDeleteRequest, + object : ActionListener { + override fun onResponse(acknowledgedResponse: AcknowledgedResponse?) { + if (acknowledgedResponse != null) { + if (!acknowledgedResponse.isAcknowledged) { + logger.error("Could not delete one or more Alerting V2 history indices: $index") + } + } + } + override fun onFailure(e: Exception) { + logger.debug("Exception ${e.message} while deleting the index $index") + } + } + ) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt new file mode 100644 index 000000000..961e98b4f --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt @@ -0,0 +1,227 @@ +package org.opensearch.alerting.alertsv2 + +import java.time.Instant +import java.util.concurrent.TimeUnit +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.action.bulk.BulkRequest +import org.opensearch.action.bulk.BulkResponse +import org.opensearch.action.delete.DeleteRequest +import org.opensearch.action.index.IndexRequest +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.core.modelv2.AlertV2 +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_ENABLED +import org.opensearch.cluster.ClusterChangedEvent +import org.opensearch.cluster.ClusterStateListener +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.TimeValue +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.core.action.ActionListener +import org.opensearch.core.common.bytes.BytesReference +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.core.xcontent.XContentParserUtils +import org.opensearch.index.VersionType +import org.opensearch.index.query.QueryBuilders +import org.opensearch.index.query.RangeQueryBuilder +import org.opensearch.index.reindex.BulkByScrollResponse +import org.opensearch.index.reindex.DeleteByQueryAction +import org.opensearch.index.reindex.DeleteByQueryRequestBuilder +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.threadpool.Scheduler +import org.opensearch.threadpool.ThreadPool +import org.opensearch.transport.client.Client + +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) +private val logger = LogManager.getLogger(AlertV2Mover::class.java) + +class AlertV2Mover( + settings: Settings, + private val client: Client, + private val threadPool: ThreadPool, + private val clusterService: ClusterService, +) : ClusterStateListener { + init { + clusterService.addListener(this) + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_V2_HISTORY_ENABLED) { alertV2HistoryEnabled = it } + } + + @Volatile private var isClusterManager = false + + private var alertV2IndexInitialized = false + + private var alertV2HistoryIndexInitialized = false + + private var alertV2HistoryEnabled = ALERT_V2_HISTORY_ENABLED.get(settings) + + private var scheduledAlertsV2CheckAndExpire: Scheduler.Cancellable? = null + + private val executorName = ThreadPool.Names.MANAGEMENT + + private val checkForExpirationInterval = TimeValue(1L, TimeUnit.MINUTES) + + override fun clusterChanged(event: ClusterChangedEvent) { + if (this.isClusterManager != event.localNodeClusterManager()) { + this.isClusterManager = event.localNodeClusterManager() + if (this.isClusterManager) { + onManager() + } else { + offManager() + } + } + + alertV2IndexInitialized = event.state().routingTable().hasIndex(AlertV2Indices.ALERT_V2_INDEX) + alertV2HistoryIndexInitialized = event.state().metadata().hasAlias(AlertV2Indices.ALERT_V2_HISTORY_WRITE_INDEX) + } + + fun onManager() { + try { + // try to sweep current AlertV2s for expiration immediately as we might be restarting the cluster + moveOrDeleteAlertV2s() + // schedule expiration checks and expirations to happen repeatedly at some interval + scheduledAlertsV2CheckAndExpire = threadPool + .scheduleWithFixedDelay({ moveOrDeleteAlertV2s() }, checkForExpirationInterval, executorName) + } catch (e: Exception) { + // This should be run on cluster startup + logger.error( + "Error sweeping AlertV2s for expiration. This cannot be done until clustermanager node is restarted.", + e + ) + } + } + + fun offManager() { + scheduledAlertsV2CheckAndExpire?.cancel() + } + + // if alertV2 history is enabled, move expired alerts to alertV2 history indices + // if alertV2 history is disabled, permanently delete expired alerts + private fun moveOrDeleteAlertV2s() { + if (!areAlertV2IndicesPresent()) { + return + } + + scope.launch { + val expiredAlertsSearchResponse = searchForExpiredAlerts() + + var copyResponse: BulkResponse? = null + val deleteResponse: BulkResponse? + if (!alertV2HistoryEnabled) { + deleteResponse = deleteExpiredAlerts(expiredAlertsSearchResponse) + } else { + copyResponse = copyExpiredAlerts(expiredAlertsSearchResponse) + deleteResponse = deleteExpiredAlertsThatWereCopied(copyResponse) + } + checkForFailures(copyResponse) + checkForFailures(deleteResponse) + } + } + + private suspend fun searchForExpiredAlerts(): SearchResponse { + val now = Instant.now().toEpochMilli() + val expiredAlertsQuery = QueryBuilders.rangeQuery(AlertV2.EXPIRATION_TIME_FIELD).lte(now) + + val expiredAlertsSearchQuery = SearchSourceBuilder.searchSource() + .query(expiredAlertsQuery) + .version(true) + + val activeAlertsRequest = SearchRequest(AlertV2Indices.ALERT_V2_INDEX) +// .routing(monitorId) + .source(expiredAlertsSearchQuery) + return client.suspendUntil { search(activeAlertsRequest, it) } + } + + private suspend fun copyExpiredAlerts(expiredAlertsSearchResponse: SearchResponse): BulkResponse? { + // If no alerts are found, simply return + if (expiredAlertsSearchResponse.hits.totalHits?.value == 0L) { + return null + } + + val indexRequests = expiredAlertsSearchResponse.hits.map { hit -> + IndexRequest(AlertV2Indices.ALERT_V2_HISTORY_WRITE_INDEX) +// .routing(monitorId) + .source( + AlertV2.parse(alertV2ContentParser(hit.sourceRef), hit.id, hit.version) + .toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS) + ) + .version(hit.version) + .versionType(VersionType.EXTERNAL_GTE) + .id(hit.id) + } + + val copyRequest = BulkRequest().add(indexRequests) + val copyResponse: BulkResponse = client.suspendUntil { bulk(copyRequest, it) } + + return copyResponse + } + + private suspend fun deleteExpiredAlerts(expiredAlertsSearchResponse: SearchResponse): BulkResponse { + val deleteRequests = expiredAlertsSearchResponse.hits.map { + DeleteRequest(AlertV2Indices.ALERT_V2_INDEX, it.id) +// .routing(monitorId) + .version(it.version) + .versionType(VersionType.EXTERNAL_GTE) + } + + val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) } + return deleteResponse + } + + private suspend fun deleteExpiredAlertsThatWereCopied(copyResponse: BulkResponse?): BulkResponse? { + // if there were no expired alerts, skip deleting anything + if (copyResponse == null) { + return null + } + + val deleteRequests = copyResponse.items.filterNot { it.isFailed }.map { + DeleteRequest(AlertV2Indices.ALERT_V2_INDEX, it.id) +// .routing(monitorId) + .version(it.version) + .versionType(VersionType.EXTERNAL_GTE) + } + val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) } + return deleteResponse + } + + private fun checkForFailures(bulkResponse: BulkResponse?) { + bulkResponse?.let { + if (bulkResponse.hasFailures()) { + val retryCause = bulkResponse.items.filter { it.isFailed } + .firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS } + ?.failure?.cause + throw RuntimeException( + "Failed to move or delete alert v2s: " + + bulkResponse.buildFailureMessage(), + retryCause + ) + } + } + } + + private fun alertV2ContentParser(bytesReference: BytesReference): XContentParser { + val xcp = XContentHelper.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, + bytesReference, XContentType.JSON + ) + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + return xcp + } + + + private fun areAlertV2IndicesPresent(): Boolean { + return alertV2IndexInitialized && alertV2HistoryIndexInitialized + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt index 5a50ce632..3549da49f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt @@ -293,5 +293,35 @@ class AlertingSettings { 0, Setting.Property.NodeScope, Setting.Property.Dynamic ) + + val ALERT_V2_HISTORY_ENABLED = Setting.boolSetting( + "plugins.alerting.v2.alert_history_enabled", + true, + Setting.Property.NodeScope, Setting.Property.Dynamic + ) + + val ALERT_V2_HISTORY_ROLLOVER_PERIOD = Setting.positiveTimeSetting( + "plugins.alerting.v2.alert_history_rollover_period", + TimeValue(12, TimeUnit.HOURS), + Setting.Property.NodeScope, Setting.Property.Dynamic + ) + + val ALERT_V2_HISTORY_INDEX_MAX_AGE = Setting.positiveTimeSetting( + "plugins.alerting.v2.alert_history_max_age", + TimeValue(30, TimeUnit.DAYS), + Setting.Property.NodeScope, Setting.Property.Dynamic + ) + + val ALERT_V2_HISTORY_MAX_DOCS = Setting.longSetting( + "plugins.alerting.v2.alert_history_max_docs", + 1000L, 0L, + Setting.Property.NodeScope, Setting.Property.Dynamic + ) + + val ALERT_V2_HISTORY_RETENTION_PERIOD = Setting.positiveTimeSetting( + "plugins.alerting.v2.alert_history_retention_period", + TimeValue(60, TimeUnit.DAYS), + Setting.Property.NodeScope, Setting.Property.Dynamic + ) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt b/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt index df5c0adce..b388ae757 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/util/IndexUtils.kt @@ -10,6 +10,7 @@ import org.opensearch.action.index.IndexResponse import org.opensearch.action.support.IndicesOptions import org.opensearch.action.support.clustermanager.AcknowledgedResponse import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.alertsv2.AlertV2Indices import org.opensearch.alerting.comments.CommentsIndices import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.cluster.ClusterState @@ -39,6 +40,8 @@ class IndexUtils { private set var alertingCommentIndexSchemaVersion: Int private set + var alertV2IndexSchemaVersion: Int + private set var scheduledJobIndexUpdated: Boolean = false private set @@ -48,15 +51,20 @@ class IndexUtils { private set var commentsIndexUpdated: Boolean = false private set + var alertV2IndexUpdated: Boolean = false + private set + var lastUpdatedAlertHistoryIndex: String? = null var lastUpdatedFindingHistoryIndex: String? = null var lastUpdatedCommentsHistoryIndex: String? = null + var lastUpdatedAlertV2HistoryIndex: String? = null init { scheduledJobIndexSchemaVersion = getSchemaVersion(ScheduledJobIndices.scheduledJobMappings()) alertIndexSchemaVersion = getSchemaVersion(AlertIndices.alertMapping()) findingIndexSchemaVersion = getSchemaVersion(AlertIndices.findingMapping()) alertingCommentIndexSchemaVersion = getSchemaVersion(CommentsIndices.commentsMapping()) + alertV2IndexSchemaVersion = getSchemaVersion(AlertV2Indices.alertV2Mapping()) } @JvmStatic @@ -79,6 +87,11 @@ class IndexUtils { commentsIndexUpdated = true } + @JvmStatic + fun alertV2IndexUpdated() { + commentsIndexUpdated = true + } + @JvmStatic fun getSchemaVersion(mapping: String): Int { val xcp = XContentType.JSON.xContent().createParser( From 97ebe37508201146ce2024306608d577738cfc45 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 12 Sep 2025 13:58:41 -0700 Subject: [PATCH 27/40] cleaning up build.gradle --- alerting/build.gradle | 68 +---------- .../alerting/alertsv2/alert_v2_mapping.json | 114 ++++++++++++++++++ 2 files changed, 117 insertions(+), 65 deletions(-) create mode 100644 alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json diff --git a/alerting/build.gradle b/alerting/build.gradle index 3731c6b22..f38428457 100644 --- a/alerting/build.gradle +++ b/alerting/build.gradle @@ -21,24 +21,6 @@ import org.opensearch.gradle.test.RestIntegTestTask import org.opensearch.gradle.testclusters.OpenSearchCluster import org.opensearch.gradle.testclusters.StandaloneRestIntegTestTask -buildscript { - ext { - opensearch_group = "org.opensearch" - opensearch_version = System.getProperty("opensearch.version", "3.1.0-SNAPSHOT") - isSnapshot = "true" == System.getProperty("build.snapshot", "true") - buildVersionQualifier = System.getProperty("build.version_qualifier", "") - kotlin_version = System.getProperty("kotlin.version", "1.9.25") - version_tokens = opensearch_version.tokenize('-') - opensearch_build = version_tokens[0] + '.0' - if (buildVersionQualifier) { - opensearch_build += "-${buildVersionQualifier}" - } - if (isSnapshot) { - opensearch_build += "-SNAPSHOT" - } - } -} - apply plugin: 'java' apply plugin: 'idea' apply plugin: 'org.jetbrains.kotlin.jvm' @@ -99,7 +81,6 @@ publishing { } repositories { - mavenLocal() maven { name = "Snapshots" url = "https://aws.oss.sonatype.org/content/repositories/snapshots" @@ -165,18 +146,6 @@ def bwcVersion = bwcVersionShort + ".0" def bwcOpenSearchVersion = bwcVersionShort + "-SNAPSHOT" def bwcPluginVersion = bwcVersion + "-SNAPSHOT" -def sqlJarDirectory = "$buildDir/dependencies/opensearch-sql-plugin" - -task addJarsToClasspath(type: Copy) { - from(fileTree(dir: sqlJarDirectory)) { - include "opensearch-sql-${opensearch_build}.jar" - include "ppl-${opensearch_build}.jar" - include "protocol-${opensearch_build}.jar" - include "core-${opensearch_build}.jar" - } - into("$buildDir/classes") -} - dependencies { // Needed for integ tests zipArchive group: 'org.opensearch.plugin', name:'opensearch-notifications-core', version: "${opensearch_build}" @@ -200,10 +169,8 @@ dependencies { // SQL/PPL plugin dependencies implementation 'org.json:json:20240303' -// implementation fileTree(dir: sqlJarDirectory, include: ["opensearch-sql-thin-${opensearch_build}.jar", "ppl-${opensearch_build}.jar", "protocol-${opensearch_build}.jar", "core-${opensearch_build}.jar"]) -// zipArchive group: 'org.opensearch.plugin', name:'opensearch-sql-plugin', version: "${opensearch_build}" - api project(":alerting-core") + implementation "com.github.seancfoley:ipaddress:5.4.1" implementation project(path: ":alerting-spi", configuration: 'shadow') @@ -216,29 +183,6 @@ dependencies { testImplementation "org.opensearch.plugin:lang-mustache-client:${opensearch_version}" } -task extractSqlJar(type: Copy) { - mustRunAfter() - from(zipTree(configurations.zipArchive.find { it.name.startsWith("opensearch-sql-plugin") })) - into sqlJarDirectory -} - -task extractSqlClass(type: Copy, dependsOn: [extractSqlJar]) { - from zipTree("${sqlJarDirectory}/opensearch-sql-${opensearch_build}.jar") - into("$buildDir/opensearch-sql") - include 'org/opensearch/sql/**' -} - -task replaceSqlJar(type: Jar, dependsOn: [extractSqlClass]) { - from("$buildDir/opensearch-sql") - archiveFileName = "opensearch-sql-thin-${opensearch_build}.jar" - destinationDirectory = file(sqlJarDirectory) - doLast { - file("${sqlJarDirectory}/opensearch-sql-${opensearch_build}.jar").delete() - } -} - -tasks.addJarsToClasspath.dependsOn(replaceSqlJar) - javadoc.enabled = false // turn off javadoc as it barfs on Kotlin code licenseHeaders.enabled = true dependencyLicenses.enabled = false @@ -570,6 +514,8 @@ task bwcTestSuite(type: StandaloneRestIntegTestTask) { run { doFirst { + dependsOn addJarsToClasspath + // There seems to be an issue when running multi node run or integ tasks with unicast_hosts // not being written, the waitForAllConditions ensures it's written getClusters().forEach { cluster -> @@ -714,11 +660,3 @@ def waitForClusterSetup(OpenSearchCluster cluster, Boolean securityEnabled) { } apply from: '../build-tools/pkgbuild.gradle' - -//compileJava { -// dependsOn addJarsToClasspath -//} -// -//compileKotlin { -// dependsOn addJarsToClasspath -//} diff --git a/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json new file mode 100644 index 000000000..fb4f3e6dd --- /dev/null +++ b/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json @@ -0,0 +1,114 @@ +{ + "dynamic": "strict", + "_routing": { + "required": true + }, + "_meta" : { + "schema_version": 5 + }, + "properties": { + "schema_version": { + "type": "integer" + }, + "monitor_id": { + "type": "keyword" + }, + "monitor_version": { + "type": "long" + }, + "id": { + "type": "keyword" + }, + "version": { + "type": "long" + }, + "severity": { + "type": "keyword" + }, + "monitor_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "monitor_user": { + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + }, + "roles": { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + }, + "custom_attribute_names": { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + } + } + }, + "execution_id": { + "type": "keyword" + }, + "trigger_id": { + "type": "keyword" + }, + "trigger_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "triggered_time": { + "type": "date" + }, + "expiration_time": { + "type": "date" + }, + "error_message": { + "type": "text" + }, + "query_results": { + "type": "nested", + "properties": { + "schema": { + "type": "nested", + "dynamic": true + }, + "datarows": { + "type": "text" + }, + "total": { + "type": "integer" + } + } + } + } +} \ No newline at end of file From bb73e226ff6482411acfb766eb1015e72334b8a3 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 12 Sep 2025 14:00:06 -0700 Subject: [PATCH 28/40] more cleanup --- alerting/build.gradle | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/alerting/build.gradle b/alerting/build.gradle index f38428457..50ea4265d 100644 --- a/alerting/build.gradle +++ b/alerting/build.gradle @@ -167,8 +167,7 @@ dependencies { implementation "org.jetbrains.kotlin:kotlin-stdlib-common:${kotlin_version}" implementation "org.jetbrains:annotations:13.0" - // SQL/PPL plugin dependencies - implementation 'org.json:json:20240303' + // SQL/PPL plugin dependencies are included in alerting-core api project(":alerting-core") implementation "com.github.seancfoley:ipaddress:5.4.1" From 99bc75beb7c64bbacc004e5c0c7d89d8da6468b2 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 12 Sep 2025 14:00:59 -0700 Subject: [PATCH 29/40] more cleanup --- alerting/build.gradle | 2 -- 1 file changed, 2 deletions(-) diff --git a/alerting/build.gradle b/alerting/build.gradle index 50ea4265d..13862f760 100644 --- a/alerting/build.gradle +++ b/alerting/build.gradle @@ -513,8 +513,6 @@ task bwcTestSuite(type: StandaloneRestIntegTestTask) { run { doFirst { - dependsOn addJarsToClasspath - // There seems to be an issue when running multi node run or integ tasks with unicast_hosts // not being written, the waitForAllConditions ensures it's written getClusters().forEach { cluster -> From 6a054f6441158ee82ec93afdf507e557589a946b Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Sat, 13 Sep 2025 11:20:38 -0700 Subject: [PATCH 30/40] gave AlertV2s their own index and history indices and rewired Get Alerts V2 to use those --- alerting/build.gradle | 1 + .../org/opensearch/alerting/AlertingPlugin.kt | 15 +- .../alerting/MonitorRunnerService.kt | 2 +- .../opensearch/alerting/PPLMonitorRunner.kt | 3 +- .../alerting/actionv2/GetAlertsV2Action.kt | 10 + .../alerting/actionv2/GetAlertsV2Request.kt | 63 ++++++ .../alerting/actionv2/GetAlertsV2Response.kt | 47 +++++ .../alerting/alertsv2/AlertV2Indices.kt | 18 +- .../alerting/alertsv2/AlertV2Mover.kt | 27 +-- .../resthandler/RestGetAlertsV2Action.kt | 21 +- .../transport/TransportGetAlertsV2Action.kt | 192 ++++++++++++++++++ .../alerting/alerts/alert_mapping.json | 21 -- .../alerting/alertsv2/alert_v2_mapping.json | 2 +- 13 files changed, 349 insertions(+), 73 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Action.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Request.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Response.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt diff --git a/alerting/build.gradle b/alerting/build.gradle index 13862f760..678a1749b 100644 --- a/alerting/build.gradle +++ b/alerting/build.gradle @@ -169,6 +169,7 @@ dependencies { // SQL/PPL plugin dependencies are included in alerting-core api project(":alerting-core") + implementation 'org.json:json:20240303' implementation "com.github.seancfoley:ipaddress:5.4.1" implementation project(path: ":alerting-spi", configuration: 'shadow') diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index eeca338dc..f826e96e8 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -16,11 +16,14 @@ import org.opensearch.alerting.action.SearchEmailAccountAction import org.opensearch.alerting.action.SearchEmailGroupAction import org.opensearch.alerting.actionv2.DeleteMonitorV2Action import org.opensearch.alerting.actionv2.ExecuteMonitorV2Action +import org.opensearch.alerting.actionv2.GetAlertsV2Action import org.opensearch.alerting.actionv2.GetMonitorV2Action import org.opensearch.alerting.actionv2.IndexMonitorV2Action import org.opensearch.alerting.actionv2.SearchMonitorV2Action import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.alerts.AlertIndices.Companion.ALL_ALERT_INDEX_PATTERN +import org.opensearch.alerting.alertsv2.AlertV2Indices +import org.opensearch.alerting.alertsv2.AlertV2Mover import org.opensearch.alerting.comments.CommentsIndices import org.opensearch.alerting.comments.CommentsIndices.Companion.ALL_COMMENTS_INDEX_PATTERN import org.opensearch.alerting.core.JobSweeper @@ -62,6 +65,7 @@ import org.opensearch.alerting.transport.TransportExecuteMonitorAction import org.opensearch.alerting.transport.TransportExecuteMonitorV2Action import org.opensearch.alerting.transport.TransportExecuteWorkflowAction import org.opensearch.alerting.transport.TransportGetAlertsAction +import org.opensearch.alerting.transport.TransportGetAlertsV2Action import org.opensearch.alerting.transport.TransportGetDestinationsAction import org.opensearch.alerting.transport.TransportGetEmailAccountAction import org.opensearch.alerting.transport.TransportGetEmailGroupAction @@ -135,8 +139,6 @@ import org.opensearch.threadpool.ThreadPool import org.opensearch.transport.client.Client import org.opensearch.watcher.ResourceWatcherService import java.util.function.Supplier -import org.opensearch.alerting.alertsv2.AlertV2Indices -import org.opensearch.alerting.alertsv2.AlertV2Mover /** * Entry point of the OpenDistro for Elasticsearch alerting plugin @@ -270,6 +272,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ActionPlugin.ActionHandler(SearchMonitorV2Action.INSTANCE, TransportSearchMonitorV2Action::class.java), ActionPlugin.ActionHandler(DeleteMonitorV2Action.INSTANCE, TransportDeleteMonitorV2Action::class.java), ActionPlugin.ActionHandler(ExecuteMonitorV2Action.INSTANCE, TransportExecuteMonitorV2Action::class.java), + ActionPlugin.ActionHandler(GetAlertsV2Action.INSTANCE, TransportGetAlertsV2Action::class.java) ) } @@ -306,6 +309,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R val settings = environment.settings() val lockService = LockService(client, clusterService) alertIndices = AlertIndices(settings, client, threadPool, clusterService) + alertV2Indices = AlertV2Indices(settings, client, threadPool, clusterService) val alertService = AlertService(client, xContentRegistry, alertIndices) val triggerService = TriggerService(scriptService) runner = MonitorRunnerService @@ -455,7 +459,12 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R AlertingSettings.COMMENTS_HISTORY_RETENTION_PERIOD, AlertingSettings.COMMENTS_MAX_CONTENT_SIZE, AlertingSettings.MAX_COMMENTS_PER_ALERT, - AlertingSettings.MAX_COMMENTS_PER_NOTIFICATION + AlertingSettings.MAX_COMMENTS_PER_NOTIFICATION, + AlertingSettings.ALERT_V2_HISTORY_ENABLED, + AlertingSettings.ALERT_V2_HISTORY_ROLLOVER_PERIOD, + AlertingSettings.ALERT_V2_HISTORY_INDEX_MAX_AGE, + AlertingSettings.ALERT_V2_HISTORY_MAX_DOCS, + AlertingSettings.ALERT_V2_HISTORY_RETENTION_PERIOD ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index 024a03e13..262e83a94 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -26,6 +26,7 @@ import org.opensearch.alerting.actionv2.ExecuteMonitorV2Request import org.opensearch.alerting.actionv2.ExecuteMonitorV2Response import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.alerts.AlertMover.Companion.moveAlerts +import org.opensearch.alerting.alertsv2.AlertV2Indices import org.opensearch.alerting.core.JobRunner import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.core.lock.LockModel @@ -94,7 +95,6 @@ import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset import java.util.UUID -import org.opensearch.alerting.alertsv2.AlertV2Indices import kotlin.coroutines.CoroutineContext object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleComponent() { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 3aca2db90..53e886a12 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -11,7 +11,7 @@ import org.opensearch.action.bulk.BulkResponse import org.opensearch.action.index.IndexRequest import org.opensearch.action.support.WriteRequest import org.opensearch.alerting.QueryLevelMonitorRunner.getConfigAndSendNotification -import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.alertsv2.AlertV2Indices import org.opensearch.alerting.core.modelv2.AlertV2 import org.opensearch.alerting.core.modelv2.MonitorV2 import org.opensearch.alerting.core.modelv2.MonitorV2RunResult @@ -44,7 +44,6 @@ import java.time.Instant import java.time.ZoneOffset.UTC import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit -import org.opensearch.alerting.alertsv2.AlertV2Indices object PPLMonitorRunner : MonitorV2Runner { private val logger = LogManager.getLogger(javaClass) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Action.kt new file mode 100644 index 000000000..6255d6ddd --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Action.kt @@ -0,0 +1,10 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionType + +class GetAlertsV2Action private constructor() : ActionType(NAME, ::GetAlertsV2Response) { + companion object { + val INSTANCE = GetAlertsV2Action() + const val NAME = "cluster:admin/opensearch/alerting/v2/alerts/get" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Request.kt new file mode 100644 index 000000000..d85aec2e5 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Request.kt @@ -0,0 +1,63 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.commons.alerting.model.Table +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.index.query.BoolQueryBuilder +import java.io.IOException + +class GetAlertsV2Request : ActionRequest { + val table: Table + val severityLevel: String + val monitorV2Id: String? + val monitorV2Ids: List? + val alertV2Ids: List? + val boolQueryBuilder: BoolQueryBuilder? + + constructor( + table: Table, + severityLevel: String, + monitorV2Id: String?, + monitorV2Ids: List? = null, + alertV2Ids: List? = null, + boolQueryBuilder: BoolQueryBuilder? = null + ) : super() { + this.table = table + this.severityLevel = severityLevel + this.monitorV2Id = monitorV2Id + this.monitorV2Ids = monitorV2Ids + this.alertV2Ids = alertV2Ids + this.boolQueryBuilder = boolQueryBuilder + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + table = Table.readFrom(sin), + severityLevel = sin.readString(), + monitorV2Id = sin.readOptionalString(), + monitorV2Ids = sin.readOptionalStringList(), + alertV2Ids = sin.readOptionalStringList(), + boolQueryBuilder = if (sin.readOptionalBoolean() == true) BoolQueryBuilder(sin) else null + ) + + override fun validate(): ActionRequestValidationException? { + return null + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + table.writeTo(out) + out.writeString(severityLevel) + out.writeOptionalString(monitorV2Id) + out.writeOptionalStringCollection(monitorV2Ids) + out.writeOptionalStringCollection(alertV2Ids) + if (boolQueryBuilder != null) { + out.writeOptionalBoolean(true) + boolQueryBuilder.writeTo(out) + } else { + out.writeOptionalBoolean(false) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Response.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Response.kt new file mode 100644 index 000000000..39b9faf53 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/GetAlertsV2Response.kt @@ -0,0 +1,47 @@ +package org.opensearch.alerting.actionv2 + +import org.opensearch.alerting.core.modelv2.AlertV2 +import org.opensearch.commons.notifications.action.BaseResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException +import java.util.Collections + +class GetAlertsV2Response : BaseResponse { + val alertV2s: List + + // totalAlertV2s is not the same as the size of alerts because there can be 30 alerts from the request, but + // the request only asked for 5 alerts, so totalAlertV2s will be 30, but alerts will only contain 5 alerts + val totalAlertV2s: Int? + + constructor( + alertV2s: List, + totalAlertV2s: Int? + ) : super() { + this.alertV2s = alertV2s + this.totalAlertV2s = totalAlertV2s + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + alertV2s = Collections.unmodifiableList(sin.readList(::AlertV2)), + totalAlertV2s = sin.readOptionalInt() + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeCollection(alertV2s) + out.writeOptionalInt(totalAlertV2s) + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + .field("alertV2s", alertV2s) + .field("totalAlertV2s", totalAlertV2s) + + return builder.endObject() + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt index b58b31178..3921be51d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Indices.kt @@ -1,6 +1,5 @@ package org.opensearch.alerting.alertsv2 -import java.time.Instant import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch @@ -20,7 +19,6 @@ import org.opensearch.action.admin.indices.rollover.RolloverRequest import org.opensearch.action.admin.indices.rollover.RolloverResponse import org.opensearch.action.support.IndicesOptions import org.opensearch.action.support.clustermanager.AcknowledgedResponse -import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_ENABLED import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_INDEX_MAX_AGE @@ -41,6 +39,7 @@ import org.opensearch.core.action.ActionListener import org.opensearch.threadpool.Scheduler.Cancellable import org.opensearch.threadpool.ThreadPool import org.opensearch.transport.client.Client +import java.time.Instant private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) private val logger = LogManager.getLogger(AlertV2Indices::class.java) @@ -86,25 +85,20 @@ class AlertV2Indices( const val ALL_ALERT_V2_INDEX_PATTERN = ".opensearch-alerting-v2-alert*" @JvmStatic - fun alertV2Mapping() = // TODO: create alert_v2 mappings + fun alertV2Mapping() = AlertV2Indices::class.java.getResource("alert_v2_mapping.json").readText() } @Volatile private var alertV2HistoryEnabled = ALERT_V2_HISTORY_ENABLED.get(settings) - @Volatile private var alertV2HistoryMaxDocs = ALERT_V2_HISTORY_MAX_DOCS.get(settings) - @Volatile private var alertV2HistoryMaxAge = ALERT_V2_HISTORY_INDEX_MAX_AGE.get(settings) - @Volatile private var alertV2HistoryRolloverPeriod = ALERT_V2_HISTORY_ROLLOVER_PERIOD.get(settings) - @Volatile private var alertV2HistoryRetentionPeriod = ALERT_V2_HISTORY_RETENTION_PERIOD.get(settings) - @Volatile private var requestTimeout = REQUEST_TIMEOUT.get(settings) @Volatile private var isClusterManager = false @@ -350,8 +344,12 @@ class AlertV2Indices( val indicesToDelete = mutableListOf() for (entry in clusterStateResponse.state.metadata.indices) { val indexMetaData = entry.value - getHistoryIndexToDelete(indexMetaData, alertV2HistoryRetentionPeriod.millis, ALERT_V2_HISTORY_WRITE_INDEX, alertV2HistoryEnabled) - ?.let { indicesToDelete.add(it) } + getHistoryIndexToDelete( + indexMetaData, + alertV2HistoryRetentionPeriod.millis, + ALERT_V2_HISTORY_WRITE_INDEX, + alertV2HistoryEnabled + )?.let { indicesToDelete.add(it) } } return indicesToDelete } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt index 961e98b4f..ec72bddd7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/alertsv2/AlertV2Mover.kt @@ -1,7 +1,5 @@ package org.opensearch.alerting.alertsv2 -import java.time.Instant -import java.util.concurrent.TimeUnit import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch @@ -12,7 +10,6 @@ import org.opensearch.action.delete.DeleteRequest import org.opensearch.action.index.IndexRequest import org.opensearch.action.search.SearchRequest import org.opensearch.action.search.SearchResponse -import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.core.modelv2.AlertV2 import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_V2_HISTORY_ENABLED @@ -25,9 +22,6 @@ import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentFactory import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType -import org.opensearch.commons.alerting.model.Alert -import org.opensearch.commons.alerting.model.ScheduledJob -import org.opensearch.core.action.ActionListener import org.opensearch.core.common.bytes.BytesReference import org.opensearch.core.rest.RestStatus import org.opensearch.core.xcontent.NamedXContentRegistry @@ -36,14 +30,12 @@ import org.opensearch.core.xcontent.XContentParser import org.opensearch.core.xcontent.XContentParserUtils import org.opensearch.index.VersionType import org.opensearch.index.query.QueryBuilders -import org.opensearch.index.query.RangeQueryBuilder -import org.opensearch.index.reindex.BulkByScrollResponse -import org.opensearch.index.reindex.DeleteByQueryAction -import org.opensearch.index.reindex.DeleteByQueryRequestBuilder import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.threadpool.Scheduler import org.opensearch.threadpool.ThreadPool import org.opensearch.transport.client.Client +import java.time.Instant +import java.util.concurrent.TimeUnit private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) private val logger = LogManager.getLogger(AlertV2Mover::class.java) @@ -139,9 +131,9 @@ class AlertV2Mover( .version(true) val activeAlertsRequest = SearchRequest(AlertV2Indices.ALERT_V2_INDEX) -// .routing(monitorId) .source(expiredAlertsSearchQuery) - return client.suspendUntil { search(activeAlertsRequest, it) } + val searchResponse: SearchResponse = client.suspendUntil { search(activeAlertsRequest, it) } + return searchResponse } private suspend fun copyExpiredAlerts(expiredAlertsSearchResponse: SearchResponse): BulkResponse? { @@ -152,7 +144,6 @@ class AlertV2Mover( val indexRequests = expiredAlertsSearchResponse.hits.map { hit -> IndexRequest(AlertV2Indices.ALERT_V2_HISTORY_WRITE_INDEX) -// .routing(monitorId) .source( AlertV2.parse(alertV2ContentParser(hit.sourceRef), hit.id, hit.version) .toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS) @@ -171,28 +162,29 @@ class AlertV2Mover( private suspend fun deleteExpiredAlerts(expiredAlertsSearchResponse: SearchResponse): BulkResponse { val deleteRequests = expiredAlertsSearchResponse.hits.map { DeleteRequest(AlertV2Indices.ALERT_V2_INDEX, it.id) -// .routing(monitorId) .version(it.version) .versionType(VersionType.EXTERNAL_GTE) } - val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) } + val deleteRequest = BulkRequest().add(deleteRequests) + val deleteResponse: BulkResponse = client.suspendUntil { bulk(deleteRequest, it) } + return deleteResponse } private suspend fun deleteExpiredAlertsThatWereCopied(copyResponse: BulkResponse?): BulkResponse? { - // if there were no expired alerts, skip deleting anything + // if there were no expired alerts to copy, skip deleting anything if (copyResponse == null) { return null } val deleteRequests = copyResponse.items.filterNot { it.isFailed }.map { DeleteRequest(AlertV2Indices.ALERT_V2_INDEX, it.id) -// .routing(monitorId) .version(it.version) .versionType(VersionType.EXTERNAL_GTE) } val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) } + return deleteResponse } @@ -220,7 +212,6 @@ class AlertV2Mover( return xcp } - private fun areAlertV2IndicesPresent(): Boolean { return alertV2IndexInitialized && alertV2HistoryIndexInitialized } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt index 2962e5447..246ac46c2 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetAlertsV2Action.kt @@ -2,11 +2,10 @@ package org.opensearch.alerting.resthandler import org.apache.logging.log4j.LogManager import org.opensearch.alerting.AlertingPlugin -import org.opensearch.commons.alerting.action.AlertingActions -import org.opensearch.commons.alerting.action.GetAlertsRequest +import org.opensearch.alerting.actionv2.GetAlertsV2Action +import org.opensearch.alerting.actionv2.GetAlertsV2Request import org.opensearch.commons.alerting.model.Table import org.opensearch.rest.BaseRestHandler -import org.opensearch.rest.RestHandler.ReplacedRoute import org.opensearch.rest.RestHandler.Route import org.opensearch.rest.RestRequest import org.opensearch.rest.RestRequest.Method.GET @@ -33,10 +32,6 @@ class RestGetAlertsV2Action : BaseRestHandler() { ) } - override fun replacedRoutes(): MutableList { - return mutableListOf() - } - // TODO: this is an Get Alerts V2 rest handler that points to the Get Alerts V1 Transport action // TODO: for now for playground, separate the 2 for GA override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { @@ -49,15 +44,7 @@ class RestGetAlertsV2Action : BaseRestHandler() { val startIndex = request.paramAsInt("startIndex", 0) val searchString = request.param("searchString", "") val severityLevel = request.param("severityLevel", "ALL") - val alertState = request.param("alertState", "ALL") val monitorId: String? = request.param("monitorId") - val workflowId: String? = request.param("workflowIds") - val workflowIds = mutableListOf() - if (workflowId.isNullOrEmpty() == false) { - workflowIds.add(workflowId) - } else { - workflowIds.add("") - } val table = Table( sortOrder, sortString, @@ -67,10 +54,10 @@ class RestGetAlertsV2Action : BaseRestHandler() { searchString ) - val getAlertsRequest = GetAlertsRequest(table, severityLevel, alertState, monitorId, null, workflowIds = workflowIds) + val getAlertsV2Request = GetAlertsV2Request(table, severityLevel, monitorId, null) return RestChannelConsumer { channel -> - client.execute(AlertingActions.GET_ALERTS_ACTION_TYPE, getAlertsRequest, RestToXContentListener(channel)) + client.execute(GetAlertsV2Action.INSTANCE, getAlertsV2Request, RestToXContentListener(channel)) } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt new file mode 100644 index 000000000..3bc68491b --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt @@ -0,0 +1,192 @@ +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.actionv2.GetAlertsV2Action +import org.opensearch.alerting.actionv2.GetAlertsV2Request +import org.opensearch.alerting.actionv2.GetAlertsV2Response +import org.opensearch.alerting.alertsv2.AlertV2Indices +import org.opensearch.alerting.core.modelv2.AlertV2 +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.alerting.util.use +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.core.action.ActionListener +import org.opensearch.core.common.io.stream.NamedWriteableRegistry +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.core.xcontent.XContentParserUtils +import org.opensearch.index.query.Operator +import org.opensearch.index.query.QueryBuilders +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.search.sort.SortBuilders +import org.opensearch.search.sort.SortOrder +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import org.opensearch.transport.client.Client +import java.io.IOException + +private val log = LogManager.getLogger(TransportGetAlertsV2Action::class.java) +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + +class TransportGetAlertsV2Action @Inject constructor( + transportService: TransportService, + val client: Client, + clusterService: ClusterService, + actionFilters: ActionFilters, + val settings: Settings, + val xContentRegistry: NamedXContentRegistry, + val namedWriteableRegistry: NamedWriteableRegistry +) : HandledTransportAction( + GetAlertsV2Action.NAME, + transportService, + actionFilters, + ::GetAlertsV2Request +), + SecureTransportAction { + + @Volatile + override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + init { + listenFilterBySettingChange(clusterService) + } + + override fun doExecute( + task: Task, + getAlertsV2Request: GetAlertsV2Request, + actionListener: ActionListener, + ) { +// val user = readUserFromThreadContext(client) + + val tableProp = getAlertsV2Request.table + val sortBuilder = SortBuilders + .fieldSort(tableProp.sortString) + .order(SortOrder.fromString(tableProp.sortOrder)) + if (!tableProp.missing.isNullOrBlank()) { + sortBuilder.missing(tableProp.missing) + } + + val queryBuilder = getAlertsV2Request.boolQueryBuilder ?: QueryBuilders.boolQuery() + + if (getAlertsV2Request.severityLevel != "ALL") { + queryBuilder.filter(QueryBuilders.termQuery("severity", getAlertsV2Request.severityLevel)) + } + + if (getAlertsV2Request.alertV2Ids.isNullOrEmpty() == false) { + queryBuilder.filter(QueryBuilders.termsQuery("_id", getAlertsV2Request.alertV2Ids)) + } + + if (getAlertsV2Request.monitorV2Id != null) { + queryBuilder.filter(QueryBuilders.termQuery("monitor_id", getAlertsV2Request.monitorV2Id)) + } else if (!getAlertsV2Request.monitorV2Ids.isNullOrEmpty()) { + queryBuilder.filter(QueryBuilders.termsQuery("monitor_id", getAlertsV2Request.monitorV2Ids)) + } + + if (!tableProp.searchString.isNullOrBlank()) { + queryBuilder + .must( + QueryBuilders + .queryStringQuery(tableProp.searchString) + .defaultOperator(Operator.AND) + .field("monitor_name") + .field("trigger_name") + ) + } + val searchSourceBuilder = SearchSourceBuilder() + .version(true) + .seqNoAndPrimaryTerm(true) + .query(queryBuilder) + .sort(sortBuilder) + .size(tableProp.size) + .from(tableProp.startIndex) + + client.threadPool().threadContext.stashContext().use { + scope.launch { + try { + getAlerts(AlertV2Indices.ALERT_V2_INDEX, searchSourceBuilder, actionListener/*, user*/) + } catch (t: Exception) { + log.error("Failed to get alerts", t) + if (t is AlertingException) { + actionListener.onFailure(t) + } else { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + } + } + } + + fun getAlerts( + alertIndex: String, + searchSourceBuilder: SearchSourceBuilder, + actionListener: ActionListener, +// user: User?, + ) { + // TODO: when implementing RBAC, pivot to this implementation +// // user is null when: 1/ security is disabled. 2/when user is super-admin. +// if (user == null) { +// // user is null when: 1/ security is disabled. 2/when user is super-admin. +// search(alertIndex, searchSourceBuilder, actionListener) +// } else if (!doFilterForUser(user)) { +// // security is enabled and filterby is disabled. +// search(alertIndex, searchSourceBuilder, actionListener) +// } else { +// // security is enabled and filterby is enabled. +// try { +// log.info("Filtering result by: ${user.backendRoles}") +// addFilter(user, searchSourceBuilder, "monitor_user.backend_roles.keyword") +// search(alertIndex, searchSourceBuilder, actionListener) +// } catch (ex: IOException) { +// actionListener.onFailure(AlertingException.wrap(ex)) +// } +// } + try { + search(alertIndex, searchSourceBuilder, actionListener) + } catch (ex: IOException) { + actionListener.onFailure(AlertingException.wrap(ex)) + } + } + + fun search(alertIndex: String, searchSourceBuilder: SearchSourceBuilder, actionListener: ActionListener) { + val searchRequest = SearchRequest() + .indices(alertIndex) + .source(searchSourceBuilder) + + client.search( + searchRequest, + object : ActionListener { + override fun onResponse(response: SearchResponse) { + val totalAlertCount = response.hits.totalHits?.value?.toInt() + val alerts = response.hits.map { hit -> + val xcp = XContentHelper.createParser( + xContentRegistry, + LoggingDeprecationHandler.INSTANCE, + hit.sourceRef, + XContentType.JSON + ) + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + val alertV2 = AlertV2.parse(xcp, hit.id, hit.version) + alertV2 + } + actionListener.onResponse(GetAlertsV2Response(alerts, totalAlertCount)) + } + + override fun onFailure(t: Exception) { + actionListener.onFailure(t) + } + } + ) + } +} diff --git a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json index 6ae614f71..76e5104cc 100644 --- a/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json +++ b/alerting/src/main/resources/org/opensearch/alerting/alerts/alert_mapping.json @@ -131,30 +131,9 @@ "end_time": { "type": "date" }, - "triggered_time": { - "type": "date" - }, - "expiration_time": { - "type": "date" - }, "error_message": { "type": "text" }, - "query_results": { - "type": "nested", - "properties": { - "schema": { - "type": "nested", - "dynamic": true - }, - "datarows": { - "type": "text" - }, - "total": { - "type": "integer" - } - } - }, "alert_history": { "type": "nested", "properties": { diff --git a/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json index fb4f3e6dd..388b82715 100644 --- a/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json +++ b/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json @@ -1,7 +1,7 @@ { "dynamic": "strict", "_routing": { - "required": true + "required": false }, "_meta" : { "schema_version": 5 From 364693171a0bc31dc5e015991641af213e07db4d Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 15 Sep 2025 12:58:18 -0700 Subject: [PATCH 31/40] initial create/update rbac implementation --- .../actionv2/IndexMonitorV2Request.kt | 14 +- .../resthandler/RestIndexMonitorV2Action.kt | 4 +- .../TransportIndexMonitorV2Action.kt | 303 +++++++++++++++--- .../TransportSearchMonitorV2Action.kt | 3 + .../alerting/core/modelv2/MonitorV2.kt | 4 + .../alerting/core/modelv2/PPLMonitor.kt | 71 ++-- 6 files changed, 322 insertions(+), 77 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt index 39b0d594e..a5ad591c1 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/IndexMonitorV2Request.kt @@ -16,7 +16,7 @@ class IndexMonitorV2Request : ActionRequest { val refreshPolicy: WriteRequest.RefreshPolicy val method: RestRequest.Method var monitorV2: MonitorV2 -// val rbacRoles: List? + val rbacRoles: List? constructor( monitorId: String, @@ -24,8 +24,8 @@ class IndexMonitorV2Request : ActionRequest { primaryTerm: Long, refreshPolicy: WriteRequest.RefreshPolicy, method: RestRequest.Method, - monitorV2: MonitorV2 -// rbacRoles: List? = null + monitorV2: MonitorV2, + rbacRoles: List? = null ) : super() { this.monitorId = monitorId this.seqNo = seqNo @@ -33,7 +33,7 @@ class IndexMonitorV2Request : ActionRequest { this.refreshPolicy = refreshPolicy this.method = method this.monitorV2 = monitorV2 -// this.rbacRoles = rbacRoles + this.rbacRoles = rbacRoles } @Throws(IOException::class) @@ -43,8 +43,8 @@ class IndexMonitorV2Request : ActionRequest { primaryTerm = sin.readLong(), refreshPolicy = WriteRequest.RefreshPolicy.readFrom(sin), method = sin.readEnum(RestRequest.Method::class.java), - monitorV2 = MonitorV2.readFrom(sin) -// rbacRoles = sin.readOptionalStringList() + monitorV2 = MonitorV2.readFrom(sin), + rbacRoles = sin.readOptionalStringList() ) override fun validate(): ActionRequestValidationException? { @@ -59,6 +59,6 @@ class IndexMonitorV2Request : ActionRequest { refreshPolicy.writeTo(out) out.writeEnum(method) MonitorV2.writeTo(out, monitorV2) -// out.writeOptionalStringCollection(rbacRoles) + out.writeOptionalStringCollection(rbacRoles) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index fd1db99c6..52cbf20ad 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -53,8 +53,10 @@ class RestIndexMonitorV2Action : BaseRestHandler() { ensureExpectedToken(Token.START_OBJECT, xcp.nextToken(), xcp) val monitorV2: MonitorV2 + val rbacRoles: List? try { monitorV2 = MonitorV2.parse(xcp) + rbacRoles = request.contentParser().map()["rbac_roles"] as List? } catch (e: Exception) { throw AlertingException.wrap(e) } @@ -68,7 +70,7 @@ class RestIndexMonitorV2Action : BaseRestHandler() { WriteRequest.RefreshPolicy.IMMEDIATE } - val indexMonitorV2Request = IndexMonitorV2Request(id, seqNo, primaryTerm, refreshPolicy, request.method(), monitorV2) + val indexMonitorV2Request = IndexMonitorV2Request(id, seqNo, primaryTerm, refreshPolicy, request.method(), monitorV2, rbacRoles) return RestChannelConsumer { channel -> client.execute(IndexMonitorV2Action.INSTANCE, indexMonitorV2Request, RestToXContentListener(channel)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index 604fa238b..68fb3169e 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -6,6 +6,7 @@ import kotlinx.coroutines.launch import org.apache.logging.log4j.LogManager import org.opensearch.ExceptionsHelper import org.opensearch.OpenSearchException +import org.opensearch.OpenSearchSecurityException import org.opensearch.OpenSearchStatusException import org.opensearch.ResourceAlreadyExistsException import org.opensearch.action.admin.cluster.health.ClusterHealthAction @@ -38,6 +39,7 @@ import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERTING_MAX_ import org.opensearch.alerting.settings.AlertingSettings.Companion.INDEX_TIMEOUT import org.opensearch.alerting.settings.AlertingSettings.Companion.REQUEST_TIMEOUT import org.opensearch.alerting.util.IndexUtils +import org.opensearch.alerting.util.use import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject import org.opensearch.common.settings.Settings @@ -50,6 +52,7 @@ import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX import org.opensearch.commons.alerting.model.userErrorMessage import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.commons.authuser.User import org.opensearch.core.action.ActionListener import org.opensearch.core.common.io.stream.NamedWriteableRegistry import org.opensearch.core.rest.RestStatus @@ -87,14 +90,18 @@ class TransportIndexMonitorV2Action @Inject constructor( // @Volatile private var maxActionThrottle = MAX_ACTION_THROTTLE_VALUE.get(settings) @Volatile override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) - override fun doExecute(task: Task, indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + override fun doExecute( + task: Task, + indexMonitorV2Request: IndexMonitorV2Request, + actionListener: ActionListener + ) { // validate the MonitorV2 based on its type - when (indexMonitorRequest.monitorV2) { + when (indexMonitorV2Request.monitorV2) { is PPLMonitor -> validateMonitorPplQuery( - indexMonitorRequest.monitorV2 as PPLMonitor, + indexMonitorV2Request.monitorV2 as PPLMonitor, object : ActionListener { // validationListener override fun onResponse(response: Unit) { - checkScheduledJobIndex(indexMonitorRequest, actionListener) + checkUserAndIndicesAccess(client, actionListener, indexMonitorV2Request) } override fun onFailure(e: Exception) { @@ -105,13 +112,14 @@ class TransportIndexMonitorV2Action @Inject constructor( else -> actionListener.onFailure( AlertingException.wrap( IllegalStateException( - "unexpected MonitorV2 type: ${indexMonitorRequest.monitorV2.javaClass.name}" + "unexpected MonitorV2 type: ${indexMonitorV2Request.monitorV2.javaClass.name}" ) ) ) } } + // validates the PPL Monitor query by submitting it to SQL/PPL plugin private fun validateMonitorPplQuery(pplMonitor: PPLMonitor, validationListener: ActionListener) { scope.launch { try { @@ -167,12 +175,121 @@ class TransportIndexMonitorV2Action @Inject constructor( } } - private fun checkScheduledJobIndex(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + private fun checkUserAndIndicesAccess( + client: Client, + actionListener: ActionListener, + indexMonitorV2Request: IndexMonitorV2Request + ) { + /* check initial user permissions */ + val user = readUserFromThreadContext(client) + + if (!validateUserBackendRoles(user, actionListener)) { + return + } + + if ( + user != null && + !isAdmin(user) && + indexMonitorV2Request.rbacRoles != null + ) { + if (indexMonitorV2Request.rbacRoles.stream().anyMatch { !user.backendRoles.contains(it) }) { + log.debug( + "User specified backend roles, ${indexMonitorV2Request.rbacRoles}, " + + "that they don't have access to. User backend roles: ${user.backendRoles}" + ) + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException( + "User specified backend roles that they don't have access to. Contact administrator", RestStatus.FORBIDDEN + ) + ) + ) + return + } else if (indexMonitorV2Request.rbacRoles.isEmpty() == true) { + log.debug( + "Non-admin user are not allowed to specify an empty set of backend roles. " + + "Please don't pass in the parameter or pass in at least one backend role." + ) + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException( + "Non-admin user are not allowed to specify an empty set of backend roles.", RestStatus.FORBIDDEN + ) + ) + ) + return + } + } + + /* check user access to indices */ + when (indexMonitorV2Request.monitorV2) { + is PPLMonitor -> { + checkPplQueryIndices(indexMonitorV2Request, client, actionListener, user) + } + } + } + + private fun checkPplQueryIndices( + indexMonitorV2Request: IndexMonitorV2Request, + client: Client, + actionListener: ActionListener, + user: User? + ) { + val pplMonitor = indexMonitorV2Request.monitorV2 as PPLMonitor + val pplQuery = pplMonitor.query + val indices = getIndicesFromPplQuery(pplQuery) + + val searchRequest = SearchRequest().indices(*indices.toTypedArray()) + .source(SearchSourceBuilder.searchSource().size(1).query(QueryBuilders.matchAllQuery())) + client.search( + searchRequest, + object : ActionListener { + override fun onResponse(searchResponse: SearchResponse) { + // User has read access to configured indices in the monitor, now create monitor with out user context. + client.threadPool().threadContext.stashContext().use { + if (user == null) { + // Security is disabled, add empty user to Monitor. user is null for older versions. + indexMonitorV2Request.monitorV2 = pplMonitor + .copy(user = User("", listOf(), listOf(), listOf())) + checkScheduledJobIndex(indexMonitorV2Request, actionListener, user) + } else { + indexMonitorV2Request.monitorV2 = pplMonitor + .copy(user = User(user.name, user.backendRoles, user.roles, user.customAttNames)) + checkScheduledJobIndex(indexMonitorV2Request, actionListener, user) + } + } + } + + // Due to below issue with security plugin, we get security_exception when invalid index name is mentioned. + // https://github.com/opendistro-for-elasticsearch/security/issues/718 + override fun onFailure(t: Exception) { + actionListener.onFailure( + AlertingException.wrap( + when (t is OpenSearchSecurityException) { + true -> OpenSearchStatusException( + "User doesn't have read permissions for one or more configured index " + + "$indices", + RestStatus.FORBIDDEN + ) + false -> t + } + ) + ) + } + } + ) + } + + private fun checkScheduledJobIndex( + indexMonitorRequest: IndexMonitorV2Request, + actionListener: ActionListener, + user: User? + ) { /* check to see if alerting-config index (scheduled job index) is created and updated before indexing MonitorV2 into it */ if (!scheduledJobIndices.scheduledJobIndexExists()) { // if alerting-config index doesn't exist, send request to create it scheduledJobIndices.initScheduledJobIndex(object : ActionListener { override fun onResponse(response: CreateIndexResponse) { - onCreateMappingsResponse(response.isAcknowledged, indexMonitorRequest, actionListener) + onCreateMappingsResponse(response.isAcknowledged, indexMonitorRequest, actionListener, user) } override fun onFailure(e: Exception) { @@ -191,7 +308,7 @@ class TransportIndexMonitorV2Action @Inject constructor( ) } // Retry mapping of monitor - onCreateMappingsResponse(true, indexMonitorRequest, actionListener) + onCreateMappingsResponse(true, indexMonitorRequest, actionListener, user) } } else { actionListener.onFailure(AlertingException.wrap(e)) @@ -204,7 +321,7 @@ class TransportIndexMonitorV2Action @Inject constructor( ScheduledJobIndices.scheduledJobMappings(), clusterService.state(), client.admin().indices(), object : ActionListener { override fun onResponse(response: AcknowledgedResponse) { - onUpdateMappingsResponse(response, indexMonitorRequest, actionListener) + onUpdateMappingsResponse(response, indexMonitorRequest, actionListener, user) } override fun onFailure(t: Exception) { actionListener.onFailure(AlertingException.wrap(t)) @@ -212,18 +329,19 @@ class TransportIndexMonitorV2Action @Inject constructor( } ) } else { - prepareMonitorIndexing(indexMonitorRequest, actionListener) + prepareMonitorIndexing(indexMonitorRequest, actionListener, user) } } private fun onCreateMappingsResponse( isAcknowledged: Boolean, request: IndexMonitorV2Request, - actionListener: ActionListener + actionListener: ActionListener, + user: User? ) { if (isAcknowledged) { log.info("Created $SCHEDULED_JOBS_INDEX with mappings.") - prepareMonitorIndexing(request, actionListener) + prepareMonitorIndexing(request, actionListener, user) IndexUtils.scheduledJobIndexUpdated() } else { log.info("Create $SCHEDULED_JOBS_INDEX mappings call not acknowledged.") @@ -240,12 +358,13 @@ class TransportIndexMonitorV2Action @Inject constructor( private fun onUpdateMappingsResponse( response: AcknowledgedResponse, indexMonitorRequest: IndexMonitorV2Request, - actionListener: ActionListener + actionListener: ActionListener, + user: User? ) { if (response.isAcknowledged) { log.info("Updated $SCHEDULED_JOBS_INDEX with mappings.") IndexUtils.scheduledJobIndexUpdated() - prepareMonitorIndexing(indexMonitorRequest, actionListener) + prepareMonitorIndexing(indexMonitorRequest, actionListener, user) } else { log.info("Update $SCHEDULED_JOBS_INDEX mappings call not acknowledged.") actionListener.onFailure( @@ -264,10 +383,14 @@ class TransportIndexMonitorV2Action @Inject constructor( * If this is an update request we can simply update the monitor. Otherwise we first check to see how many monitors already exist, * and compare this to the [maxMonitorCount]. Requests that breach this threshold will be rejected. */ - private fun prepareMonitorIndexing(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + private fun prepareMonitorIndexing( + indexMonitorRequest: IndexMonitorV2Request, + actionListener: ActionListener, + user: User? + ) { if (indexMonitorRequest.method == RestRequest.Method.PUT) { // update monitor case scope.launch { - updateMonitor(indexMonitorRequest, actionListener) + updateMonitor(indexMonitorRequest, actionListener, user) } } else { // create monitor case val query = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("${Monitor.MONITOR_TYPE}.type", Monitor.MONITOR_TYPE)) @@ -278,7 +401,7 @@ class TransportIndexMonitorV2Action @Inject constructor( searchRequest, object : ActionListener { override fun onResponse(searchResponse: SearchResponse) { - onMonitorCountSearchResponse(searchResponse, indexMonitorRequest, actionListener) + onMonitorCountSearchResponse(searchResponse, indexMonitorRequest, actionListener, user) } override fun onFailure(t: Exception) { @@ -291,7 +414,11 @@ class TransportIndexMonitorV2Action @Inject constructor( /* Functions for Update Monitor flow */ - private suspend fun updateMonitor(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { + private suspend fun updateMonitor( + indexMonitorRequest: IndexMonitorV2Request, + actionListener: ActionListener, + user: User? + ) { val getRequest = GetRequest(SCHEDULED_JOBS_INDEX, indexMonitorRequest.monitorId) try { val getResponse: GetResponse = client.suspendUntil { client.get(getRequest, it) } @@ -308,46 +435,97 @@ class TransportIndexMonitorV2Action @Inject constructor( getResponse.sourceAsBytesRef, XContentType.JSON ) val monitorV2 = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as MonitorV2 - onGetMonitorResponseForUpdate(monitorV2, indexMonitorRequest, actionListener) + onGetMonitorResponseForUpdate(monitorV2, indexMonitorRequest, actionListener, user) } catch (t: Exception) { actionListener.onFailure(AlertingException.wrap(t)) } } private suspend fun onGetMonitorResponseForUpdate( - currentMonitorV2: MonitorV2, + existingMonitorV2: MonitorV2, indexMonitorRequest: IndexMonitorV2Request, - actionListener: ActionListener + actionListener: ActionListener, + user: User? ) { - var newMonitorV2 = when (indexMonitorRequest.monitorV2) { - is PPLMonitor -> indexMonitorRequest.monitorV2 as PPLMonitor - else -> throw IllegalArgumentException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") + if ( + !checkUserPermissionsWithResource( + user, + existingMonitorV2.user, + actionListener, + "monitor", + indexMonitorRequest.monitorId + ) + ) { + return } - if (currentMonitorV2 !is PPLMonitor) { - throw IllegalStateException( - "During update, existing monitor ${currentMonitorV2.id} had unexpected type ${currentMonitorV2::class.java}" - ) + var newMonitorV2: MonitorV2 + val currentMonitorV2: MonitorV2 // this is the same as existingMonitorV2, but will be cast to a specific MonitorV2 type + + when (indexMonitorRequest.monitorV2) { + is PPLMonitor -> { + newMonitorV2 = indexMonitorRequest.monitorV2 as PPLMonitor + currentMonitorV2 = existingMonitorV2 as PPLMonitor + } + else -> throw IllegalStateException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") } + // If both are enabled, use the current existing monitor enabled time, otherwise the next execution will be + // incorrect. if (newMonitorV2.enabled && currentMonitorV2.enabled) { newMonitorV2 = newMonitorV2.copy(enabledTime = currentMonitorV2.enabledTime) } + /** + * On update monitor check which backend roles to associate to the monitor. + * Below are 2 examples of how the logic works + * + * Example 1, say we have a Monitor with backend roles [a, b, c, d] associated with it. + * If I'm User A (non-admin user) and I have backend roles [a, b, c] associated with me and I make a request to update + * the Monitor's backend roles to [a, b]. This would mean that the roles to remove are [c] and the roles to add are [a, b]. + * The Monitor's backend roles would then be [a, b, d]. + * + * Example 2, say we have a Monitor with backend roles [a, b, c, d] associated with it. + * If I'm User A (admin user) and I have backend roles [a, b, c] associated with me and I make a request to update + * the Monitor's backend roles to [a, b]. This would mean that the roles to remove are [c, d] and the roles to add are [a, b]. + * The Monitor's backend roles would then be [a, b]. + */ + if (user != null) { + if (indexMonitorRequest.rbacRoles != null) { + if (isAdmin(user)) { + newMonitorV2 = newMonitorV2.copy( + user = User(user.name, indexMonitorRequest.rbacRoles, user.roles, user.customAttNames) + ) + } else { + // rolesToRemove: these are the backend roles to remove from the monitor + val rolesToRemove = user.backendRoles - indexMonitorRequest.rbacRoles.orEmpty() + // remove the monitor's roles with rolesToRemove and add any roles passed into the request.rbacRoles + val updatedRbac = currentMonitorV2.user?.backendRoles.orEmpty() - rolesToRemove + indexMonitorRequest.rbacRoles + newMonitorV2 = newMonitorV2.copy( + user = User(user.name, updatedRbac, user.roles, user.customAttNames) + ) + } + } else { + newMonitorV2 = newMonitorV2 + .copy(user = User(user.name, currentMonitorV2.user!!.backendRoles, user.roles, user.customAttNames)) + } + log.debug("Update monitor backend roles to: ${newMonitorV2.user?.backendRoles}") + } + newMonitorV2 = newMonitorV2.copy(schemaVersion = IndexUtils.scheduledJobIndexSchemaVersion) val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) .setRefreshPolicy(indexMonitorRequest.refreshPolicy) - .source(newMonitorV2.toXContent(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) + .source(newMonitorV2.toXContentWithUser(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) .id(indexMonitorRequest.monitorId) .setIfSeqNo(indexMonitorRequest.seqNo) .setIfPrimaryTerm(indexMonitorRequest.primaryTerm) .timeout(indexTimeout) log.info( - "Updating monitor, ${currentMonitorV2.id}, from: ${currentMonitorV2.toXContent( + "Updating monitor, ${currentMonitorV2.id}, from: ${currentMonitorV2.toXContentWithUser( jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")) - )} \n to: ${newMonitorV2.toXContent(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))}" + )} \n to: ${newMonitorV2.toXContentWithUser(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))}" ) try { @@ -379,7 +557,8 @@ class TransportIndexMonitorV2Action @Inject constructor( private fun onMonitorCountSearchResponse( monitorCountSearchResponse: SearchResponse, indexMonitorRequest: IndexMonitorV2Request, - actionListener: ActionListener + actionListener: ActionListener, + user: User? ) { val totalHits = monitorCountSearchResponse.hits.totalHits?.value if (totalHits != null && totalHits >= maxMonitors) { @@ -393,39 +572,49 @@ class TransportIndexMonitorV2Action @Inject constructor( ) } else { scope.launch { - indexMonitor(indexMonitorRequest, actionListener) + indexMonitor(indexMonitorRequest, actionListener, user) } } } - private suspend fun indexMonitor(indexMonitorRequest: IndexMonitorV2Request, actionListener: ActionListener) { - // TODO: user permissions for PPL alerting -// if (user != null) { -// // Use the backend roles which is an intersection of the requested backend roles and the user's backend roles. -// // Admins can pass in any backend role. Also if no backend role is passed in, all the user's backend roles are used. -// val rbacRoles = if (request.rbacRoles == null) user.backendRoles.toSet() -// else if (!isAdmin(user)) request.rbacRoles?.intersect(user.backendRoles)?.toSet() -// else request.rbacRoles -// -// request.monitor = request.monitor.copy( -// user = User(user.name, rbacRoles.orEmpty().toList(), user.roles, user.customAttNames) -// ) -// log.debug("Created monitor's backend roles: $rbacRoles") -// } + private suspend fun indexMonitor( + indexMonitorRequest: IndexMonitorV2Request, + actionListener: ActionListener, + user: User? + ) { var monitorV2 = when (indexMonitorRequest.monitorV2) { is PPLMonitor -> indexMonitorRequest.monitorV2 as PPLMonitor else -> throw IllegalArgumentException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") } + if (user != null) { + // Use the backend roles which is an intersection of the requested backend roles and the user's backend roles. + // Admins can pass in any backend role. Also if no backend role is passed in, all the user's backend roles are used. + val rbacRoles = if (indexMonitorRequest.rbacRoles == null) user.backendRoles.toSet() + else if (!isAdmin(user)) indexMonitorRequest.rbacRoles.intersect(user.backendRoles).toSet() + else indexMonitorRequest.rbacRoles + + monitorV2 = when (monitorV2) { + is PPLMonitor -> monitorV2.copy( + user = User(user.name, rbacRoles.toList(), user.roles, user.customAttNames) + ) + else -> throw IllegalArgumentException( + "received unsupported monitor type when resolving backend roles: ${indexMonitorRequest.monitorV2.javaClass}" + ) + } + log.debug("Created monitor's backend roles: $rbacRoles") + } + + // TODO: only works because monitorV2 is always of type PPLMonitor, not extensible to other potential MonitorV2 types val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) .setRefreshPolicy(indexMonitorRequest.refreshPolicy) - .source(monitorV2.toXContent(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) + .source(monitorV2.toXContentWithUser(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) .setIfSeqNo(indexMonitorRequest.seqNo) .setIfPrimaryTerm(indexMonitorRequest.primaryTerm) .timeout(indexTimeout) log.info( - "Creating new monitorV2: ${monitorV2.toXContent( + "Creating new monitorV2: ${monitorV2.toXContentWithUser( jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")) )}" @@ -452,4 +641,20 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener.onFailure(AlertingException.wrap(t)) } } + + /* Utils */ + private fun getIndicesFromPplQuery(pplQuery: String): List { + // captures comma-separated concrete indices, index patterns, and index aliases + val indicesRegex = """(?i)source(?:\s*)=(?:\s*)([-\w.*'+]+(?:\*)?(?:\s*,\s*[-\w.*'+]+\*?)*)\s*\|*""".toRegex() + + // use find() instead of findAll() because a PPL query only ever has one source statement + // the only capture group specified in the regex captures the comma separated list of indices/index patterns + val indices = indicesRegex.find(pplQuery)?.groupValues?.get(1)?.split(",")?.map { it.trim() } + ?: throw IllegalStateException( + "Could not find indices that PPL Monitor query searches even " + + "after validating the query through SQL/PPL plugin" + ) + + return indices + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt index eb05cc0c8..3b58b4bdd 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt @@ -43,6 +43,9 @@ class TransportSearchMonitorV2Action @Inject constructor( override fun doExecute(task: Task, request: SearchMonitorV2Request, actionListener: ActionListener) { + // TODO: if alerting-config index doesnt exist, OS error is thrown to customer saying that much, try to catch that and + // throw more explicit error like "no monitorV2 exists" + val searchSourceBuilder = request.searchRequest.source() val queryBuilder = if (searchSourceBuilder.query() == null) BoolQueryBuilder() diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt index 427ff8dee..5f0d803bd 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt @@ -5,6 +5,7 @@ import org.opensearch.common.CheckedFunction import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.Schedule import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.authuser.User import org.opensearch.core.ParseField import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.core.common.io.stream.StreamOutput @@ -22,6 +23,7 @@ interface MonitorV2 : ScheduledJob { override val schedule: Schedule override val lastUpdateTime: Instant // required for scheduled job maintenance override val enabledTime: Instant? // required for scheduled job maintenance + val user: User? val triggers: List val schemaVersion: Int // for updating monitors val lookBackWindow: TimeValue? // how far back to look when querying data during monitor execution @@ -52,8 +54,10 @@ interface MonitorV2 : ScheduledJob { const val SCHEDULE_FIELD = "schedule" const val LAST_UPDATE_TIME_FIELD = "last_update_time" const val ENABLED_TIME_FIELD = "enabled_time" + const val USER_FIELD = "user" const val TRIGGERS_FIELD = "triggers" const val LOOK_BACK_WINDOW_FIELD = "look_back_window" + const val SCHEMA_VERSION_FIELD = "schema_version" // default values const val NO_ID = "" diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt index d134dd3ed..e02ce4069 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -1,5 +1,14 @@ package org.opensearch.alerting.core.modelv2 +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.ENABLED_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.ENABLED_TIME_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.LAST_UPDATE_TIME_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.LOOK_BACK_WINDOW_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.NAME_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.SCHEDULE_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.SCHEMA_VERSION_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.TRIGGERS_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.USER_FIELD import org.opensearch.alerting.core.util.nonOptionalTimeField import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.CronSchedule @@ -9,6 +18,8 @@ import org.opensearch.commons.alerting.model.Schedule import org.opensearch.commons.alerting.util.IndexUtils import org.opensearch.commons.alerting.util.instant import org.opensearch.commons.alerting.util.optionalTimeField +import org.opensearch.commons.alerting.util.optionalUserField +import org.opensearch.commons.authuser.User import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.core.common.io.stream.StreamOutput import org.opensearch.core.xcontent.ToXContent @@ -49,6 +60,7 @@ data class PPLMonitor( override val lookBackWindow: TimeValue? = null, override val lastUpdateTime: Instant, override val enabledTime: Instant?, + override val user: User?, override val triggers: List, override val schemaVersion: Int = IndexUtils.NO_SCHEMA_VERSION, val queryLanguage: QueryLanguage = QueryLanguage.PPL, // default to PPL, SQL not currently supported @@ -78,10 +90,6 @@ data class PPLMonitor( require(enabledTime == null) } - triggers.forEach { trigger -> - require(trigger is PPLTrigger) { "Incompatible trigger [${trigger.id}] for monitor type [$PPL_MONITOR_TYPE]" } - } - // TODO: create setting for max triggers and check for max triggers here } @@ -95,13 +103,26 @@ data class PPLMonitor( lookBackWindow = TimeValue.parseTimeValue(sin.readString(), PLACEHOLDER_LOOK_BACK_WINDOW_SETTING_NAME), lastUpdateTime = sin.readInstant(), enabledTime = sin.readOptionalInstant(), + user = if (sin.readBoolean()) { + User(sin) + } else { + null + }, triggers = sin.readList(PPLTrigger::readFrom), schemaVersion = sin.readInt(), queryLanguage = sin.readEnum(QueryLanguage::class.java), query = sin.readString() ) + fun toXContentWithUser(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return createXContentBuilder(builder, params, true) + } + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return createXContentBuilder(builder, params, false) + } + + private fun createXContentBuilder(builder: XContentBuilder, params: ToXContent.Params, withUser: Boolean): XContentBuilder { builder.startObject() // overall start object // if this is being written as ScheduledJob, add extra object layer and add ScheduledJob @@ -115,6 +136,10 @@ data class PPLMonitor( // read in monitor type, then delegate to correct parse() function builder.startObject(PPL_MONITOR_TYPE) // monitor type start object + if (withUser) { + builder.optionalUserField(USER_FIELD, user) + } + builder.field(MonitorV2.NAME_FIELD, name) builder.field(MonitorV2.SCHEDULE_FIELD, schedule) builder.field(MonitorV2.LOOK_BACK_WINDOW_FIELD, lookBackWindow?.toHumanReadableString(0)) @@ -155,6 +180,10 @@ data class PPLMonitor( out.writeInstant(lastUpdateTime) out.writeOptionalInstant(enabledTime) + + out.writeBoolean(user != null) + user?.writeTo(out) + out.writeVInt(triggers.size) triggers.forEach { it.writeTo(out) } out.writeInt(schemaVersion) @@ -166,13 +195,13 @@ data class PPLMonitor( return mapOf( IndexUtils._ID to id, IndexUtils._VERSION to version, - MonitorV2.NAME_FIELD to name, - MonitorV2.ENABLED_FIELD to enabled, - MonitorV2.SCHEDULE_FIELD to schedule, - MonitorV2.LOOK_BACK_WINDOW_FIELD to lookBackWindow?.toHumanReadableString(0), - MonitorV2.LAST_UPDATE_TIME_FIELD to lastUpdateTime.toEpochMilli(), - MonitorV2.ENABLED_TIME_FIELD to enabledTime?.toEpochMilli(), - MonitorV2.TRIGGERS_FIELD to triggers, + NAME_FIELD to name, + ENABLED_FIELD to enabled, + SCHEDULE_FIELD to schedule, + LOOK_BACK_WINDOW_FIELD to lookBackWindow?.toHumanReadableString(0), + LAST_UPDATE_TIME_FIELD to lastUpdateTime.toEpochMilli(), + ENABLED_TIME_FIELD to enabledTime?.toEpochMilli(), + TRIGGERS_FIELD to triggers, QUERY_LANGUAGE_FIELD to queryLanguage.value, QUERY_FIELD to query ) @@ -214,6 +243,7 @@ data class PPLMonitor( var lookBackWindow: TimeValue? = null var lastUpdateTime: Instant? = null var enabledTime: Instant? = null + var user: User? = null val triggers: MutableList = mutableListOf() var schemaVersion = IndexUtils.NO_SCHEMA_VERSION var queryLanguage: QueryLanguage = QueryLanguage.PPL // default to PPL @@ -226,10 +256,10 @@ data class PPLMonitor( xcp.nextToken() when (fieldName) { - MonitorV2.NAME_FIELD -> name = xcp.text() - MonitorV2.ENABLED_FIELD -> enabled = xcp.booleanValue() - MonitorV2.SCHEDULE_FIELD -> schedule = Schedule.parse(xcp) - MonitorV2.LOOK_BACK_WINDOW_FIELD -> { + NAME_FIELD -> name = xcp.text() + ENABLED_FIELD -> enabled = xcp.booleanValue() + SCHEDULE_FIELD -> schedule = Schedule.parse(xcp) + LOOK_BACK_WINDOW_FIELD -> { lookBackWindow = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { null } else { @@ -238,9 +268,10 @@ data class PPLMonitor( TimeValue.parseTimeValue(input, PLACEHOLDER_LOOK_BACK_WINDOW_SETTING_NAME) } } - MonitorV2.LAST_UPDATE_TIME_FIELD -> lastUpdateTime = xcp.instant() - MonitorV2.ENABLED_TIME_FIELD -> enabledTime = xcp.instant() - MonitorV2.TRIGGERS_FIELD -> { + LAST_UPDATE_TIME_FIELD -> lastUpdateTime = xcp.instant() + ENABLED_TIME_FIELD -> enabledTime = xcp.instant() + Monitor.USER_FIELD -> user = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) null else User.parse(xcp) + TRIGGERS_FIELD -> { XContentParserUtils.ensureExpectedToken( XContentParser.Token.START_ARRAY, xcp.currentToken(), @@ -250,7 +281,7 @@ data class PPLMonitor( triggers.add(PPLTrigger.parseInner(xcp)) } } - Monitor.SCHEMA_VERSION_FIELD -> schemaVersion = xcp.intValue() + SCHEMA_VERSION_FIELD -> schemaVersion = xcp.intValue() QUERY_LANGUAGE_FIELD -> { val input = xcp.text() val enumMatchResult = QueryLanguage.enumFromString(input) @@ -297,7 +328,6 @@ data class PPLMonitor( // check for required fields requireNotNull(name) { "Monitor name is null" } requireNotNull(schedule) { "Schedule is null" } - requireNotNull(queryLanguage) { "Query language is null" } requireNotNull(query) { "Query is null" } requireNotNull(lastUpdateTime) { "Last update time is null" } @@ -319,6 +349,7 @@ data class PPLMonitor( lookBackWindow, lastUpdateTime, enabledTime, + user, triggers, schemaVersion, queryLanguage, From 53afc6cb7023f645491503a224113d5bb51262c7 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Tue, 16 Sep 2025 08:16:13 -0700 Subject: [PATCH 32/40] removing requirement for lookback to be for cron only --- .../alerting/core/modelv2/PPLMonitor.kt | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt index e02ce4069..2df7e46b5 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -5,6 +5,8 @@ import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.ENABLED_TIME_FIE import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.LAST_UPDATE_TIME_FIELD import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.LOOK_BACK_WINDOW_FIELD import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.NAME_FIELD +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.NO_ID +import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.NO_VERSION import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.SCHEDULE_FIELD import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.SCHEMA_VERSION_FIELD import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.TRIGGERS_FIELD @@ -12,8 +14,6 @@ import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.USER_FIELD import org.opensearch.alerting.core.util.nonOptionalTimeField import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.CronSchedule -import org.opensearch.commons.alerting.model.IntervalSchedule -import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.Schedule import org.opensearch.commons.alerting.util.IndexUtils import org.opensearch.commons.alerting.util.instant @@ -52,8 +52,8 @@ import java.time.Instant * @property query The PPL query string to be executed by this monitor. */ data class PPLMonitor( - override val id: String = MonitorV2.NO_ID, - override val version: Long = MonitorV2.NO_VERSION, + override val id: String = NO_ID, + override val version: Long = NO_VERSION, override val name: String, override val enabled: Boolean, override val schedule: Schedule, @@ -140,14 +140,14 @@ data class PPLMonitor( builder.optionalUserField(USER_FIELD, user) } - builder.field(MonitorV2.NAME_FIELD, name) - builder.field(MonitorV2.SCHEDULE_FIELD, schedule) - builder.field(MonitorV2.LOOK_BACK_WINDOW_FIELD, lookBackWindow?.toHumanReadableString(0)) - builder.field(MonitorV2.ENABLED_FIELD, enabled) - builder.nonOptionalTimeField(MonitorV2.LAST_UPDATE_TIME_FIELD, lastUpdateTime) - builder.optionalTimeField(MonitorV2.ENABLED_TIME_FIELD, enabledTime) - builder.field(MonitorV2.TRIGGERS_FIELD, triggers.toTypedArray()) - builder.field(Monitor.SCHEMA_VERSION_FIELD, schemaVersion) + builder.field(NAME_FIELD, name) + builder.field(SCHEDULE_FIELD, schedule) + builder.field(LOOK_BACK_WINDOW_FIELD, lookBackWindow?.toHumanReadableString(0)) + builder.field(ENABLED_FIELD, enabled) + builder.nonOptionalTimeField(LAST_UPDATE_TIME_FIELD, lastUpdateTime) + builder.optionalTimeField(ENABLED_TIME_FIELD, enabledTime) + builder.field(TRIGGERS_FIELD, triggers.toTypedArray()) + builder.field(SCHEMA_VERSION_FIELD, schemaVersion) builder.field(QUERY_LANGUAGE_FIELD, queryLanguage.value) builder.field(QUERY_FIELD, query) @@ -236,7 +236,7 @@ data class PPLMonitor( @JvmStatic @JvmOverloads @Throws(IOException::class) - fun parse(xcp: XContentParser, id: String = MonitorV2.NO_ID, version: Long = MonitorV2.NO_VERSION): PPLMonitor { + fun parse(xcp: XContentParser, id: String = NO_ID, version: Long = NO_VERSION): PPLMonitor { var name: String? = null var enabled = true var schedule: Schedule? = null @@ -270,7 +270,7 @@ data class PPLMonitor( } LAST_UPDATE_TIME_FIELD -> lastUpdateTime = xcp.instant() ENABLED_TIME_FIELD -> enabledTime = xcp.instant() - Monitor.USER_FIELD -> user = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) null else User.parse(xcp) + USER_FIELD -> user = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) null else User.parse(xcp) TRIGGERS_FIELD -> { XContentParserUtils.ensureExpectedToken( XContentParser.Token.START_ARRAY, @@ -331,10 +331,6 @@ data class PPLMonitor( requireNotNull(query) { "Query is null" } requireNotNull(lastUpdateTime) { "Last update time is null" } - if (schedule is IntervalSchedule && lookBackWindow != null) { - throw IllegalArgumentException("Look back windows only supported for CRON schedules") - } - if (queryLanguage == QueryLanguage.SQL) { throw IllegalArgumentException("SQL queries are not supported. Please use a PPL query.") } From 25ca4daa073d0266ce89c72a5300e89f7b90e1b5 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 18 Sep 2025 16:38:21 -0700 Subject: [PATCH 33/40] initial rbac implementation and alerting v2 doesnt affect alerting v1 entities --- .../org/opensearch/alerting/AlertingPlugin.kt | 69 +++--- .../opensearch/alerting/PPLMonitorRunner.kt | 78 ++++--- .../resthandler/RestIndexMonitorV2Action.kt | 2 +- .../resthandler/RestSearchMonitorV2Action.kt | 8 +- .../transport/SecureTransportAction.kt | 2 +- .../TransportDeleteMonitorV2Action.kt | 74 ++++++- .../TransportExecuteMonitorV2Action.kt | 197 +++++++++--------- .../transport/TransportGetAlertsV2Action.kt | 36 ++-- .../transport/TransportGetMonitorV2Action.kt | 115 +++++----- .../TransportIndexMonitorV2Action.kt | 32 ++- .../TransportSearchMonitorV2Action.kt | 35 ++-- .../alerting/core/modelv2/AlertV2.kt | 52 +++-- .../alerting/core/modelv2/PPLMonitor.kt | 8 +- 13 files changed, 431 insertions(+), 277 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index f826e96e8..1a32c22af 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -32,19 +32,40 @@ import org.opensearch.alerting.core.action.node.ScheduledJobsStatsAction import org.opensearch.alerting.core.action.node.ScheduledJobsStatsTransportAction import org.opensearch.alerting.core.lock.LockService import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsHandler import org.opensearch.alerting.core.resthandler.RestScheduledJobStatsV2Handler import org.opensearch.alerting.core.schedule.JobScheduler import org.opensearch.alerting.core.settings.LegacyOpenDistroScheduledJobSettings import org.opensearch.alerting.core.settings.ScheduledJobSettings import org.opensearch.alerting.remote.monitors.RemoteMonitorRegistry +import org.opensearch.alerting.resthandler.RestAcknowledgeAlertAction +import org.opensearch.alerting.resthandler.RestAcknowledgeChainedAlertAction +import org.opensearch.alerting.resthandler.RestDeleteAlertingCommentAction import org.opensearch.alerting.resthandler.RestDeleteMonitorAction import org.opensearch.alerting.resthandler.RestDeleteMonitorV2Action +import org.opensearch.alerting.resthandler.RestDeleteWorkflowAction +import org.opensearch.alerting.resthandler.RestExecuteMonitorAction import org.opensearch.alerting.resthandler.RestExecuteMonitorV2Action +import org.opensearch.alerting.resthandler.RestExecuteWorkflowAction +import org.opensearch.alerting.resthandler.RestGetAlertsAction import org.opensearch.alerting.resthandler.RestGetAlertsV2Action +import org.opensearch.alerting.resthandler.RestGetDestinationsAction +import org.opensearch.alerting.resthandler.RestGetEmailAccountAction +import org.opensearch.alerting.resthandler.RestGetEmailGroupAction +import org.opensearch.alerting.resthandler.RestGetFindingsAction import org.opensearch.alerting.resthandler.RestGetMonitorAction import org.opensearch.alerting.resthandler.RestGetMonitorV2Action +import org.opensearch.alerting.resthandler.RestGetRemoteIndexesAction +import org.opensearch.alerting.resthandler.RestGetWorkflowAction +import org.opensearch.alerting.resthandler.RestGetWorkflowAlertsAction +import org.opensearch.alerting.resthandler.RestIndexAlertingCommentAction import org.opensearch.alerting.resthandler.RestIndexMonitorAction import org.opensearch.alerting.resthandler.RestIndexMonitorV2Action +import org.opensearch.alerting.resthandler.RestIndexWorkflowAction +import org.opensearch.alerting.resthandler.RestSearchAlertingCommentAction +import org.opensearch.alerting.resthandler.RestSearchEmailAccountAction +import org.opensearch.alerting.resthandler.RestSearchEmailGroupAction +import org.opensearch.alerting.resthandler.RestSearchMonitorAction import org.opensearch.alerting.resthandler.RestSearchMonitorV2Action import org.opensearch.alerting.script.TriggerScript import org.opensearch.alerting.service.DeleteMonitorService @@ -199,30 +220,30 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ): List { return listOf( // Alerting V1 -// RestGetMonitorAction(), -// RestDeleteMonitorAction(), -// RestIndexMonitorAction(), -// RestIndexWorkflowAction(), -// RestSearchMonitorAction(settings, clusterService), -// RestExecuteMonitorAction(), -// RestExecuteWorkflowAction(), -// RestAcknowledgeAlertAction(), -// RestAcknowledgeChainedAlertAction(), -// RestScheduledJobStatsHandler("_alerting"), -// RestSearchEmailAccountAction(), -// RestGetEmailAccountAction(), -// RestSearchEmailGroupAction(), -// RestGetEmailGroupAction(), -// RestGetDestinationsAction(), -// RestGetAlertsAction(), -// RestGetWorkflowAlertsAction(), -// RestGetFindingsAction(), -// RestGetWorkflowAction(), -// RestDeleteWorkflowAction(), -// RestGetRemoteIndexesAction(), -// RestIndexAlertingCommentAction(), -// RestSearchAlertingCommentAction(), -// RestDeleteAlertingCommentAction(), + RestGetMonitorAction(), + RestDeleteMonitorAction(), + RestIndexMonitorAction(), + RestIndexWorkflowAction(), + RestSearchMonitorAction(settings, clusterService), + RestExecuteMonitorAction(), + RestExecuteWorkflowAction(), + RestAcknowledgeAlertAction(), + RestAcknowledgeChainedAlertAction(), + RestScheduledJobStatsHandler("_alerting"), + RestSearchEmailAccountAction(), + RestGetEmailAccountAction(), + RestSearchEmailGroupAction(), + RestGetEmailGroupAction(), + RestGetDestinationsAction(), + RestGetAlertsAction(), + RestGetWorkflowAlertsAction(), + RestGetFindingsAction(), + RestGetWorkflowAction(), + RestDeleteWorkflowAction(), + RestGetRemoteIndexesAction(), + RestIndexAlertingCommentAction(), + RestSearchAlertingCommentAction(), + RestDeleteAlertingCommentAction(), // Alerting V2 RestIndexMonitorV2Action(), diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 53e886a12..d76ea3024 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -24,8 +24,10 @@ import org.opensearch.alerting.core.modelv2.PPLTrigger.TriggerMode import org.opensearch.alerting.core.modelv2.PPLTriggerRunResult import org.opensearch.alerting.core.modelv2.TriggerV2.Severity import org.opensearch.alerting.core.ppl.PPLPluginInterface +import org.opensearch.alerting.opensearchapi.InjectorContextElement import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.opensearchapi.withClosableContext import org.opensearch.alerting.script.PPLTriggerExecutionContext import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.XContentFactory @@ -414,6 +416,7 @@ object PPLMonitorRunner : MonitorV2Runner { monitorId = pplMonitor.id, monitorName = pplMonitor.name, monitorVersion = pplMonitor.version, + monitorUser = pplMonitor.user, triggerId = pplTrigger.id, triggerName = pplTrigger.name, queryResults = queryResult.toMap(), @@ -448,6 +451,7 @@ object PPLMonitorRunner : MonitorV2Runner { monitorId = pplMonitor.id, monitorName = pplMonitor.name, monitorVersion = pplMonitor.version, + monitorUser = pplMonitor.user, triggerId = pplTrigger.id, triggerName = pplTrigger.name, queryResults = mapOf(), @@ -473,7 +477,7 @@ object PPLMonitorRunner : MonitorV2Runner { listOf>( IndexRequest(AlertV2Indices.ALERT_V2_INDEX) .routing(pplMonitor.id) // set routing ID to PPL Monitor ID - .source(alert.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS)) + .source(alert.toXContentWithUser(XContentFactory.jsonBuilder())) .id(if (alert.id != Alert.NO_ID) alert.id else null) ) } @@ -500,9 +504,15 @@ object PPLMonitorRunner : MonitorV2Runner { private suspend fun updateMonitorWithLastTriggeredTimes(pplMonitor: PPLMonitor, client: NodeClient) { val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) .id(pplMonitor.id) - .source(pplMonitor.toXContentWithType(XContentFactory.jsonBuilder())) + .source( + pplMonitor.toXContentWithUser( + XContentFactory.jsonBuilder(), + ToXContent.MapParams( + mapOf("with_type" to "true") + ) + ) + ) .routing(pplMonitor.id) - val indexResponse = client.suspendUntil { index(indexRequest, it) } logger.info("PPLMonitor update with last execution times index response: ${indexResponse.result}") @@ -517,42 +527,44 @@ object PPLMonitorRunner : MonitorV2Runner { ) { // this function can throw an exception, which is caught by the try // catch in runMonitor() to generate an error alert - val actionOutput = mutableMapOf() - actionOutput[Action.SUBJECT] = if (action.subjectTemplate != null) + // TODO: is actionOutput even needed, we dont store action run results in alert +// val actionOutput = mutableMapOf() + val notifSubject = if (action.subjectTemplate != null) MonitorRunnerService.compileTemplateV2(action.subjectTemplate!!, triggerCtx) else "" - actionOutput[Action.MESSAGE] = MonitorRunnerService.compileTemplateV2(action.messageTemplate, triggerCtx) - if (Strings.isNullOrEmpty(actionOutput[Action.MESSAGE])) { + // TODO: check query results size, truncate accordingly, and append to notifMessage before sending + // TODO: maybe remove actionOutput, p sure that was for storing action execution results, which we arent doing + val notifMessage = MonitorRunnerService.compileTemplateV2(action.messageTemplate, triggerCtx) + if (Strings.isNullOrEmpty(notifMessage)) { throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") } if (!dryrun) { -// val client = monitorCtx.client - actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( - action, - monitorCtx, - actionOutput[Action.SUBJECT], - actionOutput[Action.MESSAGE]!! - ) - // TODO: use this block when security plugin is enabled -// client!!.threadPool().threadContext.stashContext().use { -// withClosableContext( -// InjectorContextElement( -// pplMonitor.id, -// monitorCtx.settings!!, -// monitorCtx.threadPool!!.threadContext, -// pplMonitor.user?.roles, -// pplMonitor.user -// ) -// ) { -// actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( -// action, -// monitorCtx, -// actionOutput[Action.SUBJECT], -// actionOutput[Action.MESSAGE]!! -// ) -// } -// } + val client = monitorCtx.client +// actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( +// action, +// monitorCtx, +// actionOutput[Action.SUBJECT], +// actionOutput[Action.MESSAGE]!! +// ) + client!!.threadPool().threadContext.stashContext().use { + withClosableContext( + InjectorContextElement( + pplMonitor.id, + monitorCtx.settings!!, + monitorCtx.threadPool!!.threadContext, + pplMonitor.user?.roles, + pplMonitor.user + ) + ) { + getConfigAndSendNotification( + action, + monitorCtx, + notifSubject, + notifMessage + ) + } + } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt index 52cbf20ad..0544a8b22 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestIndexMonitorV2Action.kt @@ -53,7 +53,7 @@ class RestIndexMonitorV2Action : BaseRestHandler() { ensureExpectedToken(Token.START_OBJECT, xcp.nextToken(), xcp) val monitorV2: MonitorV2 - val rbacRoles: List? + val rbacRoles: List? // TODO: do we want to support specifying rbac roles in monitor body? try { monitorV2 = MonitorV2.parse(xcp) rbacRoles = request.contentParser().map()["rbac_roles"] as List? diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt index 2b8600bca..b15e5b95d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestSearchMonitorV2Action.kt @@ -79,7 +79,7 @@ class RestSearchMonitorV2Action( } } - // once ths search response is received, rewrite the search hits to remove the extra "monitor_v2" JSON object wrapper + // once the search response is received, rewrite the search hits to remove the extra "monitor_v2" JSON object wrapper // that is used as ScheduledJob metadata private fun searchMonitorResponse(channel: RestChannel): RestResponseListener { return object : RestResponseListener(channel) { @@ -89,13 +89,16 @@ class RestSearchMonitorV2Action( return BytesRestResponse(RestStatus.REQUEST_TIMEOUT, response.toString()) } - // Swallow exception and return response as is try { for (hit in response.hits) { XContentType.JSON.xContent().createParser( channel.request().xContentRegistry, LoggingDeprecationHandler.INSTANCE, hit.sourceAsString ).use { hitsParser -> + // when reconstructing XContent, intentionally leave out + // user field in response for security reasons by + // calling ScheduledJob.toXContent instead of + // a MonitorV2's toXContentWithUser val monitorV2 = ScheduledJob.parse(hitsParser, hit.id, hit.version) val xcb = monitorV2.toXContent(jsonBuilder(), EMPTY_PARAMS) @@ -105,6 +108,7 @@ class RestSearchMonitorV2Action( } } } catch (e: Exception) { + // Swallow exception and return response as is log.error("The monitor_v2 parsing failed. Will return response as is.") } return BytesRestResponse(RestStatus.OK, response.toXContent(channel.newBuilder(), EMPTY_PARAMS)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/SecureTransportAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/SecureTransportAction.kt index 54667e125..627181188 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/SecureTransportAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/SecureTransportAction.kt @@ -45,7 +45,7 @@ interface SecureTransportAction { fun readUserFromThreadContext(client: Client): User? { val userStr = client.threadPool().threadContext.getTransient(ConfigConstants.OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT) - log.debug("User and roles string from thread context: $userStr") + log.info("User and roles string from thread context: $userStr") return User.parse(userStr) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt index c2852b022..80b93a494 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt @@ -4,17 +4,28 @@ import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchStatusException +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.alerting.actionv2.DeleteMonitorV2Action import org.opensearch.alerting.actionv2.DeleteMonitorV2Request import org.opensearch.alerting.actionv2.DeleteMonitorV2Response +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.service.DeleteMonitorService import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.core.action.ActionListener +import org.opensearch.core.rest.RestStatus import org.opensearch.core.xcontent.NamedXContentRegistry import org.opensearch.tasks.Task import org.opensearch.transport.TransportService @@ -42,16 +53,27 @@ class TransportDeleteMonitorV2Action @Inject constructor( } override fun doExecute(task: Task, request: DeleteMonitorV2Request, actionListener: ActionListener) { -// val user = readUserFromThreadContext(client) -// -// if (!validateUserBackendRoles(user, actionListener)) { -// return -// } - // TOOD: when monitor is deleted, immediately expire all alerts it generated + val user = readUserFromThreadContext(client) + + if (!validateUserBackendRoles(user, actionListener)) { + return + } + scope.launch { try { - val deleteResponse = DeleteMonitorService.deleteMonitorV2(request.monitorV2Id, request.refreshPolicy) - actionListener.onResponse(deleteResponse) + val monitorV2 = getMonitorV2(request.monitorV2Id, actionListener) ?: return@launch + + val canDelete = user == null || !doFilterForUser(user) || + checkUserPermissionsWithResource(user, monitorV2!!.user, actionListener, "monitor_v2", request.monitorV2Id) + + if (canDelete) { + val deleteResponse = DeleteMonitorService.deleteMonitorV2(request.monitorV2Id, request.refreshPolicy) + actionListener.onResponse(deleteResponse) + } else { + actionListener.onFailure( + AlertingException("Not allowed to delete this monitor_v2", RestStatus.FORBIDDEN, IllegalStateException()) + ) + } } catch (e: Exception) { actionListener.onFailure(e) } @@ -59,4 +81,40 @@ class TransportDeleteMonitorV2Action @Inject constructor( // we do not expire the alerts associated with the deleted monitor, but instead let its expiration time delete it } } + + private suspend fun getMonitorV2(monitorV2Id: String, actionListener: ActionListener): MonitorV2? { + val getRequest = GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, monitorV2Id) + + val getResponse: GetResponse = client.suspendUntil { get(getRequest, it) } + if (!getResponse.isExists) { + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException("MonitorV2 with $monitorV2Id is not found", RestStatus.NOT_FOUND) + ) + ) + return null + } + val xcp = XContentHelper.createParser( + xContentRegistry, LoggingDeprecationHandler.INSTANCE, + getResponse.sourceAsBytesRef, XContentType.JSON + ) + + val monitorV2: MonitorV2? + try { + monitorV2 = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as MonitorV2 + } catch (e: ClassCastException) { + // if ScheduledJob parsed the object and could not cast it to MonitorV2, we must + // have gotten a Monitor V1 from the given ID + actionListener.onFailure( + AlertingException.wrap( + IllegalArgumentException( + "The ID given corresponds to a V1 Monitor, please pass in the ID of a V2 Monitor" + ) + ) + ) + return null + } + + return monitorV2 + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt index 6a5654ad6..5b1cb436f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -23,8 +23,10 @@ import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.ConfigConstants import org.opensearch.commons.alerting.model.ScheduledJob import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.commons.authuser.User import org.opensearch.core.action.ActionListener import org.opensearch.core.rest.RestStatus import org.opensearch.core.xcontent.NamedXContentRegistry @@ -53,118 +55,127 @@ class TransportExecuteMonitorV2Action @Inject constructor( execMonitorV2Request: ExecuteMonitorV2Request, actionListener: ActionListener ) { -// client.threadPool().threadContext.stashContext().use { // TODO: include this when security plugin enabled - /* first define a function that will be used later to run MonitorV2s */ - val executeMonitorV2 = fun (monitorV2: MonitorV2) { - runner.launch { - // get execution time interval - val (periodStart, periodEnd) = if (execMonitorV2Request.requestStart != null) { - Pair( - Instant.ofEpochMilli(execMonitorV2Request.requestStart!!.millis), - Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis) - ) - } else { - monitorV2.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis)) - } + val userStr = client.threadPool().threadContext.getTransient(ConfigConstants.OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT) + log.debug("User and roles string from thread context: $userStr") + val user: User? = User.parse(userStr) - // call the MonitorRunnerService to execute the MonitorV2 - try { - val monitorV2Type = when (monitorV2) { - is PPLMonitor -> PPL_MONITOR_TYPE - else -> throw IllegalStateException("Unexpected MonitorV2 type: ${monitorV2.javaClass.name}") - } - log.info( - "Executing MonitorV2 from API - id: ${monitorV2.id}, type: $monitorV2Type, " + - "periodStart: $periodStart, periodEnd: $periodEnd, dryrun: ${execMonitorV2Request.dryrun}" - ) - val monitorV2RunResult = runner.runJobV2( - monitorV2, - periodStart, - periodEnd, - execMonitorV2Request.dryrun, - transportService - ) - withContext(Dispatchers.IO) { - actionListener.onResponse(ExecuteMonitorV2Response(monitorV2RunResult)) + client.threadPool().threadContext.stashContext().use { + /* first define a function that will be used later to run MonitorV2s */ + val executeMonitorV2 = fun (monitorV2: MonitorV2) { + runner.launch { + // get execution time interval + val (periodStart, periodEnd) = if (execMonitorV2Request.requestStart != null) { + Pair( + Instant.ofEpochMilli(execMonitorV2Request.requestStart!!.millis), + Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis) + ) + } else { + monitorV2.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis)) } - } catch (e: Exception) { - log.error("Unexpected error running monitor", e) - withContext(Dispatchers.IO) { - actionListener.onFailure(AlertingException.wrap(e)) + + // call the MonitorRunnerService to execute the MonitorV2 + try { + val monitorV2Type = when (monitorV2) { + is PPLMonitor -> PPL_MONITOR_TYPE + else -> throw IllegalStateException("Unexpected MonitorV2 type: ${monitorV2.javaClass.name}") + } + log.info( + "Executing MonitorV2 from API - id: ${monitorV2.id}, type: $monitorV2Type, " + + "periodStart: $periodStart, periodEnd: $periodEnd, dryrun: ${execMonitorV2Request.dryrun}" + ) + val monitorV2RunResult = runner.runJobV2( + monitorV2, + periodStart, + periodEnd, + execMonitorV2Request.dryrun, + transportService + ) + withContext(Dispatchers.IO) { + actionListener.onResponse(ExecuteMonitorV2Response(monitorV2RunResult)) + } + } catch (e: Exception) { + log.error("Unexpected error running monitor", e) + withContext(Dispatchers.IO) { + actionListener.onFailure(AlertingException.wrap(e)) + } } } } - } - /* now execute the MonitorV2 */ + /* now execute the MonitorV2 */ - // if both monitor_v2 id and object were passed in, ignore object and proceed with id - if (execMonitorV2Request.monitorId != null && execMonitorV2Request.monitorV2 != null) { - log.info( - "Both a monitor_v2 id and monitor_v2 object were passed in to ExecuteMonitorV2" + - "request. Proceeding to execute by monitor_v2 ID and ignoring monitor_v2 object." - ) - } + // if both monitor_v2 id and object were passed in, ignore object and proceed with id + if (execMonitorV2Request.monitorId != null && execMonitorV2Request.monitorV2 != null) { + log.info( + "Both a monitor_v2 id and monitor_v2 object were passed in to ExecuteMonitorV2" + + "request. Proceeding to execute by monitor_v2 ID and ignoring monitor_v2 object." + ) + } - if (execMonitorV2Request.monitorId != null) { // execute with monitor ID case - // search the alerting-config index for the MonitorV2 with this ID - val getMonitorV2Request = GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX).id(execMonitorV2Request.monitorId) - client.get( - getMonitorV2Request, - object : ActionListener { - override fun onResponse(getMonitorV2Response: GetResponse) { - if (!getMonitorV2Response.isExists) { - actionListener.onFailure( - AlertingException.wrap( - OpenSearchStatusException( - "Can't find monitorV2 with id: ${getMonitorV2Response.id}", - RestStatus.NOT_FOUND + if (execMonitorV2Request.monitorId != null) { // execute with monitor ID case + // search the alerting-config index for the MonitorV2 with this ID + val getMonitorV2Request = GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX).id(execMonitorV2Request.monitorId) + client.get( + getMonitorV2Request, + object : ActionListener { + override fun onResponse(getMonitorV2Response: GetResponse) { + if (!getMonitorV2Response.isExists) { + actionListener.onFailure( + AlertingException.wrap( + OpenSearchStatusException( + "Can't find monitorV2 with id: ${getMonitorV2Response.id}", + RestStatus.NOT_FOUND + ) ) ) - ) - return - } - if (!getMonitorV2Response.isSourceEmpty) { - XContentHelper.createParser( - xContentRegistry, LoggingDeprecationHandler.INSTANCE, - getMonitorV2Response.sourceAsBytesRef, XContentType.JSON - ).use { xcp -> - try { - val monitorV2 = ScheduledJob.parse( - xcp, - getMonitorV2Response.id, - getMonitorV2Response.version - ) as MonitorV2 - executeMonitorV2(monitorV2) - } catch (e: ClassCastException) { - actionListener.onFailure( - AlertingException.wrap( - IllegalArgumentException( - "Passed in Monitor ID is a legacy Alerting Monitor, please pass in an " + - "Alerting V2 Monitor" + return + } + if (!getMonitorV2Response.isSourceEmpty) { + XContentHelper.createParser( + xContentRegistry, LoggingDeprecationHandler.INSTANCE, + getMonitorV2Response.sourceAsBytesRef, XContentType.JSON + ).use { xcp -> + try { + val monitorV2 = ScheduledJob.parse( + xcp, + getMonitorV2Response.id, + getMonitorV2Response.version + ) as MonitorV2 + executeMonitorV2(monitorV2) + } catch (e: ClassCastException) { + actionListener.onFailure( + AlertingException.wrap( + IllegalArgumentException( + "Passed in Monitor ID is a legacy Alerting Monitor, please pass in an " + + "Alerting V2 Monitor" + ) ) ) - ) - } catch (e: Exception) { - actionListener.onFailure(AlertingException.wrap(e)) + } catch (e: Exception) { + actionListener.onFailure(AlertingException.wrap(e)) + } } } } - } - override fun onFailure(t: Exception) { - actionListener.onFailure(AlertingException.wrap(t)) + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } } + ) + } else { // execute with monitor object case + try { + val monitorV2 = when (execMonitorV2Request.monitorV2) { + is PPLMonitor -> execMonitorV2Request.monitorV2.copy(user = user) + else -> throw IllegalStateException( + "unexpected MonitorV2 type: ${execMonitorV2Request.monitorV2!!.javaClass.name}" + ) + } + executeMonitorV2(monitorV2) + } catch (e: Exception) { + actionListener.onFailure(AlertingException.wrap(e)) } - ) - } else { // execute with monitor object case - try { - val monitorV2 = execMonitorV2Request.monitorV2 as MonitorV2 - executeMonitorV2(monitorV2) - } catch (e: Exception) { - actionListener.onFailure(AlertingException.wrap(e)) } } -// } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt index 3bc68491b..04ee04047 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsV2Action.kt @@ -13,6 +13,7 @@ import org.opensearch.alerting.actionv2.GetAlertsV2Request import org.opensearch.alerting.actionv2.GetAlertsV2Response import org.opensearch.alerting.alertsv2.AlertV2Indices import org.opensearch.alerting.core.modelv2.AlertV2 +import org.opensearch.alerting.opensearchapi.addFilter import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.util.use import org.opensearch.cluster.service.ClusterService @@ -22,6 +23,7 @@ import org.opensearch.common.xcontent.LoggingDeprecationHandler import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.alerting.util.AlertingException +import org.opensearch.commons.authuser.User import org.opensearch.core.action.ActionListener import org.opensearch.core.common.io.stream.NamedWriteableRegistry import org.opensearch.core.xcontent.NamedXContentRegistry @@ -68,7 +70,7 @@ class TransportGetAlertsV2Action @Inject constructor( getAlertsV2Request: GetAlertsV2Request, actionListener: ActionListener, ) { -// val user = readUserFromThreadContext(client) + val user = readUserFromThreadContext(client) val tableProp = getAlertsV2Request.table val sortBuilder = SortBuilders @@ -84,7 +86,7 @@ class TransportGetAlertsV2Action @Inject constructor( queryBuilder.filter(QueryBuilders.termQuery("severity", getAlertsV2Request.severityLevel)) } - if (getAlertsV2Request.alertV2Ids.isNullOrEmpty() == false) { + if (!getAlertsV2Request.alertV2Ids.isNullOrEmpty()) { queryBuilder.filter(QueryBuilders.termsQuery("_id", getAlertsV2Request.alertV2Ids)) } @@ -115,7 +117,7 @@ class TransportGetAlertsV2Action @Inject constructor( client.threadPool().threadContext.stashContext().use { scope.launch { try { - getAlerts(AlertV2Indices.ALERT_V2_INDEX, searchSourceBuilder, actionListener/*, user*/) + getAlerts(AlertV2Indices.ALERT_V2_INDEX, searchSourceBuilder, actionListener, user) } catch (t: Exception) { log.error("Failed to get alerts", t) if (t is AlertingException) { @@ -132,27 +134,17 @@ class TransportGetAlertsV2Action @Inject constructor( alertIndex: String, searchSourceBuilder: SearchSourceBuilder, actionListener: ActionListener, -// user: User?, + user: User? ) { - // TODO: when implementing RBAC, pivot to this implementation -// // user is null when: 1/ security is disabled. 2/when user is super-admin. -// if (user == null) { -// // user is null when: 1/ security is disabled. 2/when user is super-admin. -// search(alertIndex, searchSourceBuilder, actionListener) -// } else if (!doFilterForUser(user)) { -// // security is enabled and filterby is disabled. -// search(alertIndex, searchSourceBuilder, actionListener) -// } else { -// // security is enabled and filterby is enabled. -// try { -// log.info("Filtering result by: ${user.backendRoles}") -// addFilter(user, searchSourceBuilder, "monitor_user.backend_roles.keyword") -// search(alertIndex, searchSourceBuilder, actionListener) -// } catch (ex: IOException) { -// actionListener.onFailure(AlertingException.wrap(ex)) -// } -// } try { + // if user is null, security plugin is disabled or user is super-admin + // if doFilterForUser() is false, security is enabled but filterby is disabled + if (user != null && doFilterForUser(user)) { + // if security is enabled and filterby is enabled, add search filter + log.info("Filtering result by: ${user.backendRoles}") + addFilter(user, searchSourceBuilder, "monitor.user.backend_roles.keyword") + } + search(alertIndex, searchSourceBuilder, actionListener) } catch (ex: IOException) { actionListener.onFailure(AlertingException.wrap(ex)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt index 034a90d99..3e430be66 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt @@ -58,62 +58,77 @@ class TransportGetMonitorV2Action @Inject constructor( .version(request.version) .fetchSourceContext(request.srcContext) -// if (!validateUserBackendRoles(user, actionListener)) { -// return -// } + val user = readUserFromThreadContext(client) -// client.threadPool().threadContext.stashContext().use { - client.get( - getRequest, - object : ActionListener { - override fun onResponse(response: GetResponse) { - if (!response.isExists) { - actionListener.onFailure( - AlertingException.wrap(OpenSearchStatusException("MonitorV2 not found.", RestStatus.NOT_FOUND)) - ) - return - } + if (!validateUserBackendRoles(user, actionListener)) { + return + } - var monitorV2: MonitorV2? = null - if (!response.isSourceEmpty) { - XContentHelper.createParser( - xContentRegistry, - LoggingDeprecationHandler.INSTANCE, - response.sourceAsBytesRef, - XContentType.JSON - ).use { xcp -> - monitorV2 = ScheduledJob.parse(xcp, response.id, response.version) as MonitorV2 -// -// // security is enabled and filterby is enabled -// if (!checkUserPermissionsWithResource( -// user, -// monitor?.user, -// actionListener, -// "monitor", -// transformedRequest.monitorId -// ) -// ) { -// return -// } + client.threadPool().threadContext.stashContext().use { + client.get( + getRequest, + object : ActionListener { + override fun onResponse(response: GetResponse) { + if (!response.isExists) { + actionListener.onFailure( + AlertingException.wrap(OpenSearchStatusException("MonitorV2 not found.", RestStatus.NOT_FOUND)) + ) + return + } + + var monitorV2: MonitorV2? = null + try { + if (!response.isSourceEmpty) { + XContentHelper.createParser( + xContentRegistry, + LoggingDeprecationHandler.INSTANCE, + response.sourceAsBytesRef, + XContentType.JSON + ).use { xcp -> + monitorV2 = ScheduledJob.parse(xcp, response.id, response.version) as MonitorV2 + } + } + } catch (e: ClassCastException) { + // if ScheduledJob parsed the object and could not cast it to MonitorV2, we must + // have gotten a Monitor V1 from the given ID + actionListener.onFailure( + AlertingException.wrap( + IllegalArgumentException( + "The ID given corresponds to a V1 Monitor, please pass in the ID of a V2 Monitor" + ) + ) + ) + return } - } - actionListener.onResponse( - GetMonitorV2Response( - response.id, - response.version, - response.seqNo, - response.primaryTerm, - monitorV2 + // security is enabled and filterby is enabled + if (!checkUserPermissionsWithResource( + user, + monitorV2?.user, + actionListener, + "monitor", + request.monitorV2Id + ) + ) { + return + } + + actionListener.onResponse( + GetMonitorV2Response( + response.id, + response.version, + response.seqNo, + response.primaryTerm, + monitorV2 + ) ) - ) - } + } - override fun onFailure(t: Exception) { - actionListener.onFailure(AlertingException.wrap(t)) + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } } - } - ) -// } + ) + } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index 68fb3169e..a2901a8df 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -181,8 +181,13 @@ class TransportIndexMonitorV2Action @Inject constructor( indexMonitorV2Request: IndexMonitorV2Request ) { /* check initial user permissions */ + val headers = client.threadPool().threadContext.headers + log.info("Headers in transport layer: $headers") + val user = readUserFromThreadContext(client) + log.info("user in checkUserAndIndicesAccess: $user") + if (!validateUserBackendRoles(user, actionListener)) { return } @@ -205,7 +210,7 @@ class TransportIndexMonitorV2Action @Inject constructor( ) ) return - } else if (indexMonitorV2Request.rbacRoles.isEmpty() == true) { + } else if (indexMonitorV2Request.rbacRoles.isEmpty()) { log.debug( "Non-admin user are not allowed to specify an empty set of backend roles. " + "Please don't pass in the parameter or pass in at least one backend role." @@ -235,6 +240,7 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user in checkPplQueryIndices: $user") val pplMonitor = indexMonitorV2Request.monitorV2 as PPLMonitor val pplQuery = pplMonitor.query val indices = getIndicesFromPplQuery(pplQuery) @@ -245,7 +251,7 @@ class TransportIndexMonitorV2Action @Inject constructor( searchRequest, object : ActionListener { override fun onResponse(searchResponse: SearchResponse) { - // User has read access to configured indices in the monitor, now create monitor with out user context. + // User has read access to configured indices in the monitor, now create monitor without user context. client.threadPool().threadContext.stashContext().use { if (user == null) { // Security is disabled, add empty user to Monitor. user is null for older versions. @@ -285,6 +291,7 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user in checkScheduledJobIndex: $user") /* check to see if alerting-config index (scheduled job index) is created and updated before indexing MonitorV2 into it */ if (!scheduledJobIndices.scheduledJobIndexExists()) { // if alerting-config index doesn't exist, send request to create it scheduledJobIndices.initScheduledJobIndex(object : ActionListener { @@ -339,6 +346,7 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user in onCreateMappingsResponse: $user") if (isAcknowledged) { log.info("Created $SCHEDULED_JOBS_INDEX with mappings.") prepareMonitorIndexing(request, actionListener, user) @@ -361,6 +369,7 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user in onUpdateMappingsResponse: $user") if (response.isAcknowledged) { log.info("Updated $SCHEDULED_JOBS_INDEX with mappings.") IndexUtils.scheduledJobIndexUpdated() @@ -388,6 +397,7 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user in prepareMonitorIndexing: $user") if (indexMonitorRequest.method == RestRequest.Method.PUT) { // update monitor case scope.launch { updateMonitor(indexMonitorRequest, actionListener, user) @@ -436,8 +446,18 @@ class TransportIndexMonitorV2Action @Inject constructor( ) val monitorV2 = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as MonitorV2 onGetMonitorResponseForUpdate(monitorV2, indexMonitorRequest, actionListener, user) - } catch (t: Exception) { - actionListener.onFailure(AlertingException.wrap(t)) + } catch (e: ClassCastException) { + // if ScheduledJob parsed the object and could not cast it to MonitorV2, we must + // have gotten a Monitor V1 from the given ID + actionListener.onFailure( + AlertingException.wrap( + IllegalArgumentException( + "The ID given corresponds to a V1 Monitor, please pass in the ID of a V2 Monitor" + ) + ) + ) + } catch (e: Exception) { + actionListener.onFailure(AlertingException.wrap(e)) } } @@ -517,6 +537,7 @@ class TransportIndexMonitorV2Action @Inject constructor( .setRefreshPolicy(indexMonitorRequest.refreshPolicy) .source(newMonitorV2.toXContentWithUser(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) .id(indexMonitorRequest.monitorId) + .routing(indexMonitorRequest.monitorId) .setIfSeqNo(indexMonitorRequest.seqNo) .setIfPrimaryTerm(indexMonitorRequest.primaryTerm) .timeout(indexTimeout) @@ -560,6 +581,7 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user in onMonitorCountSearchResponse: $user") val totalHits = monitorCountSearchResponse.hits.totalHits?.value if (totalHits != null && totalHits >= maxMonitors) { log.info("This request would create more than the allowed monitors [$maxMonitors].") @@ -582,6 +604,7 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user in indexMonitor: $user") var monitorV2 = when (indexMonitorRequest.monitorV2) { is PPLMonitor -> indexMonitorRequest.monitorV2 as PPLMonitor else -> throw IllegalArgumentException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") @@ -609,6 +632,7 @@ class TransportIndexMonitorV2Action @Inject constructor( val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) .setRefreshPolicy(indexMonitorRequest.refreshPolicy) .source(monitorV2.toXContentWithUser(jsonBuilder(), ToXContent.MapParams(mapOf("with_type" to "true")))) + .routing(indexMonitorRequest.monitorId) .setIfSeqNo(indexMonitorRequest.seqNo) .setIfPrimaryTerm(indexMonitorRequest.primaryTerm) .timeout(indexTimeout) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt index 3b58b4bdd..bf109e9a7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt @@ -7,6 +7,7 @@ import org.opensearch.action.support.HandledTransportAction import org.opensearch.alerting.actionv2.SearchMonitorV2Action import org.opensearch.alerting.actionv2.SearchMonitorV2Request import org.opensearch.alerting.core.modelv2.MonitorV2.Companion.MONITOR_V2_TYPE +import org.opensearch.alerting.opensearchapi.addFilter import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject @@ -59,23 +60,27 @@ class TransportSearchMonitorV2Action @Inject constructor( .seqNoAndPrimaryTerm(true) .version(true) -// addOwnerFieldIfNotExists(transformedRequest.searchRequest) -// val user = readUserFromThreadContext(client) -// client.threadPool().threadContext.stashContext().use { -// resolve(transformedRequest, actionListener, user) -// } + val user = readUserFromThreadContext(client) + client.threadPool().threadContext.stashContext().use { + // if user is null, security plugin is disabled or user is super-admin + // if doFilterForUser() is false, security is enabled but filterby is disabled + if (user != null && doFilterForUser(user)) { + log.info("Filtering result by: ${user.backendRoles}") + addFilter(user, request.searchRequest.source(), "monitor.user.backend_roles.keyword") + } - client.search( - request.searchRequest, - object : ActionListener { - override fun onResponse(response: SearchResponse) { - actionListener.onResponse(response) - } + client.search( + request.searchRequest, + object : ActionListener { + override fun onResponse(response: SearchResponse) { + actionListener.onResponse(response) + } - override fun onFailure(t: Exception) { - actionListener.onFailure(AlertingException.wrap(t)) + override fun onFailure(t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + } } - } - ) + ) + } } } diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt index 1dae3dc73..4e153f780 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt @@ -9,6 +9,7 @@ import org.opensearch.commons.alerting.model.Alert.Companion.ERROR_MESSAGE_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.EXECUTION_ID_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.MONITOR_ID_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.MONITOR_NAME_FIELD +import org.opensearch.commons.alerting.model.Alert.Companion.MONITOR_USER_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.MONITOR_VERSION_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.NO_ID import org.opensearch.commons.alerting.model.Alert.Companion.NO_VERSION @@ -18,6 +19,8 @@ import org.opensearch.commons.alerting.model.Alert.Companion.TRIGGER_ID_FIELD import org.opensearch.commons.alerting.model.Alert.Companion.TRIGGER_NAME_FIELD import org.opensearch.commons.alerting.util.IndexUtils.Companion.NO_SCHEMA_VERSION import org.opensearch.commons.alerting.util.instant +import org.opensearch.commons.alerting.util.optionalUserField +import org.opensearch.commons.authuser.User import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.core.common.io.stream.StreamOutput import org.opensearch.core.common.io.stream.Writeable @@ -64,7 +67,7 @@ data class AlertV2( val monitorId: String, val monitorName: String, val monitorVersion: Long, -// val monitorUser: User?, + val monitorUser: User?, val triggerId: String, val triggerName: String, val queryResults: Map, @@ -82,11 +85,11 @@ data class AlertV2( monitorId = sin.readString(), monitorName = sin.readString(), monitorVersion = sin.readLong(), -// monitorUser = if (sin.readBoolean()) { -// User(sin) -// } else { -// null -// }, + monitorUser = if (sin.readBoolean()) { + User(sin) + } else { + null + }, triggerId = sin.readString(), triggerName = sin.readString(), queryResults = sin.readMap()!!.toMap(), @@ -105,8 +108,8 @@ data class AlertV2( out.writeString(monitorId) out.writeString(monitorName) out.writeLong(monitorVersion) -// out.writeBoolean(monitorUser != null) -// monitorUser?.writeTo(out) + out.writeBoolean(monitorUser != null) + monitorUser?.writeTo(out) out.writeString(triggerId) out.writeString(triggerName) out.writeMap(queryResults) @@ -118,6 +121,14 @@ data class AlertV2( } override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return createXContentBuilder(builder, false) + } + + fun toXContentWithUser(builder: XContentBuilder): XContentBuilder { + return createXContentBuilder(builder, true) + } + + private fun createXContentBuilder(builder: XContentBuilder, withUser: Boolean): XContentBuilder { builder.startObject() .field(ALERT_ID_FIELD, id) .field(ALERT_VERSION_FIELD, version) @@ -133,11 +144,12 @@ data class AlertV2( .field(SEVERITY_FIELD, severity.value) .nonOptionalTimeField(TRIGGERED_TIME_FIELD, triggeredTime) .nonOptionalTimeField(EXPIRATION_TIME_FIELD, expirationTime) - .endObject() -// if (!secure) { -// builder.optionalUserField(MONITOR_USER_FIELD, monitorUser) -// } + if (withUser) { + builder.optionalUserField(MONITOR_USER_FIELD, monitorUser) + } + + builder.endObject() return builder } @@ -166,7 +178,7 @@ data class AlertV2( lateinit var monitorId: String lateinit var monitorName: String var monitorVersion: Long = Versions.NOT_FOUND -// var monitorUser: User? = null + var monitorUser: User? = null lateinit var triggerId: String lateinit var triggerName: String var queryResults: Map = mapOf() @@ -186,12 +198,12 @@ data class AlertV2( SCHEMA_VERSION_FIELD -> schemaVersion = xcp.intValue() MONITOR_NAME_FIELD -> monitorName = xcp.text() MONITOR_VERSION_FIELD -> monitorVersion = xcp.longValue() -// MONITOR_USER_FIELD -> -// monitorUser = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { -// null -// } else { -// User.parse(xcp) -// } + MONITOR_USER_FIELD -> + monitorUser = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { + null + } else { + User.parse(xcp) + } TRIGGER_ID_FIELD -> triggerId = xcp.text() TRIGGER_NAME_FIELD -> triggerName = xcp.text() QUERY_RESULTS_FIELD -> queryResults = xcp.map() @@ -218,7 +230,7 @@ data class AlertV2( monitorId = requireNotNull(monitorId), monitorName = requireNotNull(monitorName), monitorVersion = monitorVersion, -// monitorUser = monitorUser, + monitorUser = monitorUser, triggerId = requireNotNull(triggerId), triggerName = requireNotNull(triggerName), queryResults = requireNotNull(queryResults), diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt index 2df7e46b5..7d882fedf 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -136,10 +136,6 @@ data class PPLMonitor( // read in monitor type, then delegate to correct parse() function builder.startObject(PPL_MONITOR_TYPE) // monitor type start object - if (withUser) { - builder.optionalUserField(USER_FIELD, user) - } - builder.field(NAME_FIELD, name) builder.field(SCHEDULE_FIELD, schedule) builder.field(LOOK_BACK_WINDOW_FIELD, lookBackWindow?.toHumanReadableString(0)) @@ -151,6 +147,10 @@ data class PPLMonitor( builder.field(QUERY_LANGUAGE_FIELD, queryLanguage.value) builder.field(QUERY_FIELD, query) + if (withUser) { + builder.optionalUserField(USER_FIELD, user) + } + builder.endObject() // monitor type end object // if ScheduledJob metadata was added, end the extra object layer that was created From 50b570f74ec36800d04698b348bd6434dc5d4873 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 19 Sep 2025 10:48:57 -0700 Subject: [PATCH 34/40] separate alerting v1 and v2 --- .../opensearch/alerting/AlertingV2Utils.kt | 30 ++++++++++++++ .../opensearch/alerting/PPLMonitorRunner.kt | 1 + .../resthandler/RestGetMonitorV2Action.kt | 2 +- .../transport/TransportDeleteMonitorAction.kt | 18 +++++++-- .../TransportDeleteMonitorV2Action.kt | 20 ++++------ .../transport/TransportGetMonitorAction.kt | 10 ++++- .../transport/TransportGetMonitorV2Action.kt | 40 +++++++++---------- .../transport/TransportIndexMonitorAction.kt | 11 ++++- .../TransportIndexMonitorV2Action.kt | 24 +++++------ 9 files changed, 101 insertions(+), 55 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/AlertingV2Utils.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingV2Utils.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingV2Utils.kt new file mode 100644 index 000000000..70df08ffe --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingV2Utils.kt @@ -0,0 +1,30 @@ +package org.opensearch.alerting + +import org.opensearch.alerting.core.modelv2.MonitorV2 +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.ScheduledJob + +object AlertingV2Utils { + + // Validates that the given scheduled job is a Monitor + // returns the exception to pass into actionListener.onFailure if not. + fun validateMonitorV1(scheduledJob: ScheduledJob): Exception? { + if (scheduledJob is MonitorV2) { + return IllegalArgumentException("The ID given corresponds to a V2 Monitor, please pass in the ID of a V1 Monitor") + } else if (scheduledJob !is Monitor) { + return IllegalArgumentException("The ID given corresponds to a scheduled job of unknown type: ${scheduledJob.javaClass.name}") + } + return null + } + + // Validates that the given scheduled job is a MonitorV2 + // returns the exception to pass into actionListener.onFailure if not. + fun validateMonitorV2(scheduledJob: ScheduledJob): Exception? { + if (scheduledJob is Monitor) { + return IllegalArgumentException("The ID given corresponds to a V1 Monitor, please pass in the ID of a V2 Monitor") + } else if (scheduledJob !is MonitorV2) { + return IllegalArgumentException("The ID given corresponds to a scheduled job of unknown type: ${scheduledJob.javaClass.name}") + } + return null + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index d76ea3024..912d54548 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -501,6 +501,7 @@ object PPLMonitorRunner : MonitorV2Runner { } } + // TODO: every time this is done, trigger and action IDs change, figure out how to retain IDs private suspend fun updateMonitorWithLastTriggeredTimes(pplMonitor: PPLMonitor, client: NodeClient) { val indexRequest = IndexRequest(SCHEDULED_JOBS_INDEX) .id(pplMonitor.id) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt index a6a7e6e0b..d053e42f0 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetMonitorV2Action.kt @@ -24,7 +24,7 @@ class RestGetMonitorV2Action : BaseRestHandler() { } override fun routes(): List { - return mutableListOf( + return listOf( Route( GET, "${AlertingPlugin.MONITOR_V2_BASE_URI}/{monitorV2Id}" diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorAction.kt index b28311bd0..39b96f2b0 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorAction.kt @@ -16,6 +16,7 @@ import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.action.support.WriteRequest.RefreshPolicy +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV1 import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.service.DeleteMonitorService import org.opensearch.alerting.settings.AlertingSettings @@ -87,7 +88,7 @@ class TransportDeleteMonitorAction @Inject constructor( ) { suspend fun resolveUserAndStart(refreshPolicy: RefreshPolicy) { try { - val monitor = getMonitor() + val monitor = getMonitor() ?: return // null means there was an issue retrieving the Monitor val canDelete = user == null || !doFilterForUser(user) || checkUserPermissionsWithResource(user, monitor.user, actionListener, "monitor", monitorId) @@ -115,11 +116,11 @@ class TransportDeleteMonitorAction @Inject constructor( } } - private suspend fun getMonitor(): Monitor { + private suspend fun getMonitor(): Monitor? { val getRequest = GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, monitorId) val getResponse: GetResponse = client.suspendUntil { get(getRequest, it) } - if (getResponse.isExists == false) { + if (!getResponse.isExists) { actionListener.onFailure( AlertingException.wrap( OpenSearchStatusException("Monitor with $monitorId is not found", RestStatus.NOT_FOUND) @@ -130,7 +131,16 @@ class TransportDeleteMonitorAction @Inject constructor( xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.sourceAsBytesRef, XContentType.JSON ) - return ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as Monitor + val scheduledJob = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) + + validateMonitorV1(scheduledJob)?.let { + actionListener.onFailure(AlertingException.wrap(it)) + return null + } + + val monitor = scheduledJob as Monitor + + return monitor } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt index 80b93a494..b764bb035 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt @@ -9,6 +9,7 @@ import org.opensearch.action.get.GetRequest import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV2 import org.opensearch.alerting.actionv2.DeleteMonitorV2Action import org.opensearch.alerting.actionv2.DeleteMonitorV2Request import org.opensearch.alerting.actionv2.DeleteMonitorV2Response @@ -94,27 +95,20 @@ class TransportDeleteMonitorV2Action @Inject constructor( ) return null } + val xcp = XContentHelper.createParser( xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.sourceAsBytesRef, XContentType.JSON ) + val scheduledJob = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) - val monitorV2: MonitorV2? - try { - monitorV2 = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as MonitorV2 - } catch (e: ClassCastException) { - // if ScheduledJob parsed the object and could not cast it to MonitorV2, we must - // have gotten a Monitor V1 from the given ID - actionListener.onFailure( - AlertingException.wrap( - IllegalArgumentException( - "The ID given corresponds to a V1 Monitor, please pass in the ID of a V2 Monitor" - ) - ) - ) + validateMonitorV2(scheduledJob)?.let { + actionListener.onFailure(AlertingException.wrap(it)) return null } + val monitorV2 = scheduledJob as MonitorV2 + return monitorV2 } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt index ce42e6157..59049da44 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt @@ -18,6 +18,7 @@ import org.opensearch.action.search.SearchRequest import org.opensearch.action.search.SearchResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV1 import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_DELEGATE_PATH @@ -114,7 +115,14 @@ class TransportGetMonitorAction @Inject constructor( response.sourceAsBytesRef, XContentType.JSON ).use { xcp -> - monitor = ScheduledJob.parse(xcp, response.id, response.version) as Monitor + val scheduledJob = ScheduledJob.parse(xcp, response.id, response.version) + + validateMonitorV1(scheduledJob)?.let { + actionListener.onFailure(AlertingException.wrap(it)) + return + } + + monitor = scheduledJob as Monitor // security is enabled and filterby is enabled if (!checkUserPermissionsWithResource( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt index 3e430be66..d74e29167 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorV2Action.kt @@ -8,6 +8,7 @@ import org.opensearch.action.get.GetRequest import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV2 import org.opensearch.alerting.actionv2.GetMonitorV2Action import org.opensearch.alerting.actionv2.GetMonitorV2Request import org.opensearch.alerting.actionv2.GetMonitorV2Response @@ -76,35 +77,32 @@ class TransportGetMonitorV2Action @Inject constructor( return } - var monitorV2: MonitorV2? = null - try { - if (!response.isSourceEmpty) { - XContentHelper.createParser( - xContentRegistry, - LoggingDeprecationHandler.INSTANCE, - response.sourceAsBytesRef, - XContentType.JSON - ).use { xcp -> - monitorV2 = ScheduledJob.parse(xcp, response.id, response.version) as MonitorV2 - } - } - } catch (e: ClassCastException) { - // if ScheduledJob parsed the object and could not cast it to MonitorV2, we must - // have gotten a Monitor V1 from the given ID + if (response.isSourceEmpty) { actionListener.onFailure( - AlertingException.wrap( - IllegalArgumentException( - "The ID given corresponds to a V1 Monitor, please pass in the ID of a V2 Monitor" - ) - ) + AlertingException.wrap(OpenSearchStatusException("MonitorV2 found but was empty.", RestStatus.NO_CONTENT)) ) return } + val xcp = XContentHelper.createParser( + xContentRegistry, + LoggingDeprecationHandler.INSTANCE, + response.sourceAsBytesRef, + XContentType.JSON + ) + val scheduledJob = ScheduledJob.parse(xcp, response.id, response.version) + + validateMonitorV2(scheduledJob)?.let { + actionListener.onFailure(AlertingException.wrap(it)) + return + } + + val monitorV2 = scheduledJob as MonitorV2 + // security is enabled and filterby is enabled if (!checkUserPermissionsWithResource( user, - monitorV2?.user, + monitorV2.user, actionListener, "monitor", request.monitorV2Id diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorAction.kt index a5b849a67..4c747e737 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorAction.kt @@ -29,6 +29,7 @@ import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.action.support.WriteRequest.RefreshPolicy import org.opensearch.action.support.clustermanager.AcknowledgedResponse +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV1 import org.opensearch.alerting.MonitorMetadataService import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.opensearchapi.suspendUntil @@ -614,7 +615,15 @@ class TransportIndexMonitorAction @Inject constructor( xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.sourceAsBytesRef, XContentType.JSON ) - val monitor = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as Monitor + val scheduledJob = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) + + validateMonitorV1(scheduledJob)?.let { + actionListener.onFailure(AlertingException.wrap(it)) + return + } + + val monitor = scheduledJob as Monitor + onGetResponse(monitor) } catch (t: Exception) { actionListener.onFailure(AlertingException.wrap(t)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index a2901a8df..41fb62259 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -22,6 +22,7 @@ import org.opensearch.action.search.SearchResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.action.support.clustermanager.AcknowledgedResponse +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV2 import org.opensearch.alerting.PPLMonitorRunner.appendCustomCondition import org.opensearch.alerting.PPLMonitorRunner.executePplQuery import org.opensearch.alerting.PPLMonitorRunner.findEvalResultVar @@ -181,9 +182,6 @@ class TransportIndexMonitorV2Action @Inject constructor( indexMonitorV2Request: IndexMonitorV2Request ) { /* check initial user permissions */ - val headers = client.threadPool().threadContext.headers - log.info("Headers in transport layer: $headers") - val user = readUserFromThreadContext(client) log.info("user in checkUserAndIndicesAccess: $user") @@ -444,18 +442,16 @@ class TransportIndexMonitorV2Action @Inject constructor( xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.sourceAsBytesRef, XContentType.JSON ) - val monitorV2 = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as MonitorV2 + val scheduledJob = ScheduledJob.parse(xcp, getResponse.id, getResponse.version) + + validateMonitorV2(scheduledJob)?.let { + actionListener.onFailure(AlertingException.wrap(it)) + return + } + + val monitorV2 = scheduledJob as MonitorV2 + onGetMonitorResponseForUpdate(monitorV2, indexMonitorRequest, actionListener, user) - } catch (e: ClassCastException) { - // if ScheduledJob parsed the object and could not cast it to MonitorV2, we must - // have gotten a Monitor V1 from the given ID - actionListener.onFailure( - AlertingException.wrap( - IllegalArgumentException( - "The ID given corresponds to a V1 Monitor, please pass in the ID of a V2 Monitor" - ) - ) - ) } catch (e: Exception) { actionListener.onFailure(AlertingException.wrap(e)) } From d457ba862446afd815acad9d899f22d9903cf2ba Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Fri, 19 Sep 2025 12:35:28 -0700 Subject: [PATCH 35/40] misc rbac bug fixes --- .../TransportDeleteMonitorV2Action.kt | 4 +-- .../TransportIndexMonitorV2Action.kt | 29 +++++++++++++------ .../TransportSearchMonitorV2Action.kt | 2 +- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt index b764bb035..072f92cb6 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteMonitorV2Action.kt @@ -72,7 +72,7 @@ class TransportDeleteMonitorV2Action @Inject constructor( actionListener.onResponse(deleteResponse) } else { actionListener.onFailure( - AlertingException("Not allowed to delete this monitor_v2", RestStatus.FORBIDDEN, IllegalStateException()) + AlertingException("Not allowed to delete this Monitor V2", RestStatus.FORBIDDEN, IllegalStateException()) ) } } catch (e: Exception) { @@ -90,7 +90,7 @@ class TransportDeleteMonitorV2Action @Inject constructor( if (!getResponse.isExists) { actionListener.onFailure( AlertingException.wrap( - OpenSearchStatusException("MonitorV2 with $monitorV2Id is not found", RestStatus.NOT_FOUND) + OpenSearchStatusException("Monitor V2 with $monitorV2Id is not found", RestStatus.NOT_FOUND) ) ) return null diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index 41fb62259..c9df6be36 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -88,21 +88,31 @@ class TransportIndexMonitorV2Action @Inject constructor( @Volatile private var maxMonitors = ALERTING_MAX_MONITORS.get(settings) @Volatile private var requestTimeout = REQUEST_TIMEOUT.get(settings) @Volatile private var indexTimeout = INDEX_TIMEOUT.get(settings) -// @Volatile private var maxActionThrottle = MAX_ACTION_THROTTLE_VALUE.get(settings) @Volatile override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + init { + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERTING_MAX_MONITORS) { maxMonitors = it } + clusterService.clusterSettings.addSettingsUpdateConsumer(REQUEST_TIMEOUT) { requestTimeout = it } + clusterService.clusterSettings.addSettingsUpdateConsumer(INDEX_TIMEOUT) { indexTimeout = it } + listenFilterBySettingChange(clusterService) + } + override fun doExecute( task: Task, indexMonitorV2Request: IndexMonitorV2Request, actionListener: ActionListener ) { + // read the user from thread context immediately, before + // downstream flows spin up new threads with fresh context + val user = readUserFromThreadContext(client) + // validate the MonitorV2 based on its type when (indexMonitorV2Request.monitorV2) { is PPLMonitor -> validateMonitorPplQuery( indexMonitorV2Request.monitorV2 as PPLMonitor, object : ActionListener { // validationListener override fun onResponse(response: Unit) { - checkUserAndIndicesAccess(client, actionListener, indexMonitorV2Request) + checkUserAndIndicesAccess(client, actionListener, indexMonitorV2Request, user) } override fun onFailure(e: Exception) { @@ -134,7 +144,7 @@ class TransportIndexMonitorV2Action @Inject constructor( // from the base query + custom condition is valid val allCustomTriggersValid = true for (pplTrigger in pplMonitor.triggers) { - if (pplTrigger.conditionType == ConditionType.NUMBER_OF_RESULTS) { + if (pplTrigger.conditionType != ConditionType.CUSTOM) { continue } @@ -179,11 +189,10 @@ class TransportIndexMonitorV2Action @Inject constructor( private fun checkUserAndIndicesAccess( client: Client, actionListener: ActionListener, - indexMonitorV2Request: IndexMonitorV2Request + indexMonitorV2Request: IndexMonitorV2Request, + user: User? ) { /* check initial user permissions */ - val user = readUserFromThreadContext(client) - log.info("user in checkUserAndIndicesAccess: $user") if (!validateUserBackendRoles(user, actionListener)) { @@ -463,12 +472,14 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { + log.info("user: $user") + log.info("monitor user: ${existingMonitorV2.user}") if ( !checkUserPermissionsWithResource( user, existingMonitorV2.user, actionListener, - "monitor", + "monitor_v2", indexMonitorRequest.monitorId ) ) { @@ -514,7 +525,7 @@ class TransportIndexMonitorV2Action @Inject constructor( ) } else { // rolesToRemove: these are the backend roles to remove from the monitor - val rolesToRemove = user.backendRoles - indexMonitorRequest.rbacRoles.orEmpty() + val rolesToRemove = user.backendRoles - indexMonitorRequest.rbacRoles // remove the monitor's roles with rolesToRemove and add any roles passed into the request.rbacRoles val updatedRbac = currentMonitorV2.user?.backendRoles.orEmpty() - rolesToRemove + indexMonitorRequest.rbacRoles newMonitorV2 = newMonitorV2.copy( @@ -525,7 +536,7 @@ class TransportIndexMonitorV2Action @Inject constructor( newMonitorV2 = newMonitorV2 .copy(user = User(user.name, currentMonitorV2.user!!.backendRoles, user.roles, user.customAttNames)) } - log.debug("Update monitor backend roles to: ${newMonitorV2.user?.backendRoles}") + log.info("Update monitor backend roles to: ${newMonitorV2.user?.backendRoles}") } newMonitorV2 = newMonitorV2.copy(schemaVersion = IndexUtils.scheduledJobIndexSchemaVersion) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt index bf109e9a7..3f164932c 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportSearchMonitorV2Action.kt @@ -66,7 +66,7 @@ class TransportSearchMonitorV2Action @Inject constructor( // if doFilterForUser() is false, security is enabled but filterby is disabled if (user != null && doFilterForUser(user)) { log.info("Filtering result by: ${user.backendRoles}") - addFilter(user, request.searchRequest.source(), "monitor.user.backend_roles.keyword") + addFilter(user, request.searchRequest.source(), "monitor_v2.ppl_monitor.user.backend_roles.keyword") } client.search( From e505e124b8a0f681a7bdf2ae62dd7ede6519f2d2 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Mon, 22 Sep 2025 17:05:50 -0700 Subject: [PATCH 36/40] removing query results from alert --- .../opensearch/alerting/PPLMonitorRunner.kt | 20 ++++++++++++----- .../script/PPLTriggerExecutionContext.kt | 5 +++-- .../alerting/settings/AlertingSettings.kt | 17 +++++++++----- .../TransportIndexMonitorV2Action.kt | 9 -------- .../alerting/alertsv2/alert_v2_mapping.json | 16 ++------------ .../alerting/core/modelv2/AlertV2.kt | 16 +++++++------- .../alerting/core/modelv2/PPLTrigger.kt | 22 ++++++++++++++----- 7 files changed, 56 insertions(+), 49 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 912d54548..120bebc4a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -184,16 +184,15 @@ object PPLMonitorRunner : MonitorV2Runner { pplTrigger.lastTriggeredTime = timeOfCurrentExecution // send alert notifications -// val actionExecutionResults = mutableListOf() for (action in pplTrigger.actions) { - for (alert in thisTriggersGeneratedAlerts) { + for (queryResult in preparedQueryResults) { val pplTriggerExecutionContext = PPLTriggerExecutionContext( pplMonitor, periodStart, periodEnd, null, pplTrigger, - alert.queryResults + queryResult ) runAction( @@ -419,7 +418,7 @@ object PPLMonitorRunner : MonitorV2Runner { monitorUser = pplMonitor.user, triggerId = pplTrigger.id, triggerName = pplTrigger.name, - queryResults = queryResult.toMap(), + query = pplMonitor.query, triggeredTime = timeOfCurrentExecution, expirationTime = expirationTime, severity = pplTrigger.severity, @@ -454,7 +453,7 @@ object PPLMonitorRunner : MonitorV2Runner { monitorUser = pplMonitor.user, triggerId = pplTrigger.id, triggerName = pplTrigger.name, - queryResults = mapOf(), + query = pplMonitor.query, triggeredTime = timeOfCurrentExecution, expirationTime = expirationTime, errorMessage = obfuscatedErrorMessage, @@ -528,8 +527,17 @@ object PPLMonitorRunner : MonitorV2Runner { ) { // this function can throw an exception, which is caught by the try // catch in runMonitor() to generate an error alert - // TODO: is actionOutput even needed, we dont store action run results in alert // val actionOutput = mutableMapOf() + + // TODO: make queryResults a JSON +// val pplQueryResultsToInclude: Map +// val size = triggerCtx.pplQueryResults.toString().length +// val maxSize = monitorCtx.clusterService!!.clusterSettings.get(AlertingSettings.ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE) +// +// if (size < maxSize) { +// +// } + val notifSubject = if (action.subjectTemplate != null) MonitorRunnerService.compileTemplateV2(action.subjectTemplate!!, triggerCtx) else "" diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt index 5eac38663..c6f17fedf 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt @@ -1,5 +1,6 @@ package org.opensearch.alerting.script +import org.json.JSONObject import org.opensearch.alerting.core.modelv2.PPLMonitor import org.opensearch.alerting.core.modelv2.PPLMonitorRunResult.Companion.PPL_QUERY_RESULTS_FIELD import org.opensearch.alerting.core.modelv2.PPLTrigger @@ -12,13 +13,13 @@ data class PPLTriggerExecutionContext( override val periodEnd: Instant, override val error: Exception? = null, val pplTrigger: PPLTrigger, - val pplQueryResults: Map // keys are PPL query result fields + val pplQueryResults: JSONObject // can be a full set of PPL query results, or an individual result row ) : TriggerV2ExecutionContext(monitorV2, periodStart, periodEnd, error) { override fun asTemplateArg(): Map { val templateArg = super.asTemplateArg().toMutableMap() templateArg[PPL_TRIGGER_FIELD] = pplTrigger.asTemplateArg() - templateArg[PPL_QUERY_RESULTS_FIELD] = pplQueryResults + templateArg[PPL_QUERY_RESULTS_FIELD] = pplQueryResults.toMap() return templateArg.toMap() } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt index 3549da49f..0be56d183 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt @@ -295,33 +295,40 @@ class AlertingSettings { ) val ALERT_V2_HISTORY_ENABLED = Setting.boolSetting( - "plugins.alerting.v2.alert_history_enabled", + "plugins.alerting_v2.alert_history_enabled", true, Setting.Property.NodeScope, Setting.Property.Dynamic ) val ALERT_V2_HISTORY_ROLLOVER_PERIOD = Setting.positiveTimeSetting( - "plugins.alerting.v2.alert_history_rollover_period", + "plugins.alerting_v2.alert_history_rollover_period", TimeValue(12, TimeUnit.HOURS), Setting.Property.NodeScope, Setting.Property.Dynamic ) val ALERT_V2_HISTORY_INDEX_MAX_AGE = Setting.positiveTimeSetting( - "plugins.alerting.v2.alert_history_max_age", + "plugins.alerting_v2.alert_history_max_age", TimeValue(30, TimeUnit.DAYS), Setting.Property.NodeScope, Setting.Property.Dynamic ) val ALERT_V2_HISTORY_MAX_DOCS = Setting.longSetting( - "plugins.alerting.v2.alert_history_max_docs", + "plugins.alerting_v2.alert_history_max_docs", 1000L, 0L, Setting.Property.NodeScope, Setting.Property.Dynamic ) val ALERT_V2_HISTORY_RETENTION_PERIOD = Setting.positiveTimeSetting( - "plugins.alerting.v2.alert_history_retention_period", + "plugins.alerting_v2.alert_history_retention_period", TimeValue(60, TimeUnit.DAYS), Setting.Property.NodeScope, Setting.Property.Dynamic ) + + val ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE = Setting.longSetting( + "plugins.alerting_v2.notif_query_results_max_size", + 3000L, + 0L, + Setting.Property.NodeScope, Setting.Property.Dynamic + ) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt index c9df6be36..23681ac14 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportIndexMonitorV2Action.kt @@ -193,8 +193,6 @@ class TransportIndexMonitorV2Action @Inject constructor( user: User? ) { /* check initial user permissions */ - log.info("user in checkUserAndIndicesAccess: $user") - if (!validateUserBackendRoles(user, actionListener)) { return } @@ -247,7 +245,6 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { - log.info("user in checkPplQueryIndices: $user") val pplMonitor = indexMonitorV2Request.monitorV2 as PPLMonitor val pplQuery = pplMonitor.query val indices = getIndicesFromPplQuery(pplQuery) @@ -298,7 +295,6 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { - log.info("user in checkScheduledJobIndex: $user") /* check to see if alerting-config index (scheduled job index) is created and updated before indexing MonitorV2 into it */ if (!scheduledJobIndices.scheduledJobIndexExists()) { // if alerting-config index doesn't exist, send request to create it scheduledJobIndices.initScheduledJobIndex(object : ActionListener { @@ -353,7 +349,6 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { - log.info("user in onCreateMappingsResponse: $user") if (isAcknowledged) { log.info("Created $SCHEDULED_JOBS_INDEX with mappings.") prepareMonitorIndexing(request, actionListener, user) @@ -376,7 +371,6 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { - log.info("user in onUpdateMappingsResponse: $user") if (response.isAcknowledged) { log.info("Updated $SCHEDULED_JOBS_INDEX with mappings.") IndexUtils.scheduledJobIndexUpdated() @@ -404,7 +398,6 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { - log.info("user in prepareMonitorIndexing: $user") if (indexMonitorRequest.method == RestRequest.Method.PUT) { // update monitor case scope.launch { updateMonitor(indexMonitorRequest, actionListener, user) @@ -588,7 +581,6 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { - log.info("user in onMonitorCountSearchResponse: $user") val totalHits = monitorCountSearchResponse.hits.totalHits?.value if (totalHits != null && totalHits >= maxMonitors) { log.info("This request would create more than the allowed monitors [$maxMonitors].") @@ -611,7 +603,6 @@ class TransportIndexMonitorV2Action @Inject constructor( actionListener: ActionListener, user: User? ) { - log.info("user in indexMonitor: $user") var monitorV2 = when (indexMonitorRequest.monitorV2) { is PPLMonitor -> indexMonitorRequest.monitorV2 as PPLMonitor else -> throw IllegalArgumentException("received unsupported monitor type to index: ${indexMonitorRequest.monitorV2.javaClass}") diff --git a/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json b/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json index 388b82715..b658dd579 100644 --- a/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json +++ b/alerting/src/main/resources/org/opensearch/alerting/alertsv2/alert_v2_mapping.json @@ -95,20 +95,8 @@ "error_message": { "type": "text" }, - "query_results": { - "type": "nested", - "properties": { - "schema": { - "type": "nested", - "dynamic": true - }, - "datarows": { - "type": "text" - }, - "total": { - "type": "integer" - } - } + "query": { + "type": "text" } } } \ No newline at end of file diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt index 4e153f780..42c568863 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/AlertV2.kt @@ -70,7 +70,7 @@ data class AlertV2( val monitorUser: User?, val triggerId: String, val triggerName: String, - val queryResults: Map, + val query: String, val triggeredTime: Instant, val expirationTime: Instant, val errorMessage: String? = null, @@ -92,7 +92,7 @@ data class AlertV2( }, triggerId = sin.readString(), triggerName = sin.readString(), - queryResults = sin.readMap()!!.toMap(), + query = sin.readString(), triggeredTime = sin.readInstant(), expirationTime = sin.readInstant(), errorMessage = sin.readOptionalString(), @@ -112,7 +112,7 @@ data class AlertV2( monitorUser?.writeTo(out) out.writeString(triggerId) out.writeString(triggerName) - out.writeMap(queryResults) + out.writeString(query) out.writeInstant(triggeredTime) out.writeInstant(expirationTime) out.writeOptionalString(errorMessage) @@ -139,7 +139,7 @@ data class AlertV2( .field(EXECUTION_ID_FIELD, executionId) .field(TRIGGER_ID_FIELD, triggerId) .field(TRIGGER_NAME_FIELD, triggerName) - .field(QUERY_RESULTS_FIELD, queryResults) + .field(QUERY_FIELD, query) .field(ERROR_MESSAGE_FIELD, errorMessage) .field(SEVERITY_FIELD, severity.value) .nonOptionalTimeField(TRIGGERED_TIME_FIELD, triggeredTime) @@ -168,7 +168,7 @@ data class AlertV2( companion object { const val TRIGGERED_TIME_FIELD = "triggered_time" const val EXPIRATION_TIME_FIELD = "expiration_time" - const val QUERY_RESULTS_FIELD = "query_results" + const val QUERY_FIELD = "query" @JvmStatic @JvmOverloads @@ -181,7 +181,7 @@ data class AlertV2( var monitorUser: User? = null lateinit var triggerId: String lateinit var triggerName: String - var queryResults: Map = mapOf() + lateinit var query: String lateinit var severity: Severity var triggeredTime: Instant? = null var expirationTime: Instant? = null @@ -206,7 +206,7 @@ data class AlertV2( } TRIGGER_ID_FIELD -> triggerId = xcp.text() TRIGGER_NAME_FIELD -> triggerName = xcp.text() - QUERY_RESULTS_FIELD -> queryResults = xcp.map() + QUERY_FIELD -> query = xcp.text() TRIGGERED_TIME_FIELD -> triggeredTime = xcp.instant() EXPIRATION_TIME_FIELD -> expirationTime = xcp.instant() ERROR_MESSAGE_FIELD -> errorMessage = xcp.textOrNull() @@ -233,7 +233,7 @@ data class AlertV2( monitorUser = monitorUser, triggerId = requireNotNull(triggerId), triggerName = requireNotNull(triggerName), - queryResults = requireNotNull(queryResults), + query = requireNotNull(query), triggeredTime = requireNotNull(triggeredTime), expirationTime = requireNotNull(expirationTime), errorMessage = errorMessage, diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt index 4f3f5237d..655761870 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLTrigger.kt @@ -12,6 +12,7 @@ import org.opensearch.common.CheckedFunction import org.opensearch.common.UUIDs import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.commons.alerting.util.AlertingException import org.opensearch.commons.alerting.util.instant import org.opensearch.commons.alerting.util.optionalTimeField import org.opensearch.core.ParseField @@ -287,19 +288,30 @@ data class PPLTrigger( } SUPPRESS_FIELD -> { suppressDuration = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) { + // if expire field is null, skip reading it and let it retain the default value null } else { val input = xcp.text() - // throws IllegalArgumentException if there's parsing error - TimeValue.parseTimeValue(input, PLACEHOLDER_SUPPRESS_SETTING_NAME) + try { + TimeValue.parseTimeValue(input, PLACEHOLDER_SUPPRESS_SETTING_NAME) + } catch (e: Exception) { + throw AlertingException.wrap( + IllegalArgumentException("Invalid value for field: $SUPPRESS_FIELD", e) + ) + } } } EXPIRE_FIELD -> { + // if expire field is null, skip reading it and let it retain the default value if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { - // if expire field is null, skip reading it and let it retain the default value val input = xcp.text() - // throws IllegalArgumentException if there's parsing error - expireDuration = TimeValue.parseTimeValue(input, PLACEHOLDER_EXPIRE_SETTING_NAME) + try { + expireDuration = TimeValue.parseTimeValue(input, PLACEHOLDER_EXPIRE_SETTING_NAME) + } catch (e: Exception) { + throw AlertingException.wrap( + IllegalArgumentException("Invalid value for field: $EXPIRE_FIELD", e) + ) + } } } LAST_TRIGGERED_FIELD -> lastTriggeredTime = xcp.instant() From 19bc2dcf95a868d1920c2c3dea040dbd26e5b08a Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Tue, 23 Sep 2025 15:39:37 -0700 Subject: [PATCH 37/40] now checks query results size and truncates before passing into notification --- .../org/opensearch/alerting/AlertingPlugin.kt | 3 +- .../opensearch/alerting/PPLMonitorRunner.kt | 68 +++++++++++++------ .../script/PPLTriggerExecutionContext.kt | 2 +- .../alerting/core/modelv2/PPLMonitor.kt | 2 +- 4 files changed, 53 insertions(+), 22 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 1a32c22af..1cc035f6e 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -485,7 +485,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R AlertingSettings.ALERT_V2_HISTORY_ROLLOVER_PERIOD, AlertingSettings.ALERT_V2_HISTORY_INDEX_MAX_AGE, AlertingSettings.ALERT_V2_HISTORY_MAX_DOCS, - AlertingSettings.ALERT_V2_HISTORY_RETENTION_PERIOD + AlertingSettings.ALERT_V2_HISTORY_RETENTION_PERIOD, + AlertingSettings.ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 120bebc4a..f11661710 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -29,6 +29,7 @@ import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.opensearchapi.withClosableContext import org.opensearch.alerting.script.PPLTriggerExecutionContext +import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.XContentFactory import org.opensearch.commons.alerting.alerts.AlertError @@ -529,34 +530,63 @@ object PPLMonitorRunner : MonitorV2Runner { // catch in runMonitor() to generate an error alert // val actionOutput = mutableMapOf() - // TODO: make queryResults a JSON -// val pplQueryResultsToInclude: Map -// val size = triggerCtx.pplQueryResults.toString().length -// val maxSize = monitorCtx.clusterService!!.clusterSettings.get(AlertingSettings.ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE) -// -// if (size < maxSize) { -// -// } +// JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList()) + + val pplQueryResultsToInclude: Map + + // these are the full query results we got from the monitor's + // query execution + val pplQueryFullResults = triggerCtx.pplQueryResults + + // make a deep copy of the original query results with only a single data row + // do this by serializing the full results into a string, then creating a new JSONObject from the string, + // then remove all but one row in the deep copy's datarows + val pplQueryResultsSingleRow = JSONObject(pplQueryFullResults.toString()) + pplQueryResultsSingleRow.getJSONArray("datarows").apply { + for (i in length() - 1 downTo 1) { + remove(i) + } + } + + // estimate byte size with string length + val size = pplQueryFullResults.toString().length + val oneRowSize = pplQueryResultsSingleRow.toString().length + + logger.info("size: $size") + logger.info("oneRowSize: $oneRowSize") + + // retrieve the size limit from cluster settings + val maxSize = monitorCtx.clusterService!!.clusterSettings.get(AlertingSettings.ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE) + + var truncatedToSingleRow = false + var truncatedEntirely = false + if (size > maxSize && oneRowSize <= maxSize) { + triggerCtx.pplQueryResults = pplQueryResultsSingleRow + truncatedToSingleRow = true + } else if (size > maxSize && oneRowSize > maxSize) { + triggerCtx.pplQueryResults = JSONObject() + truncatedEntirely = true + } val notifSubject = if (action.subjectTemplate != null) MonitorRunnerService.compileTemplateV2(action.subjectTemplate!!, triggerCtx) else "" - // TODO: check query results size, truncate accordingly, and append to notifMessage before sending - // TODO: maybe remove actionOutput, p sure that was for storing action execution results, which we arent doing - val notifMessage = MonitorRunnerService.compileTemplateV2(action.messageTemplate, triggerCtx) + + var notifMessage = MonitorRunnerService.compileTemplateV2(action.messageTemplate, triggerCtx) if (Strings.isNullOrEmpty(notifMessage)) { throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") } + if (truncatedToSingleRow) { + notifMessage += "\n\n(Note from Alerting Plugin: the full query results were too large, " + + "only one query result row was passed into this notification)" + } else if (truncatedEntirely) { + notifMessage += "\n\n(Note from Alerting Plugin: the query results were too large, " + + "no query results were passed into this notification)" + } + if (!dryrun) { - val client = monitorCtx.client -// actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( -// action, -// monitorCtx, -// actionOutput[Action.SUBJECT], -// actionOutput[Action.MESSAGE]!! -// ) - client!!.threadPool().threadContext.stashContext().use { + monitorCtx.client!!.threadPool().threadContext.stashContext().use { withClosableContext( InjectorContextElement( pplMonitor.id, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt index c6f17fedf..23f746ace 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/PPLTriggerExecutionContext.kt @@ -13,7 +13,7 @@ data class PPLTriggerExecutionContext( override val periodEnd: Instant, override val error: Exception? = null, val pplTrigger: PPLTrigger, - val pplQueryResults: JSONObject // can be a full set of PPL query results, or an individual result row + var pplQueryResults: JSONObject // can be a full set of PPL query results, or an individual result row ) : TriggerV2ExecutionContext(monitorV2, periodStart, periodEnd, error) { override fun asTemplateArg(): Map { diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt index 7d882fedf..2a9e77788 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -240,7 +240,7 @@ data class PPLMonitor( var name: String? = null var enabled = true var schedule: Schedule? = null - var lookBackWindow: TimeValue? = null + var lookBackWindow: TimeValue? = null // TODO: default value var lastUpdateTime: Instant? = null var enabledTime: Instant? = null var user: User? = null From 6093ad0064cf2a98a34d60a88930034a7a0e7720 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 25 Sep 2025 11:53:06 -0700 Subject: [PATCH 38/40] refactored limiting the number of alerts from a per_result trigger --- .../org/opensearch/alerting/AlertingPlugin.kt | 3 +- .../opensearch/alerting/PPLMonitorRunner.kt | 40 +++++++++++++------ .../alerting/settings/AlertingSettings.kt | 7 ++++ 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 1cc035f6e..94d2e5f83 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -486,7 +486,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R AlertingSettings.ALERT_V2_HISTORY_INDEX_MAX_AGE, AlertingSettings.ALERT_V2_HISTORY_MAX_DOCS, AlertingSettings.ALERT_V2_HISTORY_RETENTION_PERIOD, - AlertingSettings.ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE + AlertingSettings.ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE, + AlertingSettings.ALERT_V2_PER_RESULT_TRIGGER_MAX_ALERTS ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index f11661710..5a5bf830f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -89,7 +89,6 @@ object PPLMonitorRunner : MonitorV2Runner { // use threadpool time for cross node consistency val timeOfCurrentExecution = Instant.ofEpochMilli(MonitorRunnerService.monitorCtx.threadPool!!.absoluteTimeInMillis()) - // TODO: put alertV2s in their own index try { monitorCtx.alertV2Indices!!.createOrUpdateAlertV2Index() monitorCtx.alertV2Indices!!.createOrUpdateInitialAlertV2HistoryIndex() @@ -168,7 +167,8 @@ object PPLMonitorRunner : MonitorV2Runner { if (triggered) { // if trigger is on result set mode, this list will have exactly 1 element // if trigger is on per result mode, this list will have as many elements as the query results had rows - val preparedQueryResults = prepareQueryResults(relevantQueryResultRows, pplTrigger.mode) + // up to the max number of alerts a per result trigger can generate + val preparedQueryResults = prepareQueryResults(relevantQueryResultRows, pplTrigger.mode, monitorCtx) // generate alerts based on trigger mode // if this trigger is on result_set mode, this list contains exactly 1 alert @@ -383,22 +383,41 @@ object PPLMonitorRunner : MonitorV2Runner { // prepares the query results to be passed into alerts and notifications based on trigger mode // if result set, alert and notification simply stores all query results // if per result, each alert and notification stores a single row of the query results - private fun prepareQueryResults(relevantQueryResultRows: JSONObject, triggerMode: TriggerMode): List { + private fun prepareQueryResults( + relevantQueryResultRows: JSONObject, + triggerMode: TriggerMode, + monitorCtx: MonitorRunnerExecutionContext + ): List { // case: result set + // return the results as a single set of all the results if (triggerMode == TriggerMode.RESULT_SET) { return listOf(relevantQueryResultRows) } // case: per result + // prepare to generate an alert for each query result row val individualRows = mutableListOf() val numAlertsToGenerate = relevantQueryResultRows.getInt("total") for (i in 0 until numAlertsToGenerate) { val individualRow = JSONObject() individualRow.put("schema", JSONArray(relevantQueryResultRows.getJSONArray("schema").toList())) - individualRow.put("datarows", JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList())) + individualRow.put( + "datarows", + JSONArray().put( + JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList()) + ) + ) individualRows.add(individualRow) } - return individualRows + + logger.info("individualRows: $individualRows") + + // there may be many query result rows, and generating an alert for each of them could lead to cluster issues, + // so limit the number of per_result alerts that are generated + val maxAlerts = monitorCtx.clusterService!!.clusterSettings.get(AlertingSettings.ALERT_V2_PER_RESULT_TRIGGER_MAX_ALERTS) + val reducedIndividualRows = individualRows.take(maxAlerts) + + return reducedIndividualRows } private fun generateAlerts( @@ -428,10 +447,7 @@ object PPLMonitorRunner : MonitorV2Runner { alertV2s.add(alertV2) } - // TODO: this is a magic number right now, make it a setting - val alertsLimit = 10 - - return alertV2s.take(alertsLimit).toList() // return as immutable list + return alertV2s.toList() // return as immutable list } private fun generateErrorAlert( @@ -532,8 +548,6 @@ object PPLMonitorRunner : MonitorV2Runner { // JSONArray(relevantQueryResultRows.getJSONArray("datarows").getJSONArray(i).toList()) - val pplQueryResultsToInclude: Map - // these are the full query results we got from the monitor's // query execution val pplQueryFullResults = triggerCtx.pplQueryResults @@ -552,8 +566,8 @@ object PPLMonitorRunner : MonitorV2Runner { val size = pplQueryFullResults.toString().length val oneRowSize = pplQueryResultsSingleRow.toString().length - logger.info("size: $size") - logger.info("oneRowSize: $oneRowSize") + logger.info("pplQueryFullResults: $pplQueryFullResults") + logger.info("pplQueryResultsSingleRow: $pplQueryResultsSingleRow") // retrieve the size limit from cluster settings val maxSize = monitorCtx.clusterService!!.clusterSettings.get(AlertingSettings.ALERT_V2_NOTIF_QUERY_RESULTS_MAX_SIZE) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt index 0be56d183..21dfba936 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt @@ -330,5 +330,12 @@ class AlertingSettings { 0L, Setting.Property.NodeScope, Setting.Property.Dynamic ) + + val ALERT_V2_PER_RESULT_TRIGGER_MAX_ALERTS = Setting.intSetting( + "plugins.alerting_v2.per_result_trigger_max_alerts", + 10, + 1, + Setting.Property.NodeScope, Setting.Property.Dynamic + ) } } From e3e18327f3b34fd895782878427b8616eac5d960 Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 25 Sep 2025 12:28:46 -0700 Subject: [PATCH 39/40] manually calling execute monitor API should no longer get suppressed --- .../alerting/MonitorRunnerService.kt | 7 +++-- .../opensearch/alerting/MonitorV2Runner.kt | 1 + .../opensearch/alerting/PPLMonitorRunner.kt | 11 ++++++-- .../actionv2/ExecuteMonitorV2Request.kt | 5 ++++ .../resthandler/RestExecuteMonitorV2Action.kt | 4 +-- .../TransportExecuteMonitorV2Action.kt | 27 ++++++++----------- 6 files changed, 33 insertions(+), 22 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index 262e83a94..27dee9537 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -444,6 +444,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon monitorCtx.clusterService!!.state().nodes().localNode.id ) val executeMonitorV2Request = ExecuteMonitorV2Request( + false, false, job.id, // only need to pass in MonitorV2 ID null, // no need to pass in MonitorV2 object itself @@ -460,7 +461,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } catch (e: Exception) { logger.error("MonitorV2 run failed for monitor with id ${job.id}", e) } finally { - monitorCtx.client!!.suspendUntil { monitorCtx.lockService!!.release(monitorLock, it) } + monitorCtx.client!!.suspendUntil { monitorCtx.lockService!!.release(monitorLock, it) } logger.debug("lock ${monitorLock?.lockId} released") } } @@ -591,6 +592,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon periodStart: Instant, periodEnd: Instant, dryrun: Boolean, + manual: Boolean, transportService: TransportService, ): MonitorV2RunResult<*> { updateAlertingConfigIndexSchema() @@ -603,7 +605,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon logger.info( "Executing scheduled monitor - id: ${monitorV2.id}, type: $monitorV2Type, periodStart: $periodStart, " + - "periodEnd: $periodEnd, dryrun: $dryrun, executionId: $executionId" + "periodEnd: $periodEnd, dryrun: $dryrun, manual: $manual, executionId: $executionId" ) // for now, always call PPLMonitorRunner since only PPL Monitors are initially supported @@ -615,6 +617,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon periodStart, periodEnd, dryrun, + manual, executionId = executionId, transportService = transportService, ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt index 410076f19..ef2b3fcfa 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorV2Runner.kt @@ -12,6 +12,7 @@ interface MonitorV2Runner { periodStart: Instant, periodEnd: Instant, dryRun: Boolean, + manual: Boolean, executionId: String, transportService: TransportService ): MonitorV2RunResult<*> diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index 5a5bf830f..a41d9aaf6 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -61,6 +61,7 @@ object PPLMonitorRunner : MonitorV2Runner { periodStart: Instant, periodEnd: Instant, dryRun: Boolean, + manual: Boolean, executionId: String, transportService: TransportService, ): MonitorV2RunResult<*> { @@ -111,7 +112,7 @@ object PPLMonitorRunner : MonitorV2Runner { try { // check for suppression and skip execution // before even running the trigger itself - val suppressed = checkForSuppress(pplTrigger, timeOfCurrentExecution) + val suppressed = checkForSuppress(pplTrigger, timeOfCurrentExecution, manual) if (suppressed) { logger.info("suppressing trigger ${pplTrigger.name} from monitor ${pplMonitor.name}") @@ -244,7 +245,13 @@ object PPLMonitorRunner : MonitorV2Runner { ) } - private fun checkForSuppress(pplTrigger: PPLTrigger, timeOfCurrentExecution: Instant): Boolean { + // returns true if the pplTrigger should be suppressed + private fun checkForSuppress(pplTrigger: PPLTrigger, timeOfCurrentExecution: Instant, manual: Boolean): Boolean { + // manual calls from the user to execute a monitor should never be suppressed + if (manual) { + return false + } + // the interval between throttledTimeBound and now is the suppression window // i.e. any PPLTrigger whose last trigger time is in this window must be suppressed val suppressTimeBound = pplTrigger.suppressDuration?.let { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt index 8084430c5..dbf5d62b9 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/actionv2/ExecuteMonitorV2Request.kt @@ -11,6 +11,7 @@ import java.io.IOException class ExecuteMonitorV2Request : ActionRequest { val dryrun: Boolean + val manual: Boolean val monitorId: String? // exactly one of monitorId or monitor must be non-null val monitorV2: MonitorV2? val requestStart: TimeValue? @@ -18,12 +19,14 @@ class ExecuteMonitorV2Request : ActionRequest { constructor( dryrun: Boolean, + manual: Boolean, // if execute was called by user or by scheduled job monitorId: String?, monitorV2: MonitorV2?, requestStart: TimeValue? = null, requestEnd: TimeValue ) : super() { this.dryrun = dryrun + this.manual = manual this.monitorId = monitorId this.monitorV2 = monitorV2 this.requestStart = requestStart @@ -33,6 +36,7 @@ class ExecuteMonitorV2Request : ActionRequest { @Throws(IOException::class) constructor(sin: StreamInput) : this( sin.readBoolean(), // dryrun + sin.readBoolean(), // manual sin.readOptionalString(), // monitorId if (sin.readBoolean()) { MonitorV2.readFrom(sin) // monitor @@ -53,6 +57,7 @@ class ExecuteMonitorV2Request : ActionRequest { @Throws(IOException::class) override fun writeTo(out: StreamOutput) { out.writeBoolean(dryrun) + out.writeBoolean(manual) out.writeOptionalString(monitorId) if (monitorV2 != null) { out.writeBoolean(true) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt index 6c74ada9f..058cd7a1f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteMonitorV2Action.kt @@ -45,7 +45,7 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { if (request.hasParam("monitorV2Id")) { val monitorV2Id = request.param("monitorV2Id") - val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, monitorV2Id, null, null, requestEnd) + val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, true, monitorV2Id, null, null, requestEnd) client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) } else { val xcp = request.contentParser() @@ -58,7 +58,7 @@ class RestExecuteMonitorV2Action : BaseRestHandler() { throw AlertingException.wrap(e) } - val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, null, monitorV2, null, requestEnd) + val execMonitorV2Request = ExecuteMonitorV2Request(dryrun, true, null, monitorV2, null, requestEnd) client.execute(ExecuteMonitorV2Action.INSTANCE, execMonitorV2Request, RestToXContentListener(channel)) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt index 5b1cb436f..19fb8ce7d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -34,6 +34,7 @@ import org.opensearch.tasks.Task import org.opensearch.transport.TransportService import org.opensearch.transport.client.Client import java.time.Instant +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV2 private val log = LogManager.getLogger(TransportExecuteMonitorV2Action::class.java) @@ -66,7 +67,7 @@ class TransportExecuteMonitorV2Action @Inject constructor( // get execution time interval val (periodStart, periodEnd) = if (execMonitorV2Request.requestStart != null) { Pair( - Instant.ofEpochMilli(execMonitorV2Request.requestStart!!.millis), + Instant.ofEpochMilli(execMonitorV2Request.requestStart.millis), Instant.ofEpochMilli(execMonitorV2Request.requestEnd.millis) ) } else { @@ -81,13 +82,14 @@ class TransportExecuteMonitorV2Action @Inject constructor( } log.info( "Executing MonitorV2 from API - id: ${monitorV2.id}, type: $monitorV2Type, " + - "periodStart: $periodStart, periodEnd: $periodEnd, dryrun: ${execMonitorV2Request.dryrun}" + "periodStart: $periodStart, periodEnd: $periodEnd, manual: ${execMonitorV2Request.manual}" ) val monitorV2RunResult = runner.runJobV2( monitorV2, periodStart, periodEnd, execMonitorV2Request.dryrun, + execMonitorV2Request.manual, transportService ) withContext(Dispatchers.IO) { @@ -135,22 +137,15 @@ class TransportExecuteMonitorV2Action @Inject constructor( xContentRegistry, LoggingDeprecationHandler.INSTANCE, getMonitorV2Response.sourceAsBytesRef, XContentType.JSON ).use { xcp -> + val scheduledJob = ScheduledJob.parse(xcp, getMonitorV2Response.id, getMonitorV2Response.version) + validateMonitorV2(scheduledJob)?.let { + actionListener.onFailure(AlertingException.wrap(it)) + return + } + val monitorV2 = scheduledJob as MonitorV2 + executeMonitorV2(monitorV2) try { - val monitorV2 = ScheduledJob.parse( - xcp, - getMonitorV2Response.id, - getMonitorV2Response.version - ) as MonitorV2 executeMonitorV2(monitorV2) - } catch (e: ClassCastException) { - actionListener.onFailure( - AlertingException.wrap( - IllegalArgumentException( - "Passed in Monitor ID is a legacy Alerting Monitor, please pass in an " + - "Alerting V2 Monitor" - ) - ) - ) } catch (e: Exception) { actionListener.onFailure(AlertingException.wrap(e)) } From 7366e0066094ddcc774514d77684c6fd5efe7b9d Mon Sep 17 00:00:00 2001 From: Dennis Toepker Date: Thu, 25 Sep 2025 14:53:45 -0700 Subject: [PATCH 40/40] deploy cleanup --- .../kotlin/org/opensearch/alerting/PPLMonitorRunner.kt | 8 ++++---- .../alerting/transport/TransportExecuteMonitorV2Action.kt | 3 +-- .../org/opensearch/alerting/core/modelv2/MonitorV2.kt | 2 +- .../org/opensearch/alerting/core/modelv2/PPLMonitor.kt | 6 +++++- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt index a41d9aaf6..e495800ef 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/PPLMonitorRunner.kt @@ -104,6 +104,7 @@ object PPLMonitorRunner : MonitorV2Runner { // then inject a time filter where statement into PPL Monitor query. // if the given monitor query already has any time check whatsoever, this // simply returns the original query itself + // TODO: get lookback window based start time and put that in execution results instead of periodStart val timeFilteredQuery = addTimeFilter(pplMonitor.query, periodStart, periodEnd, pplMonitor.lookBackWindow) logger.info("time filtered query: $timeFilteredQuery") @@ -269,12 +270,13 @@ object PPLMonitorRunner : MonitorV2Runner { // periodStart: the lower bound of the initially computed query interval based on monitor schedule // periodEnd: the upper bound of the initially computed query interval based on monitor schedule // lookBackWindow: customer's desired query look back window, overrides [periodStart, periodEnd] if not null - private fun addTimeFilter(query: String, periodStart: Instant, periodEnd: Instant, lookBackWindow: TimeValue?): String { + private fun addTimeFilter(query: String, periodStart: Instant, periodEnd: Instant, lookBackWindow: TimeValue): String { // inject time filter into PPL query to only query for data within the (periodStart, periodEnd) interval // TODO: if query contains "_time", "span", "earliest", "latest", skip adding filter // pending https://github.com/opensearch-project/sql/issues/3969 // for now assume TIMESTAMP_FIELD field is always present in customer data + // TODO: delete this, add lookback window time filter always // if the raw query contained any time check whatsoever, skip adding a time filter internally // and return query as is, customer's in-query time checks instantly and automatically overrides if (query.contains(TIMESTAMP_FIELD)) { // TODO: replace with PPL time keyword checks after that's GA @@ -282,9 +284,7 @@ object PPLMonitorRunner : MonitorV2Runner { } // if customer passed in a look back window, override the precomputed interval with it - val updatedPeriodStart = lookBackWindow?.let { window -> - periodEnd.minus(window.millis, ChronoUnit.MILLIS) - } ?: periodStart + val updatedPeriodStart = periodEnd.minus(lookBackWindow.millis, ChronoUnit.MILLIS) // PPL plugin only accepts timestamp strings in this format val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(UTC) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt index 19fb8ce7d..c7f27d029 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorV2Action.kt @@ -9,6 +9,7 @@ import org.opensearch.action.get.GetRequest import org.opensearch.action.get.GetResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.AlertingV2Utils.validateMonitorV2 import org.opensearch.alerting.MonitorRunnerService import org.opensearch.alerting.actionv2.ExecuteMonitorV2Action import org.opensearch.alerting.actionv2.ExecuteMonitorV2Request @@ -34,7 +35,6 @@ import org.opensearch.tasks.Task import org.opensearch.transport.TransportService import org.opensearch.transport.client.Client import java.time.Instant -import org.opensearch.alerting.AlertingV2Utils.validateMonitorV2 private val log = LogManager.getLogger(TransportExecuteMonitorV2Action::class.java) @@ -143,7 +143,6 @@ class TransportExecuteMonitorV2Action @Inject constructor( return } val monitorV2 = scheduledJob as MonitorV2 - executeMonitorV2(monitorV2) try { executeMonitorV2(monitorV2) } catch (e: Exception) { diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt index 5f0d803bd..eaa3bfcce 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/MonitorV2.kt @@ -26,7 +26,7 @@ interface MonitorV2 : ScheduledJob { val user: User? val triggers: List val schemaVersion: Int // for updating monitors - val lookBackWindow: TimeValue? // how far back to look when querying data during monitor execution + val lookBackWindow: TimeValue // how far back to look when querying data during monitor execution fun asTemplateArg(): Map diff --git a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt index 2a9e77788..3e3eeded5 100644 --- a/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt +++ b/core/src/main/kotlin/org/opensearch/alerting/core/modelv2/PPLMonitor.kt @@ -57,7 +57,7 @@ data class PPLMonitor( override val name: String, override val enabled: Boolean, override val schedule: Schedule, - override val lookBackWindow: TimeValue? = null, + override val lookBackWindow: TimeValue, override val lastUpdateTime: Instant, override val enabledTime: Instant?, override val user: User?, @@ -316,6 +316,9 @@ data class PPLMonitor( } } + // if no lookback window was given, set a default one + lookBackWindow = lookBackWindow ?: TimeValue.timeValueHours(1L) + // if enabled, set time of MonitorV2 creation/update is set as enable time if (enabled && enabledTime == null) { enabledTime = Instant.now() @@ -330,6 +333,7 @@ data class PPLMonitor( requireNotNull(schedule) { "Schedule is null" } requireNotNull(query) { "Query is null" } requireNotNull(lastUpdateTime) { "Last update time is null" } + requireNotNull(lookBackWindow) { "Look back window is null" } if (queryLanguage == QueryLanguage.SQL) { throw IllegalArgumentException("SQL queries are not supported. Please use a PPL query.")