From af5fab2b5a74959399cf35004f46056daf50392c Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 10:57:40 +0100 Subject: [PATCH 1/9] enable automlx --- ads/opctl/operator/lowcode/anomaly/const.py | 2 + .../lowcode/anomaly/model/anomaly_merlion.py | 40 +++++++- .../lowcode/anomaly/model/base_model.py | 9 +- .../operator/lowcode/anomaly/model/factory.py | 2 + .../operator/lowcode/anomaly/schema.yaml | 1 + ads/opctl/operator/lowcode/anomaly/utils.py | 98 +++++++++++++++++++ 6 files changed, 146 insertions(+), 6 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 0f9156f4a..96c19f94b 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -36,6 +36,8 @@ class SupportedModels(ExtendedEnum): # changepoint BOCPD = "bocpd" + AUTOMLX = "automlx" + class NonTimeADSupportedModels(ExtendedEnum): """Supported non time-based anomaly detection models.""" diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 8999b2674..9fd57ee6d 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -62,6 +62,13 @@ def _get_config_model(self, model_name): model_config_map[model_name] = [model_config, model] return model_config_map + def _preprocess_data(self, df, date_column): + df[date_column] = pd.to_datetime(df[date_column]) + if df[date_column].dt.tz is not None: + df[date_column] = df[date_column].dt.tz_convert(None) + data = df.set_index(date_column) + return TimeSeries.from_pd(data) + def _build_model(self) -> AnomalyOutput: """ Builds a Merlion anomaly detection model and trains it using the given data. @@ -85,9 +92,8 @@ def _build_model(self) -> AnomalyOutput: anomaly_output = AnomalyOutput(date_column=date_column) # model_objects = defaultdict(list) - for target, df in self.datasets.full_data_dict.items(): - data = df.set_index(date_column) - data = TimeSeries.from_pd(data) + for s_id, df in self.datasets.full_data_dict.items(): + data = self._preprocess_data(df, date_column) for _, (model_config, model) in model_config_map.items(): if self.spec.model == SupportedModels.BOCPD: model_config = model_config(**self.spec.model_kwargs) @@ -107,8 +113,32 @@ def _build_model(self) -> AnomalyOutput: self.spec.target_column ) model = model(model_config) + scores = None + if ( + self.X_valid_dict.get(s_id, None) is not None + and self.y_valid_dict.get(s_id, None) is not None + ): + try: + # Do we need to set datetime col as index? + # TODO: get data by series id + v_df = self.X_valid_dict[s_id] + v_labels = self.y_valid_dict[s_id] + v_data = self._preprocess_data(v_df, date_column) + v_labels.index = v_data.index + print(v_data, v_labels) + scores_v = model.train( + train_data=v_data, anomaly_labels=v_labels + ) + print("checkpoint 1") + scores = model.get_anomaly_score( + train_data=data, anomaly_labels=None + ) + except Exception as e: + logging.debug(f"Failed to use validation data with error: {e}") + if scores is None: + scores = model.train(train_data=data, anomaly_labels=None) - scores = model.train(train_data=data, anomaly_labels=None) + # Normalize scores out of 100 scores = scores.to_pd().reset_index() scores["anom_score"] = ( scores["anom_score"] - scores["anom_score"].min() @@ -140,7 +170,7 @@ def _build_model(self) -> AnomalyOutput: } ).reset_index(drop=True) - anomaly_output.add_output(target, anomaly, score) + anomaly_output.add_output(s_id, anomaly, score) return anomaly_output def _generate_report(self): diff --git a/ads/opctl/operator/lowcode/anomaly/model/base_model.py b/ads/opctl/operator/lowcode/anomaly/model/base_model.py index 4f28bb8d2..16876fdfa 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/base_model.py +++ b/ads/opctl/operator/lowcode/anomaly/model/base_model.py @@ -163,7 +163,14 @@ def generate_report(self): title_text = rc.Heading("Anomaly Detection Report", level=1) yaml_appendix_title = rc.Heading("Reference: YAML File", level=2) - yaml_appendix = rc.Yaml(self.config.to_dict()) + config_dict = self.config.to_dict() + # pop the data incase it isn't json serializable + config_dict["spec"]["input_data"].pop("data") + if config_dict["spec"].get("validation_data"): + config_dict["spec"]["validation_data"].pop("data") + if config_dict["spec"].get("test_data"): + config_dict["spec"]["test_data"].pop("data") + yaml_appendix = rc.Yaml(config_dict) summary = rc.Block( rc.Group( rc.Text(f"You selected the **`{self.spec.model}`** model.\n"), diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index 4bb59d02b..1883d7ce8 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -9,6 +9,7 @@ from ..operator_config import AnomalyOperatorConfig from .anomaly_dataset import AnomalyDatasets from .anomaly_merlion import AnomalyMerlionOperatorModel +from .automlx import AutoMLXOperatorModel from .autots import AutoTSOperatorModel from .base_model import AnomalyOperatorBaseModel from .isolationforest import IsolationForestOperatorModel @@ -61,6 +62,7 @@ class AnomalyOperatorModelFactory: SupportedModels.PROPHET: AnomalyMerlionOperatorModel, SupportedModels.SARIMA: AnomalyMerlionOperatorModel, SupportedModels.BOCPD: AnomalyMerlionOperatorModel, + SupportedModels.AUTOMLX: AutoMLXOperatorModel, } _NonTime_MAP = { diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index aba6c4e82..00130e16d 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -361,6 +361,7 @@ spec: allowed: - autots - auto + - automlx - oneclasssvm - isolationforest - randomcutforest diff --git a/ads/opctl/operator/lowcode/anomaly/utils.py b/ads/opctl/operator/lowcode/anomaly/utils.py index 396d34c57..17037d8d7 100644 --- a/ads/opctl/operator/lowcode/anomaly/utils.py +++ b/ads/opctl/operator/lowcode/anomaly/utils.py @@ -86,3 +86,101 @@ def select_auto_model(operator_config): if operator_config.spec.datetime_column is not None: return SupportedModels.AutoTS return NonTimeADSupportedModels.IsolationForest + + +import plotly.graph_objects as go +from sklearn.metrics import f1_score + + +def plot_anomaly_threshold_gain( + scores, threshold=None, labels=None, title="Threshold Analysis" +): + """ + Plots: + - Anomalies detected vs. threshold (always) + - F1 Score vs. threshold (if labels provided) + - % of data flagged vs. threshold (if labels not provided) + + Args: + scores (array-like): Anomaly scores (higher = more anomalous) + threshold (float): Optional current threshold to highlight + labels (array-like): Optional true labels (1=anomaly, 0=normal) + title (str): Chart title + """ + scores = np.array(scores) + thresholds = np.linspace(min(scores), max(scores), 100) + + anomalies_found = [] + y_secondary = [] + y_secondary_label = "" + + for t in thresholds: + predicted = (scores >= t).astype(int) + + # Count anomalies detected + if labels is not None: + true_anomalies = np.sum((predicted == 1) & (np.array(labels) == 1)) + # Compute F1 score + y_secondary.append(f1_score(labels, predicted, zero_division=0)) + y_secondary_label = "F1 Score" + else: + true_anomalies = np.sum(predicted) + # Compute % of data flagged + y_secondary.append(100 * np.mean(predicted)) + y_secondary_label = "% of Data Flagged" + + anomalies_found.append(true_anomalies) + + # Start building the plot + fig = go.Figure() + + # Primary Y: Anomalies detected + fig.add_trace( + go.Scatter( + x=thresholds, + y=anomalies_found, + name="Anomalies Detected", + mode="lines", + line=dict(color="royalblue"), + yaxis="y1", + ) + ) + + # Secondary Y: F1 or % flagged + fig.add_trace( + go.Scatter( + x=thresholds, + y=y_secondary, + name=y_secondary_label, + mode="lines", + line=dict(color="orange", dash="dot"), + yaxis="y2", + ) + ) + + # Vertical line for current threshold + if threshold is not None: + fig.add_trace( + go.Scatter( + x=[threshold, threshold], + y=[0, max(anomalies_found)], + mode="lines", + name=f"Current Threshold ({threshold:.2f})", + line=dict(color="firebrick", dash="dash"), + yaxis="y1", + ) + ) + + # Layout + fig.update_layout( + title=title, + xaxis=dict(title="Anomaly Score Threshold"), + yaxis=dict(title="Anomalies Detected", side="left"), + yaxis2=dict( + title=y_secondary_label, overlaying="y", side="right", showgrid=False + ), + legend=dict(x=0.01, y=0.99), + template="plotly_white", + ) + + fig.show() From e2e3ae23f016eaf92b5bc0f9fe89d839ee87d3c2 Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 13:02:45 +0100 Subject: [PATCH 2/9] error handling --- ads/opctl/operator/lowcode/anomaly/const.py | 2 +- .../lowcode/anomaly/model/anomaly_merlion.py | 52 +++++++++++-------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 96c19f94b..0c0bf1e63 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -166,4 +166,4 @@ class OutputColumns(ExtendedEnum): TODS_DEFAULT_MODEL = "ocsvm" -SUBSAMPLE_THRESHOLD = 1000 +SUBSAMPLE_THRESHOLD = 10 diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 9fd57ee6d..c2b5d770a 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -67,7 +67,7 @@ def _preprocess_data(self, df, date_column): if df[date_column].dt.tz is not None: df[date_column] = df[date_column].dt.tz_convert(None) data = df.set_index(date_column) - return TimeSeries.from_pd(data) + return data def _build_model(self) -> AnomalyOutput: """ @@ -82,6 +82,17 @@ def _build_model(self) -> AnomalyOutput: AnomalyOutput An AnomalyOutput object containing the anomaly detection results. """ + + def _inject_train_data( + v_data: pd.DataFrame, train_data: pd.DataFrame + ) -> pd.DataFrame: + # Step 1: Get index from train data not already present in validation data + v_index_set = set(v_data.index) + filtered_train = train_data[~train_data.index.isin(v_index_set)] + + combined_data = pd.concat([filtered_train, v_data]).sort_index() + return combined_data + model_kwargs = self.spec.model_kwargs anomaly_output = AnomalyOutput(date_column="index") anomaly_threshold = model_kwargs.get("anomaly_threshold", 95) @@ -93,7 +104,8 @@ def _build_model(self) -> AnomalyOutput: anomaly_output = AnomalyOutput(date_column=date_column) # model_objects = defaultdict(list) for s_id, df in self.datasets.full_data_dict.items(): - data = self._preprocess_data(df, date_column) + df_clean = self._preprocess_data(df, date_column) + data = TimeSeries.from_pd(df_clean) for _, (model_config, model) in model_config_map.items(): if self.spec.model == SupportedModels.BOCPD: model_config = model_config(**self.spec.model_kwargs) @@ -115,28 +127,24 @@ def _build_model(self) -> AnomalyOutput: model = model(model_config) scores = None if ( - self.X_valid_dict.get(s_id, None) is not None - and self.y_valid_dict.get(s_id, None) is not None + hasattr(self.datasets, "valid_data") + and self.datasets.valid_data.get_data_for_series(s_id) is not None ): - try: - # Do we need to set datetime col as index? - # TODO: get data by series id - v_df = self.X_valid_dict[s_id] - v_labels = self.y_valid_dict[s_id] - v_data = self._preprocess_data(v_df, date_column) - v_labels.index = v_data.index - print(v_data, v_labels) - scores_v = model.train( - train_data=v_data, anomaly_labels=v_labels - ) - print("checkpoint 1") - scores = model.get_anomaly_score( - train_data=data, anomaly_labels=None - ) - except Exception as e: - logging.debug(f"Failed to use validation data with error: {e}") + # try: + v_df = self.datasets.valid_data.get_data_for_series(s_id) + v_data = self._preprocess_data(v_df, date_column) + + v_labels = TimeSeries.from_pd(v_data["anomaly"]) + v_data = v_data.drop("anomaly", axis=1) + v_data = _inject_train_data(v_data, df_clean) + scores_v = model.train( + train_data=TimeSeries.from_pd(v_data), anomaly_labels=v_labels + ) + scores = TimeSeries.from_pd(scores_v.to_pd().loc[df_clean.index]) + # except Exception as e: + # logging.debug(f"Failed to use validation data with error: {e}") if scores is None: - scores = model.train(train_data=data, anomaly_labels=None) + scores = model.train(train_data=data) # Normalize scores out of 100 scores = scores.to_pd().reset_index() From e7402eccf7e86d9e15ecb30deb6ffa7b46747bc9 Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 14:55:14 +0100 Subject: [PATCH 3/9] update model names --- ads/opctl/operator/lowcode/anomaly/const.py | 57 ++++-- .../lowcode/anomaly/model/anomaly_merlion.py | 181 ++++++++++-------- .../operator/lowcode/anomaly/model/factory.py | 14 +- .../operator/lowcode/anomaly/schema.yaml | 6 + 4 files changed, 160 insertions(+), 98 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 0c0bf1e63..e9640f3a6 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -10,15 +10,31 @@ class SupportedModels(ExtendedEnum): """Supported anomaly models.""" + # Autots Models AutoTS = "autots" Auto = "auto" IQR = "iqr" - LOF = "lof" ZSCORE = "zscore" ROLLING_ZSCORE = "rolling_zscore" MAD = "mad" EE = "ee" - ISOLATIONFOREST = "isolationforest" + + # univariate Merlion + StatThreshold = "StatThreshold" + WindStats = "WindStats" + DBL = "DynamicBaseline" + SPECTRAL_RESIDUAL = "spectral_residual" + ZMS = "ZMS" + + # multivariate Merlion + DAGMM = "dagmm" + DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" + LSTM_ED = "lstm_ed" + VAE = "vae" + RandomCutForest = "RandomCutForest" + AutoEncoder = "AutoEncoder" + LOF = "lof" + IsolationForest = "IsolationForest" # point anomaly DAGMM = "dagmm" @@ -32,6 +48,7 @@ class SupportedModels(ExtendedEnum): ETS = "ets" PROPHET = "prophet" SARIMA = "sarima" + MSES = "mses" # changepoint BOCPD = "bocpd" @@ -82,37 +99,34 @@ class TODSSubModels(ExtendedEnum): class MerlionADModels(ExtendedEnum): """Supported Merlion AD sub models.""" + # univariate + StatThreshold = "StatThreshold" + WindStats = "WindStats" + DBL = "DynamicBaseline" + SPECTRAL_RESIDUAL = "spectral_residual" + ZMS = "ZMS" + # point anomaly DAGMM = "dagmm" DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" LSTM_ED = "lstm_ed" - SPECTRAL_RESIDUAL = "spectral_residual" VAE = "vae" + RandomCutForest = "RandomCutForest" + AutoEncoder = "AutoEncoder" + LOF = "LocalOutlierFactor" + IsolationForest = "IsolationForest" # forecast_based ARIMA = "arima" ETS = "ets" PROPHET = "prophet" SARIMA = "sarima" + MSES = "mses" # changepoint BOCPD = "bocpd" -MERLIONAD_IMPORT_MODEL_MAP = { - MerlionADModels.DAGMM: ".dagmm", - MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector", - MerlionADModels.LSTM_ED: ".lstm_ed", - MerlionADModels.SPECTRAL_RESIDUAL: ".spectral_residual", - MerlionADModels.VAE: ".vae", - MerlionADModels.ARIMA: ".forecast_based.arima", - MerlionADModels.ETS: ".forecast_based.ets", - MerlionADModels.PROPHET: ".forecast_based.prophet", - MerlionADModels.SARIMA: ".forecast_based.sarima", - MerlionADModels.BOCPD: ".change_point.bocpd", -} - - MERLIONAD_MODEL_MAP = { MerlionADModels.DAGMM: "DAGMM", MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector", @@ -123,7 +137,16 @@ class MerlionADModels(ExtendedEnum): MerlionADModels.ETS: "ETSDetector", MerlionADModels.PROPHET: "ProphetDetector", MerlionADModels.SARIMA: "SarimaDetector", + MerlionADModels.MSES: "MSESDetector", MerlionADModels.BOCPD: "BOCPD", + MerlionADModels.ZMS: "ZMS", + MerlionADModels.RandomCutForest: "RandomCutForest", + MerlionADModels.AutoEncoder: "AutoEncoder", + MerlionADModels.LOF: "LocalOutlierFactor", + MerlionADModels.IsolationForest: "IsolationForest", + MerlionADModels.StatThreshold: "StatThreshold", + MerlionADModels.WindStats: "WindStats", + MerlionADModels.DBL: "DynamicBaseline", } diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index c2b5d770a..65c21a7cd 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (c) 2023, 2024 Oracle and/or its affiliates. +# Copyright (c) 2023, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import importlib @@ -26,6 +26,48 @@ logging.getLogger("report_creator").setLevel(logging.WARNING) +def prepare_model_kwargs(model_name, model_kwargs): + model_name = MERLIONAD_MODEL_MAP.get(model_name) + + # individual handling by model + if model_name == SupportedModels.BOCPD: + if ( + model_kwargs.get("threshold", None) is None + and model_kwargs.get("alm_threshold") is not None + ): + model_kwargs["threshold"] = AggregateAlarms( + alm_threshold=model_kwargs.get("alm_threshold") + ) + elif ( + model_name == SupportedModels.MSES + and model_kwargs.get("max_forecast_steps", None) is None + ): + model_kwargs["max_forecast_steps"] = 1 + return model_kwargs + + +def init_merlion_model(model_name, model_kwargs): + from merlion.models.factory import ModelFactory + + model_name = MERLIONAD_MODEL_MAP.get(model_name) + + if model_name == "DeepPointAnomalyDetector": + from merlion.models.anomaly.deep_point_anomaly_detector import ( + DeepPointAnomalyDetector, + ) + + model = DeepPointAnomalyDetector( + DeepPointAnomalyDetector.config_class(**model_kwargs) + ) + unused_model_kwargs = model_kwargs + else: + model, unused_model_kwargs = ModelFactory.create( + model_name, return_unused_kwargs=True, **model_kwargs + ) + + return model, unused_model_kwargs + + class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel): """Class representing Merlion Anomaly Detection operator model.""" @@ -93,92 +135,75 @@ def _inject_train_data( combined_data = pd.concat([filtered_train, v_data]).sort_index() return combined_data - model_kwargs = self.spec.model_kwargs + model_kwargs = prepare_model_kwargs( + self.spec.model, self.spec.model_kwargs.copy() + ) anomaly_output = AnomalyOutput(date_column="index") anomaly_threshold = model_kwargs.get("anomaly_threshold", 95) - model_config_map = {} - model_config_map = self._get_config_model(self.spec.model) - date_column = self.spec.datetime_column.name anomaly_output = AnomalyOutput(date_column=date_column) - # model_objects = defaultdict(list) + for s_id, df in self.datasets.full_data_dict.items(): df_clean = self._preprocess_data(df, date_column) data = TimeSeries.from_pd(df_clean) - for _, (model_config, model) in model_config_map.items(): - if self.spec.model == SupportedModels.BOCPD: - model_config = model_config(**self.spec.model_kwargs) - else: - model_config = model_config( - **{ - **self.spec.model_kwargs, - "threshold": AggregateAlarms( - alm_threshold=model_kwargs.get("alm_threshold") - if model_kwargs.get("alm_threshold") - else None - ), - } - ) - if hasattr(model_config, "target_seq_index"): - model_config.target_seq_index = df.columns.get_loc( - self.spec.target_column + target_seq_index = df_clean.columns.get_loc(self.spec.target_column) + model, unused_model_kwargs = init_merlion_model( + self.spec.model, target_seq_index, model_kwargs + ) + scores = None + + if ( + hasattr(self.datasets, "valid_data") + and self.datasets.valid_data.get_data_for_series(s_id) is not None + ): + # try: + v_df = self.datasets.valid_data.get_data_for_series(s_id) + v_data = self._preprocess_data(v_df, date_column) + + v_labels = TimeSeries.from_pd(v_data["anomaly"]) + v_data = v_data.drop("anomaly", axis=1) + v_data = _inject_train_data(v_data, df_clean) + scores_v = model.train( + train_data=TimeSeries.from_pd(v_data), anomaly_labels=v_labels + ) + scores = TimeSeries.from_pd(scores_v.to_pd().loc[df_clean.index]) + # except Exception as e: + # logging.debug(f"Failed to use validation data with error: {e}") + if scores is None: + scores = model.train(train_data=data) + + # Normalize scores out of 100 + scores = scores.to_pd().reset_index() + scores["anom_score"] = ( + scores["anom_score"] - scores["anom_score"].min() + ) / (scores["anom_score"].max() - scores["anom_score"].min()) + + try: + y_pred = model.get_anomaly_label(data) + y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype(int) + except Exception: + y_pred = ( + scores["anom_score"] + > np.percentile( + scores["anom_score"], + anomaly_threshold, ) - model = model(model_config) - scores = None - if ( - hasattr(self.datasets, "valid_data") - and self.datasets.valid_data.get_data_for_series(s_id) is not None - ): - # try: - v_df = self.datasets.valid_data.get_data_for_series(s_id) - v_data = self._preprocess_data(v_df, date_column) - - v_labels = TimeSeries.from_pd(v_data["anomaly"]) - v_data = v_data.drop("anomaly", axis=1) - v_data = _inject_train_data(v_data, df_clean) - scores_v = model.train( - train_data=TimeSeries.from_pd(v_data), anomaly_labels=v_labels - ) - scores = TimeSeries.from_pd(scores_v.to_pd().loc[df_clean.index]) - # except Exception as e: - # logging.debug(f"Failed to use validation data with error: {e}") - if scores is None: - scores = model.train(train_data=data) - - # Normalize scores out of 100 - scores = scores.to_pd().reset_index() - scores["anom_score"] = ( - scores["anom_score"] - scores["anom_score"].min() - ) / (scores["anom_score"].max() - scores["anom_score"].min()) - - try: - y_pred = model.get_anomaly_label(data) - y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype( - int - ) - except Exception: - y_pred = ( - scores["anom_score"] - > np.percentile( - scores["anom_score"], - anomaly_threshold, - ) - ).astype(int) - - index_col = df.columns[0] - - anomaly = pd.DataFrame( - {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} - ).reset_index(drop=True) - score = pd.DataFrame( - { - index_col: df[index_col], - OutputColumns.SCORE_COL: scores["anom_score"], - } - ).reset_index(drop=True) - - anomaly_output.add_output(s_id, anomaly, score) + ).astype(int) + + index_col = df.columns[0] + + anomaly = pd.DataFrame( + {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} + ).reset_index(drop=True) + score = pd.DataFrame( + { + index_col: df[index_col], + OutputColumns.SCORE_COL: scores["anom_score"], + } + ).reset_index(drop=True) + + anomaly_output.add_output(s_id, anomaly, score) return anomaly_output def _generate_report(self): diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index 1883d7ce8..1a347a4da 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -45,22 +45,30 @@ class AnomalyOperatorModelFactory: _MAP = { SupportedModels.AutoTS: AutoTSOperatorModel, + SupportedModels.Auto: AutoTSOperatorModel, SupportedModels.IQR: AutoTSOperatorModel, - SupportedModels.LOF: AutoTSOperatorModel, - SupportedModels.ISOLATIONFOREST: AutoTSOperatorModel, SupportedModels.ZSCORE: AutoTSOperatorModel, SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel, SupportedModels.EE: AutoTSOperatorModel, SupportedModels.MAD: AutoTSOperatorModel, + SupportedModels.StatThreshold: AnomalyMerlionOperatorModel, + SupportedModels.WindStats: AnomalyMerlionOperatorModel, + SupportedModels.DBL: AnomalyMerlionOperatorModel, + SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel, + SupportedModels.ZMS: AnomalyMerlionOperatorModel, SupportedModels.DAGMM: AnomalyMerlionOperatorModel, SupportedModels.DEEP_POINT_ANOMALY_DETECTOR: AnomalyMerlionOperatorModel, SupportedModels.LSTM_ED: AnomalyMerlionOperatorModel, - SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel, SupportedModels.VAE: AnomalyMerlionOperatorModel, + SupportedModels.RandomCutForest: AnomalyMerlionOperatorModel, + SupportedModels.AutoEncoder: AnomalyMerlionOperatorModel, + SupportedModels.LOF: AnomalyMerlionOperatorModel, + SupportedModels.IsolationForest: AnomalyMerlionOperatorModel, SupportedModels.ARIMA: AnomalyMerlionOperatorModel, SupportedModels.ETS: AnomalyMerlionOperatorModel, SupportedModels.PROPHET: AnomalyMerlionOperatorModel, SupportedModels.SARIMA: AnomalyMerlionOperatorModel, + SupportedModels.MSES: AnomalyMerlionOperatorModel, SupportedModels.BOCPD: AnomalyMerlionOperatorModel, SupportedModels.AUTOMLX: AutoMLXOperatorModel, } diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index 00130e16d..b039102db 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -379,8 +379,14 @@ spec: - arima - ets - sarima + - mses - bocpd - prophet + - statthreshold + - windstats + - dynamicbaseline + - zms + - autoencoder meta: description: "The model to be used for anomaly detection" From 0b80a6c7fd575c440e9fb19fccd70efe54cd6072 Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 15:22:57 +0100 Subject: [PATCH 4/9] move to merlion --- ads/opctl/operator/lowcode/anomaly/const.py | 36 ++++----- .../lowcode/anomaly/model/anomaly_merlion.py | 75 ++++++++++--------- .../operator/lowcode/anomaly/model/autots.py | 8 +- .../operator/lowcode/anomaly/schema.yaml | 14 ++-- 4 files changed, 67 insertions(+), 66 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index e9640f3a6..e731391ad 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -20,21 +20,21 @@ class SupportedModels(ExtendedEnum): EE = "ee" # univariate Merlion - StatThreshold = "StatThreshold" - WindStats = "WindStats" - DBL = "DynamicBaseline" + StatThreshold = "stat_threshold" + WindStats = "wind_stats" + DBL = "dynamic_baseline" SPECTRAL_RESIDUAL = "spectral_residual" - ZMS = "ZMS" + ZMS = "zms" # multivariate Merlion DAGMM = "dagmm" DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" LSTM_ED = "lstm_ed" VAE = "vae" - RandomCutForest = "RandomCutForest" - AutoEncoder = "AutoEncoder" + RandomCutForest = "random_cut_forest" + AutoEncoder = "auto_encoder" LOF = "lof" - IsolationForest = "IsolationForest" + IsolationForest = "isolation_forest" # point anomaly DAGMM = "dagmm" @@ -59,9 +59,9 @@ class SupportedModels(ExtendedEnum): class NonTimeADSupportedModels(ExtendedEnum): """Supported non time-based anomaly detection models.""" - OneClassSVM = "oneclasssvm" - IsolationForest = "isolationforest" - RandomCutForest = "randomcutforest" + OneClassSVM = "one_class_svm" + IsolationForest = "isolation_forest" + RandomCutForest = "random_cut_forest" # TODO : Add DBScan # DBScan = "dbscan" @@ -100,21 +100,21 @@ class MerlionADModels(ExtendedEnum): """Supported Merlion AD sub models.""" # univariate - StatThreshold = "StatThreshold" - WindStats = "WindStats" - DBL = "DynamicBaseline" + StatThreshold = "stat_threshold" + WindStats = "wind_stats" + DBL = "dynamic_baseline" SPECTRAL_RESIDUAL = "spectral_residual" - ZMS = "ZMS" + ZMS = "zms" # point anomaly DAGMM = "dagmm" DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" LSTM_ED = "lstm_ed" VAE = "vae" - RandomCutForest = "RandomCutForest" - AutoEncoder = "AutoEncoder" - LOF = "LocalOutlierFactor" - IsolationForest = "IsolationForest" + RandomCutForest = "random_cut_forest" + AutoEncoder = "auto_encoder" + LOF = "lof" + IsolationForest = "isolation_forest" # forecast_based ARIMA = "arima" diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 65c21a7cd..5ba929f2e 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -3,7 +3,6 @@ # Copyright (c) 2023, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -import importlib import logging import numpy as np @@ -14,7 +13,6 @@ from ads.common.decorator.runtime_dependency import runtime_dependency from ads.opctl.operator.lowcode.anomaly.const import ( - MERLIONAD_IMPORT_MODEL_MAP, MERLIONAD_MODEL_MAP, OutputColumns, SupportedModels, @@ -46,11 +44,20 @@ def prepare_model_kwargs(model_name, model_kwargs): return model_kwargs -def init_merlion_model(model_name, model_kwargs): +@runtime_dependency( + module="merlion", + err_msg=( + "Please run `pip3 install salesforce-merlion[all]` to " + "install the required packages." + ), +) +def init_merlion_model(model_name, target_seq_index, model_kwargs): from merlion.models.factory import ModelFactory model_name = MERLIONAD_MODEL_MAP.get(model_name) + model_kwargs["target_seq_index"] = target_seq_index + if model_name == "DeepPointAnomalyDetector": from merlion.models.anomaly.deep_point_anomaly_detector import ( DeepPointAnomalyDetector, @@ -71,38 +78,31 @@ def init_merlion_model(model_name, model_kwargs): class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel): """Class representing Merlion Anomaly Detection operator model.""" - @runtime_dependency( - module="merlion", - err_msg=( - "Please run `pip3 install salesforce-merlion[all]` to " - "install the required packages." - ), - ) - def _get_config_model(self, model_name): - """ - Returns a dictionary with model names as keys and a list of model config and model object as values. - - Parameters - ---------- - model_name : str - model name from the Merlion model list. - - Returns - ------- - dict - A dictionary with model names as keys and a list of model config and model object as values. - """ - model_config_map = {} - model_module = importlib.import_module( - name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name), - package="merlion.models.anomaly", - ) - model_config = getattr( - model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config" - ) - model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name)) - model_config_map[model_name] = [model_config, model] - return model_config_map + # def _get_config_model(self, model_name): + # """ + # Returns a dictionary with model names as keys and a list of model config and model object as values. + + # Parameters + # ---------- + # model_name : str + # model name from the Merlion model list. + + # Returns + # ------- + # dict + # A dictionary with model names as keys and a list of model config and model object as values. + # """ + # model_config_map = {} + # model_module = importlib.import_module( + # name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name), + # package="merlion.models.anomaly", + # ) + # model_config = getattr( + # model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config" + # ) + # model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name)) + # model_config_map[model_name] = [model_config, model] + # return model_config_map def _preprocess_data(self, df, date_column): df[date_column] = pd.to_datetime(df[date_column]) @@ -152,7 +152,6 @@ def _inject_train_data( self.spec.model, target_seq_index, model_kwargs ) scores = None - if ( hasattr(self.datasets, "valid_data") and self.datasets.valid_data.get_data_for_series(s_id) is not None @@ -167,7 +166,9 @@ def _inject_train_data( scores_v = model.train( train_data=TimeSeries.from_pd(v_data), anomaly_labels=v_labels ) - scores = TimeSeries.from_pd(scores_v.to_pd().loc[df_clean.index]) + scores_df = scores_v.to_pd() + available_index = scores_df.index.intersection(df_clean.index) + scores = TimeSeries.from_pd(scores_df.loc[available_index]) # except Exception as e: # logging.debug(f"Failed to use validation data with error: {e}") if scores is None: diff --git a/ads/opctl/operator/lowcode/anomaly/model/autots.py b/ads/opctl/operator/lowcode/anomaly/model/autots.py index 550833a67..68a7a73f4 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/autots.py +++ b/ads/opctl/operator/lowcode/anomaly/model/autots.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (c) 2023, 2024 Oracle and/or its affiliates. +# Copyright (c) 2023, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import logging @@ -22,7 +22,7 @@ class AutoTSOperatorModel(AnomalyOperatorBaseModel): """Class representing AutoTS Anomaly Detection operator model.""" model_mapping = { - "isolationforest": "IsolationForest", + # "isolationforest": "IsolationForest", "lof": "LOF", "ee": "EE", "zscore": "zscore", @@ -43,7 +43,7 @@ def _build_model(self) -> AnomalyOutput: from autots.evaluator.anomaly_detector import AnomalyDetector method = ( - SupportedModels.ISOLATIONFOREST + SupportedModels.IQR if self.spec.model == SupportedModels.AutoTS else self.spec.model ) @@ -55,7 +55,7 @@ def _build_model(self) -> AnomalyOutput: } # Supported methods with contamination param if method in [ - SupportedModels.ISOLATIONFOREST, + # SupportedModels.IsolationForest, SupportedModels.LOF, SupportedModels.EE, ]: diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index b039102db..c8bac56db 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -362,9 +362,9 @@ spec: - autots - auto - automlx - - oneclasssvm - - isolationforest - - randomcutforest + - one_class_svm + - isolation_forest + - random_cut_forest - iqr - lof - zscore @@ -382,11 +382,11 @@ spec: - mses - bocpd - prophet - - statthreshold - - windstats - - dynamicbaseline + - stat_threshold + - wind_stats + - dynamic_baseline - zms - - autoencoder + - auto_encoder meta: description: "The model to be used for anomaly detection" From 18441cf26ea258e1ec72de7f745b452e1a2644e7 Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 16:22:07 +0100 Subject: [PATCH 5/9] add auto-select --- ads/opctl/operator/lowcode/anomaly/const.py | 3 +- .../lowcode/anomaly/model/autoselect.py | 122 ++++++++++++++++++ .../lowcode/anomaly/model/base_model.py | 27 ++-- .../operator/lowcode/anomaly/model/factory.py | 2 + .../operator/lowcode/anomaly/schema.yaml | 1 + 5 files changed, 142 insertions(+), 13 deletions(-) create mode 100644 ads/opctl/operator/lowcode/anomaly/model/autoselect.py diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index e731391ad..97960c9e7 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -13,6 +13,7 @@ class SupportedModels(ExtendedEnum): # Autots Models AutoTS = "autots" Auto = "auto" + AutoSelect = "auto-select" IQR = "iqr" ZSCORE = "zscore" ROLLING_ZSCORE = "rolling_zscore" @@ -189,4 +190,4 @@ class OutputColumns(ExtendedEnum): TODS_DEFAULT_MODEL = "ocsvm" -SUBSAMPLE_THRESHOLD = 10 +SUBSAMPLE_THRESHOLD = 1000 diff --git a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py new file mode 100644 index 000000000..4669aabf7 --- /dev/null +++ b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +# Copyright (c) 2025 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +import logging + +import report_creator as rc + +from ads.opctl.operator.lowcode.anomaly.const import ( + SUBSAMPLE_THRESHOLD, + OutputColumns, +) + +from .anomaly_merlion import AnomalyMerlionOperatorModel +from .base_model import AnomalyOperatorBaseModel + +logging.getLogger("report_creator").setLevel(logging.WARNING) + + +class AutoSelectOperatorModel(AnomalyOperatorBaseModel): + def _build_model(**kwargs): + pass + + def generate_report(self): + anom_outputs = {} + all_plots = {} + model_list = self.spec.model_kwargs.pop("model_list", ["lof", "prophet"]) + for m in model_list: + config_i = self.config + config_i.spec.model = m + try: + anom_outputs[m] = AnomalyMerlionOperatorModel( + config_i, self.datasets + )._build_model() + all_plots[m] = self._get_plots_from_output(anom_outputs[m], m) + except: + logging.debug(f"Model {m} failed. skipping.") + return self._generate_report(all_plots, anom_outputs, model_list) + + def _get_plots_from_output(self, anomaly_output, model): + import matplotlib.pyplot as plt + + plt.rcParams.update({"figure.max_open_warning": 0}) + + blocks = [] + date_column = self.spec.datetime_column.name + for target, df in self.datasets.full_data_dict.items(): + if target in anomaly_output.list_categories(): + figure_blocks = [] + time_col = df[date_column].reset_index(drop=True) + anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[ + OutputColumns.ANOMALY_COL + ] + anomaly_indices = [ + i for i, index in enumerate(anomaly_col) if index == 1 + ] + downsampled_time_col = time_col + selected_indices = list(range(len(time_col))) + if self.spec.subsample_report_data: + non_anomaly_indices = [ + i for i in range(len(time_col)) if i not in anomaly_indices + ] + # Downsample non-anomalous data if it exceeds the threshold (1000) + if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD: + downsampled_non_anomaly_indices = non_anomaly_indices[ + :: len(non_anomaly_indices) // SUBSAMPLE_THRESHOLD + ] + selected_indices = ( + anomaly_indices + downsampled_non_anomaly_indices + ) + selected_indices.sort() + downsampled_time_col = time_col[selected_indices] + + columns = set(df.columns).difference({date_column}) + for col in columns: + y = df[col].reset_index(drop=True) + + downsampled_y = y[selected_indices] + + fig, ax = plt.subplots(figsize=(8, 3), layout="constrained") + ax.grid() + ax.plot(downsampled_time_col, downsampled_y, color="black") + # Plot anomalies + for i in anomaly_indices: + ax.scatter(time_col[i], y[i], color="red", marker="o") + plt.xlabel(date_column) + plt.ylabel(col) + plt.title(f"`{col}` with reference to anomalies") + figure_blocks.append(rc.Widget(ax)) + else: + figure_blocks = None + + blocks.append( + rc.Group(*figure_blocks, label=f"{target}_{model}") + ) if figure_blocks else None + plots = rc.Select(blocks) + return plots + + def _generate_report(self, all_plots, anomaly_outputs, model_list): + """Genreates a report for the model.""" + title_text = rc.Heading("Auto-Select Report", level=2) + summary = rc.Text( + "This report presents the results of multiple model tuning experiments, visualized to facilitate comparative analysis and informed model selection. Each modeling framework has been systematically trained, hyperparameter-tuned, and validated to optimize performance based on the characteristics of your dataset." + ) + + model_sections = [] + for m in model_list: + model_sections.append(all_plots[m]) + sec_text = rc.Heading(f"Train Evaluation Metrics for {m}", level=3) + sec = rc.DataTable(self._evaluation_metrics(anomaly_outputs[m]), index=True) + model_sections.append(sec_text) + model_sections.append(sec) + + report_sections = [title_text, summary] + model_sections + + # save the report and result CSV + self._save_report( + report_sections=report_sections, + anomaly_output=None, + test_metrics=None, + ) diff --git a/ads/opctl/operator/lowcode/anomaly/model/base_model.py b/ads/opctl/operator/lowcode/anomaly/model/base_model.py index 16876fdfa..f7bbabf1f 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/base_model.py +++ b/ads/opctl/operator/lowcode/anomaly/model/base_model.py @@ -327,23 +327,26 @@ def _save_report( ) as f2: f2.write(f1.read()) - if self.spec.generate_inliers: - inliers = anomaly_output.get_inliers(self.datasets) + if anomaly_output is not None: + if self.spec.generate_inliers: + inliers = anomaly_output.get_inliers(self.datasets) + write_data( + data=inliers, + filename=os.path.join( + unique_output_dir, self.spec.inliers_filename + ), + format="csv", + storage_options=storage_options, + ) + + outliers = anomaly_output.get_outliers(self.datasets) write_data( - data=inliers, - filename=os.path.join(unique_output_dir, self.spec.inliers_filename), + data=outliers, + filename=os.path.join(unique_output_dir, self.spec.outliers_filename), format="csv", storage_options=storage_options, ) - outliers = anomaly_output.get_outliers(self.datasets) - write_data( - data=outliers, - filename=os.path.join(unique_output_dir, self.spec.outliers_filename), - format="csv", - storage_options=storage_options, - ) - if test_metrics is not None and not test_metrics.empty: write_data( data=test_metrics.rename_axis("metrics").reset_index(), diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index 1a347a4da..2e8b57abc 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -10,6 +10,7 @@ from .anomaly_dataset import AnomalyDatasets from .anomaly_merlion import AnomalyMerlionOperatorModel from .automlx import AutoMLXOperatorModel +from .autoselect import AutoSelectOperatorModel from .autots import AutoTSOperatorModel from .base_model import AnomalyOperatorBaseModel from .isolationforest import IsolationForestOperatorModel @@ -71,6 +72,7 @@ class AnomalyOperatorModelFactory: SupportedModels.MSES: AnomalyMerlionOperatorModel, SupportedModels.BOCPD: AnomalyMerlionOperatorModel, SupportedModels.AUTOMLX: AutoMLXOperatorModel, + SupportedModels.AutoSelect: AutoSelectOperatorModel, } _NonTime_MAP = { diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index c8bac56db..bb225364a 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -361,6 +361,7 @@ spec: allowed: - autots - auto + - auto-select - automlx - one_class_svm - isolation_forest From a4957efdbfd5a3cbe1488d89c84d58bfbd46b3e7 Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 16:33:20 +0100 Subject: [PATCH 6/9] enable any model for autoselect --- ads/opctl/operator/lowcode/anomaly/model/autoselect.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py index 4669aabf7..3e6406c22 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py +++ b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py @@ -12,7 +12,6 @@ OutputColumns, ) -from .anomaly_merlion import AnomalyMerlionOperatorModel from .base_model import AnomalyOperatorBaseModel logging.getLogger("report_creator").setLevel(logging.WARNING) @@ -23,6 +22,8 @@ def _build_model(**kwargs): pass def generate_report(self): + from .factory import AnomalyOperatorModelFactory + anom_outputs = {} all_plots = {} model_list = self.spec.model_kwargs.pop("model_list", ["lof", "prophet"]) @@ -30,7 +31,7 @@ def generate_report(self): config_i = self.config config_i.spec.model = m try: - anom_outputs[m] = AnomalyMerlionOperatorModel( + anom_outputs[m] = AnomalyOperatorModelFactory.get_model( config_i, self.datasets )._build_model() all_plots[m] = self._get_plots_from_output(anom_outputs[m], m) From 7e1695fd8095fa9208114716762ae909f7ade301 Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 16:49:53 +0100 Subject: [PATCH 7/9] add lift gain chart --- ads/opctl/operator/lowcode/anomaly/model/autoselect.py | 8 ++++++++ ads/opctl/operator/lowcode/anomaly/utils.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py index 3e6406c22..5b393ed62 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py +++ b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py @@ -12,6 +12,7 @@ OutputColumns, ) +from ..utils import plot_anomaly_threshold_gain from .base_model import AnomalyOperatorBaseModel logging.getLogger("report_creator").setLevel(logging.WARNING) @@ -112,6 +113,13 @@ def _generate_report(self, all_plots, anomaly_outputs, model_list): sec = rc.DataTable(self._evaluation_metrics(anomaly_outputs[m]), index=True) model_sections.append(sec_text) model_sections.append(sec) + cat1 = anomaly_outputs[m].list_categories()[0] + print(anomaly_outputs[m].get_scores_by_cat(cat1)) + fig = plot_anomaly_threshold_gain( + anomaly_outputs[m].get_scores_by_cat(cat1)["score"], + title=f"Threshold Analysis for {m}", + ) + model_sections.append(rc.Widget(fig)) report_sections = [title_text, summary] + model_sections diff --git a/ads/opctl/operator/lowcode/anomaly/utils.py b/ads/opctl/operator/lowcode/anomaly/utils.py index 17037d8d7..3892946a4 100644 --- a/ads/opctl/operator/lowcode/anomaly/utils.py +++ b/ads/opctl/operator/lowcode/anomaly/utils.py @@ -183,4 +183,4 @@ def plot_anomaly_threshold_gain( template="plotly_white", ) - fig.show() + return fig From 62fe3e3f5ce1c27067c7e21950ba989b19346084 Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 16 Apr 2025 16:59:12 +0100 Subject: [PATCH 8/9] remove print --- ads/opctl/operator/lowcode/anomaly/model/autoselect.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py index 5b393ed62..30f1f5547 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py +++ b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py @@ -114,7 +114,6 @@ def _generate_report(self, all_plots, anomaly_outputs, model_list): model_sections.append(sec_text) model_sections.append(sec) cat1 = anomaly_outputs[m].list_categories()[0] - print(anomaly_outputs[m].get_scores_by_cat(cat1)) fig = plot_anomaly_threshold_gain( anomaly_outputs[m].get_scores_by_cat(cat1)["score"], title=f"Threshold Analysis for {m}", From 9c07e02dc3cc6b73ae0752084c3f940b1434f81f Mon Sep 17 00:00:00 2001 From: Allen Date: Mon, 28 Apr 2025 15:08:54 +0100 Subject: [PATCH 9/9] inital commit of ad auto-select --- ads/opctl/operator/lowcode/anomaly/const.py | 4 ++-- ads/opctl/operator/lowcode/anomaly/model/autoselect.py | 9 ++++----- ads/opctl/operator/lowcode/anomaly/schema.yaml | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 97960c9e7..4623b165e 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -33,7 +33,7 @@ class SupportedModels(ExtendedEnum): LSTM_ED = "lstm_ed" VAE = "vae" RandomCutForest = "random_cut_forest" - AutoEncoder = "auto_encoder" + AutoEncoder = "autoencoder" LOF = "lof" IsolationForest = "isolation_forest" @@ -113,7 +113,7 @@ class MerlionADModels(ExtendedEnum): LSTM_ED = "lstm_ed" VAE = "vae" RandomCutForest = "random_cut_forest" - AutoEncoder = "auto_encoder" + AutoEncoder = "autoencoder" LOF = "lof" IsolationForest = "isolation_forest" diff --git a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py index 30f1f5547..eb04ca0bf 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py +++ b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py @@ -27,8 +27,7 @@ def generate_report(self): anom_outputs = {} all_plots = {} - model_list = self.spec.model_kwargs.pop("model_list", ["lof", "prophet"]) - for m in model_list: + for m in self.spec.model_kwargs.pop("model_list", ["lof", "prophet"]): config_i = self.config config_i.spec.model = m try: @@ -38,7 +37,7 @@ def generate_report(self): all_plots[m] = self._get_plots_from_output(anom_outputs[m], m) except: logging.debug(f"Model {m} failed. skipping.") - return self._generate_report(all_plots, anom_outputs, model_list) + return self._generate_report(all_plots, anom_outputs) def _get_plots_from_output(self, anomaly_output, model): import matplotlib.pyplot as plt @@ -99,7 +98,7 @@ def _get_plots_from_output(self, anomaly_output, model): plots = rc.Select(blocks) return plots - def _generate_report(self, all_plots, anomaly_outputs, model_list): + def _generate_report(self, all_plots, anomaly_outputs): """Genreates a report for the model.""" title_text = rc.Heading("Auto-Select Report", level=2) summary = rc.Text( @@ -107,7 +106,7 @@ def _generate_report(self, all_plots, anomaly_outputs, model_list): ) model_sections = [] - for m in model_list: + for m in anomaly_outputs: model_sections.append(all_plots[m]) sec_text = rc.Heading(f"Train Evaluation Metrics for {m}", level=3) sec = rc.DataTable(self._evaluation_metrics(anomaly_outputs[m]), index=True) diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index bb225364a..c0629a088 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -387,7 +387,7 @@ spec: - wind_stats - dynamic_baseline - zms - - auto_encoder + - autoencoder meta: description: "The model to be used for anomaly detection"