diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 0f9156f4a..4623b165e 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -10,15 +10,32 @@ class SupportedModels(ExtendedEnum): """Supported anomaly models.""" + # Autots Models AutoTS = "autots" Auto = "auto" + AutoSelect = "auto-select" IQR = "iqr" - LOF = "lof" ZSCORE = "zscore" ROLLING_ZSCORE = "rolling_zscore" MAD = "mad" EE = "ee" - ISOLATIONFOREST = "isolationforest" + + # univariate Merlion + StatThreshold = "stat_threshold" + WindStats = "wind_stats" + DBL = "dynamic_baseline" + SPECTRAL_RESIDUAL = "spectral_residual" + ZMS = "zms" + + # multivariate Merlion + DAGMM = "dagmm" + DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" + LSTM_ED = "lstm_ed" + VAE = "vae" + RandomCutForest = "random_cut_forest" + AutoEncoder = "autoencoder" + LOF = "lof" + IsolationForest = "isolation_forest" # point anomaly DAGMM = "dagmm" @@ -32,17 +49,20 @@ class SupportedModels(ExtendedEnum): ETS = "ets" PROPHET = "prophet" SARIMA = "sarima" + MSES = "mses" # changepoint BOCPD = "bocpd" + AUTOMLX = "automlx" + class NonTimeADSupportedModels(ExtendedEnum): """Supported non time-based anomaly detection models.""" - OneClassSVM = "oneclasssvm" - IsolationForest = "isolationforest" - RandomCutForest = "randomcutforest" + OneClassSVM = "one_class_svm" + IsolationForest = "isolation_forest" + RandomCutForest = "random_cut_forest" # TODO : Add DBScan # DBScan = "dbscan" @@ -80,37 +100,34 @@ class TODSSubModels(ExtendedEnum): class MerlionADModels(ExtendedEnum): """Supported Merlion AD sub models.""" + # univariate + StatThreshold = "stat_threshold" + WindStats = "wind_stats" + DBL = "dynamic_baseline" + SPECTRAL_RESIDUAL = "spectral_residual" + ZMS = "zms" + # point anomaly DAGMM = "dagmm" DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" LSTM_ED = "lstm_ed" - SPECTRAL_RESIDUAL = "spectral_residual" VAE = "vae" + RandomCutForest = "random_cut_forest" + AutoEncoder = "autoencoder" + LOF = "lof" + IsolationForest = "isolation_forest" # forecast_based ARIMA = "arima" ETS = "ets" PROPHET = "prophet" SARIMA = "sarima" + MSES = "mses" # changepoint BOCPD = "bocpd" -MERLIONAD_IMPORT_MODEL_MAP = { - MerlionADModels.DAGMM: ".dagmm", - MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector", - MerlionADModels.LSTM_ED: ".lstm_ed", - MerlionADModels.SPECTRAL_RESIDUAL: ".spectral_residual", - MerlionADModels.VAE: ".vae", - MerlionADModels.ARIMA: ".forecast_based.arima", - MerlionADModels.ETS: ".forecast_based.ets", - MerlionADModels.PROPHET: ".forecast_based.prophet", - MerlionADModels.SARIMA: ".forecast_based.sarima", - MerlionADModels.BOCPD: ".change_point.bocpd", -} - - MERLIONAD_MODEL_MAP = { MerlionADModels.DAGMM: "DAGMM", MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector", @@ -121,7 +138,16 @@ class MerlionADModels(ExtendedEnum): MerlionADModels.ETS: "ETSDetector", MerlionADModels.PROPHET: "ProphetDetector", MerlionADModels.SARIMA: "SarimaDetector", + MerlionADModels.MSES: "MSESDetector", MerlionADModels.BOCPD: "BOCPD", + MerlionADModels.ZMS: "ZMS", + MerlionADModels.RandomCutForest: "RandomCutForest", + MerlionADModels.AutoEncoder: "AutoEncoder", + MerlionADModels.LOF: "LocalOutlierFactor", + MerlionADModels.IsolationForest: "IsolationForest", + MerlionADModels.StatThreshold: "StatThreshold", + MerlionADModels.WindStats: "WindStats", + MerlionADModels.DBL: "DynamicBaseline", } diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 8999b2674..5ba929f2e 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -1,9 +1,8 @@ #!/usr/bin/env python -# Copyright (c) 2023, 2024 Oracle and/or its affiliates. +# Copyright (c) 2023, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -import importlib import logging import numpy as np @@ -14,7 +13,6 @@ from ads.common.decorator.runtime_dependency import runtime_dependency from ads.opctl.operator.lowcode.anomaly.const import ( - MERLIONAD_IMPORT_MODEL_MAP, MERLIONAD_MODEL_MAP, OutputColumns, SupportedModels, @@ -26,41 +24,92 @@ logging.getLogger("report_creator").setLevel(logging.WARNING) -class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel): - """Class representing Merlion Anomaly Detection operator model.""" +def prepare_model_kwargs(model_name, model_kwargs): + model_name = MERLIONAD_MODEL_MAP.get(model_name) + + # individual handling by model + if model_name == SupportedModels.BOCPD: + if ( + model_kwargs.get("threshold", None) is None + and model_kwargs.get("alm_threshold") is not None + ): + model_kwargs["threshold"] = AggregateAlarms( + alm_threshold=model_kwargs.get("alm_threshold") + ) + elif ( + model_name == SupportedModels.MSES + and model_kwargs.get("max_forecast_steps", None) is None + ): + model_kwargs["max_forecast_steps"] = 1 + return model_kwargs + + +@runtime_dependency( + module="merlion", + err_msg=( + "Please run `pip3 install salesforce-merlion[all]` to " + "install the required packages." + ), +) +def init_merlion_model(model_name, target_seq_index, model_kwargs): + from merlion.models.factory import ModelFactory - @runtime_dependency( - module="merlion", - err_msg=( - "Please run `pip3 install salesforce-merlion[all]` to " - "install the required packages." - ), - ) - def _get_config_model(self, model_name): - """ - Returns a dictionary with model names as keys and a list of model config and model object as values. + model_name = MERLIONAD_MODEL_MAP.get(model_name) - Parameters - ---------- - model_name : str - model name from the Merlion model list. + model_kwargs["target_seq_index"] = target_seq_index - Returns - ------- - dict - A dictionary with model names as keys and a list of model config and model object as values. - """ - model_config_map = {} - model_module = importlib.import_module( - name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name), - package="merlion.models.anomaly", + if model_name == "DeepPointAnomalyDetector": + from merlion.models.anomaly.deep_point_anomaly_detector import ( + DeepPointAnomalyDetector, + ) + + model = DeepPointAnomalyDetector( + DeepPointAnomalyDetector.config_class(**model_kwargs) ) - model_config = getattr( - model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config" + unused_model_kwargs = model_kwargs + else: + model, unused_model_kwargs = ModelFactory.create( + model_name, return_unused_kwargs=True, **model_kwargs ) - model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name)) - model_config_map[model_name] = [model_config, model] - return model_config_map + + return model, unused_model_kwargs + + +class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel): + """Class representing Merlion Anomaly Detection operator model.""" + + # def _get_config_model(self, model_name): + # """ + # Returns a dictionary with model names as keys and a list of model config and model object as values. + + # Parameters + # ---------- + # model_name : str + # model name from the Merlion model list. + + # Returns + # ------- + # dict + # A dictionary with model names as keys and a list of model config and model object as values. + # """ + # model_config_map = {} + # model_module = importlib.import_module( + # name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name), + # package="merlion.models.anomaly", + # ) + # model_config = getattr( + # model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config" + # ) + # model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name)) + # model_config_map[model_name] = [model_config, model] + # return model_config_map + + def _preprocess_data(self, df, date_column): + df[date_column] = pd.to_datetime(df[date_column]) + if df[date_column].dt.tz is not None: + df[date_column] = df[date_column].dt.tz_convert(None) + data = df.set_index(date_column) + return data def _build_model(self) -> AnomalyOutput: """ @@ -75,72 +124,87 @@ def _build_model(self) -> AnomalyOutput: AnomalyOutput An AnomalyOutput object containing the anomaly detection results. """ - model_kwargs = self.spec.model_kwargs + + def _inject_train_data( + v_data: pd.DataFrame, train_data: pd.DataFrame + ) -> pd.DataFrame: + # Step 1: Get index from train data not already present in validation data + v_index_set = set(v_data.index) + filtered_train = train_data[~train_data.index.isin(v_index_set)] + + combined_data = pd.concat([filtered_train, v_data]).sort_index() + return combined_data + + model_kwargs = prepare_model_kwargs( + self.spec.model, self.spec.model_kwargs.copy() + ) anomaly_output = AnomalyOutput(date_column="index") anomaly_threshold = model_kwargs.get("anomaly_threshold", 95) - model_config_map = {} - model_config_map = self._get_config_model(self.spec.model) - date_column = self.spec.datetime_column.name anomaly_output = AnomalyOutput(date_column=date_column) - # model_objects = defaultdict(list) - for target, df in self.datasets.full_data_dict.items(): - data = df.set_index(date_column) - data = TimeSeries.from_pd(data) - for _, (model_config, model) in model_config_map.items(): - if self.spec.model == SupportedModels.BOCPD: - model_config = model_config(**self.spec.model_kwargs) - else: - model_config = model_config( - **{ - **self.spec.model_kwargs, - "threshold": AggregateAlarms( - alm_threshold=model_kwargs.get("alm_threshold") - if model_kwargs.get("alm_threshold") - else None - ), - } - ) - if hasattr(model_config, "target_seq_index"): - model_config.target_seq_index = df.columns.get_loc( - self.spec.target_column - ) - model = model(model_config) - - scores = model.train(train_data=data, anomaly_labels=None) - scores = scores.to_pd().reset_index() - scores["anom_score"] = ( - scores["anom_score"] - scores["anom_score"].min() - ) / (scores["anom_score"].max() - scores["anom_score"].min()) - - try: - y_pred = model.get_anomaly_label(data) - y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype( - int + + for s_id, df in self.datasets.full_data_dict.items(): + df_clean = self._preprocess_data(df, date_column) + data = TimeSeries.from_pd(df_clean) + target_seq_index = df_clean.columns.get_loc(self.spec.target_column) + model, unused_model_kwargs = init_merlion_model( + self.spec.model, target_seq_index, model_kwargs + ) + scores = None + if ( + hasattr(self.datasets, "valid_data") + and self.datasets.valid_data.get_data_for_series(s_id) is not None + ): + # try: + v_df = self.datasets.valid_data.get_data_for_series(s_id) + v_data = self._preprocess_data(v_df, date_column) + + v_labels = TimeSeries.from_pd(v_data["anomaly"]) + v_data = v_data.drop("anomaly", axis=1) + v_data = _inject_train_data(v_data, df_clean) + scores_v = model.train( + train_data=TimeSeries.from_pd(v_data), anomaly_labels=v_labels + ) + scores_df = scores_v.to_pd() + available_index = scores_df.index.intersection(df_clean.index) + scores = TimeSeries.from_pd(scores_df.loc[available_index]) + # except Exception as e: + # logging.debug(f"Failed to use validation data with error: {e}") + if scores is None: + scores = model.train(train_data=data) + + # Normalize scores out of 100 + scores = scores.to_pd().reset_index() + scores["anom_score"] = ( + scores["anom_score"] - scores["anom_score"].min() + ) / (scores["anom_score"].max() - scores["anom_score"].min()) + + try: + y_pred = model.get_anomaly_label(data) + y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype(int) + except Exception: + y_pred = ( + scores["anom_score"] + > np.percentile( + scores["anom_score"], + anomaly_threshold, ) - except Exception: - y_pred = ( - scores["anom_score"] - > np.percentile( - scores["anom_score"], - anomaly_threshold, - ) - ).astype(int) - - index_col = df.columns[0] - - anomaly = pd.DataFrame( - {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} - ).reset_index(drop=True) - score = pd.DataFrame( - { - index_col: df[index_col], - OutputColumns.SCORE_COL: scores["anom_score"], - } - ).reset_index(drop=True) - - anomaly_output.add_output(target, anomaly, score) + ).astype(int) + + index_col = df.columns[0] + + anomaly = pd.DataFrame( + {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} + ).reset_index(drop=True) + score = pd.DataFrame( + { + index_col: df[index_col], + OutputColumns.SCORE_COL: scores["anom_score"], + } + ).reset_index(drop=True) + + anomaly_output.add_output(s_id, anomaly, score) return anomaly_output def _generate_report(self): diff --git a/ads/opctl/operator/lowcode/anomaly/model/autoselect.py b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py new file mode 100644 index 000000000..eb04ca0bf --- /dev/null +++ b/ads/opctl/operator/lowcode/anomaly/model/autoselect.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python + +# Copyright (c) 2025 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +import logging + +import report_creator as rc + +from ads.opctl.operator.lowcode.anomaly.const import ( + SUBSAMPLE_THRESHOLD, + OutputColumns, +) + +from ..utils import plot_anomaly_threshold_gain +from .base_model import AnomalyOperatorBaseModel + +logging.getLogger("report_creator").setLevel(logging.WARNING) + + +class AutoSelectOperatorModel(AnomalyOperatorBaseModel): + def _build_model(**kwargs): + pass + + def generate_report(self): + from .factory import AnomalyOperatorModelFactory + + anom_outputs = {} + all_plots = {} + for m in self.spec.model_kwargs.pop("model_list", ["lof", "prophet"]): + config_i = self.config + config_i.spec.model = m + try: + anom_outputs[m] = AnomalyOperatorModelFactory.get_model( + config_i, self.datasets + )._build_model() + all_plots[m] = self._get_plots_from_output(anom_outputs[m], m) + except: + logging.debug(f"Model {m} failed. skipping.") + return self._generate_report(all_plots, anom_outputs) + + def _get_plots_from_output(self, anomaly_output, model): + import matplotlib.pyplot as plt + + plt.rcParams.update({"figure.max_open_warning": 0}) + + blocks = [] + date_column = self.spec.datetime_column.name + for target, df in self.datasets.full_data_dict.items(): + if target in anomaly_output.list_categories(): + figure_blocks = [] + time_col = df[date_column].reset_index(drop=True) + anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[ + OutputColumns.ANOMALY_COL + ] + anomaly_indices = [ + i for i, index in enumerate(anomaly_col) if index == 1 + ] + downsampled_time_col = time_col + selected_indices = list(range(len(time_col))) + if self.spec.subsample_report_data: + non_anomaly_indices = [ + i for i in range(len(time_col)) if i not in anomaly_indices + ] + # Downsample non-anomalous data if it exceeds the threshold (1000) + if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD: + downsampled_non_anomaly_indices = non_anomaly_indices[ + :: len(non_anomaly_indices) // SUBSAMPLE_THRESHOLD + ] + selected_indices = ( + anomaly_indices + downsampled_non_anomaly_indices + ) + selected_indices.sort() + downsampled_time_col = time_col[selected_indices] + + columns = set(df.columns).difference({date_column}) + for col in columns: + y = df[col].reset_index(drop=True) + + downsampled_y = y[selected_indices] + + fig, ax = plt.subplots(figsize=(8, 3), layout="constrained") + ax.grid() + ax.plot(downsampled_time_col, downsampled_y, color="black") + # Plot anomalies + for i in anomaly_indices: + ax.scatter(time_col[i], y[i], color="red", marker="o") + plt.xlabel(date_column) + plt.ylabel(col) + plt.title(f"`{col}` with reference to anomalies") + figure_blocks.append(rc.Widget(ax)) + else: + figure_blocks = None + + blocks.append( + rc.Group(*figure_blocks, label=f"{target}_{model}") + ) if figure_blocks else None + plots = rc.Select(blocks) + return plots + + def _generate_report(self, all_plots, anomaly_outputs): + """Genreates a report for the model.""" + title_text = rc.Heading("Auto-Select Report", level=2) + summary = rc.Text( + "This report presents the results of multiple model tuning experiments, visualized to facilitate comparative analysis and informed model selection. Each modeling framework has been systematically trained, hyperparameter-tuned, and validated to optimize performance based on the characteristics of your dataset." + ) + + model_sections = [] + for m in anomaly_outputs: + model_sections.append(all_plots[m]) + sec_text = rc.Heading(f"Train Evaluation Metrics for {m}", level=3) + sec = rc.DataTable(self._evaluation_metrics(anomaly_outputs[m]), index=True) + model_sections.append(sec_text) + model_sections.append(sec) + cat1 = anomaly_outputs[m].list_categories()[0] + fig = plot_anomaly_threshold_gain( + anomaly_outputs[m].get_scores_by_cat(cat1)["score"], + title=f"Threshold Analysis for {m}", + ) + model_sections.append(rc.Widget(fig)) + + report_sections = [title_text, summary] + model_sections + + # save the report and result CSV + self._save_report( + report_sections=report_sections, + anomaly_output=None, + test_metrics=None, + ) diff --git a/ads/opctl/operator/lowcode/anomaly/model/autots.py b/ads/opctl/operator/lowcode/anomaly/model/autots.py index 550833a67..68a7a73f4 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/autots.py +++ b/ads/opctl/operator/lowcode/anomaly/model/autots.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (c) 2023, 2024 Oracle and/or its affiliates. +# Copyright (c) 2023, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import logging @@ -22,7 +22,7 @@ class AutoTSOperatorModel(AnomalyOperatorBaseModel): """Class representing AutoTS Anomaly Detection operator model.""" model_mapping = { - "isolationforest": "IsolationForest", + # "isolationforest": "IsolationForest", "lof": "LOF", "ee": "EE", "zscore": "zscore", @@ -43,7 +43,7 @@ def _build_model(self) -> AnomalyOutput: from autots.evaluator.anomaly_detector import AnomalyDetector method = ( - SupportedModels.ISOLATIONFOREST + SupportedModels.IQR if self.spec.model == SupportedModels.AutoTS else self.spec.model ) @@ -55,7 +55,7 @@ def _build_model(self) -> AnomalyOutput: } # Supported methods with contamination param if method in [ - SupportedModels.ISOLATIONFOREST, + # SupportedModels.IsolationForest, SupportedModels.LOF, SupportedModels.EE, ]: diff --git a/ads/opctl/operator/lowcode/anomaly/model/base_model.py b/ads/opctl/operator/lowcode/anomaly/model/base_model.py index 4f28bb8d2..f7bbabf1f 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/base_model.py +++ b/ads/opctl/operator/lowcode/anomaly/model/base_model.py @@ -163,7 +163,14 @@ def generate_report(self): title_text = rc.Heading("Anomaly Detection Report", level=1) yaml_appendix_title = rc.Heading("Reference: YAML File", level=2) - yaml_appendix = rc.Yaml(self.config.to_dict()) + config_dict = self.config.to_dict() + # pop the data incase it isn't json serializable + config_dict["spec"]["input_data"].pop("data") + if config_dict["spec"].get("validation_data"): + config_dict["spec"]["validation_data"].pop("data") + if config_dict["spec"].get("test_data"): + config_dict["spec"]["test_data"].pop("data") + yaml_appendix = rc.Yaml(config_dict) summary = rc.Block( rc.Group( rc.Text(f"You selected the **`{self.spec.model}`** model.\n"), @@ -320,23 +327,26 @@ def _save_report( ) as f2: f2.write(f1.read()) - if self.spec.generate_inliers: - inliers = anomaly_output.get_inliers(self.datasets) + if anomaly_output is not None: + if self.spec.generate_inliers: + inliers = anomaly_output.get_inliers(self.datasets) + write_data( + data=inliers, + filename=os.path.join( + unique_output_dir, self.spec.inliers_filename + ), + format="csv", + storage_options=storage_options, + ) + + outliers = anomaly_output.get_outliers(self.datasets) write_data( - data=inliers, - filename=os.path.join(unique_output_dir, self.spec.inliers_filename), + data=outliers, + filename=os.path.join(unique_output_dir, self.spec.outliers_filename), format="csv", storage_options=storage_options, ) - outliers = anomaly_output.get_outliers(self.datasets) - write_data( - data=outliers, - filename=os.path.join(unique_output_dir, self.spec.outliers_filename), - format="csv", - storage_options=storage_options, - ) - if test_metrics is not None and not test_metrics.empty: write_data( data=test_metrics.rename_axis("metrics").reset_index(), diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index 4bb59d02b..2e8b57abc 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -9,6 +9,8 @@ from ..operator_config import AnomalyOperatorConfig from .anomaly_dataset import AnomalyDatasets from .anomaly_merlion import AnomalyMerlionOperatorModel +from .automlx import AutoMLXOperatorModel +from .autoselect import AutoSelectOperatorModel from .autots import AutoTSOperatorModel from .base_model import AnomalyOperatorBaseModel from .isolationforest import IsolationForestOperatorModel @@ -44,23 +46,33 @@ class AnomalyOperatorModelFactory: _MAP = { SupportedModels.AutoTS: AutoTSOperatorModel, + SupportedModels.Auto: AutoTSOperatorModel, SupportedModels.IQR: AutoTSOperatorModel, - SupportedModels.LOF: AutoTSOperatorModel, - SupportedModels.ISOLATIONFOREST: AutoTSOperatorModel, SupportedModels.ZSCORE: AutoTSOperatorModel, SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel, SupportedModels.EE: AutoTSOperatorModel, SupportedModels.MAD: AutoTSOperatorModel, + SupportedModels.StatThreshold: AnomalyMerlionOperatorModel, + SupportedModels.WindStats: AnomalyMerlionOperatorModel, + SupportedModels.DBL: AnomalyMerlionOperatorModel, + SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel, + SupportedModels.ZMS: AnomalyMerlionOperatorModel, SupportedModels.DAGMM: AnomalyMerlionOperatorModel, SupportedModels.DEEP_POINT_ANOMALY_DETECTOR: AnomalyMerlionOperatorModel, SupportedModels.LSTM_ED: AnomalyMerlionOperatorModel, - SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel, SupportedModels.VAE: AnomalyMerlionOperatorModel, + SupportedModels.RandomCutForest: AnomalyMerlionOperatorModel, + SupportedModels.AutoEncoder: AnomalyMerlionOperatorModel, + SupportedModels.LOF: AnomalyMerlionOperatorModel, + SupportedModels.IsolationForest: AnomalyMerlionOperatorModel, SupportedModels.ARIMA: AnomalyMerlionOperatorModel, SupportedModels.ETS: AnomalyMerlionOperatorModel, SupportedModels.PROPHET: AnomalyMerlionOperatorModel, SupportedModels.SARIMA: AnomalyMerlionOperatorModel, + SupportedModels.MSES: AnomalyMerlionOperatorModel, SupportedModels.BOCPD: AnomalyMerlionOperatorModel, + SupportedModels.AUTOMLX: AutoMLXOperatorModel, + SupportedModels.AutoSelect: AutoSelectOperatorModel, } _NonTime_MAP = { diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index aba6c4e82..c0629a088 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -361,9 +361,11 @@ spec: allowed: - autots - auto - - oneclasssvm - - isolationforest - - randomcutforest + - auto-select + - automlx + - one_class_svm + - isolation_forest + - random_cut_forest - iqr - lof - zscore @@ -378,8 +380,14 @@ spec: - arima - ets - sarima + - mses - bocpd - prophet + - stat_threshold + - wind_stats + - dynamic_baseline + - zms + - autoencoder meta: description: "The model to be used for anomaly detection" diff --git a/ads/opctl/operator/lowcode/anomaly/utils.py b/ads/opctl/operator/lowcode/anomaly/utils.py index 396d34c57..3892946a4 100644 --- a/ads/opctl/operator/lowcode/anomaly/utils.py +++ b/ads/opctl/operator/lowcode/anomaly/utils.py @@ -86,3 +86,101 @@ def select_auto_model(operator_config): if operator_config.spec.datetime_column is not None: return SupportedModels.AutoTS return NonTimeADSupportedModels.IsolationForest + + +import plotly.graph_objects as go +from sklearn.metrics import f1_score + + +def plot_anomaly_threshold_gain( + scores, threshold=None, labels=None, title="Threshold Analysis" +): + """ + Plots: + - Anomalies detected vs. threshold (always) + - F1 Score vs. threshold (if labels provided) + - % of data flagged vs. threshold (if labels not provided) + + Args: + scores (array-like): Anomaly scores (higher = more anomalous) + threshold (float): Optional current threshold to highlight + labels (array-like): Optional true labels (1=anomaly, 0=normal) + title (str): Chart title + """ + scores = np.array(scores) + thresholds = np.linspace(min(scores), max(scores), 100) + + anomalies_found = [] + y_secondary = [] + y_secondary_label = "" + + for t in thresholds: + predicted = (scores >= t).astype(int) + + # Count anomalies detected + if labels is not None: + true_anomalies = np.sum((predicted == 1) & (np.array(labels) == 1)) + # Compute F1 score + y_secondary.append(f1_score(labels, predicted, zero_division=0)) + y_secondary_label = "F1 Score" + else: + true_anomalies = np.sum(predicted) + # Compute % of data flagged + y_secondary.append(100 * np.mean(predicted)) + y_secondary_label = "% of Data Flagged" + + anomalies_found.append(true_anomalies) + + # Start building the plot + fig = go.Figure() + + # Primary Y: Anomalies detected + fig.add_trace( + go.Scatter( + x=thresholds, + y=anomalies_found, + name="Anomalies Detected", + mode="lines", + line=dict(color="royalblue"), + yaxis="y1", + ) + ) + + # Secondary Y: F1 or % flagged + fig.add_trace( + go.Scatter( + x=thresholds, + y=y_secondary, + name=y_secondary_label, + mode="lines", + line=dict(color="orange", dash="dot"), + yaxis="y2", + ) + ) + + # Vertical line for current threshold + if threshold is not None: + fig.add_trace( + go.Scatter( + x=[threshold, threshold], + y=[0, max(anomalies_found)], + mode="lines", + name=f"Current Threshold ({threshold:.2f})", + line=dict(color="firebrick", dash="dash"), + yaxis="y1", + ) + ) + + # Layout + fig.update_layout( + title=title, + xaxis=dict(title="Anomaly Score Threshold"), + yaxis=dict(title="Anomalies Detected", side="left"), + yaxis2=dict( + title=y_secondary_label, overlaying="y", side="right", showgrid=False + ), + legend=dict(x=0.01, y=0.99), + template="plotly_white", + ) + + return fig