|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +# Copyright (c) 2023, 2024 Oracle and/or its affiliates. |
| 4 | +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ |
| 5 | + |
| 6 | +import numpy as np |
| 7 | +import pandas as pd |
| 8 | + |
| 9 | +from ads.common.decorator.runtime_dependency import runtime_dependency |
| 10 | +from ads.opctl.operator.lowcode.anomaly.const import OutputColumns |
| 11 | + |
| 12 | +from .anomaly_dataset import AnomalyOutput |
| 13 | +from .base_model import AnomalyOperatorBaseModel |
| 14 | + |
| 15 | + |
| 16 | +class RandomCutForestOperatorModel(AnomalyOperatorBaseModel): |
| 17 | + """ |
| 18 | + Class representing Random Cut Forest Anomaly Detection operator model. |
| 19 | + """ |
| 20 | + |
| 21 | + @runtime_dependency( |
| 22 | + module="rrcf", |
| 23 | + err_msg=( |
| 24 | + "Please run `pip install rrcf` to " |
| 25 | + "install the required dependencies for RandomCutForest." |
| 26 | + ), |
| 27 | + ) |
| 28 | + def _build_model(self) -> AnomalyOutput: |
| 29 | + from rrcf import RCTree |
| 30 | + |
| 31 | + model_kwargs = self.spec.model_kwargs |
| 32 | + # map the output as per anomaly dataset class, 1: outlier, 0: inlier |
| 33 | + self.outlier_map = {1: 0, -1: 1} |
| 34 | + |
| 35 | + anomaly_output = AnomalyOutput(date_column="index") |
| 36 | + #TODO: PDB |
| 37 | + import pdb |
| 38 | + |
| 39 | + pdb.set_trace() |
| 40 | + |
| 41 | + for target, df in self.datasets.full_data_dict.items(): |
| 42 | + model = RCTree(**model_kwargs) |
| 43 | + model.fit(df) |
| 44 | + y_pred = model.predict(df) |
| 45 | + y_pred = np.vectorize(self.outlier_map.get)(y_pred) |
| 46 | + |
| 47 | + scores = model.score_samples(df) |
| 48 | + |
| 49 | + index_col = df.columns[0] |
| 50 | + |
| 51 | + anomaly = pd.DataFrame( |
| 52 | + {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} |
| 53 | + ).reset_index(drop=True) |
| 54 | + score = pd.DataFrame( |
| 55 | + {"index": df[index_col], OutputColumns.SCORE_COL: scores} |
| 56 | + ).reset_index(drop=True) |
| 57 | + |
| 58 | + anomaly_output.add_output(target, anomaly, score) |
| 59 | + |
| 60 | + return anomaly_output |
| 61 | + |
| 62 | + def _generate_report(self): |
| 63 | + """Generates the report.""" |
| 64 | + import report_creator as rc |
| 65 | + |
| 66 | + other_sections = [ |
| 67 | + rc.Heading("Selected Models Overview", level=2), |
| 68 | + rc.Text( |
| 69 | + "The following tables provide information regarding the chosen model." |
| 70 | + ), |
| 71 | + ] |
| 72 | + |
| 73 | + model_description = rc.Text( |
| 74 | + "The Random Cut Forest (RCF) is an unsupervised machine learning algorithm that is used for anomaly detection." |
| 75 | + " It works by building an ensemble of binary trees (random cut trees) and using them to compute anomaly scores for data points." |
| 76 | + ) |
| 77 | + |
| 78 | + return ( |
| 79 | + model_description, |
| 80 | + other_sections, |
| 81 | + ) |
0 commit comments