Skip to content

Commit d56fddd

Browse files
authored
Monthly windstats (#172)
* windstats with monthly features Add post rules feature to windstats runner. * windstats runner comments * Add new feature to return only anomaly scores * re-check pr * Update numpy version Avoid using numpy v2.0 to avoid version conflicts. numpy v2.0 was published on 2024-6-17. * numpy version Keep numpy version under v2.0, which was released on 2024-06-17 and will lead to conflict.
1 parent bb7c349 commit d56fddd

File tree

3 files changed

+55
-22
lines changed

3 files changed

+55
-22
lines changed

merlion/models/anomaly/windstats_monthly.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
logger = logging.getLogger(__name__)
2323

2424

25-
class WindStatsConfig(DetectorConfig):
25+
class MonthlyWindStatsConfig(DetectorConfig):
2626
"""
2727
Config class for `WindStats`.
2828
"""
@@ -61,15 +61,15 @@ class MonthlyWindStats(DetectorBase):
6161
minimum of the scores is returned.
6262
"""
6363

64-
config_class = WindStatsConfig
64+
config_class = MonthlyWindStatsConfig
6565

66-
def __init__(self, config: WindStatsConfig = None):
66+
def __init__(self, config: MonthlyWindStatsConfig = None):
6767
"""
6868
config.wind_sz: the window size in minutes, default is 30 minute window
6969
config.max_days: maximum number of days stored in memory (only mean and std of each window are stored), default is 4 days
7070
here the days are first bucketized and then bucketized by window id.
7171
"""
72-
super().__init__(WindStatsConfig() if config is None else config)
72+
super().__init__(MonthlyWindStatsConfig() if config is None else config)
7373
self.table = {}
7474

7575
@property

merlion/models/anomaly/windstats_run.py

+50-17
Original file line numberDiff line numberDiff line change
@@ -5,50 +5,83 @@
55
For the implementation of only weekly/monthly seasonality, specify "enable_weekly" of "enable_monthly" arguments of RunWindStats().
66
"""
77

8-
from windstats import WindStats, WindStatsConfig
9-
from windstats_monthly import MonthlyWindStats, MonthlyWindStatsConfig
10-
from ts_datasets.anomaly import NAB
8+
from merlion.models.anomaly.windstats import WindStats, WindStatsConfig
9+
from merlion.models.anomaly.windstats_monthly import MonthlyWindStats, MonthlyWindStatsConfig
1110
from merlion.utils import TimeSeries
12-
from merlion.post_process.threshold import AggregateAlarms
1311

1412
class RunWindStats:
15-
def __init__(self, threshold, enable_weekly = True, enable_monthly = True, WeeklyWindStatsConfig = WindStatsConfig(), MonthlyWindStatsConfig = MonthlyWindStatsConfig()):
13+
def __init__(
14+
self,
15+
threshold,
16+
enable_weekly = True,
17+
enable_monthly = True,
18+
post_rule_on_anom_score = False,
19+
WeeklyWindStatsConfig = WindStatsConfig(),
20+
MonthlyWindStatsConfig = MonthlyWindStatsConfig(),
21+
return_score = True
22+
):
1623
"""
1724
Users can customize the configuration for weekly or monthly-based windstats. If not, then the default configuration will apply.
1825
"""
1926

2027
self.enable_weekly = enable_weekly
2128
self.enable_monthly = enable_monthly
29+
self.return_score = return_score
2230
assert self.enable_weekly == True or self.enable_monthly == True, "Must enable either weekly or monthly seasonality, or both!"
2331

2432
# Threshold on identifying anomaly based on anomaly score.
2533
self.threshold = threshold
34+
# If apply post rules on anomaly score
35+
self.post_rule = post_rule_on_anom_score
2636

37+
# Intialize according model if enable weekly/monthly analysis
2738
if self.enable_weekly:
2839
self.model_weekly = WindStats(WeeklyWindStatsConfig)
29-
3040
if self.enable_monthly:
3141
self.model_monthly = MonthlyWindStats(MonthlyWindStatsConfig)
3242

43+
# Identify anomaly based on the hard threshold.
3344
def anomalyByScore(self, scores, threshold):
34-
scores.loc[abs(scores["anom_score"]) <= threshold] = 0
35-
scores.loc[abs(scores["anom_score"]) > threshold] = 1
45+
labels = scores.copy()
46+
labels.loc[abs(labels["anom_score"]) <= threshold] = 0
47+
labels.loc[abs(labels["anom_score"]) > threshold] = 1
3648

37-
scores.rename(columns = {"anom_score": "anomaly"}, inplace = True)
38-
return scores
49+
labels.rename(columns = {"anom_score": "anomaly"}, inplace = True)
50+
return labels
51+
52+
# Filter anomaly scores based on post rules. Same as "get_anomaly_label" in WindStats
53+
def get_anomaly_label(self, model, ts):
54+
scores = model.train(ts)
55+
return model.post_rule(scores) if model.post_rule is not None else scores
3956

4057
def run(self, ts):
4158
if self.enable_weekly:
42-
scores_weekly = self.model_weekly.train(ts).to_pd()
43-
scores_weekly = self.anomalyByScore(scores_weekly, self.threshold)
59+
if self.post_rule:
60+
scores_weekly = self.get_anomaly_label(self.model_weekly, ts).to_pd()
61+
else:
62+
scores_weekly = self.model_weekly.train(ts).to_pd()
63+
labels_weekly = self.anomalyByScore(scores_weekly, self.threshold)
4464

4565
if self.enable_monthly:
46-
scores_monthly = self.model_monthly.train(ts).to_pd()
47-
scores_monthly = self.anomalyByScore(scores_monthly, self.threshold)
66+
if self.post_rule:
67+
scores_monthly = self.get_anomaly_label(self.model_monthly, ts).to_pd()
68+
else:
69+
scores_monthly = self.model_monthly.train(ts).to_pd()
70+
labels_monthly = self.anomalyByScore(scores_monthly, self.threshold)
4871

72+
# Anomaly is identified if and only if it's detected in both weekly and monthly patterns.
4973
if self.enable_weekly and self.enable_monthly:
50-
return scores_weekly * scores_monthly
74+
if self.return_score:
75+
return scores_weekly, scores_monthly, scores_weekly * scores_monthly
76+
else:
77+
return scores_weekly, scores_monthly, labels_weekly * labels_monthly
5178
elif self.enable_weekly:
52-
return scores_weekly
79+
if self.return_score:
80+
return scores_weekly, None, scores_weekly
81+
else:
82+
return scores_weekly, None, labels_weekly
5383
else:
54-
return scores_monthly
84+
if self.return_score:
85+
return None, scores_monthly, scores_monthly
86+
else:
87+
return None, scores_monthly, labels_monthly

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def read_file(fname):
6767
"py4j",
6868
"matplotlib",
6969
"plotly>=4.13",
70-
"numpy>=1.21", # 1.21 remediates a security risk
70+
"numpy>=1.21,<2.0", # 1.21 remediates a security risk
7171
"packaging",
7272
"pandas>=1.1.0", # >=1.1.0 for origin kwarg to df.resample()
7373
"prophet>=1.1", # 1.1 removes dependency on pystan

0 commit comments

Comments
 (0)