From 0f1a8b41951585bfb4112d502badbfbfca21b402 Mon Sep 17 00:00:00 2001
From: priyanshu-8789 <priyanshumishra8789@gmail.com>
Date: Sat, 24 May 2025 16:37:22 +0530
Subject: [PATCH 01/10] Added Random Forest Regressor as an additional
 prediction model.

---
 machine_learning/forecasting/run.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 9d81b03cd09e..8ef4151b0417 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -10,11 +10,11 @@
          for the next 3 months sales or something,
          u can just adjust it for ur own purpose
 """
-
 from warnings import simplefilter
 
 import numpy as np
 import pandas as pd
+from sklearn.ensemble import RandomForestRegressor
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
 from statsmodels.tsa.statespace.sarimax import SARIMAX
@@ -77,6 +77,28 @@ def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> f
     y_pred = regressor.predict(x_test)
     return float(y_pred[0])
 
+def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> float:
+    """
+    Fourth method: Random Forest Regressor
+    Random Forest is an ensemble learning method for regression that operates
+    by constructing a multitude of decision trees at training time and outputting
+    the mean prediction of the individual trees.
+
+    It is more robust than a single decision tree and less prone to overfitting.
+    Good for capturing nonlinear relationships in data.
+
+    input : training data (date, total_event) in list of float
+            where x = list of set (date and total event)
+    output : list of total user prediction in float
+    
+    >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
+    2.3333333333333335
+    """
+    model = RandomForestRegressor(n_estimators=100, random_state=42)
+    model.fit(x_train, train_user)
+    prediction = model.predict(x_test)
+    return float(prediction[0])
+
 
 def interquartile_range_checker(train_user: list) -> float:
     """

From f2793d33abb7ebd476f22ce8db5ea839e67f25ba Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 24 May 2025 11:14:06 +0000
Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/forecasting/run.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 8ef4151b0417..125476003c16 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -10,6 +10,7 @@
          for the next 3 months sales or something,
          u can just adjust it for ur own purpose
 """
+
 from warnings import simplefilter
 
 import numpy as np
@@ -77,6 +78,7 @@ def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> f
     y_pred = regressor.predict(x_test)
     return float(y_pred[0])
 
+
 def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> float:
     """
     Fourth method: Random Forest Regressor
@@ -90,7 +92,7 @@ def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> fl
     input : training data (date, total_event) in list of float
             where x = list of set (date and total event)
     output : list of total user prediction in float
-    
+
     >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
     2.3333333333333335
     """

From 67b1d801cfde24e8c85830fce21df764774a6f6d Mon Sep 17 00:00:00 2001
From: priyanshu-8789 <priyanshumishra8789@gmail.com>
Date: Sat, 24 May 2025 17:04:56 +0530
Subject: [PATCH 03/10] Added Random Forest Regressor to main voting

---
 machine_learning/forecasting/run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 8ef4151b0417..e1918cec4cac 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -177,6 +177,7 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
         ),
         sarimax_predictor(train_user, train_match, test_match),
         support_vector_regressor(x_train, x_test, train_user),
+        random_forest_regressor(x_train, x_test, train_user),  # Added Random Forest Regressor
     ]
 
     # check the safety of today's data

From 4ad2464f3803e66869085bf712957f8ba41275dd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 24 May 2025 11:35:40 +0000
Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/forecasting/run.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index ee9ca6b8ac8d..18ee0d896a9a 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -179,7 +179,9 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
         ),
         sarimax_predictor(train_user, train_match, test_match),
         support_vector_regressor(x_train, x_test, train_user),
-        random_forest_regressor(x_train, x_test, train_user),  # Added Random Forest Regressor
+        random_forest_regressor(
+            x_train, x_test, train_user
+        ),  # Added Random Forest Regressor
     ]
 
     # check the safety of today's data

From 691d98dd459e2894584c8a8d404a18711451eef5 Mon Sep 17 00:00:00 2001
From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com>
Date: Sat, 24 May 2025 17:32:35 +0530
Subject: [PATCH 05/10] Update run.py

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 18ee0d896a9a..1f4bcc1abbe3 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -90,7 +90,7 @@ def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> fl
     Good for capturing nonlinear relationships in data.
 
     input : training data (date, total_event) in list of float
-            where x = list of set (date and total event)
+    where x = list of set (date and total event)
     output : list of total user prediction in float
 
     >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])

From 062e04652c06f7c0fc62d418de1b84abf8304bde Mon Sep 17 00:00:00 2001
From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com>
Date: Sat, 24 May 2025 17:40:40 +0530
Subject: [PATCH 06/10] Update run.py

---
 machine_learning/forecasting/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 1f4bcc1abbe3..729ad3380ba7 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -94,7 +94,7 @@ def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> fl
     output : list of total user prediction in float
 
     >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
-    2.3333333333333335
+    1.95
     """
     model = RandomForestRegressor(n_estimators=100, random_state=42)
     model.fit(x_train, train_user)

From 6c2f7b48ead29db0005e09649091d102b989d33d Mon Sep 17 00:00:00 2001
From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com>
Date: Sun, 25 May 2025 19:47:05 +0530
Subject: [PATCH 07/10] Update run.py

Used matplotlib to plot actual vs predicted user count, forecast confidence intervals, outlier thresholds from IQR.
Added logging instead of print because in production, print() is not scalable.
---
 machine_learning/forecasting/run.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 729ad3380ba7..7594fcb192a2 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,6 +11,7 @@
          u can just adjust it for ur own purpose
 """
 
+import logging
 from warnings import simplefilter
 
 import numpy as np
@@ -19,7 +20,10 @@
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
 from statsmodels.tsa.statespace.sarimax import SARIMAX
+import matplotlib.pyplot as plt
 
+logging.basicConfig(level=logging.Info)
+logger = logging.getLogger(__name__)
 
 def linear_regression_prediction(
     train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list
@@ -143,6 +147,21 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
             not_safe += 1
     return safe > not_safe
 
+def plot_forecast(actual, predictions):
+    plt.figure(figsize=(10, 5))
+    plt.plot(range(len(actual)), actual, label="Actual")
+    plt.plot(len(actual), predictions[0], 'ro', label="Linear Reg")
+    plt.plot(len(actual), predictions[1], 'go', label="SARIMAX")
+    plt.plot(len(actual), predictions[2], 'bo', label="SVR")
+    plt.plot(len(actual), predictions[3], 'yo', label="RF")
+    plt.legend()
+    plt.title("Data Safety Forecast")
+    plt.xlabel("Days")
+    plt.ylabel("Normalized User Count")
+    plt.grid(True)
+    plt.tight_layout()
+    plt.show()
+
 
 if __name__ == "__main__":
     """
@@ -179,11 +198,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
         ),
         sarimax_predictor(train_user, train_match, test_match),
         support_vector_regressor(x_train, x_test, train_user),
-        random_forest_regressor(
-            x_train, x_test, train_user
-        ),  # Added Random Forest Regressor
+        random_forest_regressor(x_train, x_test, train_user),
     ]
 
     # check the safety of today's data
     not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
-    print(f"Today's data is {not_str}safe.")
+    logger.info(f"Today's data is {not_str}safe.")
+        
+    plot_forecast(train_user, res_vote)

From c8df2ccf38e382707855f83e2c10cf12a162de8e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 14:17:30 +0000
Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/forecasting/run.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 7594fcb192a2..2a460761c9f3 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -25,6 +25,7 @@
 logging.basicConfig(level=logging.Info)
 logger = logging.getLogger(__name__)
 
+
 def linear_regression_prediction(
     train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list
 ) -> float:
@@ -147,13 +148,14 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
             not_safe += 1
     return safe > not_safe
 
+
 def plot_forecast(actual, predictions):
     plt.figure(figsize=(10, 5))
     plt.plot(range(len(actual)), actual, label="Actual")
-    plt.plot(len(actual), predictions[0], 'ro', label="Linear Reg")
-    plt.plot(len(actual), predictions[1], 'go', label="SARIMAX")
-    plt.plot(len(actual), predictions[2], 'bo', label="SVR")
-    plt.plot(len(actual), predictions[3], 'yo', label="RF")
+    plt.plot(len(actual), predictions[0], "ro", label="Linear Reg")
+    plt.plot(len(actual), predictions[1], "go", label="SARIMAX")
+    plt.plot(len(actual), predictions[2], "bo", label="SVR")
+    plt.plot(len(actual), predictions[3], "yo", label="RF")
     plt.legend()
     plt.title("Data Safety Forecast")
     plt.xlabel("Days")
@@ -204,5 +206,5 @@ def plot_forecast(actual, predictions):
     # check the safety of today's data
     not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
     logger.info(f"Today's data is {not_str}safe.")
-        
+
     plot_forecast(train_user, res_vote)

From 39bc4463a5c255dfbdc94f58490912c0b29b31f7 Mon Sep 17 00:00:00 2001
From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com>
Date: Sun, 25 May 2025 19:56:52 +0530
Subject: [PATCH 09/10] Update run.py

---
 machine_learning/forecasting/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index 2a460761c9f3..ac6b610aa986 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -11,16 +11,16 @@
          u can just adjust it for ur own purpose
 """
 
-import logging
 from warnings import simplefilter
 
+import logging
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.preprocessing import Normalizer
 from sklearn.svm import SVR
 from statsmodels.tsa.statespace.sarimax import SARIMAX
-import matplotlib.pyplot as plt
 
 logging.basicConfig(level=logging.Info)
 logger = logging.getLogger(__name__)

From f882dfc1a95961c2fa386c02f820fdbbe417bf2b Mon Sep 17 00:00:00 2001
From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com>
Date: Sun, 25 May 2025 19:59:39 +0530
Subject: [PATCH 10/10] Update run.py

---
 machine_learning/forecasting/run.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
index ac6b610aa986..787806abb94f 100644
--- a/machine_learning/forecasting/run.py
+++ b/machine_learning/forecasting/run.py
@@ -13,7 +13,6 @@
 
 from warnings import simplefilter
 
-import logging
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -22,9 +21,6 @@
 from sklearn.svm import SVR
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
-logging.basicConfig(level=logging.Info)
-logger = logging.getLogger(__name__)
-
 
 def linear_regression_prediction(
     train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list
@@ -205,6 +201,6 @@ def plot_forecast(actual, predictions):
 
     # check the safety of today's data
     not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
-    logger.info(f"Today's data is {not_str}safe.")
+    print(f"Today's data is {not_str}safe.")
 
     plot_forecast(train_user, res_vote)