From 0f1a8b41951585bfb4112d502badbfbfca21b402 Mon Sep 17 00:00:00 2001 From: priyanshu-8789 Date: Sat, 24 May 2025 16:37:22 +0530 Subject: [PATCH 01/10] Added Random Forest Regressor as an additional prediction model. --- machine_learning/forecasting/run.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 9d81b03cd09e..8ef4151b0417 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -10,11 +10,11 @@ for the next 3 months sales or something, u can just adjust it for ur own purpose """ - from warnings import simplefilter import numpy as np import pandas as pd +from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import Normalizer from sklearn.svm import SVR from statsmodels.tsa.statespace.sarimax import SARIMAX @@ -77,6 +77,28 @@ def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> f y_pred = regressor.predict(x_test) return float(y_pred[0]) +def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> float: + """ + Fourth method: Random Forest Regressor + Random Forest is an ensemble learning method for regression that operates + by constructing a multitude of decision trees at training time and outputting + the mean prediction of the individual trees. + + It is more robust than a single decision tree and less prone to overfitting. + Good for capturing nonlinear relationships in data. + + input : training data (date, total_event) in list of float + where x = list of set (date and total event) + output : list of total user prediction in float + + >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) + 2.3333333333333335 + """ + model = RandomForestRegressor(n_estimators=100, random_state=42) + model.fit(x_train, train_user) + prediction = model.predict(x_test) + return float(prediction[0]) + def interquartile_range_checker(train_user: list) -> float: """ From f2793d33abb7ebd476f22ce8db5ea839e67f25ba Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 24 May 2025 11:14:06 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/forecasting/run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 8ef4151b0417..125476003c16 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -10,6 +10,7 @@ for the next 3 months sales or something, u can just adjust it for ur own purpose """ + from warnings import simplefilter import numpy as np @@ -77,6 +78,7 @@ def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> f y_pred = regressor.predict(x_test) return float(y_pred[0]) + def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> float: """ Fourth method: Random Forest Regressor @@ -90,7 +92,7 @@ def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> fl input : training data (date, total_event) in list of float where x = list of set (date and total event) output : list of total user prediction in float - + >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) 2.3333333333333335 """ From 67b1d801cfde24e8c85830fce21df764774a6f6d Mon Sep 17 00:00:00 2001 From: priyanshu-8789 Date: Sat, 24 May 2025 17:04:56 +0530 Subject: [PATCH 03/10] Added Random Forest Regressor to main voting --- machine_learning/forecasting/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 8ef4151b0417..e1918cec4cac 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -177,6 +177,7 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool: ), sarimax_predictor(train_user, train_match, test_match), support_vector_regressor(x_train, x_test, train_user), + random_forest_regressor(x_train, x_test, train_user), # Added Random Forest Regressor ] # check the safety of today's data From 4ad2464f3803e66869085bf712957f8ba41275dd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 24 May 2025 11:35:40 +0000 Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/forecasting/run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index ee9ca6b8ac8d..18ee0d896a9a 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -179,7 +179,9 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool: ), sarimax_predictor(train_user, train_match, test_match), support_vector_regressor(x_train, x_test, train_user), - random_forest_regressor(x_train, x_test, train_user), # Added Random Forest Regressor + random_forest_regressor( + x_train, x_test, train_user + ), # Added Random Forest Regressor ] # check the safety of today's data From 691d98dd459e2894584c8a8d404a18711451eef5 Mon Sep 17 00:00:00 2001 From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com> Date: Sat, 24 May 2025 17:32:35 +0530 Subject: [PATCH 05/10] Update run.py --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 18ee0d896a9a..1f4bcc1abbe3 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -90,7 +90,7 @@ def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> fl Good for capturing nonlinear relationships in data. input : training data (date, total_event) in list of float - where x = list of set (date and total event) + where x = list of set (date and total event) output : list of total user prediction in float >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) From 062e04652c06f7c0fc62d418de1b84abf8304bde Mon Sep 17 00:00:00 2001 From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com> Date: Sat, 24 May 2025 17:40:40 +0530 Subject: [PATCH 06/10] Update run.py --- machine_learning/forecasting/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 1f4bcc1abbe3..729ad3380ba7 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -94,7 +94,7 @@ def random_forest_regressor(x_train: list, x_test: list, train_user: list) -> fl output : list of total user prediction in float >>> random_forest_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) - 2.3333333333333335 + 1.95 """ model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(x_train, train_user) From 6c2f7b48ead29db0005e09649091d102b989d33d Mon Sep 17 00:00:00 2001 From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com> Date: Sun, 25 May 2025 19:47:05 +0530 Subject: [PATCH 07/10] Update run.py Used matplotlib to plot actual vs predicted user count, forecast confidence intervals, outlier thresholds from IQR. Added logging instead of print because in production, print() is not scalable. --- machine_learning/forecasting/run.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 729ad3380ba7..7594fcb192a2 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,6 +11,7 @@ u can just adjust it for ur own purpose """ +import logging from warnings import simplefilter import numpy as np @@ -19,7 +20,10 @@ from sklearn.preprocessing import Normalizer from sklearn.svm import SVR from statsmodels.tsa.statespace.sarimax import SARIMAX +import matplotlib.pyplot as plt +logging.basicConfig(level=logging.Info) +logger = logging.getLogger(__name__) def linear_regression_prediction( train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list @@ -143,6 +147,21 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool: not_safe += 1 return safe > not_safe +def plot_forecast(actual, predictions): + plt.figure(figsize=(10, 5)) + plt.plot(range(len(actual)), actual, label="Actual") + plt.plot(len(actual), predictions[0], 'ro', label="Linear Reg") + plt.plot(len(actual), predictions[1], 'go', label="SARIMAX") + plt.plot(len(actual), predictions[2], 'bo', label="SVR") + plt.plot(len(actual), predictions[3], 'yo', label="RF") + plt.legend() + plt.title("Data Safety Forecast") + plt.xlabel("Days") + plt.ylabel("Normalized User Count") + plt.grid(True) + plt.tight_layout() + plt.show() + if __name__ == "__main__": """ @@ -179,11 +198,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool: ), sarimax_predictor(train_user, train_match, test_match), support_vector_regressor(x_train, x_test, train_user), - random_forest_regressor( - x_train, x_test, train_user - ), # Added Random Forest Regressor + random_forest_regressor(x_train, x_test, train_user), ] # check the safety of today's data not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not " - print(f"Today's data is {not_str}safe.") + logger.info(f"Today's data is {not_str}safe.") + + plot_forecast(train_user, res_vote) From c8df2ccf38e382707855f83e2c10cf12a162de8e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 25 May 2025 14:17:30 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/forecasting/run.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 7594fcb192a2..2a460761c9f3 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -25,6 +25,7 @@ logging.basicConfig(level=logging.Info) logger = logging.getLogger(__name__) + def linear_regression_prediction( train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list ) -> float: @@ -147,13 +148,14 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool: not_safe += 1 return safe > not_safe + def plot_forecast(actual, predictions): plt.figure(figsize=(10, 5)) plt.plot(range(len(actual)), actual, label="Actual") - plt.plot(len(actual), predictions[0], 'ro', label="Linear Reg") - plt.plot(len(actual), predictions[1], 'go', label="SARIMAX") - plt.plot(len(actual), predictions[2], 'bo', label="SVR") - plt.plot(len(actual), predictions[3], 'yo', label="RF") + plt.plot(len(actual), predictions[0], "ro", label="Linear Reg") + plt.plot(len(actual), predictions[1], "go", label="SARIMAX") + plt.plot(len(actual), predictions[2], "bo", label="SVR") + plt.plot(len(actual), predictions[3], "yo", label="RF") plt.legend() plt.title("Data Safety Forecast") plt.xlabel("Days") @@ -204,5 +206,5 @@ def plot_forecast(actual, predictions): # check the safety of today's data not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not " logger.info(f"Today's data is {not_str}safe.") - + plot_forecast(train_user, res_vote) From 39bc4463a5c255dfbdc94f58490912c0b29b31f7 Mon Sep 17 00:00:00 2001 From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com> Date: Sun, 25 May 2025 19:56:52 +0530 Subject: [PATCH 09/10] Update run.py --- machine_learning/forecasting/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 2a460761c9f3..ac6b610aa986 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -11,16 +11,16 @@ u can just adjust it for ur own purpose """ -import logging from warnings import simplefilter +import logging +import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import Normalizer from sklearn.svm import SVR from statsmodels.tsa.statespace.sarimax import SARIMAX -import matplotlib.pyplot as plt logging.basicConfig(level=logging.Info) logger = logging.getLogger(__name__) From f882dfc1a95961c2fa386c02f820fdbbe417bf2b Mon Sep 17 00:00:00 2001 From: Priyanshu Mishra <118528432+priyanshu-8789@users.noreply.github.com> Date: Sun, 25 May 2025 19:59:39 +0530 Subject: [PATCH 10/10] Update run.py --- machine_learning/forecasting/run.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index ac6b610aa986..787806abb94f 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -13,7 +13,6 @@ from warnings import simplefilter -import logging import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -22,9 +21,6 @@ from sklearn.svm import SVR from statsmodels.tsa.statespace.sarimax import SARIMAX -logging.basicConfig(level=logging.Info) -logger = logging.getLogger(__name__) - def linear_regression_prediction( train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list @@ -205,6 +201,6 @@ def plot_forecast(actual, predictions): # check the safety of today's data not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not " - logger.info(f"Today's data is {not_str}safe.") + print(f"Today's data is {not_str}safe.") plot_forecast(train_user, res_vote)