From ccf7d8f2658c2f9f4167fa4d5aa7aee8240be4d2 Mon Sep 17 00:00:00 2001 From: Satvik-Singh192 Date: Fri, 7 Nov 2025 10:02:47 +0530 Subject: [PATCH 1/6] feat: added loading bars centrally cuz file wise approach resulted in pytest failing a lot --- .gitignore | 2 ++ src/quant_research_starter/cli.py | 58 +++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index f60b36a0..b848c2d1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ __pycache__/ *.py[cod] *$py.class src/quant_research_starter.egg-info/PKG-INFO + +myenv/ \ No newline at end of file diff --git a/src/quant_research_starter/cli.py b/src/quant_research_starter/cli.py index 18ca8c1a..6869306f 100644 --- a/src/quant_research_starter/cli.py +++ b/src/quant_research_starter/cli.py @@ -6,6 +6,7 @@ import click import matplotlib.pyplot as plt import pandas as pd +from tqdm import tqdm from .backtest import VectorizedBacktest from .data import SampleDataLoader, SyntheticDataGenerator @@ -30,9 +31,14 @@ def generate_data(output, symbols, days): click.echo("Generating synthetic price data...") generator = SyntheticDataGenerator() - prices = generator.generate_price_data( - n_symbols=symbols, days=days, start_date="2020-01-01" - ) + all_prices = [] + for _ in tqdm(range(symbols), desc="Generating price series"): + prices = generator.generate_price_data( + n_symbols=1, days=days, start_date="2020-01-01" + ) + all_prices.append(prices) + + prices = pd.concat(all_prices, axis=1) # Ensure output directory exists output_path = Path(output) @@ -94,8 +100,13 @@ def compute_factors(data_file, factors, output): vol = VolatilityFactor(lookback=21) factor_data["volatility"] = vol.compute(prices) - # Combine factors (simple average for demo) - combined_signals = pd.DataFrame({k: v.mean(axis=1) for k, v in factor_data.items()}) + combined_signals = pd.DataFrame( + { + k: tqdm(v.mean(axis=1), desc=f"Averaging {k} factor") + for k, v in factor_data.items() + } + ) + combined_signals["composite"] = combined_signals.mean(axis=1) # Save results @@ -148,7 +159,6 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly): # Load signals if Path(signals_file).exists(): signals_data = pd.read_csv(signals_file, index_col=0, parse_dates=True) - # Use composite signal if available, otherwise first column if "composite" in signals_data.columns: signals = signals_data["composite"] else: @@ -158,27 +168,48 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly): momentum = MomentumFactor(lookback=63) signals = momentum.compute(prices).mean(axis=1) - # Ensure signals align with prices + # Align dates common_dates = prices.index.intersection(signals.index) prices = prices.loc[common_dates] signals = signals.loc[common_dates] - # Expand signals to all symbols (simplified - same signal for all) + # Expand signals across symbols signal_matrix = pd.DataFrame( dict.fromkeys(prices.columns, signals), index=signals.index ) - # Run backtest + def run_with_progress(self, weight_scheme="rank"): + returns = [] + idx = self.prices.index + + for i in tqdm(range(1, len(idx)), desc="Running backtest"): + ret = self._compute_daily_return( + self.prices.iloc[i - 1], + self.prices.iloc[i], + weight_scheme, + ) + returns.append(ret) + + results = pd.DataFrame({"returns": returns}, index=idx[1:]) + results["portfolio_value"] = ( + self.initial_capital * (1 + results["returns"]).cumprod() + ) + results["final_value"] = results["portfolio_value"].iloc[-1] + results["total_return"] = results["final_value"] / self.initial_capital - 1 + + return results + + VectorizedBacktest.run = run_with_progress + backtest = VectorizedBacktest( prices=prices, signals=signal_matrix, initial_capital=initial_capital, transaction_cost=0.001, ) - results = backtest.run(weight_scheme="rank") - # Calculate metrics + # Metrics metrics_calc = RiskMetrics(results["returns"]) metrics = metrics_calc.calculate_all() @@ -195,18 +226,16 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly): with open(output_path, "w") as f: json.dump(results_dict, f, indent=2) - # Generate plot + # Plotting if plot: plt.figure(figsize=(12, 8)) - # Plot portfolio value plt.subplot(2, 1, 1) plt.plot(results["portfolio_value"].index, results["portfolio_value"].values) plt.title("Portfolio Value") plt.ylabel("USD") plt.grid(True) - # Plot returns plt.subplot(2, 1, 2) plt.bar(results["returns"].index, results["returns"].values, alpha=0.7) plt.title("Daily Returns") @@ -220,7 +249,6 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly): click.echo(f"Plot saved -> {plot_path}") - # Generate Plotly HTML chart if requested if plotly: html_path = output_path.parent / "backtest_plot.html" From 77c1988eb6653c8483d5fa42c8abac37a9f9eb84 Mon Sep 17 00:00:00 2001 From: Satvik-Singh192 Date: Mon, 10 Nov 2025 12:07:22 +0530 Subject: [PATCH 2/6] feat: final fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a6cdb449..ce515f34 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ The backtester supports different rebalancing frequencies to match your strategy ```python from quant_research_starter.backtest import VectorizedBacktest - +## # Daily rebalancing (default) bt_daily = VectorizedBacktest(prices, signals, rebalance_freq="D") From a26f3f040179789ff0dc8d2548c041eee3ff4fdb Mon Sep 17 00:00:00 2001 From: Satvik Singh Date: Mon, 10 Nov 2025 12:19:27 +0530 Subject: [PATCH 3/6] feat: update readme Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index ce515f34..363b73ea 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,6 @@ The backtester supports different rebalancing frequencies to match your strategy ```python from quant_research_starter.backtest import VectorizedBacktest -## # Daily rebalancing (default) bt_daily = VectorizedBacktest(prices, signals, rebalance_freq="D") From 08f04af96f0b552dd3c6e6aab4050be658bc3575 Mon Sep 17 00:00:00 2001 From: Satvik Singh Date: Mon, 10 Nov 2025 12:20:00 +0530 Subject: [PATCH 4/6] feat: plzz work Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/quant_research_starter/cli.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/quant_research_starter/cli.py b/src/quant_research_starter/cli.py index 6869306f..a9caa38e 100644 --- a/src/quant_research_starter/cli.py +++ b/src/quant_research_starter/cli.py @@ -178,28 +178,7 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly): dict.fromkeys(prices.columns, signals), index=signals.index ) - def run_with_progress(self, weight_scheme="rank"): - returns = [] - idx = self.prices.index - - for i in tqdm(range(1, len(idx)), desc="Running backtest"): - ret = self._compute_daily_return( - self.prices.iloc[i - 1], - self.prices.iloc[i], - weight_scheme, - ) - returns.append(ret) - - results = pd.DataFrame({"returns": returns}, index=idx[1:]) - results["portfolio_value"] = ( - self.initial_capital * (1 + results["returns"]).cumprod() - ) - results["final_value"] = results["portfolio_value"].iloc[-1] - results["total_return"] = results["final_value"] / self.initial_capital - 1 - - return results - - VectorizedBacktest.run = run_with_progress + # Use the original vectorized run() method for performance backtest = VectorizedBacktest( prices=prices, From b583331bbd43b4bdd36e52b5f9ca8db959da7af4 Mon Sep 17 00:00:00 2001 From: Satvik Singh Date: Mon, 10 Nov 2025 12:20:23 +0530 Subject: [PATCH 5/6] feat: add commments Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/quant_research_starter/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/quant_research_starter/cli.py b/src/quant_research_starter/cli.py index a9caa38e..ff4d382f 100644 --- a/src/quant_research_starter/cli.py +++ b/src/quant_research_starter/cli.py @@ -159,6 +159,7 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly): # Load signals if Path(signals_file).exists(): signals_data = pd.read_csv(signals_file, index_col=0, parse_dates=True) + # If a 'composite' signal column exists, use it; otherwise, fall back to the first available signal column. if "composite" in signals_data.columns: signals = signals_data["composite"] else: From 3a8e8f13906402ea86935ff8479aad93f1e31009 Mon Sep 17 00:00:00 2001 From: Satvik Singh Date: Mon, 10 Nov 2025 12:21:14 +0530 Subject: [PATCH 6/6] feat: fix Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/quant_research_starter/cli.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/quant_research_starter/cli.py b/src/quant_research_starter/cli.py index ff4d382f..12a71c4a 100644 --- a/src/quant_research_starter/cli.py +++ b/src/quant_research_starter/cli.py @@ -100,13 +100,11 @@ def compute_factors(data_file, factors, output): vol = VolatilityFactor(lookback=21) factor_data["volatility"] = vol.compute(prices) - combined_signals = pd.DataFrame( - { - k: tqdm(v.mean(axis=1), desc=f"Averaging {k} factor") - for k, v in factor_data.items() - } - ) + combined_signals_dict = {} + for k, v in tqdm(factor_data.items(), desc="Averaging factors"): + combined_signals_dict[k] = v.mean(axis=1) + combined_signals = pd.DataFrame(combined_signals_dict) combined_signals["composite"] = combined_signals.mean(axis=1) # Save results