From f812c0fafc7c164dd04c95ab8dfcd8f2a34e3673 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Mon, 28 Apr 2025 18:54:36 +0800
Subject: [PATCH 01/10] fixed a problem with multi index caused by the default
 value of groupkey

---
 qlib/utils/paral.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/qlib/utils/paral.py b/qlib/utils/paral.py
index 3b6671ddbe6..60992174d07 100644
--- a/qlib/utils/paral.py
+++ b/qlib/utils/paral.py
@@ -25,7 +25,7 @@ def __init__(self, *args, **kwargs):
 
 
 def datetime_groupby_apply(
-    df, apply_func: Union[Callable, Text], axis=0, level="datetime", resample_rule="M", n_jobs=-1
+    df, apply_func: Union[Callable, Text], axis=0, level="datetime", resample_rule="ME", n_jobs=-1
 ):
     """datetime_groupby_apply
     This function will apply the `apply_func` on the datetime level index.
@@ -52,11 +52,11 @@ def datetime_groupby_apply(
     def _naive_group_apply(df):
         if isinstance(apply_func, str):
             return getattr(df.groupby(axis=axis, level=level), apply_func)()
-        return df.groupby(axis=axis, level=level).apply(apply_func)
+        return df.groupby(level=level, group_keys=False).apply(apply_func)
 
     if n_jobs != 1:
         dfs = ParallelExt(n_jobs=n_jobs)(
-            delayed(_naive_group_apply)(sub_df) for idx, sub_df in df.resample(resample_rule, axis=axis, level=level)
+            delayed(_naive_group_apply)(sub_df) for idx, sub_df in df.resample(resample_rule, level=level)
         )
         return pd.concat(dfs, axis=axis).sort_index()
     else:

From 8d332af382bd716b698db644787c9a160a611364 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Thu, 8 May 2025 17:29:14 +0800
Subject: [PATCH 02/10] modify group_key default value

---
 examples/benchmarks/TFT/libs/tft_model.py     |  4 ++--
 examples/benchmarks/TFT/tft.py                |  4 ++--
 examples/benchmarks/TRA/src/dataset.py        |  2 +-
 examples/highfreq/highfreq_ops.py             |  2 +-
 .../scripts/gen_training_orders.py            |  4 ++--
 qlib/backtest/high_performance_ds.py          |  4 ++--
 qlib/backtest/position.py                     |  2 +-
 qlib/backtest/report.py                       |  2 +-
 qlib/contrib/data/dataset.py                  |  2 +-
 qlib/contrib/data/processor.py                | 20 +++++++++----------
 qlib/contrib/eva/alpha.py                     | 18 ++++++++---------
 qlib/contrib/meta/data_selection/dataset.py   |  4 ++--
 qlib/contrib/meta/data_selection/model.py     |  2 +-
 qlib/contrib/model/double_ensemble.py         |  2 +-
 qlib/contrib/model/pytorch_adarnn.py          |  4 ++--
 qlib/contrib/model/pytorch_add.py             |  6 +++---
 qlib/contrib/model/pytorch_gats.py            |  2 +-
 qlib/contrib/model/pytorch_gats_ts.py         |  4 ++--
 qlib/contrib/model/pytorch_hist.py            |  2 +-
 qlib/contrib/model/pytorch_igmtf.py           |  2 +-
 qlib/contrib/model/pytorch_krnn.py            |  2 +-
 qlib/contrib/ops/high_freq.py                 |  4 ++--
 .../analysis_model_performance.py             | 20 +++++++++----------
 .../analysis_position/cumulative_return.py    |  2 +-
 .../analysis_position/parse_position.py       |  2 +-
 .../report/analysis_position/rank_label.py    |  2 +-
 .../report/analysis_position/risk_analysis.py |  6 +++---
 .../report/analysis_position/score_ic.py      |  4 ++--
 qlib/contrib/report/data/ana.py               | 16 +++++++--------
 qlib/contrib/strategy/rule_strategy.py        |  4 ++--
 qlib/data/cache.py                            |  2 +-
 qlib/data/dataset/processor.py                |  2 +-
 qlib/data/dataset/storage.py                  |  2 +-
 qlib/model/ens/ensemble.py                    |  2 +-
 qlib/tests/__init__.py                        |  2 +-
 qlib/utils/paral.py                           |  2 +-
 qlib/utils/resam.py                           |  4 ++--
 qlib/workflow/record_temp.py                  |  2 +-
 scripts/data_collector/utils.py               |  2 +-
 scripts/dump_bin.py                           |  2 +-
 tests/dataset_tests/test_datalayer.py         |  4 ++--
 41 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py
index 2a1a2fa1522..f3b6dda34fe 100644
--- a/examples/benchmarks/TFT/libs/tft_model.py
+++ b/examples/benchmarks/TFT/libs/tft_model.py
@@ -599,7 +599,7 @@ def _batch_sampled_data(self, data, max_samples):
         print("Getting valid sampling locations.")
         valid_sampling_locations = []
         split_data_map = {}
-        for identifier, df in data.groupby(id_col):
+        for identifier, df in data.groupby(id_col, group_key=False):
             print("Getting locations for {}".format(identifier))
             num_entries = len(df)
             if num_entries >= self.time_steps:
@@ -678,7 +678,7 @@ def _batch_single_entity(input_data):
         input_cols = [tup[0] for tup in self.column_definition if tup[2] not in {InputTypes.ID, InputTypes.TIME}]
 
         data_map = {}
-        for _, sliced in data.groupby(id_col):
+        for _, sliced in data.groupby(id_col, group_keys=False):
             col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}
 
             for k in col_mappings:
diff --git a/examples/benchmarks/TFT/tft.py b/examples/benchmarks/TFT/tft.py
index e6dd27e83e4..95c31755fd3 100644
--- a/examples/benchmarks/TFT/tft.py
+++ b/examples/benchmarks/TFT/tft.py
@@ -78,13 +78,13 @@
 
 
 def get_shifted_label(data_df, shifts=5, col_shift="LABEL0"):
-    return data_df[[col_shift]].groupby("instrument").apply(lambda df: df.shift(shifts))
+    return data_df[[col_shift]].groupby("instrument", group_keys=False).apply(lambda df: df.shift(shifts))
 
 
 def fill_test_na(test_df):
     test_df_res = test_df.copy()
     feature_cols = ~test_df_res.columns.str.contains("label", case=False)
-    test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime").apply(lambda df: df.fillna(df.mean()))
+    test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime", group_keys=False).apply(lambda df: df.fillna(df.mean()))
     test_df_res.loc[:, feature_cols] = test_feature_fna
     return test_df_res
 
diff --git a/examples/benchmarks/TRA/src/dataset.py b/examples/benchmarks/TRA/src/dataset.py
index de4b2ad4119..47cde9f3fd3 100644
--- a/examples/benchmarks/TRA/src/dataset.py
+++ b/examples/benchmarks/TRA/src/dataset.py
@@ -29,7 +29,7 @@ def _create_ts_slices(index, seq_len):
     assert index.is_lexsorted(), "index should be sorted"
 
     # number of dates for each code
-    sample_count_by_codes = pd.Series(0, index=index).groupby(level=0).size().values
+    sample_count_by_codes = pd.Series(0, index=index).groupby(level=0, group_keys=False).size().values
 
     # start_index for each code
     start_index_of_codes = np.roll(np.cumsum(sample_count_by_codes), 1)
diff --git a/examples/highfreq/highfreq_ops.py b/examples/highfreq/highfreq_ops.py
index a9947f91d85..25d7321bba4 100644
--- a/examples/highfreq/highfreq_ops.py
+++ b/examples/highfreq/highfreq_ops.py
@@ -25,7 +25,7 @@ class DayLast(ElemOperator):
     def _load_internal(self, instrument, start_index, end_index, freq):
         _calendar = get_calendar_day(freq=freq)
         series = self.feature.load(instrument, start_index, end_index, freq)
-        return series.groupby(_calendar[series.index]).transform("last")
+        return series.groupby(_calendar[series.index], group_keys=False).transform("last")
 
 
 class FFillNan(ElemOperator):
diff --git a/examples/rl_order_execution/scripts/gen_training_orders.py b/examples/rl_order_execution/scripts/gen_training_orders.py
index 85217d717a0..b03ce6e5a85 100755
--- a/examples/rl_order_execution/scripts/gen_training_orders.py
+++ b/examples/rl_order_execution/scripts/gen_training_orders.py
@@ -19,9 +19,9 @@ def generate_order(stock: str, start_idx: int, end_idx: int) -> bool:
 
     df["date"] = df["datetime"].dt.date.astype("datetime64")
     df = df.set_index(["instrument", "datetime", "date"])
-    df = df.groupby("date").take(range(start_idx, end_idx)).droplevel(level=0)
+    df = df.groupby("date", group_keys=False).take(range(start_idx, end_idx)).droplevel(level=0)
 
-    order_all = pd.DataFrame(df.groupby(level=(2, 0)).mean().dropna())
+    order_all = pd.DataFrame(df.groupby(level=(2, 0), group_keys=False).mean().dropna())
     order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"]
     order_all = order_all[order_all["amount"] > 0.0]
     order_all["order_type"] = 0
diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py
index 67acc7adde3..f149f13dd5c 100644
--- a/qlib/backtest/high_performance_ds.py
+++ b/qlib/backtest/high_performance_ds.py
@@ -104,7 +104,7 @@ class PandasQuote(BaseQuote):
     def __init__(self, quote_df: pd.DataFrame, freq: str) -> None:
         super().__init__(quote_df=quote_df, freq=freq)
         quote_dict = {}
-        for stock_id, stock_val in quote_df.groupby(level="instrument"):
+        for stock_id, stock_val in quote_df.groupby(level="instrument", group_keys=False):
             quote_dict[stock_id] = stock_val.droplevel(level="instrument")
         self.data = quote_dict
 
@@ -137,7 +137,7 @@ def __init__(self, quote_df: pd.DataFrame, freq: str, region: str = "cn") -> Non
         """
         super().__init__(quote_df=quote_df, freq=freq)
         quote_dict = {}
-        for stock_id, stock_val in quote_df.groupby(level="instrument"):
+        for stock_id, stock_val in quote_df.groupby(level="instrument", group_keys=False):
             quote_dict[stock_id] = idd.MultiData(stock_val.droplevel(level="instrument"))
             quote_dict[stock_id].sort_index()  # To support more flexible slicing, we must sort data first
         self.data = quote_dict
diff --git a/qlib/backtest/position.py b/qlib/backtest/position.py
index 18b084fb647..e6f46279f3b 100644
--- a/qlib/backtest/position.py
+++ b/qlib/backtest/position.py
@@ -311,7 +311,7 @@ def fill_stock_value(self, start_time: Union[str, pd.Timestamp], freq: str, last
             freq=freq,
             disk_cache=True,
         ).dropna()
-        price_dict = price_df.groupby(["instrument"]).tail(1).reset_index(level=1, drop=True)["$close"].to_dict()
+        price_dict = price_df.groupby(["instrument"], group_keys=False).tail(1)["$close"].to_dict()
 
         if len(price_dict) < len(stock_list):
             lack_stock = set(stock_list) - set(price_dict)
diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py
index d0d327d03c2..b6fd9b07bf7 100644
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -114,7 +114,7 @@ def _cal_benchmark(benchmark_config: Optional[dict], freq: str) -> Optional[pd.S
             _temp_result, _ = get_higher_eq_freq_feature(_codes, fields, start_time, end_time, freq=freq)
             if len(_temp_result) == 0:
                 raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark")
-            return _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean().fillna(0)
+            return _temp_result.groupby(level="datetime", group_keys=False)[_temp_result.columns.tolist()[0]].mean().fillna(0)
 
     def _sample_benchmark(
         self,
diff --git a/qlib/contrib/data/dataset.py b/qlib/contrib/data/dataset.py
index 8b40dba1fc0..24160d7ba02 100644
--- a/qlib/contrib/data/dataset.py
+++ b/qlib/contrib/data/dataset.py
@@ -32,7 +32,7 @@ def _create_ts_slices(index, seq_len):
     assert index.is_monotonic_increasing, "index should be sorted"
 
     # number of dates for each instrument
-    sample_count_by_insts = index.to_series().groupby(level=0).size().values
+    sample_count_by_insts = index.to_series().groupby(level=0, group_keys=False).size().values
 
     # start index for each instrument
     start_index_of_insts = np.roll(np.cumsum(sample_count_by_insts), 1)
diff --git a/qlib/contrib/data/processor.py b/qlib/contrib/data/processor.py
index 875e9b5319d..5444bf34f8b 100644
--- a/qlib/contrib/data/processor.py
+++ b/qlib/contrib/data/processor.py
@@ -55,14 +55,14 @@ def _feature_norm(x):
 
         # Label
         cols = df_focus.columns[df_focus.columns.str.contains("^LABEL")]
-        df_focus[cols] = df_focus[cols].groupby(level="datetime").apply(_label_norm)
+        df_focus[cols] = df_focus[cols].groupby(level="datetime", group_keys=False).apply(_label_norm)
 
         # Features
         cols = df_focus.columns[df_focus.columns.str.contains("^KLEN|^KLOW|^KUP")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: x**0.25).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].apply(lambda x: x**0.25).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^KLOW2|^KUP2")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: x**0.5).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].apply(lambda x: x**0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         _cols = [
             "KMID",
@@ -88,25 +88,25 @@ def _feature_norm(x):
         ]
         pat = "|".join(["^" + x for x in _cols])
         cols = df_focus.columns[df_focus.columns.str.contains(pat) & (~df_focus.columns.isin(["HIGH0", "LOW0"]))]
-        df_focus[cols] = df_focus[cols].groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^STD|^VOLUME|^VMA|^VSTD")]
-        df_focus[cols] = df_focus[cols].apply(np.log).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].apply(np.log).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^RSQR")]
-        df_focus[cols] = df_focus[cols].fillna(0).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].fillna(0).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^MAX|^HIGH0")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: (x - 1) ** 0.5).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].apply(lambda x: (x - 1) ** 0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^MIN|^LOW0")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: (1 - x) ** 0.5).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].apply(lambda x: (1 - x) ** 0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^CORR|^CORD")]
-        df_focus[cols] = df_focus[cols].apply(np.exp).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].apply(np.exp).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^WVMA")]
-        df_focus[cols] = df_focus[cols].apply(np.log1p).groupby(level="datetime").apply(_feature_norm)
+        df_focus[cols] = df_focus[cols].apply(np.log1p).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         df[selected_cols] = df_focus.values
 
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index 86d366d205b..a38fbccbb38 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -47,23 +47,23 @@ def calc_long_short_prec(
     if dropna:
         df.dropna(inplace=True)
 
-    group = df.groupby(level=date_col)
+    group = df.groupby(level=date_col, group_keys=False)
 
     def N(x):
         return int(len(x) * quantile)
 
     # find the top/low quantile of prediction and treat them as long and short target
-    long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label).reset_index(level=0, drop=True)
-    short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label).reset_index(level=0, drop=True)
+    long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label)
+    short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label)
 
-    groupll = long.groupby(date_col)
+    groupll = long.groupby(date_col, group_keys=False)
     l_dom = groupll.apply(lambda x: x > 0)
     l_c = groupll.count()
 
-    groups = short.groupby(date_col)
+    groups = short.groupby(date_col, group_keys=False)
     s_dom = groups.apply(lambda x: x < 0)
     s_c = groups.count()
-    return (l_dom.groupby(date_col).sum() / l_c), (s_dom.groupby(date_col).sum() / s_c)
+    return (l_dom.groupby(date_col, group_keys=False).sum() / l_c), (s_dom.groupby(date_col, group_keys=False).sum() / s_c)
 
 
 def calc_long_short_return(
@@ -100,7 +100,7 @@ def calc_long_short_return(
     df = pd.DataFrame({"pred": pred, "label": label})
     if dropna:
         df.dropna(inplace=True)
-    group = df.groupby(level=date_col)
+    group = df.groupby(level=date_col, group_keys=False)
 
     def N(x):
         return int(len(x) * quantile)
@@ -173,8 +173,8 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False
         ic and rank ic
     """
     df = pd.DataFrame({"pred": pred, "label": label})
-    ic = df.groupby(date_col).apply(lambda df: df["pred"].corr(df["label"]))
-    ric = df.groupby(date_col).apply(lambda df: df["pred"].corr(df["label"], method="spearman"))
+    ic = df.groupby(date_col, group_keys=False).apply(lambda df: df["pred"].corr(df["label"]))
+    ric = df.groupby(date_col, group_keys=False).apply(lambda df: df["pred"].corr(df["label"], method="spearman"))
     if dropna:
         return ic.dropna(), ric.dropna()
     else:
diff --git a/qlib/contrib/meta/data_selection/dataset.py b/qlib/contrib/meta/data_selection/dataset.py
index 58e160f110a..61efdd63cfb 100644
--- a/qlib/contrib/meta/data_selection/dataset.py
+++ b/qlib/contrib/meta/data_selection/dataset.py
@@ -106,7 +106,7 @@ def setup(self, trainer=TrainerR, trainer_kwargs={}):
 
     def _calc_perf(self, pred, label):
         df = pd.DataFrame({"pred": pred, "label": label})
-        df = df.groupby("datetime").corr(method="spearman")
+        df = df.groupby("datetime", group_keys=False).corr(method="spearman")
         corr = df.loc(axis=0)[:, "pred"]["label"].droplevel(axis=0, level=-1)
         return corr
 
@@ -161,7 +161,7 @@ def __init__(self, task: dict, meta_info: pd.DataFrame, mode: str = MetaTask.PRO
                 raise ValueError(f"Most of samples are dropped. Please check this task: {task}")
 
             assert (
-                d_test.groupby("datetime").size().shape[0] >= 5
+                d_test.groupby("datetime", group_keys=False).size().shape[0] >= 5
             ), "In this segment, this trading dates is less than 5, you'd better check the data."
 
             sample_time_belong = np.zeros((d_train.shape[0], time_perf.shape[1]))
diff --git a/qlib/contrib/meta/data_selection/model.py b/qlib/contrib/meta/data_selection/model.py
index 7aaa0cad798..f531aedffc3 100644
--- a/qlib/contrib/meta/data_selection/model.py
+++ b/qlib/contrib/meta/data_selection/model.py
@@ -125,7 +125,7 @@ def run_epoch(self, phase, task_list, epoch, opt, loss_l, ignore_weight=False):
         loss_l.setdefault(phase, []).append(running_loss)
 
         pred_y_all = pd.concat(pred_y_all)
-        ic = pred_y_all.groupby("datetime").apply(lambda df: df["pred"].corr(df["label"], method="spearman")).mean()
+        ic = pred_y_all.groupby("datetime", group_keys=False).apply(lambda df: df["pred"].corr(df["label"], method="spearman")).mean()
 
         R.log_metrics(**{f"loss/{phase}": running_loss, "step": epoch})
         R.log_metrics(**{f"ic/{phase}": ic, "step": epoch})
diff --git a/qlib/contrib/model/double_ensemble.py b/qlib/contrib/model/double_ensemble.py
index de737b56da9..168ab25667d 100644
--- a/qlib/contrib/model/double_ensemble.py
+++ b/qlib/contrib/model/double_ensemble.py
@@ -166,7 +166,7 @@ def sample_reweight(self, loss_curve, loss_values, k_th):
 
         # calculate weights
         h["bins"] = pd.cut(h["h_value"], self.bins_sr)
-        h_avg = h.groupby("bins")["h_value"].mean()
+        h_avg = h.groupby("bins", group_keys=False)["h_value"].mean()
         weights = pd.Series(np.zeros(N, dtype=float))
         for b in h_avg.index:
             weights[h["bins"] == b] = 1.0 / (self.decay**k_th * h_avg[b] + 0.1)
diff --git a/qlib/contrib/model/pytorch_adarnn.py b/qlib/contrib/model/pytorch_adarnn.py
index 6988837efb8..5a83a37b190 100644
--- a/qlib/contrib/model/pytorch_adarnn.py
+++ b/qlib/contrib/model/pytorch_adarnn.py
@@ -214,8 +214,8 @@ def train_AdaRNN(self, train_loader_list, epoch, dist_old=None, weight_mat=None)
     def calc_all_metrics(pred):
         """pred is a pandas dataframe that has two attributes: score (pred) and label (real)"""
         res = {}
-        ic = pred.groupby(level="datetime").apply(lambda x: x.label.corr(x.score))
-        rank_ic = pred.groupby(level="datetime").apply(lambda x: x.label.corr(x.score, method="spearman"))
+        ic = pred.groupby(level="datetime", group_keys=False).apply(lambda x: x.label.corr(x.score))
+        rank_ic = pred.groupby(level="datetime", group_keys=False).apply(lambda x: x.label.corr(x.score, method="spearman"))
         res["ic"] = ic.mean()
         res["icir"] = ic.mean() / ic.std()
         res["ric"] = rank_ic.mean()
diff --git a/qlib/contrib/model/pytorch_add.py b/qlib/contrib/model/pytorch_add.py
index e97621157a2..c94a03ecc31 100644
--- a/qlib/contrib/model/pytorch_add.py
+++ b/qlib/contrib/model/pytorch_add.py
@@ -226,7 +226,7 @@ def loss_rec(self, x, rec_x, record=None):
 
     def get_daily_inter(self, df, shuffle=False):
         # organize the train data into daily batches
-        daily_count = df.groupby(level=0).size().values
+        daily_count = df.groupby(level=0, group_keys=False).size().values
         daily_index = np.roll(np.cumsum(daily_count), 1)
         daily_index[0] = 0
         if shuffle:
@@ -349,7 +349,7 @@ def bootstrap_fit(self, x_train, y_train, m_train, x_valid, y_valid, m_valid):
         return best_score
 
     def gen_market_label(self, df, raw_label):
-        market_label = raw_label.groupby("datetime").mean().squeeze()
+        market_label = raw_label.groupby("datetime", group_keys=False).mean().squeeze()
         bins = [-np.inf, self.lo, self.hi, np.inf]
         market_label = pd.cut(market_label, bins, labels=False)
         market_label.name = ("market_return", "market_return")
@@ -357,7 +357,7 @@ def gen_market_label(self, df, raw_label):
         return df
 
     def fit_thresh(self, train_label):
-        market_label = train_label.groupby("datetime").mean().squeeze()
+        market_label = train_label.groupby("datetime", group_keys=False).mean().squeeze()
         self.lo, self.hi = market_label.quantile([1 / 3, 2 / 3])
 
     def fit(
diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py
index 2a39e4b0ff6..16439b3783a 100644
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -163,7 +163,7 @@ def metric_fn(self, pred, label):
 
     def get_daily_inter(self, df, shuffle=False):
         # organize the train data into daily batches
-        daily_count = df.groupby(level=0).size().values
+        daily_count = df.groupby(level=0, group_keys=False).size().values
         daily_index = np.roll(np.cumsum(daily_count), 1)
         daily_index[0] = 0
         if shuffle:
diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py
index 3bcb73c5517..d081994589d 100644
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -27,7 +27,7 @@ class DailyBatchSampler(Sampler):
     def __init__(self, data_source):
         self.data_source = data_source
         # calculate number of samples in each batch
-        self.daily_count = pd.Series(index=self.data_source.get_index()).groupby("datetime").size().values
+        self.daily_count = pd.Series(index=self.data_source.get_index()).groupby("datetime", group_keys=False).size().values
         self.daily_index = np.roll(np.cumsum(self.daily_count), 1)  # calculate begin index of each batch
         self.daily_index[0] = 0
 
@@ -181,7 +181,7 @@ def metric_fn(self, pred, label):
 
     def get_daily_inter(self, df, shuffle=False):
         # organize the train data into daily batches
-        daily_count = df.groupby(level=0).size().values
+        daily_count = df.groupby(level=0, group_keys=False).size().values
         daily_index = np.roll(np.cumsum(daily_count), 1)
         daily_index[0] = 0
         if shuffle:
diff --git a/qlib/contrib/model/pytorch_hist.py b/qlib/contrib/model/pytorch_hist.py
index e4220d0556f..779cde9c859 100644
--- a/qlib/contrib/model/pytorch_hist.py
+++ b/qlib/contrib/model/pytorch_hist.py
@@ -177,7 +177,7 @@ def metric_fn(self, pred, label):
 
     def get_daily_inter(self, df, shuffle=False):
         # organize the train data into daily batches
-        daily_count = df.groupby(level=0).size().values
+        daily_count = df.groupby(level=0, group_keys=False).size().values
         daily_index = np.roll(np.cumsum(daily_count), 1)
         daily_index[0] = 0
         if shuffle:
diff --git a/qlib/contrib/model/pytorch_igmtf.py b/qlib/contrib/model/pytorch_igmtf.py
index 3bc5ac78d91..0bddc5a0f5f 100644
--- a/qlib/contrib/model/pytorch_igmtf.py
+++ b/qlib/contrib/model/pytorch_igmtf.py
@@ -170,7 +170,7 @@ def metric_fn(self, pred, label):
 
     def get_daily_inter(self, df, shuffle=False):
         # organize the train data into daily batches
-        daily_count = df.groupby(level=0).size().values
+        daily_count = df.groupby(level=0, group_keys=False).size().values
         daily_index = np.roll(np.cumsum(daily_count), 1)
         daily_index[0] = 0
         if shuffle:
diff --git a/qlib/contrib/model/pytorch_krnn.py b/qlib/contrib/model/pytorch_krnn.py
index f69d1d23b1b..d97920b4dc5 100644
--- a/qlib/contrib/model/pytorch_krnn.py
+++ b/qlib/contrib/model/pytorch_krnn.py
@@ -368,7 +368,7 @@ def metric_fn(self, pred, label):
 
     def get_daily_inter(self, df, shuffle=False):
         # organize the train data into daily batches
-        daily_count = df.groupby(level=0).size().values
+        daily_count = df.groupby(level=0, group_keys=False).size().values
         daily_index = np.roll(np.cumsum(daily_count), 1)
         daily_index[0] = 0
         if shuffle:
diff --git a/qlib/contrib/ops/high_freq.py b/qlib/contrib/ops/high_freq.py
index 65b84fed6c1..25e66570f09 100644
--- a/qlib/contrib/ops/high_freq.py
+++ b/qlib/contrib/ops/high_freq.py
@@ -96,7 +96,7 @@ def period_cusum(self, df):
     def _load_internal(self, instrument, start_index, end_index, freq):
         _calendar = get_calendar_day(freq=freq)
         series = self.feature.load(instrument, start_index, end_index, freq)
-        return series.groupby(_calendar[series.index]).transform(self.period_cusum)
+        return series.groupby(_calendar[series.index], group_keys=False).transform(self.period_cusum)
 
 
 class DayLast(ElemOperator):
@@ -116,7 +116,7 @@ class DayLast(ElemOperator):
     def _load_internal(self, instrument, start_index, end_index, freq):
         _calendar = get_calendar_day(freq=freq)
         series = self.feature.load(instrument, start_index, end_index, freq)
-        return series.groupby(_calendar[series.index]).transform("last")
+        return series.groupby(_calendar[series.index], group_keys=False).transform("last")
 
 
 class FFillNan(ElemOperator):
diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py
index 6efe49e576a..3fd0dc3f6da 100644
--- a/qlib/contrib/report/analysis_model/analysis_model_performance.py
+++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py
@@ -38,7 +38,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     t_df = pd.DataFrame(
         {
             "Group%d"
-            % (i + 1): pred_label_drop.groupby(level="datetime")["label"].apply(
+            % (i + 1): pred_label_drop.groupby(level="datetime", group_keys=False)["label"].apply(
                 lambda x: x[len(x) // N * i : len(x) // N * (i + 1)].mean()  # pylint: disable=W0640
             )
             for i in range(N)
@@ -50,7 +50,7 @@ def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int
     t_df["long-short"] = t_df["Group1"] - t_df["Group%d" % N]
 
     # Long-Average
-    t_df["long-average"] = t_df["Group1"] - pred_label.groupby(level="datetime")["label"].mean()
+    t_df["long-average"] = t_df["Group1"] - pred_label.groupby(level="datetime", group_keys=False)["label"].mean()
 
     t_df = t_df.dropna(how="all")  # for days which does not contain label
     # Cumulative Return By Group
@@ -137,7 +137,7 @@ def _corr_series(x, method):
 
     ic_df = pd.concat(
         [
-            pred_label.groupby(level="datetime").apply(partial(_corr_series, method=_methods_mapping[m])).rename(m)
+            pred_label.groupby(level="datetime", group_keys=False).apply(partial(_corr_series, method=_methods_mapping[m])).rename(m)
             for m in methods
         ],
         axis=1,
@@ -145,7 +145,7 @@ def _corr_series(x, method):
     _ic = ic_df.iloc(axis=1)[0]
 
     _index = _ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
-    _monthly_ic = _ic.groupby(_index).mean()
+    _monthly_ic = _ic.groupby(_index, group_keys=False).mean()
     _monthly_ic.index = pd.MultiIndex.from_arrays(
         [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)],
         names=["year", "month"],
@@ -220,8 +220,8 @@ def _corr_series(x, method):
 
 def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
     pred = pred_label.copy()
-    pred["score_last"] = pred.groupby(level="instrument")["score"].shift(lag)
-    ac = pred.groupby(level="datetime").apply(lambda x: x["score"].rank(pct=True).corr(x["score_last"].rank(pct=True)))
+    pred["score_last"] = pred.groupby(level="instrument", group_keys=False)["score"].shift(lag)
+    ac = pred.groupby(level="datetime", group_keys=False).apply(lambda x: x["score"].rank(pct=True).corr(x["score_last"].rank(pct=True)))
     _df = ac.to_frame("value")
     ac_figure = ScatterGraph(
         _df,
@@ -235,13 +235,13 @@ def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
 
 def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
     pred = pred_label.copy()
-    pred["score_last"] = pred.groupby(level="instrument")["score"].shift(lag)
-    top = pred.groupby(level="datetime").apply(
+    pred["score_last"] = pred.groupby(level="instrument", group_keys=False)["score"].shift(lag)
+    top = pred.groupby(level="datetime", group_keys=False).apply(
         lambda x: 1
         - x.nlargest(len(x) // N, columns="score").index.isin(x.nlargest(len(x) // N, columns="score_last").index).sum()
         / (len(x) // N)
     )
-    bottom = pred.groupby(level="datetime").apply(
+    bottom = pred.groupby(level="datetime", group_keys=False).apply(
         lambda x: 1
         - x.nsmallest(len(x) // N, columns="score")
         .index.isin(x.nsmallest(len(x) // N, columns="score_last").index)
@@ -313,7 +313,7 @@ def model_performance_graph(
                                 2017-12-15  -0.102778       -0.102778
 
 
-    :param lag: `pred.groupby(level='instrument')['score'].shift(lag)`. It will be only used in the auto-correlation computing.
+    :param lag: `pred.groupby(level='instrument', group_keys=False)['score'].shift(lag)`. It will be only used in the auto-correlation computing.
     :param N: group number, default 5.
     :param reverse: if `True`, `pred['score'] *= -1`.
     :param rank: if **True**, calculate rank ic.
diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py
index b15ea15c05e..4f325aa1a36 100644
--- a/qlib/contrib/report/analysis_position/cumulative_return.py
+++ b/qlib/contrib/report/analysis_position/cumulative_return.py
@@ -38,7 +38,7 @@ def _get_cum_return_data_with_position(
 
     _cumulative_return_df["label"] = _cumulative_return_df["label"] - _cumulative_return_df["bench"]
     _cumulative_return_df = _cumulative_return_df.dropna()
-    df_gp = _cumulative_return_df.groupby(level="datetime")
+    df_gp = _cumulative_return_df.groupby(level="datetime", group_keys=False)
     result_list = []
     for gp in df_gp:
         date = gp[0]
diff --git a/qlib/contrib/report/analysis_position/parse_position.py b/qlib/contrib/report/analysis_position/parse_position.py
index 61064d3e6af..2b64a5bf439 100644
--- a/qlib/contrib/report/analysis_position/parse_position.py
+++ b/qlib/contrib/report/analysis_position/parse_position.py
@@ -132,7 +132,7 @@ def _calculate_day_value(g_df: pd.DataFrame):
         g_df["excess_return"] = g_df[_label_name] - g_df[_label_name].mean()
         return g_df
 
-    return df.groupby(level="datetime").apply(_calculate_day_value)
+    return df.groupby(level="datetime", group_keys=False).apply(_calculate_day_value)
 
 
 def get_position_data(
diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py
index fb2fcc6d8b4..3e94d174c43 100644
--- a/qlib/contrib/report/analysis_position/rank_label.py
+++ b/qlib/contrib/report/analysis_position/rank_label.py
@@ -31,7 +31,7 @@ def _get_figure_with_position(
     )
 
     res_dict = dict()
-    _pos_gp = _position_df.groupby(level=1)
+    _pos_gp = _position_df.groupby(level=1, group_keys=False)
     for _item in _pos_gp:
         _date = _item[0]
         _day_df = _item[1]
diff --git a/qlib/contrib/report/analysis_position/risk_analysis.py b/qlib/contrib/report/analysis_position/risk_analysis.py
index 70bd7dcff82..27a5aa477cb 100644
--- a/qlib/contrib/report/analysis_position/risk_analysis.py
+++ b/qlib/contrib/report/analysis_position/risk_analysis.py
@@ -63,9 +63,9 @@ def _get_monthly_risk_analysis_with_report(report_normal_df: pd.DataFrame) -> pd
     """
 
     # Group by month
-    report_normal_gp = report_normal_df.groupby([report_normal_df.index.year, report_normal_df.index.month])
+    report_normal_gp = report_normal_df.groupby([report_normal_df.index.year, report_normal_df.index.month], group_keys=False)
     # report_long_short_gp = report_long_short_df.groupby(
-    #     [report_long_short_df.index.year, report_long_short_df.index.month]
+    #     [report_long_short_df.index.year, report_long_short_df.index.month], group_keys=False
     # )
 
     gp_month = sorted(set(report_normal_gp.size().index))
@@ -97,7 +97,7 @@ def _get_monthly_analysis_with_feature(monthly_df: pd.DataFrame, feature: str =
     :param feature:
     :return:
     """
-    _monthly_df_gp = monthly_df.reset_index().groupby(["level_1"])
+    _monthly_df_gp = monthly_df.reset_index().groupby(["level_1"], group_keys=False)
 
     _name_df = _monthly_df_gp.get_group(feature).set_index(["level_0", "level_1"])
     _temp_df = _name_df.pivot_table(index="date", values=["risk"], columns=_name_df.index)
diff --git a/qlib/contrib/report/analysis_position/score_ic.py b/qlib/contrib/report/analysis_position/score_ic.py
index bd2d10a2299..ff80c274513 100644
--- a/qlib/contrib/report/analysis_position/score_ic.py
+++ b/qlib/contrib/report/analysis_position/score_ic.py
@@ -15,8 +15,8 @@ def _get_score_ic(pred_label: pd.DataFrame):
     """
     concat_data = pred_label.copy()
     concat_data.dropna(axis=0, how="any", inplace=True)
-    _ic = concat_data.groupby(level="datetime").apply(lambda x: x["label"].corr(x["score"]))
-    _rank_ic = concat_data.groupby(level="datetime").apply(lambda x: x["label"].corr(x["score"], method="spearman"))
+    _ic = concat_data.groupby(level="datetime", group_keys=False).apply(lambda x: x["label"].corr(x["score"]))
+    _rank_ic = concat_data.groupby(level="datetime", group_keys=False).apply(lambda x: x["label"].corr(x["score"], method="spearman"))
     return pd.DataFrame({"ic": _ic, "rank_ic": _rank_ic})
 
 
diff --git a/qlib/contrib/report/data/ana.py b/qlib/contrib/report/data/ana.py
index d01e852ceeb..e93b07612ad 100644
--- a/qlib/contrib/report/data/ana.py
+++ b/qlib/contrib/report/data/ana.py
@@ -72,10 +72,10 @@ def calc_stat_values(self):
         self._val_cnt = {}
         for col, item in self._dataset.items():
             if not super().skip(col):
-                self._val_cnt[col] = item.groupby(DT_COL_NAME).apply(lambda s: len(s.unique()))
+                self._val_cnt[col] = item.groupby(DT_COL_NAME, group_keys=False).apply(lambda s: len(s.unique()))
         self._val_cnt = pd.DataFrame(self._val_cnt)
         if self.ratio:
-            self._val_cnt = self._val_cnt.div(self._dataset.groupby(DT_COL_NAME).size(), axis=0)
+            self._val_cnt = self._val_cnt.div(self._dataset.groupby(DT_COL_NAME, group_keys=False).size(), axis=0)
 
         # TODO: transfer this feature to other analysers
         ymin, ymax = self._val_cnt.min().min(), self._val_cnt.max().max()
@@ -98,7 +98,7 @@ def calc_stat_values(self):
         self._inf_cnt = {}
         for col, item in self._dataset.items():
             if not super().skip(col):
-                self._inf_cnt[col] = item.apply(np.isinf).astype(np.int).groupby(DT_COL_NAME).sum()
+                self._inf_cnt[col] = item.apply(np.isinf).astype(np.int).groupby(DT_COL_NAME, group_keys=False).sum()
         self._inf_cnt = pd.DataFrame(self._inf_cnt)
 
     def skip(self, col):
@@ -111,7 +111,7 @@ def plot_single(self, col, ax):
 
 class FeaNanAna(FeaAnalyser):
     def calc_stat_values(self):
-        self._nan_cnt = self._dataset.isna().groupby(DT_COL_NAME).sum()
+        self._nan_cnt = self._dataset.isna().groupby(DT_COL_NAME, group_keys=False).sum()
 
     def skip(self, col):
         return (col not in self._nan_cnt) or (self._nan_cnt[col].sum() == 0)
@@ -123,8 +123,8 @@ def plot_single(self, col, ax):
 
 class FeaNanAnaRatio(FeaAnalyser):
     def calc_stat_values(self):
-        self._nan_cnt = self._dataset.isna().groupby(DT_COL_NAME).sum()
-        self._total_cnt = self._dataset.groupby(DT_COL_NAME).size()
+        self._nan_cnt = self._dataset.isna().groupby(DT_COL_NAME, group_keys=False).sum()
+        self._total_cnt = self._dataset.groupby(DT_COL_NAME, group_keys=False).size()
 
     def skip(self, col):
         return (col not in self._nan_cnt) or (self._nan_cnt[col].sum() == 0)
@@ -176,8 +176,8 @@ def plot_single(self, col, ax):
 
 class FeaMeanStd(NumFeaAnalyser):
     def calc_stat_values(self):
-        self._std = self._dataset.groupby(DT_COL_NAME).std()
-        self._mean = self._dataset.groupby(DT_COL_NAME).mean()
+        self._std = self._dataset.groupby(DT_COL_NAME, group_keys=False).std()
+        self._mean = self._dataset.groupby(DT_COL_NAME, group_keys=False).mean()
 
     def plot_single(self, col, ax):
         self._mean[col].plot(ax=ax, label="mean")
diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py
index 4c1fc2f16c7..2cac662f76c 100644
--- a/qlib/contrib/strategy/rule_strategy.py
+++ b/qlib/contrib/strategy/rule_strategy.py
@@ -347,7 +347,7 @@ def _reset_signal(self):
         self.signal = {}
 
         if not signal_df.empty:
-            for stock_id, stock_val in signal_df.groupby(level="instrument"):
+            for stock_id, stock_val in signal_df.groupby(level="instrument", group_keys=False):
                 self.signal[stock_id] = stock_val["signal"].droplevel(level="instrument")
 
     def reset_level_infra(self, level_infra):
@@ -434,7 +434,7 @@ def _reset_signal(self):
         self.signal = {}
 
         if not signal_df.empty:
-            for stock_id, stock_val in signal_df.groupby(level="instrument"):
+            for stock_id, stock_val in signal_df.groupby(level="instrument", group_keys=False):
                 self.signal[stock_id] = stock_val["volatility"].droplevel(level="instrument")
 
     def reset_level_infra(self, level_infra):
diff --git a/qlib/data/cache.py b/qlib/data/cache.py
index 3264dcd0204..9ba87f3d267 100644
--- a/qlib/data/cache.py
+++ b/qlib/data/cache.py
@@ -842,7 +842,7 @@ def append_index(self, data, to_disk=True):
         def build_index_from_data(data, start_index=0):
             if data.empty:
                 return pd.DataFrame()
-            line_data = data.groupby("datetime").size()
+            line_data = data.groupby("datetime", group_keys=False).size()
             line_data.sort_index(inplace=True)
             index_end = line_data.cumsum()
             index_start = index_end.shift(1, fill_value=0)
diff --git a/qlib/data/dataset/processor.py b/qlib/data/dataset/processor.py
index dd0dd700c24..d05dbe381c5 100644
--- a/qlib/data/dataset/processor.py
+++ b/qlib/data/dataset/processor.py
@@ -352,7 +352,7 @@ def __init__(self, fields_group=None):
     def __call__(self, df):
         # try not modify original dataframe
         cols = get_group_columns(df, self.fields_group)
-        t = df[cols].groupby("datetime").rank(pct=True)
+        t = df[cols].groupby("datetime", group_keys=False).rank(pct=True)
         t -= 0.5
         t *= 3.46  # NOTE: towards unit std
         df[cols] = t
diff --git a/qlib/data/dataset/storage.py b/qlib/data/dataset/storage.py
index 2adf6cd62a6..62e7ba7e432 100644
--- a/qlib/data/dataset/storage.py
+++ b/qlib/data/dataset/storage.py
@@ -77,7 +77,7 @@ class HashingStockStorage(BaseHandlerStorage):
     def __init__(self, df):
         self.hash_df = dict()
         self.stock_level = get_level_index(df, "instrument")
-        for k, v in df.groupby(level="instrument"):
+        for k, v in df.groupby(level="instrument", group_keys=False):
             self.hash_df[k] = v
         self.columns = df.columns
 
diff --git a/qlib/model/ens/ensemble.py b/qlib/model/ens/ensemble.py
index 1ebb16f18bc..1670a6538ef 100644
--- a/qlib/model/ens/ensemble.py
+++ b/qlib/model/ens/ensemble.py
@@ -126,7 +126,7 @@ def __call__(self, ensemble_dict: dict) -> pd.DataFrame:
         # NOTE: this may change the style underlying data!!!!
         # from pd.DataFrame to pd.Series
         results = pd.concat(values, axis=1)
-        results = results.groupby("datetime").apply(lambda df: (df - df.mean()) / df.std())
+        results = results.groupby("datetime", group_keys=False).apply(lambda df: (df - df.mean()) / df.std())
         results = results.mean(axis=1)
         results = results.sort_index()
         return results
diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py
index 97ff00c579b..f9793cdabde 100644
--- a/qlib/tests/__init__.py
+++ b/qlib/tests/__init__.py
@@ -187,7 +187,7 @@ class MockInstrumentStorage(MockStorageBase, InstrumentStorage):
     def __init__(self, **kwargs):
         super().__init__()
         instruments = {}
-        for symbol, group in self.df.groupby(by="symbol"):
+        for symbol, group in self.df.groupby(by="symbol", group_keys=False):
             start = group["datetime"].iloc[0]
             end = group["datetime"].iloc[-1]
             instruments[symbol] = [(start, end)]
diff --git a/qlib/utils/paral.py b/qlib/utils/paral.py
index 60992174d07..018556995d4 100644
--- a/qlib/utils/paral.py
+++ b/qlib/utils/paral.py
@@ -51,7 +51,7 @@ def datetime_groupby_apply(
 
     def _naive_group_apply(df):
         if isinstance(apply_func, str):
-            return getattr(df.groupby(axis=axis, level=level), apply_func)()
+            return getattr(df.groupby(axis=axis, level=level, group_keys=False), apply_func)()
         return df.groupby(level=level, group_keys=False).apply(apply_func)
 
     if n_jobs != 1:
diff --git a/qlib/utils/resam.py b/qlib/utils/resam.py
index 018cf7e099d..9fe38ad6629 100644
--- a/qlib/utils/resam.py
+++ b/qlib/utils/resam.py
@@ -194,9 +194,9 @@ def resam_ts_data(
     if isinstance(feature.index, pd.MultiIndex):
         if callable(method):
             method_func = method
-            return feature.groupby(level="instrument").apply(method_func, **method_kwargs)
+            return feature.groupby(level="instrument", group_keys=False).apply(method_func, **method_kwargs)
         elif isinstance(method, str):
-            return getattr(feature.groupby(level="instrument"), method)(**method_kwargs)
+            return getattr(feature.groupby(level="instrument", group_keys=False), method)(**method_kwargs)
     else:
         if callable(method):
             method_func = method
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index 4c230e6e5e4..844914d469f 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -652,7 +652,7 @@ def _generate(self, **kwargs):
             combined_df = pd.concat(risk_analysis_df_map[_analysis_freq])
 
             # Calculate return and information ratio's mean, std and mean/std
-            multi_pass_port_analysis_df = combined_df.groupby(level=[0, 1]).apply(
+            multi_pass_port_analysis_df = combined_df.groupby(level=[0, 1], group_keys=False).apply(
                 lambda x: pd.Series(
                     {"mean": x["risk"].mean(), "std": x["risk"].std(), "mean_std": x["risk"].mean() / x["risk"].std()}
                 )
diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py
index f32a3065192..f25b1ec7a23 100644
--- a/scripts/data_collector/utils.py
+++ b/scripts/data_collector/utils.py
@@ -808,7 +808,7 @@ def calc_paused_num(df: pd.DataFrame, _date_field_name, _symbol_field_name):
     all_nan_nums = 0
     # Record the number of consecutive occurrences of trading days that are not nan throughout the day
     not_nan_nums = 0
-    for _date, _df in df.groupby("_tmp_date"):
+    for _date, _df in df.groupby("_tmp_date", group_keys=False):
         _df["paused"] = 0
         if not _df.loc[_df["volume"] < 0].empty:
             logger.warning(f"volume < 0, will fill np.nan: {_date} {_symbol}")
diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py
index a65b1f58ee4..6e501e83e40 100644
--- a/scripts/dump_bin.py
+++ b/scripts/dump_bin.py
@@ -458,7 +458,7 @@ def _dump_features(self):
         error_code = {}
         with ProcessPoolExecutor(max_workers=self.works) as executor:
             futures = {}
-            for _code, _df in self._all_data.groupby(self.symbol_field_name):
+            for _code, _df in self._all_data.groupby(self.symbol_field_name, group_keys=False):
                 _code = fname_to_code(str(_code).lower()).upper()
                 _start, _end = self._get_date(_df, is_begin_end=True)
                 if not (isinstance(_start, pd.Timestamp) and isinstance(_end, pd.Timestamp)):
diff --git a/tests/dataset_tests/test_datalayer.py b/tests/dataset_tests/test_datalayer.py
index bdd0d915bfe..6509a77dec5 100644
--- a/tests/dataset_tests/test_datalayer.py
+++ b/tests/dataset_tests/test_datalayer.py
@@ -7,8 +7,8 @@
 class TestDataset(TestAutoData):
     def testCSI300(self):
         close_p = D.features(D.instruments("csi300"), ["$close"])
-        size = close_p.groupby("datetime").size()
-        cnt = close_p.groupby("datetime").count()["$close"]
+        size = close_p.groupby("datetime", group_keys=False).size()
+        cnt = close_p.groupby("datetime", group_keys=False).count()["$close"]
         size_desc = size.describe(percentiles=np.arange(0.1, 1.0, 0.1))
         cnt_desc = cnt.describe(percentiles=np.arange(0.1, 1.0, 0.1))
 

From 823ad8d394996eefbaeff1ae1c3c648ce0f1f615 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Thu, 8 May 2025 17:33:47 +0800
Subject: [PATCH 03/10] limit pandas verion

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 275d632be04..d6a79b973a1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ readme = {file = "README.md", content-type = "text/markdown"}
 dependencies = [
   "pyyaml",
   "numpy",
-  "pandas",
+  "pandas>=2.0.0",
   "mlflow",
   "filelock>=3.16.0",
   "redis",

From 002e393e38a2175d60bd810c60e42c102071535e Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Thu, 8 May 2025 17:48:25 +0800
Subject: [PATCH 04/10] format with black

---
 examples/benchmarks/TFT/tft.py                |  4 +++-
 qlib/backtest/report.py                       |  6 ++++-
 qlib/contrib/data/processor.py                | 22 +++++++++++++++----
 qlib/contrib/eva/alpha.py                     |  4 +++-
 qlib/contrib/meta/data_selection/model.py     |  6 ++++-
 qlib/contrib/model/pytorch_adarnn.py          |  4 +++-
 qlib/contrib/model/pytorch_gats_ts.py         |  4 +++-
 .../analysis_model_performance.py             |  8 +++++--
 .../report/analysis_position/risk_analysis.py |  4 +++-
 .../report/analysis_position/score_ic.py      |  4 +++-
 10 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/examples/benchmarks/TFT/tft.py b/examples/benchmarks/TFT/tft.py
index 95c31755fd3..633a875c0fa 100644
--- a/examples/benchmarks/TFT/tft.py
+++ b/examples/benchmarks/TFT/tft.py
@@ -84,7 +84,9 @@ def get_shifted_label(data_df, shifts=5, col_shift="LABEL0"):
 def fill_test_na(test_df):
     test_df_res = test_df.copy()
     feature_cols = ~test_df_res.columns.str.contains("label", case=False)
-    test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime", group_keys=False).apply(lambda df: df.fillna(df.mean()))
+    test_feature_fna = (
+        test_df_res.loc[:, feature_cols].groupby("datetime", group_keys=False).apply(lambda df: df.fillna(df.mean()))
+    )
     test_df_res.loc[:, feature_cols] = test_feature_fna
     return test_df_res
 
diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py
index b6fd9b07bf7..f1016e24e2a 100644
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -114,7 +114,11 @@ def _cal_benchmark(benchmark_config: Optional[dict], freq: str) -> Optional[pd.S
             _temp_result, _ = get_higher_eq_freq_feature(_codes, fields, start_time, end_time, freq=freq)
             if len(_temp_result) == 0:
                 raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark")
-            return _temp_result.groupby(level="datetime", group_keys=False)[_temp_result.columns.tolist()[0]].mean().fillna(0)
+            return (
+                _temp_result.groupby(level="datetime", group_keys=False)[_temp_result.columns.tolist()[0]]
+                .mean()
+                .fillna(0)
+            )
 
     def _sample_benchmark(
         self,
diff --git a/qlib/contrib/data/processor.py b/qlib/contrib/data/processor.py
index 5444bf34f8b..4047ed73ae4 100644
--- a/qlib/contrib/data/processor.py
+++ b/qlib/contrib/data/processor.py
@@ -59,10 +59,14 @@ def _feature_norm(x):
 
         # Features
         cols = df_focus.columns[df_focus.columns.str.contains("^KLEN|^KLOW|^KUP")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: x**0.25).groupby(level="datetime", group_keys=False).apply(_feature_norm)
+        df_focus[cols] = (
+            df_focus[cols].apply(lambda x: x**0.25).groupby(level="datetime", group_keys=False).apply(_feature_norm)
+        )
 
         cols = df_focus.columns[df_focus.columns.str.contains("^KLOW2|^KUP2")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: x**0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
+        df_focus[cols] = (
+            df_focus[cols].apply(lambda x: x**0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
+        )
 
         _cols = [
             "KMID",
@@ -97,10 +101,20 @@ def _feature_norm(x):
         df_focus[cols] = df_focus[cols].fillna(0).groupby(level="datetime", group_keys=False).apply(_feature_norm)
 
         cols = df_focus.columns[df_focus.columns.str.contains("^MAX|^HIGH0")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: (x - 1) ** 0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
+        df_focus[cols] = (
+            df_focus[cols]
+            .apply(lambda x: (x - 1) ** 0.5)
+            .groupby(level="datetime", group_keys=False)
+            .apply(_feature_norm)
+        )
 
         cols = df_focus.columns[df_focus.columns.str.contains("^MIN|^LOW0")]
-        df_focus[cols] = df_focus[cols].apply(lambda x: (1 - x) ** 0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
+        df_focus[cols] = (
+            df_focus[cols]
+            .apply(lambda x: (1 - x) ** 0.5)
+            .groupby(level="datetime", group_keys=False)
+            .apply(_feature_norm)
+        )
 
         cols = df_focus.columns[df_focus.columns.str.contains("^CORR|^CORD")]
         df_focus[cols] = df_focus[cols].apply(np.exp).groupby(level="datetime", group_keys=False).apply(_feature_norm)
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index a38fbccbb38..09e0d08cffe 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -63,7 +63,9 @@ def N(x):
     groups = short.groupby(date_col, group_keys=False)
     s_dom = groups.apply(lambda x: x < 0)
     s_c = groups.count()
-    return (l_dom.groupby(date_col, group_keys=False).sum() / l_c), (s_dom.groupby(date_col, group_keys=False).sum() / s_c)
+    return (l_dom.groupby(date_col, group_keys=False).sum() / l_c), (
+        s_dom.groupby(date_col, group_keys=False).sum() / s_c
+    )
 
 
 def calc_long_short_return(
diff --git a/qlib/contrib/meta/data_selection/model.py b/qlib/contrib/meta/data_selection/model.py
index f531aedffc3..ed3ff9397e7 100644
--- a/qlib/contrib/meta/data_selection/model.py
+++ b/qlib/contrib/meta/data_selection/model.py
@@ -125,7 +125,11 @@ def run_epoch(self, phase, task_list, epoch, opt, loss_l, ignore_weight=False):
         loss_l.setdefault(phase, []).append(running_loss)
 
         pred_y_all = pd.concat(pred_y_all)
-        ic = pred_y_all.groupby("datetime", group_keys=False).apply(lambda df: df["pred"].corr(df["label"], method="spearman")).mean()
+        ic = (
+            pred_y_all.groupby("datetime", group_keys=False)
+            .apply(lambda df: df["pred"].corr(df["label"], method="spearman"))
+            .mean()
+        )
 
         R.log_metrics(**{f"loss/{phase}": running_loss, "step": epoch})
         R.log_metrics(**{f"ic/{phase}": ic, "step": epoch})
diff --git a/qlib/contrib/model/pytorch_adarnn.py b/qlib/contrib/model/pytorch_adarnn.py
index 5a83a37b190..c1585a6ac0a 100644
--- a/qlib/contrib/model/pytorch_adarnn.py
+++ b/qlib/contrib/model/pytorch_adarnn.py
@@ -215,7 +215,9 @@ def calc_all_metrics(pred):
         """pred is a pandas dataframe that has two attributes: score (pred) and label (real)"""
         res = {}
         ic = pred.groupby(level="datetime", group_keys=False).apply(lambda x: x.label.corr(x.score))
-        rank_ic = pred.groupby(level="datetime", group_keys=False).apply(lambda x: x.label.corr(x.score, method="spearman"))
+        rank_ic = pred.groupby(level="datetime", group_keys=False).apply(
+            lambda x: x.label.corr(x.score, method="spearman")
+        )
         res["ic"] = ic.mean()
         res["icir"] = ic.mean() / ic.std()
         res["ric"] = rank_ic.mean()
diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py
index d081994589d..09f0ac08b25 100644
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -27,7 +27,9 @@ class DailyBatchSampler(Sampler):
     def __init__(self, data_source):
         self.data_source = data_source
         # calculate number of samples in each batch
-        self.daily_count = pd.Series(index=self.data_source.get_index()).groupby("datetime", group_keys=False).size().values
+        self.daily_count = (
+            pd.Series(index=self.data_source.get_index()).groupby("datetime", group_keys=False).size().values
+        )
         self.daily_index = np.roll(np.cumsum(self.daily_count), 1)  # calculate begin index of each batch
         self.daily_index[0] = 0
 
diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py
index 3fd0dc3f6da..cac1f1b8eea 100644
--- a/qlib/contrib/report/analysis_model/analysis_model_performance.py
+++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py
@@ -137,7 +137,9 @@ def _corr_series(x, method):
 
     ic_df = pd.concat(
         [
-            pred_label.groupby(level="datetime", group_keys=False).apply(partial(_corr_series, method=_methods_mapping[m])).rename(m)
+            pred_label.groupby(level="datetime", group_keys=False)
+            .apply(partial(_corr_series, method=_methods_mapping[m]))
+            .rename(m)
             for m in methods
         ],
         axis=1,
@@ -221,7 +223,9 @@ def _corr_series(x, method):
 def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
     pred = pred_label.copy()
     pred["score_last"] = pred.groupby(level="instrument", group_keys=False)["score"].shift(lag)
-    ac = pred.groupby(level="datetime", group_keys=False).apply(lambda x: x["score"].rank(pct=True).corr(x["score_last"].rank(pct=True)))
+    ac = pred.groupby(level="datetime", group_keys=False).apply(
+        lambda x: x["score"].rank(pct=True).corr(x["score_last"].rank(pct=True))
+    )
     _df = ac.to_frame("value")
     ac_figure = ScatterGraph(
         _df,
diff --git a/qlib/contrib/report/analysis_position/risk_analysis.py b/qlib/contrib/report/analysis_position/risk_analysis.py
index 27a5aa477cb..c7cb99c7a35 100644
--- a/qlib/contrib/report/analysis_position/risk_analysis.py
+++ b/qlib/contrib/report/analysis_position/risk_analysis.py
@@ -63,7 +63,9 @@ def _get_monthly_risk_analysis_with_report(report_normal_df: pd.DataFrame) -> pd
     """
 
     # Group by month
-    report_normal_gp = report_normal_df.groupby([report_normal_df.index.year, report_normal_df.index.month], group_keys=False)
+    report_normal_gp = report_normal_df.groupby(
+        [report_normal_df.index.year, report_normal_df.index.month], group_keys=False
+    )
     # report_long_short_gp = report_long_short_df.groupby(
     #     [report_long_short_df.index.year, report_long_short_df.index.month], group_keys=False
     # )
diff --git a/qlib/contrib/report/analysis_position/score_ic.py b/qlib/contrib/report/analysis_position/score_ic.py
index ff80c274513..52f45c9cbab 100644
--- a/qlib/contrib/report/analysis_position/score_ic.py
+++ b/qlib/contrib/report/analysis_position/score_ic.py
@@ -16,7 +16,9 @@ def _get_score_ic(pred_label: pd.DataFrame):
     concat_data = pred_label.copy()
     concat_data.dropna(axis=0, how="any", inplace=True)
     _ic = concat_data.groupby(level="datetime", group_keys=False).apply(lambda x: x["label"].corr(x["score"]))
-    _rank_ic = concat_data.groupby(level="datetime", group_keys=False).apply(lambda x: x["label"].corr(x["score"], method="spearman"))
+    _rank_ic = concat_data.groupby(level="datetime", group_keys=False).apply(
+        lambda x: x["label"].corr(x["score"], method="spearman")
+    )
     return pd.DataFrame({"ic": _ic, "rank_ic": _rank_ic})
 
 

From 2b65113a60877ad0c5dca9c776bdba37efc9e88e Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Thu, 8 May 2025 17:54:57 +0800
Subject: [PATCH 05/10] fix docs error

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index d6a79b973a1..2e83f437988 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,7 @@ docs = [
   "sphinx",
   "sphinx_rtd_theme",
   "readthedocs_sphinx_ext",
+  "snowballstemmer",
 ]
 package = [
   "twine",

From f0731bc88424d6e0ddb68dc2d4fd78e95c054711 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Thu, 8 May 2025 18:15:26 +0800
Subject: [PATCH 06/10] fix docs error

---
 pyproject.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2e83f437988..3cbc7d9f8a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,11 +67,13 @@ lint = [
   "flake8",
   "nbqa",
 ]
+# snowballstemmer, a dependency of sphinx, was released on 2025-05-08 with version 3.0.0,
+# which causes errors in the build process. So we've limited the version for now.
 docs = [
   "sphinx",
   "sphinx_rtd_theme",
   "readthedocs_sphinx_ext",
-  "snowballstemmer",
+  "snowballstemmer<3.0",
 ]
 package = [
   "twine",

From 59a7bc35c37cb1fee5d86a3e55b29894882425f7 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Mon, 12 May 2025 22:11:38 +0800
Subject: [PATCH 07/10] fixed bugs caused by pandas upgrade

---
 examples/highfreq/highfreq_ops.py         |  4 ++--
 qlib/contrib/eva/alpha.py                 |  2 +-
 qlib/contrib/model/double_ensemble.py     |  2 +-
 qlib/contrib/model/highfreq_gdbt_model.py |  8 ++++++--
 qlib/workflow/cli.py                      | 11 ++++++++++-
 5 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/examples/highfreq/highfreq_ops.py b/examples/highfreq/highfreq_ops.py
index 25d7321bba4..36e15a37559 100644
--- a/examples/highfreq/highfreq_ops.py
+++ b/examples/highfreq/highfreq_ops.py
@@ -44,7 +44,7 @@ class FFillNan(ElemOperator):
 
     def _load_internal(self, instrument, start_index, end_index, freq):
         series = self.feature.load(instrument, start_index, end_index, freq)
-        return series.fillna(method="ffill")
+        return series.ffill()
 
 
 class BFillNan(ElemOperator):
@@ -63,7 +63,7 @@ class BFillNan(ElemOperator):
 
     def _load_internal(self, instrument, start_index, end_index, freq):
         series = self.feature.load(instrument, start_index, end_index, freq)
-        return series.fillna(method="bfill")
+        return series.bfill()
 
 
 class Date(ElemOperator):
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index 09e0d08cffe..2c2f9ea7e18 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -39,7 +39,7 @@ def calc_long_short_prec(
         long precision and short precision in time level
     """
     if is_alpha:
-        label = label - label.mean(level=date_col)
+        label = label - label.groupby(level=date_col, group_keys=False).mean()
     if int(1 / quantile) >= len(label.index.get_level_values(1).unique()):
         raise ValueError("Need more instruments to calculate precision")
 
diff --git a/qlib/contrib/model/double_ensemble.py b/qlib/contrib/model/double_ensemble.py
index 168ab25667d..85d4418f4db 100644
--- a/qlib/contrib/model/double_ensemble.py
+++ b/qlib/contrib/model/double_ensemble.py
@@ -166,7 +166,7 @@ def sample_reweight(self, loss_curve, loss_values, k_th):
 
         # calculate weights
         h["bins"] = pd.cut(h["h_value"], self.bins_sr)
-        h_avg = h.groupby("bins", group_keys=False)["h_value"].mean()
+        h_avg = h.groupby("bins", group_keys=False, observed=False)["h_value"].mean()
         weights = pd.Series(np.zeros(N, dtype=float))
         for b in h_avg.index:
             weights[h["bins"] == b] = 1.0 / (self.decay**k_th * h_avg[b] + 0.1)
diff --git a/qlib/contrib/model/highfreq_gdbt_model.py b/qlib/contrib/model/highfreq_gdbt_model.py
index 324ea1e3917..c12e7ddd853 100644
--- a/qlib/contrib/model/highfreq_gdbt_model.py
+++ b/qlib/contrib/model/highfreq_gdbt_model.py
@@ -90,8 +90,12 @@ def _prepare_data(self, dataset: DatasetH):
         if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
             l_name = df_train["label"].columns[0]
             # Convert label into alpha
-            df_train["label"][l_name] = df_train["label"][l_name] - df_train["label"][l_name].mean(level=0)
-            df_valid["label"][l_name] = df_valid["label"][l_name] - df_valid["label"][l_name].mean(level=0)
+            df_train.loc[:, ("label", l_name)] = (
+                df_train.loc[:, ("label", l_name)] - df_train.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
+            )
+            df_valid.loc[:, ("label", l_name)] = (
+                df_valid.loc[:, ("label", l_name)] - df_valid.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
+            )
 
             def mapping_fn(x):
                 return 0 if x < 0 else 1
diff --git a/qlib/workflow/cli.py b/qlib/workflow/cli.py
index d6e401e010d..01ae2021c99 100644
--- a/qlib/workflow/cli.py
+++ b/qlib/workflow/cli.py
@@ -154,4 +154,13 @@ def run():
 
 
 if __name__ == "__main__":
-    run()
+    # run()
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml")
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml")
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml")
+    workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml")
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml")
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml")
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml")
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml")
+    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/KRNN/workflow_config_krnn_Alpha360.yaml")

From f01ed3519563dc55b8eca935598363b3d4fc6ca8 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Mon, 12 May 2025 22:19:34 +0800
Subject: [PATCH 08/10] remove needless code

---
 qlib/workflow/cli.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/qlib/workflow/cli.py b/qlib/workflow/cli.py
index 01ae2021c99..d6e401e010d 100644
--- a/qlib/workflow/cli.py
+++ b/qlib/workflow/cli.py
@@ -154,13 +154,4 @@ def run():
 
 
 if __name__ == "__main__":
-    # run()
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml")
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml")
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml")
-    workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml")
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml")
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml")
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml")
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml")
-    # workflow(config_path="C:/Users/v-lvlinlang/Desktop/qlib/1917/qlib/examples/benchmarks/KRNN/workflow_config_krnn_Alpha360.yaml")
+    run()

From e734b8dac874c0bf6e7fb28412540d5ad271a6b1 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Mon, 12 May 2025 22:23:38 +0800
Subject: [PATCH 09/10] reformat with black

---
 qlib/contrib/model/highfreq_gdbt_model.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/qlib/contrib/model/highfreq_gdbt_model.py b/qlib/contrib/model/highfreq_gdbt_model.py
index c12e7ddd853..ad0641136f2 100644
--- a/qlib/contrib/model/highfreq_gdbt_model.py
+++ b/qlib/contrib/model/highfreq_gdbt_model.py
@@ -91,10 +91,12 @@ def _prepare_data(self, dataset: DatasetH):
             l_name = df_train["label"].columns[0]
             # Convert label into alpha
             df_train.loc[:, ("label", l_name)] = (
-                df_train.loc[:, ("label", l_name)] - df_train.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
+                df_train.loc[:, ("label", l_name)]
+                - df_train.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
             )
             df_valid.loc[:, ("label", l_name)] = (
-                df_valid.loc[:, ("label", l_name)] - df_valid.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
+                df_valid.loc[:, ("label", l_name)]
+                - df_valid.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
             )
 
             def mapping_fn(x):

From c1278a5868f2241132b00d916745d97b0a41440a Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Tue, 13 May 2025 15:13:44 +0800
Subject: [PATCH 10/10] limit version & add docs

---
 README.md      | 8 ++++++++
 pyproject.toml | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c6657b6251a..f3bf81b2429 100644
--- a/README.md
+++ b/README.md
@@ -462,6 +462,14 @@ python run_all_model.py run 10
 
 It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). 
 
+### Break change
+In `pandas`, `group_key` is one of the parameters of the `groupby` method. From version 1.5 to 2.0 of `pandas`, the default value of `group_key` has been changed from `no default` to `True`, which will cause qlib to report an error during operation. So we set `group_key=False`, but it doesn't guarantee that some programmes will run correctly, including:
+* qlib\examples\rl_order_execution\scripts\gen_training_orders.py
+* qlib\examples\benchmarks\TRA\src\dataset.MTSDatasetH.py
+* qlib\examples\benchmarks\TFT\tft.py
+
+
+
 ## [Adapting to Market Dynamics](examples/benchmarks_dynamic)
 
 Due to the non-stationary nature of the environment of the financial market, the data distribution may change in different periods, which makes the performance of models build on training data decays in the future test data.
diff --git a/pyproject.toml b/pyproject.toml
index 3cbc7d9f8a9..ada0071cfcd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ readme = {file = "README.md", content-type = "text/markdown"}
 dependencies = [
   "pyyaml",
   "numpy",
-  "pandas>=2.0.0",
+  "pandas>=0.24",
   "mlflow",
   "filelock>=3.16.0",
   "redis",