Skip to content

fixed a problem with multi index caused by the default value of groupkey #1917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 13, 2025
Merged
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,14 @@ python run_all_model.py run 10

It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).

### Break change
In `pandas`, `group_key` is one of the parameters of the `groupby` method. From version 1.5 to 2.0 of `pandas`, the default value of `group_key` has been changed from `no default` to `True`, which will cause qlib to report an error during operation. So we set `group_key=False`, but it doesn't guarantee that some programmes will run correctly, including:
* qlib\examples\rl_order_execution\scripts\gen_training_orders.py
* qlib\examples\benchmarks\TRA\src\dataset.MTSDatasetH.py
* qlib\examples\benchmarks\TFT\tft.py



## [Adapting to Market Dynamics](examples/benchmarks_dynamic)

Due to the non-stationary nature of the environment of the financial market, the data distribution may change in different periods, which makes the performance of models build on training data decays in the future test data.
Expand Down
4 changes: 2 additions & 2 deletions examples/benchmarks/TFT/libs/tft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def _batch_sampled_data(self, data, max_samples):
print("Getting valid sampling locations.")
valid_sampling_locations = []
split_data_map = {}
for identifier, df in data.groupby(id_col):
for identifier, df in data.groupby(id_col, group_key=False):
print("Getting locations for {}".format(identifier))
num_entries = len(df)
if num_entries >= self.time_steps:
Expand Down Expand Up @@ -678,7 +678,7 @@ def _batch_single_entity(input_data):
input_cols = [tup[0] for tup in self.column_definition if tup[2] not in {InputTypes.ID, InputTypes.TIME}]

data_map = {}
for _, sliced in data.groupby(id_col):
for _, sliced in data.groupby(id_col, group_keys=False):
col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}

for k in col_mappings:
Expand Down
6 changes: 4 additions & 2 deletions examples/benchmarks/TFT/tft.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,15 @@


def get_shifted_label(data_df, shifts=5, col_shift="LABEL0"):
return data_df[[col_shift]].groupby("instrument").apply(lambda df: df.shift(shifts))
return data_df[[col_shift]].groupby("instrument", group_keys=False).apply(lambda df: df.shift(shifts))


def fill_test_na(test_df):
test_df_res = test_df.copy()
feature_cols = ~test_df_res.columns.str.contains("label", case=False)
test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime").apply(lambda df: df.fillna(df.mean()))
test_feature_fna = (
test_df_res.loc[:, feature_cols].groupby("datetime", group_keys=False).apply(lambda df: df.fillna(df.mean()))
)
test_df_res.loc[:, feature_cols] = test_feature_fna
return test_df_res

Expand Down
2 changes: 1 addition & 1 deletion examples/benchmarks/TRA/src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _create_ts_slices(index, seq_len):
assert index.is_lexsorted(), "index should be sorted"

# number of dates for each code
sample_count_by_codes = pd.Series(0, index=index).groupby(level=0).size().values
sample_count_by_codes = pd.Series(0, index=index).groupby(level=0, group_keys=False).size().values

# start_index for each code
start_index_of_codes = np.roll(np.cumsum(sample_count_by_codes), 1)
Expand Down
6 changes: 3 additions & 3 deletions examples/highfreq/highfreq_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class DayLast(ElemOperator):
def _load_internal(self, instrument, start_index, end_index, freq):
_calendar = get_calendar_day(freq=freq)
series = self.feature.load(instrument, start_index, end_index, freq)
return series.groupby(_calendar[series.index]).transform("last")
return series.groupby(_calendar[series.index], group_keys=False).transform("last")


class FFillNan(ElemOperator):
Expand All @@ -44,7 +44,7 @@ class FFillNan(ElemOperator):

def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.fillna(method="ffill")
return series.ffill()


class BFillNan(ElemOperator):
Expand All @@ -63,7 +63,7 @@ class BFillNan(ElemOperator):

def _load_internal(self, instrument, start_index, end_index, freq):
series = self.feature.load(instrument, start_index, end_index, freq)
return series.fillna(method="bfill")
return series.bfill()


class Date(ElemOperator):
Expand Down
4 changes: 2 additions & 2 deletions examples/rl_order_execution/scripts/gen_training_orders.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def generate_order(stock: str, start_idx: int, end_idx: int) -> bool:

df["date"] = df["datetime"].dt.date.astype("datetime64")
df = df.set_index(["instrument", "datetime", "date"])
df = df.groupby("date").take(range(start_idx, end_idx)).droplevel(level=0)
df = df.groupby("date", group_keys=False).take(range(start_idx, end_idx)).droplevel(level=0)

order_all = pd.DataFrame(df.groupby(level=(2, 0)).mean().dropna())
order_all = pd.DataFrame(df.groupby(level=(2, 0), group_keys=False).mean().dropna())
order_all["amount"] = np.random.lognormal(-3.28, 1.14) * order_all["$volume0"]
order_all = order_all[order_all["amount"] > 0.0]
order_all["order_type"] = 0
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ readme = {file = "README.md", content-type = "text/markdown"}
dependencies = [
"pyyaml",
"numpy",
"pandas",
"pandas>=0.24",
"mlflow",
"filelock>=3.16.0",
"redis",
Expand Down Expand Up @@ -67,10 +67,13 @@ lint = [
"flake8",
"nbqa",
]
# snowballstemmer, a dependency of sphinx, was released on 2025-05-08 with version 3.0.0,
# which causes errors in the build process. So we've limited the version for now.
docs = [
"sphinx",
"sphinx_rtd_theme",
"readthedocs_sphinx_ext",
"snowballstemmer<3.0",
]
package = [
"twine",
Expand Down
4 changes: 2 additions & 2 deletions qlib/backtest/high_performance_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class PandasQuote(BaseQuote):
def __init__(self, quote_df: pd.DataFrame, freq: str) -> None:
super().__init__(quote_df=quote_df, freq=freq)
quote_dict = {}
for stock_id, stock_val in quote_df.groupby(level="instrument"):
for stock_id, stock_val in quote_df.groupby(level="instrument", group_keys=False):
quote_dict[stock_id] = stock_val.droplevel(level="instrument")
self.data = quote_dict

Expand Down Expand Up @@ -137,7 +137,7 @@ def __init__(self, quote_df: pd.DataFrame, freq: str, region: str = "cn") -> Non
"""
super().__init__(quote_df=quote_df, freq=freq)
quote_dict = {}
for stock_id, stock_val in quote_df.groupby(level="instrument"):
for stock_id, stock_val in quote_df.groupby(level="instrument", group_keys=False):
quote_dict[stock_id] = idd.MultiData(stock_val.droplevel(level="instrument"))
quote_dict[stock_id].sort_index() # To support more flexible slicing, we must sort data first
self.data = quote_dict
Expand Down
2 changes: 1 addition & 1 deletion qlib/backtest/position.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ def fill_stock_value(self, start_time: Union[str, pd.Timestamp], freq: str, last
freq=freq,
disk_cache=True,
).dropna()
price_dict = price_df.groupby(["instrument"]).tail(1).reset_index(level=1, drop=True)["$close"].to_dict()
price_dict = price_df.groupby(["instrument"], group_keys=False).tail(1)["$close"].to_dict()

if len(price_dict) < len(stock_list):
lack_stock = set(stock_list) - set(price_dict)
Expand Down
6 changes: 5 additions & 1 deletion qlib/backtest/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,11 @@ def _cal_benchmark(benchmark_config: Optional[dict], freq: str) -> Optional[pd.S
_temp_result, _ = get_higher_eq_freq_feature(_codes, fields, start_time, end_time, freq=freq)
if len(_temp_result) == 0:
raise ValueError(f"The benchmark {_codes} does not exist. Please provide the right benchmark")
return _temp_result.groupby(level="datetime")[_temp_result.columns.tolist()[0]].mean().fillna(0)
return (
_temp_result.groupby(level="datetime", group_keys=False)[_temp_result.columns.tolist()[0]]
.mean()
.fillna(0)
)

def _sample_benchmark(
self,
Expand Down
2 changes: 1 addition & 1 deletion qlib/contrib/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _create_ts_slices(index, seq_len):
assert index.is_monotonic_increasing, "index should be sorted"

# number of dates for each instrument
sample_count_by_insts = index.to_series().groupby(level=0).size().values
sample_count_by_insts = index.to_series().groupby(level=0, group_keys=False).size().values

# start index for each instrument
start_index_of_insts = np.roll(np.cumsum(sample_count_by_insts), 1)
Expand Down
34 changes: 24 additions & 10 deletions qlib/contrib/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,18 @@ def _feature_norm(x):

# Label
cols = df_focus.columns[df_focus.columns.str.contains("^LABEL")]
df_focus[cols] = df_focus[cols].groupby(level="datetime").apply(_label_norm)
df_focus[cols] = df_focus[cols].groupby(level="datetime", group_keys=False).apply(_label_norm)

# Features
cols = df_focus.columns[df_focus.columns.str.contains("^KLEN|^KLOW|^KUP")]
df_focus[cols] = df_focus[cols].apply(lambda x: x**0.25).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = (
df_focus[cols].apply(lambda x: x**0.25).groupby(level="datetime", group_keys=False).apply(_feature_norm)
)

cols = df_focus.columns[df_focus.columns.str.contains("^KLOW2|^KUP2")]
df_focus[cols] = df_focus[cols].apply(lambda x: x**0.5).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = (
df_focus[cols].apply(lambda x: x**0.5).groupby(level="datetime", group_keys=False).apply(_feature_norm)
)

_cols = [
"KMID",
Expand All @@ -88,25 +92,35 @@ def _feature_norm(x):
]
pat = "|".join(["^" + x for x in _cols])
cols = df_focus.columns[df_focus.columns.str.contains(pat) & (~df_focus.columns.isin(["HIGH0", "LOW0"]))]
df_focus[cols] = df_focus[cols].groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = df_focus[cols].groupby(level="datetime", group_keys=False).apply(_feature_norm)

cols = df_focus.columns[df_focus.columns.str.contains("^STD|^VOLUME|^VMA|^VSTD")]
df_focus[cols] = df_focus[cols].apply(np.log).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = df_focus[cols].apply(np.log).groupby(level="datetime", group_keys=False).apply(_feature_norm)

cols = df_focus.columns[df_focus.columns.str.contains("^RSQR")]
df_focus[cols] = df_focus[cols].fillna(0).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = df_focus[cols].fillna(0).groupby(level="datetime", group_keys=False).apply(_feature_norm)

cols = df_focus.columns[df_focus.columns.str.contains("^MAX|^HIGH0")]
df_focus[cols] = df_focus[cols].apply(lambda x: (x - 1) ** 0.5).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = (
df_focus[cols]
.apply(lambda x: (x - 1) ** 0.5)
.groupby(level="datetime", group_keys=False)
.apply(_feature_norm)
)

cols = df_focus.columns[df_focus.columns.str.contains("^MIN|^LOW0")]
df_focus[cols] = df_focus[cols].apply(lambda x: (1 - x) ** 0.5).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = (
df_focus[cols]
.apply(lambda x: (1 - x) ** 0.5)
.groupby(level="datetime", group_keys=False)
.apply(_feature_norm)
)

cols = df_focus.columns[df_focus.columns.str.contains("^CORR|^CORD")]
df_focus[cols] = df_focus[cols].apply(np.exp).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = df_focus[cols].apply(np.exp).groupby(level="datetime", group_keys=False).apply(_feature_norm)

cols = df_focus.columns[df_focus.columns.str.contains("^WVMA")]
df_focus[cols] = df_focus[cols].apply(np.log1p).groupby(level="datetime").apply(_feature_norm)
df_focus[cols] = df_focus[cols].apply(np.log1p).groupby(level="datetime", group_keys=False).apply(_feature_norm)

df[selected_cols] = df_focus.values

Expand Down
22 changes: 12 additions & 10 deletions qlib/contrib/eva/alpha.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,31 +39,33 @@ def calc_long_short_prec(
long precision and short precision in time level
"""
if is_alpha:
label = label - label.mean(level=date_col)
label = label - label.groupby(level=date_col, group_keys=False).mean()
if int(1 / quantile) >= len(label.index.get_level_values(1).unique()):
raise ValueError("Need more instruments to calculate precision")

df = pd.DataFrame({"pred": pred, "label": label})
if dropna:
df.dropna(inplace=True)

group = df.groupby(level=date_col)
group = df.groupby(level=date_col, group_keys=False)

def N(x):
return int(len(x) * quantile)

# find the top/low quantile of prediction and treat them as long and short target
long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label).reset_index(level=0, drop=True)
short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label).reset_index(level=0, drop=True)
long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label)
short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label)

groupll = long.groupby(date_col)
groupll = long.groupby(date_col, group_keys=False)
l_dom = groupll.apply(lambda x: x > 0)
l_c = groupll.count()

groups = short.groupby(date_col)
groups = short.groupby(date_col, group_keys=False)
s_dom = groups.apply(lambda x: x < 0)
s_c = groups.count()
return (l_dom.groupby(date_col).sum() / l_c), (s_dom.groupby(date_col).sum() / s_c)
return (l_dom.groupby(date_col, group_keys=False).sum() / l_c), (
s_dom.groupby(date_col, group_keys=False).sum() / s_c
)


def calc_long_short_return(
Expand Down Expand Up @@ -100,7 +102,7 @@ def calc_long_short_return(
df = pd.DataFrame({"pred": pred, "label": label})
if dropna:
df.dropna(inplace=True)
group = df.groupby(level=date_col)
group = df.groupby(level=date_col, group_keys=False)

def N(x):
return int(len(x) * quantile)
Expand Down Expand Up @@ -173,8 +175,8 @@ def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False
ic and rank ic
"""
df = pd.DataFrame({"pred": pred, "label": label})
ic = df.groupby(date_col).apply(lambda df: df["pred"].corr(df["label"]))
ric = df.groupby(date_col).apply(lambda df: df["pred"].corr(df["label"], method="spearman"))
ic = df.groupby(date_col, group_keys=False).apply(lambda df: df["pred"].corr(df["label"]))
ric = df.groupby(date_col, group_keys=False).apply(lambda df: df["pred"].corr(df["label"], method="spearman"))
if dropna:
return ic.dropna(), ric.dropna()
else:
Expand Down
4 changes: 2 additions & 2 deletions qlib/contrib/meta/data_selection/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def setup(self, trainer=TrainerR, trainer_kwargs={}):

def _calc_perf(self, pred, label):
df = pd.DataFrame({"pred": pred, "label": label})
df = df.groupby("datetime").corr(method="spearman")
df = df.groupby("datetime", group_keys=False).corr(method="spearman")
corr = df.loc(axis=0)[:, "pred"]["label"].droplevel(axis=0, level=-1)
return corr

Expand Down Expand Up @@ -161,7 +161,7 @@ def __init__(self, task: dict, meta_info: pd.DataFrame, mode: str = MetaTask.PRO
raise ValueError(f"Most of samples are dropped. Please check this task: {task}")

assert (
d_test.groupby("datetime").size().shape[0] >= 5
d_test.groupby("datetime", group_keys=False).size().shape[0] >= 5
), "In this segment, this trading dates is less than 5, you'd better check the data."

sample_time_belong = np.zeros((d_train.shape[0], time_perf.shape[1]))
Expand Down
6 changes: 5 additions & 1 deletion qlib/contrib/meta/data_selection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,11 @@ def run_epoch(self, phase, task_list, epoch, opt, loss_l, ignore_weight=False):
loss_l.setdefault(phase, []).append(running_loss)

pred_y_all = pd.concat(pred_y_all)
ic = pred_y_all.groupby("datetime").apply(lambda df: df["pred"].corr(df["label"], method="spearman")).mean()
ic = (
pred_y_all.groupby("datetime", group_keys=False)
.apply(lambda df: df["pred"].corr(df["label"], method="spearman"))
.mean()
)

R.log_metrics(**{f"loss/{phase}": running_loss, "step": epoch})
R.log_metrics(**{f"ic/{phase}": ic, "step": epoch})
Expand Down
2 changes: 1 addition & 1 deletion qlib/contrib/model/double_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def sample_reweight(self, loss_curve, loss_values, k_th):

# calculate weights
h["bins"] = pd.cut(h["h_value"], self.bins_sr)
h_avg = h.groupby("bins")["h_value"].mean()
h_avg = h.groupby("bins", group_keys=False, observed=False)["h_value"].mean()
weights = pd.Series(np.zeros(N, dtype=float))
for b in h_avg.index:
weights[h["bins"] == b] = 1.0 / (self.decay**k_th * h_avg[b] + 0.1)
Expand Down
10 changes: 8 additions & 2 deletions qlib/contrib/model/highfreq_gdbt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,14 @@ def _prepare_data(self, dataset: DatasetH):
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
l_name = df_train["label"].columns[0]
# Convert label into alpha
df_train["label"][l_name] = df_train["label"][l_name] - df_train["label"][l_name].mean(level=0)
df_valid["label"][l_name] = df_valid["label"][l_name] - df_valid["label"][l_name].mean(level=0)
df_train.loc[:, ("label", l_name)] = (
df_train.loc[:, ("label", l_name)]
- df_train.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
)
df_valid.loc[:, ("label", l_name)] = (
df_valid.loc[:, ("label", l_name)]
- df_valid.loc[:, ("label", l_name)].groupby(level=0, group_keys=False).mean()
)

def mapping_fn(x):
return 0 if x < 0 else 1
Expand Down
6 changes: 4 additions & 2 deletions qlib/contrib/model/pytorch_adarnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,10 @@ def train_AdaRNN(self, train_loader_list, epoch, dist_old=None, weight_mat=None)
def calc_all_metrics(pred):
"""pred is a pandas dataframe that has two attributes: score (pred) and label (real)"""
res = {}
ic = pred.groupby(level="datetime").apply(lambda x: x.label.corr(x.score))
rank_ic = pred.groupby(level="datetime").apply(lambda x: x.label.corr(x.score, method="spearman"))
ic = pred.groupby(level="datetime", group_keys=False).apply(lambda x: x.label.corr(x.score))
rank_ic = pred.groupby(level="datetime", group_keys=False).apply(
lambda x: x.label.corr(x.score, method="spearman")
)
res["ic"] = ic.mean()
res["icir"] = ic.mean() / ic.std()
res["ric"] = rank_ic.mean()
Expand Down
6 changes: 3 additions & 3 deletions qlib/contrib/model/pytorch_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def loss_rec(self, x, rec_x, record=None):

def get_daily_inter(self, df, shuffle=False):
# organize the train data into daily batches
daily_count = df.groupby(level=0).size().values
daily_count = df.groupby(level=0, group_keys=False).size().values
daily_index = np.roll(np.cumsum(daily_count), 1)
daily_index[0] = 0
if shuffle:
Expand Down Expand Up @@ -349,15 +349,15 @@ def bootstrap_fit(self, x_train, y_train, m_train, x_valid, y_valid, m_valid):
return best_score

def gen_market_label(self, df, raw_label):
market_label = raw_label.groupby("datetime").mean().squeeze()
market_label = raw_label.groupby("datetime", group_keys=False).mean().squeeze()
bins = [-np.inf, self.lo, self.hi, np.inf]
market_label = pd.cut(market_label, bins, labels=False)
market_label.name = ("market_return", "market_return")
df = df.join(market_label)
return df

def fit_thresh(self, train_label):
market_label = train_label.groupby("datetime").mean().squeeze()
market_label = train_label.groupby("datetime", group_keys=False).mean().squeeze()
self.lo, self.hi = market_label.quantile([1 / 3, 2 / 3])

def fit(
Expand Down
Loading
Loading