From fdc1784b11533a53fe147327e5fa48476bff9a07 Mon Sep 17 00:00:00 2001 From: Samuel Huang Date: Tue, 14 Oct 2025 18:40:08 -0400 Subject: [PATCH 1/2] update signature for _batch_setitems --- src/datasets/utils/_dill.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datasets/utils/_dill.py b/src/datasets/utils/_dill.py index fad95f7edf5..c77082095d8 100644 --- a/src/datasets/utils/_dill.py +++ b/src/datasets/utils/_dill.py @@ -69,9 +69,9 @@ def save(self, obj, save_persistent_id=True): obj = getattr(obj, "_torchdynamo_orig_callable", obj) dill.Pickler.save(self, obj, save_persistent_id=save_persistent_id) - def _batch_setitems(self, items): + def _batch_setitems(self, items, obj): if self._legacy_no_dict_keys_sorting: - return super()._batch_setitems(items) + return super()._batch_setitems(items, obj) # Ignore the order of keys in a dict try: # Faster, but fails for unorderable elements @@ -80,7 +80,7 @@ def _batch_setitems(self, items): from datasets.fingerprint import Hasher items = sorted(items, key=lambda x: Hasher.hash(x[0])) - dill.Pickler._batch_setitems(self, items) + dill.Pickler._batch_setitems(self, items, obj) def memoize(self, obj): # Don't memoize strings since two identical strings can have different Python ids From 55e94d85fb49ed0a4ac0e79f50dbdafbadd86feb Mon Sep 17 00:00:00 2001 From: Samuel Huang Date: Wed, 15 Oct 2025 15:18:11 -0400 Subject: [PATCH 2/2] arguments passthrough --- src/datasets/utils/_dill.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/datasets/utils/_dill.py b/src/datasets/utils/_dill.py index c77082095d8..f3a4baba681 100644 --- a/src/datasets/utils/_dill.py +++ b/src/datasets/utils/_dill.py @@ -69,9 +69,7 @@ def save(self, obj, save_persistent_id=True): obj = getattr(obj, "_torchdynamo_orig_callable", obj) dill.Pickler.save(self, obj, save_persistent_id=save_persistent_id) - def _batch_setitems(self, items, obj): - if self._legacy_no_dict_keys_sorting: - return super()._batch_setitems(items, obj) + def _batch_setitems(self, items, *args, **kwargs): # Ignore the order of keys in a dict try: # Faster, but fails for unorderable elements @@ -80,7 +78,7 @@ def _batch_setitems(self, items, obj): from datasets.fingerprint import Hasher items = sorted(items, key=lambda x: Hasher.hash(x[0])) - dill.Pickler._batch_setitems(self, items, obj) + return super()._batch_setitems(items, *args, **kwargs) def memoize(self, obj): # Don't memoize strings since two identical strings can have different Python ids