From fdc1784b11533a53fe147327e5fa48476bff9a07 Mon Sep 17 00:00:00 2001
From: Samuel Huang <hi@sgh.ng>
Date: Tue, 14 Oct 2025 18:40:08 -0400
Subject: [PATCH 1/2] update signature for _batch_setitems

---
 src/datasets/utils/_dill.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/datasets/utils/_dill.py b/src/datasets/utils/_dill.py
index fad95f7edf5..c77082095d8 100644
--- a/src/datasets/utils/_dill.py
+++ b/src/datasets/utils/_dill.py
@@ -69,9 +69,9 @@ def save(self, obj, save_persistent_id=True):
             obj = getattr(obj, "_torchdynamo_orig_callable", obj)
         dill.Pickler.save(self, obj, save_persistent_id=save_persistent_id)
 
-    def _batch_setitems(self, items):
+    def _batch_setitems(self, items, obj):
         if self._legacy_no_dict_keys_sorting:
-            return super()._batch_setitems(items)
+            return super()._batch_setitems(items, obj)
         # Ignore the order of keys in a dict
         try:
             # Faster, but fails for unorderable elements
@@ -80,7 +80,7 @@ def _batch_setitems(self, items):
             from datasets.fingerprint import Hasher
 
             items = sorted(items, key=lambda x: Hasher.hash(x[0]))
-        dill.Pickler._batch_setitems(self, items)
+        dill.Pickler._batch_setitems(self, items, obj)
 
     def memoize(self, obj):
         # Don't memoize strings since two identical strings can have different Python ids

From 55e94d85fb49ed0a4ac0e79f50dbdafbadd86feb Mon Sep 17 00:00:00 2001
From: Samuel Huang <hi@sgh.ng>
Date: Wed, 15 Oct 2025 15:18:11 -0400
Subject: [PATCH 2/2] arguments passthrough

---
 src/datasets/utils/_dill.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/datasets/utils/_dill.py b/src/datasets/utils/_dill.py
index c77082095d8..f3a4baba681 100644
--- a/src/datasets/utils/_dill.py
+++ b/src/datasets/utils/_dill.py
@@ -69,9 +69,7 @@ def save(self, obj, save_persistent_id=True):
             obj = getattr(obj, "_torchdynamo_orig_callable", obj)
         dill.Pickler.save(self, obj, save_persistent_id=save_persistent_id)
 
-    def _batch_setitems(self, items, obj):
-        if self._legacy_no_dict_keys_sorting:
-            return super()._batch_setitems(items, obj)
+    def _batch_setitems(self, items, *args, **kwargs):
         # Ignore the order of keys in a dict
         try:
             # Faster, but fails for unorderable elements
@@ -80,7 +78,7 @@ def _batch_setitems(self, items, obj):
             from datasets.fingerprint import Hasher
 
             items = sorted(items, key=lambda x: Hasher.hash(x[0]))
-        dill.Pickler._batch_setitems(self, items, obj)
+        return super()._batch_setitems(items, *args, **kwargs)
 
     def memoize(self, obj):
         # Don't memoize strings since two identical strings can have different Python ids