Skip to content

Commit 99e6996

Browse files
elronbandeldafnapension
authored andcommitted
Another try
Signed-off-by: elronbandel <[email protected]>
1 parent a167633 commit 99e6996

File tree

1 file changed

+37
-23
lines changed

1 file changed

+37
-23
lines changed

src/unitxt/api.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
import hashlib
22
import inspect
33
import json
4+
import os
5+
import random
6+
import time
47
from datetime import datetime
58
from functools import lru_cache
69
from typing import Any, Dict, List, Optional, Union
710

11+
import filelock
812
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
913
from datasets.exceptions import DatasetGenerationError
14+
from huggingface_hub import constants as hf_constants
1015

1116
from .artifact import fetch_artifact
1217
from .card import TaskCard
@@ -171,16 +176,21 @@ def _source_to_dataset(
171176
streaming=False,
172177
lock_timeout=60, # Timeout in seconds for acquiring the lock
173178
):
174-
import json
175-
import os
176-
177-
import filelock
178-
179179
from .dataset import Dataset as UnitxtDataset
180180

181181
# Generate a unique signature for the source
182182
source_signature = json.dumps(to_dict(source, object_to_str_without_addresses), sort_keys=True)
183183
config_name = "recipe-" + short_hex_hash(source_signature)
184+
hf_cache_home = hf_constants.HF_HOME
185+
lock_dir = os.path.join(hf_cache_home, "locks")
186+
os.makedirs(lock_dir, exist_ok=True)
187+
188+
# Create a lock file path based on the dataset configuration
189+
lock_file = os.path.join(lock_dir, f"unitxt_{config_name}.lock")
190+
191+
# Add retry logic
192+
max_attempts = 5
193+
base_wait = 5 # seconds
184194

185195
stream = source()
186196

@@ -196,28 +206,32 @@ def _source_to_dataset(
196206

197207
ds_builder._generators = stream
198208

199-
# Create a lock file path based on the dataset configuration
200-
lock_file = os.path.join(os.path.expanduser("~"), ".cache", "unitxt", f"{config_name}.lock")
201-
os.makedirs(os.path.dirname(lock_file), exist_ok=True)
202209

203-
# Create a file lock
204-
lock = filelock.FileLock(lock_file, timeout=lock_timeout)
210+
for attempt in range(max_attempts):
211+
# Create a file lock with appropriate timeout
212+
lock = filelock.FileLock(lock_file, timeout=300) # 5 minutes
205213

206-
# Only protect the download_and_prepare operation with the lock
207-
try:
208-
with lock:
209-
ds_builder.download_and_prepare(
210-
verification_mode="no_checks",
211-
download_mode=None if use_cache else "force_redownload",
214+
try:
215+
with lock:
216+
ds_builder.download_and_prepare(
217+
verification_mode="no_checks",
218+
download_mode=None if use_cache else "force_redownload",
219+
)
220+
221+
# If we reach here, the lock was successfully acquired and released
222+
if streaming:
223+
return ds_builder.as_streaming_dataset(split=split)
224+
return ds_builder.as_dataset(
225+
split=split, run_post_process=False, verification_mode="no_checks"
212226
)
213-
except filelock.Timeout:
214-
raise TimeoutError(f"Could not acquire lock for {config_name} within {lock_timeout} seconds. Another process may be preparing the same dataset.")
215227

216-
if streaming:
217-
return ds_builder.as_streaming_dataset(split=split)
218-
return ds_builder.as_dataset(
219-
split=split, run_post_process=False, verification_mode="no_checks"
220-
)
228+
except filelock.Timeout:
229+
if attempt < max_attempts - 1: # Not the last attempt
230+
wait_time = base_wait * (2 ** attempt) + random.uniform(0, 1)
231+
time.sleep(wait_time)
232+
else:
233+
raise TimeoutError(f"Could not acquire lock for {config_name} after {max_attempts} attempts")
234+
221235
except DatasetGenerationError as e:
222236
raise e.__cause__
223237

0 commit comments

Comments
 (0)