Skip to content

Commit a9589e1

Browse files
authored
Merge branch 'main' into transformers-save-dtensor
2 parents 94abfb4 + 785835f commit a9589e1

22 files changed

+263
-37
lines changed

.github/conda/meta.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ requirements:
2121
- typing-extensions
2222
- packaging
2323
- pyyaml
24+
- hf-xet >=1.1.0,<2.0.0
2425
run:
2526
- python
2627
- pip
@@ -30,6 +31,7 @@ requirements:
3031
- typing-extensions
3132
- packaging
3233
- pyyaml
34+
- hf-xet >=1.1.0,<2.0.0
3335

3436
test:
3537
imports:

docs/source/en/guides/manage-cache.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ The caching system is designed as follows:
2121
├─ <SPACES>
2222
```
2323

24-
The `<CACHE_DIR>` is usually your user's home directory. However, it is customizable with the `cache_dir` argument on all methods, or by specifying either `HF_HOME` or `HF_HUB_CACHE` environment variable.
24+
The default `<CACHE_DIR>` is `~/.cache/huggingface/hub`. However, it is customizable with the `cache_dir` argument on all methods, or by specifying either `HF_HOME` or `HF_HUB_CACHE` environment variable.
2525

2626
Models, datasets and spaces share a common root. Each of these repositories contains the
2727
repository type, the namespace (organization or username) if it exists and the

docs/source/en/package_reference/environment_variables.md

+5
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ Integer value to define the number of seconds to wait for server response when d
8686
## Xet
8787

8888
### Other Xet environment variables
89+
* [`HF_HUB_DISABLE_XET`](../package_reference/environment_variables#hfhubdisablexet)
8990
* [`HF_XET_CACHE`](../package_reference/environment_variables#hfxetcache)
9091
* [`HF_XET_HIGH_PERFORMANCE`](../package_reference/environment_variables#hfxethighperformance)
9192
* [`HF_XET_RECONSTRUCT_WRITE_SEQUENTIALLY`](../package_reference/environment_variables#hfxetreconstructwritesequentially)
@@ -164,6 +165,10 @@ Each library defines its own policy (i.e. which usage to monitor) but the core i
164165

165166
You can set `HF_HUB_DISABLE_TELEMETRY=1` as environment variable to globally disable telemetry.
166167

168+
### HF_HUB_DISABLE_XET
169+
170+
Set to disable using `hf-xet`, even if it is available in your Python environment. This is since `hf-xet` will be used automatically if it is found, this allows explicitly disabling its usage.
171+
167172
### HF_HUB_ENABLE_HF_TRANSFER
168173

169174
Set to `True` for faster uploads and downloads from the Hub using `hf_transfer`.

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def get_version() -> str:
1414
install_requires = [
1515
"filelock",
1616
"fsspec>=2023.5.0",
17-
"hf-xet>=1.0.2,<2.0.0; platform_machine=='x86_64' or platform_machine=='amd64' or platform_machine=='arm64' or platform_machine=='aarch64'",
17+
"hf-xet>=1.1.0,<2.0.0; platform_machine=='x86_64' or platform_machine=='amd64' or platform_machine=='arm64' or platform_machine=='aarch64'",
1818
"packaging>=20.9",
1919
"pyyaml>=5.1",
2020
"requests",
@@ -56,7 +56,7 @@ def get_version() -> str:
5656
"keras<3.0",
5757
]
5858

59-
extras["hf_xet"] = ["hf_xet>=1.0.2,<2.0.0"]
59+
extras["hf_xet"] = ["hf_xet>=1.1.0,<2.0.0"]
6060

6161
extras["testing"] = (
6262
extras["cli"]

src/huggingface_hub/_commit_api.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ def _upload_xet_files(
530530
if len(additions) == 0:
531531
return
532532
# at this point, we know that hf_xet is installed
533-
from hf_xet import upload_files
533+
from hf_xet import upload_bytes, upload_files
534534

535535
try:
536536
xet_connection_info = fetch_xet_connection_info_from_repo_info(
@@ -571,8 +571,10 @@ def token_refresher() -> Tuple[str, int]:
571571
num_chunks_num_digits = int(math.log10(num_chunks)) + 1
572572
for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
573573
_chunk = [op for op in chunk]
574-
paths = [str(op.path_or_fileobj) for op in _chunk]
575-
expected_size = sum([os.path.getsize(path) for path in paths])
574+
575+
bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
576+
paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
577+
expected_size = sum(op.upload_info.size for op in bytes_ops + paths_ops)
576578

577579
if num_chunks > 1:
578580
description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
@@ -592,7 +594,24 @@ def token_refresher() -> Tuple[str, int]:
592594
def update_progress(increment: int):
593595
progress.update(increment)
594596

595-
upload_files(paths, xet_endpoint, access_token_info, token_refresher, update_progress, repo_type)
597+
if len(paths_ops) > 0:
598+
upload_files(
599+
[str(op.path_or_fileobj) for op in paths_ops],
600+
xet_endpoint,
601+
access_token_info,
602+
token_refresher,
603+
update_progress,
604+
repo_type,
605+
)
606+
if len(bytes_ops) > 0:
607+
upload_bytes(
608+
[op.path_or_fileobj for op in bytes_ops],
609+
xet_endpoint,
610+
access_token_info,
611+
token_refresher,
612+
update_progress,
613+
repo_type,
614+
)
596615
return
597616

598617

src/huggingface_hub/_upload_large_folder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def target_chunk(self) -> int:
209209
def update_chunk(self, success: bool, nb_items: int, duration: float) -> None:
210210
with self._chunk_lock:
211211
if not success:
212-
logger.warn(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.")
212+
logger.warning(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.")
213213
self._chunk_idx -= 1
214214
elif nb_items >= COMMIT_SIZE_SCALE[self._chunk_idx] and duration < 40:
215215
logger.info(f"Successfully committed {nb_items} at once. Increasing the limit for next batch.")

src/huggingface_hub/commands/upload.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
from huggingface_hub.errors import RevisionNotFoundError
6060
from huggingface_hub.hf_api import HfApi
6161
from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
62+
from huggingface_hub.utils._runtime import is_xet_available
6263

6364

6465
logger = logging.get_logger(__name__)
@@ -215,7 +216,7 @@ def _upload(self) -> str:
215216
if self.delete is not None and len(self.delete) > 0:
216217
warnings.warn("Ignoring `--delete` since a single file is uploaded.")
217218

218-
if not HF_HUB_ENABLE_HF_TRANSFER:
219+
if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
219220
logger.info(
220221
"Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
221222
" https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."

src/huggingface_hub/file_download.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ def xet_get(
582582
583583
"""
584584
try:
585-
from hf_xet import PyPointerFile, download_files # type: ignore[no-redef]
585+
from hf_xet import PyXetDownloadInfo, download_files # type: ignore[no-redef]
586586
except ImportError:
587587
raise ValueError(
588588
"To use optimized download using Xet storage, you need to install the hf_xet package. "
@@ -597,8 +597,10 @@ def token_refresher() -> Tuple[str, int]:
597597
raise ValueError("Failed to refresh token using xet metadata.")
598598
return connection_info.access_token, connection_info.expiration_unix_epoch
599599

600-
pointer_files = [
601-
PyPointerFile(path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, filesize=expected_size)
600+
xet_download_info = [
601+
PyXetDownloadInfo(
602+
destination_path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, file_size=expected_size
603+
)
602604
]
603605

604606
if not displayed_filename:
@@ -623,7 +625,7 @@ def progress_updater(progress_bytes: float):
623625
progress.update(progress_bytes)
624626

625627
download_files(
626-
pointer_files,
628+
xet_download_info,
627629
endpoint=connection_info.endpoint,
628630
token_info=(connection_info.access_token, connection_info.expiration_unix_epoch),
629631
token_refresher=token_refresher,

src/huggingface_hub/hf_api.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -4475,18 +4475,17 @@ def preupload_lfs_files(
44754475
expand="xetEnabled",
44764476
token=token,
44774477
).xet_enabled
4478-
has_binary_data = any(
4479-
isinstance(addition.path_or_fileobj, (bytes, io.BufferedIOBase))
4480-
for addition in new_lfs_additions_to_upload
4478+
has_buffered_io_data = any(
4479+
isinstance(addition.path_or_fileobj, io.BufferedIOBase) for addition in new_lfs_additions_to_upload
44814480
)
4482-
if xet_enabled and not has_binary_data and is_xet_available():
4481+
if xet_enabled and not has_buffered_io_data and is_xet_available():
44834482
logger.info("Uploading files using Xet Storage..")
44844483
_upload_xet_files(**upload_kwargs, create_pr=create_pr) # type: ignore [arg-type]
44854484
else:
44864485
if xet_enabled and is_xet_available():
4487-
if has_binary_data:
4486+
if has_buffered_io_data:
44884487
logger.warning(
4489-
"Uploading files as bytes or binary IO objects is not supported by Xet Storage. "
4488+
"Uploading files as a binary IO buffer is not supported by Xet Storage. "
44904489
"Falling back to HTTP upload."
44914490
)
44924491
_upload_lfs_files(**upload_kwargs, num_threads=num_threads) # type: ignore [arg-type]
@@ -7573,6 +7572,7 @@ def create_inference_endpoint(
75737572
revision: Optional[str] = None,
75747573
task: Optional[str] = None,
75757574
custom_image: Optional[Dict] = None,
7575+
env: Optional[Dict[str, str]] = None,
75767576
secrets: Optional[Dict[str, str]] = None,
75777577
type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
75787578
domain: Optional[str] = None,
@@ -7616,6 +7616,8 @@ def create_inference_endpoint(
76167616
custom_image (`Dict`, *optional*):
76177617
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
76187618
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7619+
env (`Dict[str, str]`, *optional*):
7620+
Non-secret environment variables to inject in the container environment.
76197621
secrets (`Dict[str, str]`, *optional*):
76207622
Secret values to inject in the container environment.
76217623
type ([`InferenceEndpointType]`, *optional*):
@@ -7678,14 +7680,14 @@ def create_inference_endpoint(
76787680
... type="protected",
76797681
... instance_size="x1",
76807682
... instance_type="nvidia-a10g",
7683+
... env={
7684+
... "MAX_BATCH_PREFILL_TOKENS": "2048",
7685+
... "MAX_INPUT_LENGTH": "1024",
7686+
... "MAX_TOTAL_TOKENS": "1512",
7687+
... "MODEL_ID": "/repository"
7688+
... },
76817689
... custom_image={
76827690
... "health_route": "/health",
7683-
... "env": {
7684-
... "MAX_BATCH_PREFILL_TOKENS": "2048",
7685-
... "MAX_INPUT_LENGTH": "1024",
7686-
... "MAX_TOTAL_TOKENS": "1512",
7687-
... "MODEL_ID": "/repository"
7688-
... },
76897691
... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
76907692
... },
76917693
... secrets={"MY_SECRET_KEY": "secret_value"},
@@ -7723,6 +7725,8 @@ def create_inference_endpoint(
77237725
},
77247726
"type": type,
77257727
}
7728+
if env:
7729+
payload["model"]["env"] = env
77267730
if secrets:
77277731
payload["model"]["secrets"] = secrets
77287732
if domain is not None or path is not None:
@@ -7897,6 +7901,7 @@ def update_inference_endpoint(
78977901
revision: Optional[str] = None,
78987902
task: Optional[str] = None,
78997903
custom_image: Optional[Dict] = None,
7904+
env: Optional[Dict[str, str]] = None,
79007905
secrets: Optional[Dict[str, str]] = None,
79017906
# Route update
79027907
domain: Optional[str] = None,
@@ -7942,6 +7947,8 @@ def update_inference_endpoint(
79427947
custom_image (`Dict`, *optional*):
79437948
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
79447949
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7950+
env (`Dict[str, str]`, *optional*):
7951+
Non-secret environment variables to inject in the container environment
79457952
secrets (`Dict[str, str]`, *optional*):
79467953
Secret values to inject in the container environment.
79477954
@@ -7992,6 +7999,8 @@ def update_inference_endpoint(
79927999
payload["model"]["task"] = task
79938000
if custom_image is not None:
79948001
payload["model"]["image"] = {"custom": custom_image}
8002+
if env is not None:
8003+
payload["model"]["env"] = env
79958004
if secrets is not None:
79968005
payload["model"]["secrets"] = secrets
79978006
if domain is not None:

src/huggingface_hub/inference/_client.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -883,7 +883,13 @@ def chat_completion(
883883
payload_model = model or self.model
884884

885885
# Get the provider helper
886-
provider_helper = get_provider_helper(self.provider, task="conversational", model=payload_model)
886+
provider_helper = get_provider_helper(
887+
self.provider,
888+
task="conversational",
889+
model=model_id_or_url
890+
if model_id_or_url is not None and model_id_or_url.startswith(("http://", "https://"))
891+
else payload_model,
892+
)
887893

888894
# Prepare the payload
889895
parameters = {

src/huggingface_hub/inference/_generated/_async_client.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,13 @@ async def chat_completion(
923923
payload_model = model or self.model
924924

925925
# Get the provider helper
926-
provider_helper = get_provider_helper(self.provider, task="conversational", model=payload_model)
926+
provider_helper = get_provider_helper(
927+
self.provider,
928+
task="conversational",
929+
model=model_id_or_url
930+
if model_id_or_url is not None and model_id_or_url.startswith(("http://", "https://"))
931+
else payload_model,
932+
)
927933

928934
# Prepare the payload
929935
parameters = {

src/huggingface_hub/inference/_generated/types/chat_completion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ChatCompletionInputMessage(BaseInferenceType):
4545
tool_calls: Optional[List[ChatCompletionInputToolCall]] = None
4646

4747

48-
ChatCompletionInputGrammarTypeType = Literal["json", "regex"]
48+
ChatCompletionInputGrammarTypeType = Literal["json", "regex", "json_schema"]
4949

5050

5151
@dataclass_with_extra

src/huggingface_hub/inference/_generated/types/text_generation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .base import BaseInferenceType, dataclass_with_extra
99

1010

11-
TypeEnum = Literal["json", "regex"]
11+
TypeEnum = Literal["json", "regex", "json_schema"]
1212

1313

1414
@dataclass_with_extra

src/huggingface_hub/inference/_providers/__init__.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from .nebius import NebiusConversationalTask, NebiusTextGenerationTask, NebiusTextToImageTask
2424
from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
2525
from .openai import OpenAIConversationalTask
26-
from .replicate import ReplicateTask, ReplicateTextToSpeechTask
26+
from .replicate import ReplicateTask, ReplicateTextToImageTask, ReplicateTextToSpeechTask
2727
from .sambanova import SambanovaConversationalTask, SambanovaFeatureExtractionTask
2828
from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask
2929

@@ -115,7 +115,7 @@
115115
"conversational": OpenAIConversationalTask(),
116116
},
117117
"replicate": {
118-
"text-to-image": ReplicateTask("text-to-image"),
118+
"text-to-image": ReplicateTextToImageTask(),
119119
"text-to-speech": ReplicateTextToSpeechTask(),
120120
"text-to-video": ReplicateTask("text-to-video"),
121121
},
@@ -147,7 +147,9 @@ def get_provider_helper(
147147
ValueError: If provider or task is not supported
148148
"""
149149

150-
if model is None and provider in (None, "auto"):
150+
if (model is None and provider in (None, "auto")) or (
151+
model is not None and model.startswith(("http://", "https://"))
152+
):
151153
provider = "hf-inference"
152154

153155
if provider is None:

src/huggingface_hub/inference/_providers/hf_inference.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def _prepare_url(self, api_key: str, mapped_model: str) -> str:
4242
return mapped_model
4343
return (
4444
# Feature-extraction and sentence-similarity are the only cases where we handle models with several tasks.
45-
f"{self.base_url}/pipeline/{self.task}/{mapped_model}"
45+
f"{self.base_url}/models/{mapped_model}/pipeline/{self.task}"
4646
if self.task in ("feature-extraction", "sentence-similarity")
4747
# Otherwise, we use the default endpoint
4848
else f"{self.base_url}/models/{mapped_model}"

src/huggingface_hub/inference/_providers/replicate.py

+13
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,19 @@ def get_response(self, response: Union[bytes, Dict], request_params: Optional[Re
4747
return get_session().get(output_url).content
4848

4949

50+
class ReplicateTextToImageTask(ReplicateTask):
51+
def __init__(self):
52+
super().__init__("text-to-image")
53+
54+
def _prepare_payload_as_dict(
55+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
56+
) -> Optional[Dict]:
57+
payload: Dict = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) # type: ignore[assignment]
58+
if provider_mapping_info.adapter_weights_path is not None:
59+
payload["input"]["lora_weights"] = f"https://huggingface.co/{provider_mapping_info.hf_model_id}"
60+
return payload
61+
62+
5063
class ReplicateTextToSpeechTask(ReplicateTask):
5164
def __init__(self):
5265
super().__init__("text-to-speech")

src/huggingface_hub/utils/_runtime.py

+4
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ def get_hf_transfer_version() -> str:
154154

155155
# xet
156156
def is_xet_available() -> bool:
157+
# since hf_xet is automatically used if available, allow explicit disabling via environment variable
158+
if constants._is_true(os.environ.get("HF_HUB_DISABLE_XET")): # type: ignore
159+
return False
160+
157161
return is_package_available("hf_xet")
158162

159163

0 commit comments

Comments
 (0)