openml · Omswastik-11 · Nov 16, 2025 · Nov 18, 2025 · Nov 29, 2025 · Nov 29, 2025
diff --git a/examples/Advanced/huggingface_tutorial.py b/examples/Advanced/huggingface_tutorial.py
@@ -0,0 +1,86 @@
+"""
+Hugging Face Integration Tutorial
+=================================
+
+This example demonstrates how to use the experimental Hugging Face integration
+to push models to the Hugging Face Hub and link them to OpenML runs.
+
+Requirements:
+    pip install openml[huggingface]
+    or
+    pip install huggingface_hub transformers
+"""
+import logging
+import sys
+
+import openml
+from openml.extensions.huggingface_integration import (
+    push_model_to_hub_for_run,
+    load_model_from_run,
+    run_task_with_hf_sync,
+    is_hf_transformer
+)
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+
+def main():
+    # Check if HF dependencies are available
+    try:
+        import transformers
+        from transformers import AutoModel, AutoConfig
+    except ImportError:
+        print("This example requires 'transformers' and 'huggingface_hub'.")
+        print("Please install them with: pip install openml[huggingface]")
+        sys.exit(0)
+
+    print("Hugging Face integration is available.")
+
+    # 1. Create a dummy model (or load one)
+    # For demonstration, we'll create a tiny random model
+    config = AutoConfig.from_pretrained("bert-base-uncased")
+    config.num_hidden_layers = 1
+    config.hidden_size = 32
+    config.num_attention_heads = 2
+    config.vocab_size = 100
+
+    model = AutoModel.from_config(config)
+
+    if is_hf_transformer(model):
+        print("Model is recognized as a Hugging Face transformer.")
+
+    # 2. Setup a dummy run (in a real scenario, you would run a task)
+    # Here we just simulate a run object
+    run = openml.runs.OpenMLRun(task_id=1, flow_id=1, dataset_id=1)
+    run.run_id = 12345  # Fake run ID
+
+    # 3. Push model to Hub
+    # NOTE: You need to be logged in to Hugging Face Hub or provide a token.
+    # You can login with `huggingface-cli login`
+
+    repo_id = "your-username/openml-test-model" # CHANGE THIS
+
+    print(f"\nAttempting to push to {repo_id}...")
+    print("Note: This will fail if you don't have write access to the repo or aren't logged in.")
+
+    try:
+        # We pass a token=None to use the locally stored token
+        run = push_model_to_hub_for_run(model, run, repo_id=repo_id)
+
+        print("\nRun tags after push:")
+        print(run.tags)
+
+        # 4. Load model back
+        print("\nLoading model back from run...")
+        loaded_model = load_model_from_run(run.run_id)
+        print(f"Loaded model: {type(loaded_model)}")
+
+    except Exception as e:
+        print(f"\nSkipping actual push/load in this tutorial due to error (likely auth): {e}")
+        print("To run the full example, ensure you are logged in to HF Hub and set a valid repo_id.")
+
+    # 5. Convenience wrapper usage
+    # run = run_task_with_hf_sync(model, task_id=31, repo_id=repo_id)
+
+if __name__ == "__main__":
+    main()
diff --git a/openml/extensions/huggingface_integration.py b/openml/extensions/huggingface_integration.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+import openml
+
+if TYPE_CHECKING:
+    from openml.runs import OpenMLRun
+
+logger = logging.getLogger(__name__)
+
+try:
+    from huggingface_hub import HfApi
+    from transformers import AutoModel, PreTrainedModel
+
+    _HF_AVAILABLE = True
+except ImportError:
+    _HF_AVAILABLE = False
+    PreTrainedModel = object  # type: ignore
+
+
+def is_hf_transformer(model: Any) -> bool:
+    """Check if a model is a Hugging Face Transformers model."""
+    if not _HF_AVAILABLE:
+        return False
+    return isinstance(model, PreTrainedModel)
+
+
+def push_model_to_hub_for_run(
+    model: Any,
+    run: OpenMLRun,
+    repo_id: str,
+    token: str | None = None,
+) -> OpenMLRun:
+    """
+    Push a Hugging Face model to the Hub and link it to an OpenML run.
+
+    If the model is not a Hugging Face model, the run is returned unchanged.
+    If the model is a Hugging Face model, it is pushed to the Hub, and a tag
+    referencing the commit is added to the run.
+
+    Parameters
+    ----------
+    model : Any
+        The model to push.
+    run : OpenMLRun
+        The OpenML run to link.
+    repo_id : str
+        The ID of the repository to push to (e.g. "username/repo_name").
+    token : str, optional
+        The Hugging Face authentication token.
+
+    Returns
+    -------
+    OpenMLRun
+        The updated OpenML run.
+    """
+    if not is_hf_transformer(model):
+        return run
+
+    if not _HF_AVAILABLE:
+        # Should be unreachable if is_hf_transformer works correctly,
+        # but good for safety if logic changes.
+        logger.warning("Hugging Face integration dependencies not found. Skipping push.")
+        return run
+
+    # 1. Push to Hub
+    model.push_to_hub(repo_id, commit_message=f"OpenML Run {run.run_id}", token=token)
+
+    # 2. Get latest commit
+    api = HfApi(token=token)
+    commit_sha = api.list_repo_commits(repo_id)[0].commit_id
+
+    # 3. Construct URI
+    # Format: hf://{user_or_org}/{repo_name}@{commit_sha}
+    hf_uri = f"hf://{repo_id}@{commit_sha}"
+
+    # 4. Store URI in tags
+    run.tags.append(f"hf_uri={hf_uri}")
+    run.tags.append("hf-integrated")
+
+    return run
+
+
+def load_model_from_run(
+    run_id: int,
+    token: str | None = None,
+) -> Any:
+    """
+    Load a Hugging Face model linked to an OpenML run.
+
+    Parameters
+    ----------
+    run_id : int
+        The ID of the OpenML run.
+    token : str, optional
+        The Hugging Face authentication token.
+
+    Returns
+    -------
+    Any
+        The loaded Hugging Face model.
+
+    Raises
+    ------
+    ImportError
+        If Hugging Face dependencies are not installed.
+    ValueError
+        If the run does not have a linked Hugging Face model.
+    """
+    if not _HF_AVAILABLE:
+        raise ImportError("Hugging Face integration requires 'huggingface_hub' and 'transformers'.")
+
+    run = openml.runs.get_run(run_id)
+
+    hf_uri = None
+    for tag in run.tags:
+        if tag.startswith("hf_uri="):
+            hf_uri = tag.split("=", 1)[1]
+            break
+
+    if not hf_uri:
+        raise ValueError(
+            f"Run {run_id} does not have a linked Hugging Face model (no 'hf_uri' tag)."
+        )
+
+    # Parse URI: hf://{repo_id}@{commit_sha}
+    # Remove hf://
+    uri_path = hf_uri[5:]
+    if "@" not in uri_path:
+        raise ValueError(f"Invalid HF URI format: {hf_uri}")
+
+    repo_id, commit_sha = uri_path.split("@", 1)
+
+    # Load model
+    return AutoModel.from_pretrained(repo_id, revision=commit_sha, token=token)
+
+
+def run_task_with_hf_sync(
+    model: Any,
+    task_id: int,
+    repo_id: str,
+    hf_token: str | None = None,
+) -> OpenMLRun:
+    """
+    Run a task and sync the model to Hugging Face Hub.
+
+    Parameters
+    ----------
+    model : Any
+        The model to run.
+    task_id : int
+        The ID of the task to run.
+    repo_id : str
+        The Hugging Face repository ID to push to.
+    hf_token : str, optional
+        The Hugging Face authentication token.
+
+    Returns
+    -------
+    OpenMLRun
+        The published OpenML run.
+    """
+    task = openml.tasks.get_task(task_id)
+    run = openml.runs.run_model_on_task(model, task)
+    run = push_model_to_hub_for_run(model, run, repo_id=repo_id, token=hf_token)
+    run.publish()
+    return run
diff --git a/pyproject.toml b/pyproject.toml
@@ -109,6 +109,11 @@ docs=[
     "mike"
 ]
 
+huggingface=[
+    "huggingface_hub",
+    "transformers"
+]
+
 [project.urls]
 home="https://openml.org/"
 documentation = "https://openml.github.io/openml-python/"

diff --git a/tests/test_huggingface_integration.py b/tests/test_huggingface_integration.py
@@ -0,0 +1,116 @@
+import unittest
+from unittest.mock import MagicMock, patch
+import pytest
+import sys
+
+# Mock modules if they don't exist, so we can test the logic
+# This needs to be done before importing the integration module if we want to force-enable it
+# But the integration module does a try-import.
+
+from openml.runs import OpenMLRun
+import openml.extensions.huggingface_integration as hf_int
+
+class TestHuggingFaceIntegration(unittest.TestCase):
+
+    def setUp(self):
+        self.run = OpenMLRun(task_id=1, flow_id=1, dataset_id=1)
+        self.run.run_id = 123
+        self.run.tags = []
+
+    def test_is_hf_transformer_no_deps(self):
+        # Force _HF_AVAILABLE to False
+        with patch("openml.extensions.huggingface_integration._HF_AVAILABLE", False):
+            self.assertFalse(hf_int.is_hf_transformer(MagicMock()))
+
+    def test_push_model_no_deps(self):
+        with patch("openml.extensions.huggingface_integration._HF_AVAILABLE", False):
+            model = MagicMock()
+            run = hf_int.push_model_to_hub_for_run(model, self.run, "repo")
+            self.assertEqual(run.tags, [])
+
+    def test_load_model_no_deps(self):
+        with patch("openml.extensions.huggingface_integration._HF_AVAILABLE", False):
+            with self.assertRaises(ImportError):
+                hf_int.load_model_from_run(123)
+
+    @patch("openml.extensions.huggingface_integration._HF_AVAILABLE", True)
+    def test_is_hf_transformer_with_deps(self):
+        # We need to mock PreTrainedModel in the module
+        MockPTM = MagicMock()
+        with patch("openml.extensions.huggingface_integration.PreTrainedModel", MockPTM):
+            model = MockPTM()
+            # isinstance check in the module needs to work. 
+            # Since we patched the name 'PreTrainedModel' in the module, 
+            # if we create an instance of that mock, isinstance might not work as expected 
+            # if the module uses the *real* class it imported (or failed to import).
+
+            # If the module successfully imported PreTrainedModel, it holds a reference to the real class.
+            # If it failed, it holds 'object'.
+
+            # If we want to test the True path, we should rely on the module's reference.
+            pass 
+            # This is getting complicated to test "with deps" if they aren't actually there.
+            # I will rely on the fact that if they are there, we test it.
+            # If not, we skip the "with deps" tests.
+
+@pytest.mark.skipif(not hf_int._HF_AVAILABLE, reason="Hugging Face dependencies not installed")
+class TestHuggingFaceIntegrationWithDeps(unittest.TestCase):
+
+    def setUp(self):
+        self.run = OpenMLRun(task_id=1, flow_id=1, dataset_id=1)
+        self.run.run_id = 123
+        self.run.tags = []
+
+    def test_is_hf_transformer(self):
+        from transformers import PreTrainedModel
+        # Create a dummy subclass
+        class DummyModel(PreTrainedModel):
+            def __init__(self):
+                # Minimal init to satisfy PreTrainedModel if needed, 
+                # but usually we can just mock or pass dummy config
+                self.config = MagicMock()
+
+        model = DummyModel()
+        self.assertTrue(hf_int.is_hf_transformer(model))
+        self.assertFalse(hf_int.is_hf_transformer("string"))
+
+    @patch("openml.extensions.huggingface_integration.HfApi")
+    def test_push_model_to_hub_for_run(self, MockHfApi):
+        from transformers import PreTrainedModel
+
+        model = MagicMock(spec=PreTrainedModel)
+
+        # Mock HfApi
+        mock_api = MockHfApi.return_value
+        mock_commit = MagicMock()
+        mock_commit.commit_id = "sha123"
+        mock_api.list_repo_commits.return_value = [mock_commit]
+
+        run = hf_int.push_model_to_hub_for_run(model, self.run, "user/repo")
+
+        model.push_to_hub.assert_called_with("user/repo", commit_message="OpenML Run 123", token=None)
+        self.assertIn("hf_uri=hf://user/repo@sha123", run.tags)
+        self.assertIn("hf-integrated", run.tags)
+
+    @patch("openml.extensions.huggingface_integration.AutoModel")
+    @patch("openml.runs.get_run")
+    def test_load_model_from_run(self, mock_get_run, MockAutoModel):
+        self.run.tags = ["hf_uri=hf://user/repo@sha123"]
+        mock_get_run.return_value = self.run
+
+        hf_int.load_model_from_run(123)
+
+        MockAutoModel.from_pretrained.assert_called_with("user/repo", revision="sha123", token=None)
+
+    @patch("openml.runs.get_run")
+    def test_load_model_from_run_missing_tag(self, mock_get_run):
+        mock_get_run.return_value = self.run
+        with self.assertRaises(ValueError):
+            hf_int.load_model_from_run(123)
+
+    @patch("openml.runs.get_run")
+    def test_load_model_from_run_bad_uri(self, mock_get_run):
+        self.run.tags = ["hf_uri=hf://bad_uri"]
+        mock_get_run.return_value = self.run
+        with self.assertRaises(ValueError):
+            hf_int.load_model_from_run(123)