From 1a1536da6dad734ddc4b946384c44a2d6f6660f5 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Sat, 20 Jul 2024 12:12:24 +1200 Subject: [PATCH 1/9] Add polars as a testing dependency --- pyproject.toml | 1 + requirements/dev.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 3497c51a..215bf07f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ test = [ "pytest-dotenv", "pytest-parallel", "s3fs", + "polars>=1.0.0", ] [build-system] diff --git a/requirements/dev.txt b/requirements/dev.txt index 2b2a43a0..401b48eb 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -266,6 +266,8 @@ pluggy==1.5.0 # via pytest plum-dispatch==2.5.1.post1 # via quartodoc +polars==1.2.1 + # via pins (setup.cfg) portalocker==2.10.1 # via msal-extensions pre-commit==3.7.1 From 9667fc02c1af2fcd8c24949296ab5428e128db93 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Sat, 20 Jul 2024 12:54:29 +1200 Subject: [PATCH 2/9] Support writing `polars.DataFrame` to parquet. --- pins/drivers.py | 44 ++++++++++++++++++++++++++++++++------ pins/tests/test_drivers.py | 30 ++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/pins/drivers.py b/pins/drivers.py index 5aa3e186..9ae64662 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Sequence +from typing import Literal, Sequence, assert_never from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError @@ -14,14 +14,38 @@ def _assert_is_pandas_df(x, file_type: str) -> None: - import pandas as pd + df_family = _get_df_family(x) - if not isinstance(x, pd.DataFrame): + if df_family != "pandas": raise NotImplementedError( f"Currently only pandas.DataFrame can be saved as type {file_type!r}." ) +def _get_df_family(df) -> Literal["unknown", "pandas", "polars"]: + try: + import polars as pl + except ModuleNotFoundError: + is_polars_df = False + else: + is_polars_df = isinstance(df, pl.DataFrame) + + import pandas as pd + + is_pandas_df = isinstance(df, pd.DataFrame) + + if not is_polars_df and not is_pandas_df: + return "unknown" + if is_polars_df and is_pandas_df: # Hybrid DataFrame type! + return "unknown" + elif is_polars_df: + return "polars" + elif is_pandas_df: + return "pandas" + else: + assert_never(df) + + def load_path(meta, path_to_version): # Check that only a single file name was given fnames = [meta.file] if isinstance(meta.file, str) else meta.file @@ -171,9 +195,17 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen ) elif type == "parquet": - _assert_is_pandas_df(obj, file_type=type) - - obj.to_parquet(final_name) + df_family = _get_df_family(obj) + if df_family == "polars": + obj.write_parquet(final_name) + elif df_family == "pandas": + obj.to_parquet(final_name) + else: + msg = ( + "Currently only pandas.DataFrame and polars.DataFrame can be saved to " + "a parquet file." + ) + raise NotImplementedError(msg) elif type == "joblib": import joblib diff --git a/pins/tests/test_drivers.py b/pins/tests/test_drivers.py index 230f0e80..5f7d66b6 100644 --- a/pins/tests/test_drivers.py +++ b/pins/tests/test_drivers.py @@ -76,6 +76,36 @@ def test_driver_roundtrip(tmp_path: Path, type_): assert df.equals(obj) +@pytest.mark.parametrize( + "type_", + [ + "parquet", + ], +) +def test_driver_polars_roundtrip(tmp_dir2, type_): + import polars as pl + + df = pl.DataFrame({"x": [1, 2, 3]}) + + fname = "some_df" + full_file = f"{fname}.{type_}" + + p_obj = tmp_dir2 / fname + res_fname = save_data(df, p_obj, type_) + + assert Path(res_fname).name == full_file + + meta = MetaRaw(full_file, type_, "my_pin") + pandas_df = load_data( + meta, fsspec.filesystem("file"), tmp_dir2, allow_pickle_read=True + ) + + # Convert from pandas to polars + obj = pl.DataFrame(pandas_df) + + assert df.equals(obj) + + @pytest.mark.parametrize( "type_", [ From 998662a75ff18f2786462b52adca98896f9a30f9 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Sat, 20 Jul 2024 12:57:41 +1200 Subject: [PATCH 3/9] Support `polars.DataFrame` in `default_title` --- pins/drivers.py | 8 +++++--- pins/tests/test_drivers.py | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pins/drivers.py b/pins/drivers.py index 9ae64662..08ec5f39 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -232,13 +232,15 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen def default_title(obj, name): - import pandas as pd + df_family = _get_df_family(obj) - if isinstance(obj, pd.DataFrame): + if df_family in ("pandas", "polars"): # TODO(compat): title says CSV rather than data.frame # see https://github.com/machow/pins-python/issues/5 shape_str = " x ".join(map(str, obj.shape)) return f"{name}: a pinned {shape_str} DataFrame" - else: + elif df_family == "unknown": obj_name = type(obj).__qualname__ return f"{name}: a pinned {obj_name} object" + else: + assert_never(df_family) diff --git a/pins/tests/test_drivers.py b/pins/tests/test_drivers.py index 5f7d66b6..b82fcad3 100644 --- a/pins/tests/test_drivers.py +++ b/pins/tests/test_drivers.py @@ -2,6 +2,7 @@ import fsspec import pandas as pd +import polars as pl import pytest from pins.config import PINS_ENV_INSECURE_READ @@ -34,6 +35,7 @@ class D: [ (pd.DataFrame({"x": [1, 2]}), "somename: a pinned 2 x 1 DataFrame"), (pd.DataFrame({"x": [1], "y": [2]}), "somename: a pinned 1 x 2 DataFrame"), + (pl.DataFrame({"x": [1, 2]}), "somename: a pinned 2 x 1 DataFrame"), (ExC(), "somename: a pinned ExC object"), (ExC().D(), "somename: a pinned ExC.D object"), ([1, 2, 3], "somename: a pinned list object"), From f319afdbe1501397eeace15b8331835079ced0b3 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Sat, 20 Jul 2024 13:01:13 +1200 Subject: [PATCH 4/9] Update docs. --- README.md | 5 +++-- README.qmd | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 31955ddb..a9b7924b 100644 --- a/README.md +++ b/README.md @@ -67,9 +67,10 @@ board.pin_write(mtcars.head(), "mtcars", type="csv") Above, we saved the data as a CSV, but depending on what you’re saving and who else you want to read it, you might use the `type` argument to -instead save it as a `joblib`, `parquet`, or `json` file. +instead save it as a `joblib`, `parquet`, or `json` file. If you're using +a `polars.DataFrame`, you can save to `parquet`. -You can later retrieve the pinned data with `.pin_read()`: +You can later retrieve the pinned data as a `pandas.DataFrame` with `.pin_read()`: ``` python board.pin_read("mtcars") diff --git a/README.qmd b/README.qmd index 98af5826..a1982e99 100644 --- a/README.qmd +++ b/README.qmd @@ -66,8 +66,9 @@ board.pin_write(mtcars.head(), "mtcars", type="csv") Above, we saved the data as a CSV, but depending on what you’re saving and who else you want to read it, you might use the `type` argument to instead save it as a `joblib`, `parquet`, or `json` file. +If you're using a `polars.DataFrame`, you can save to `parquet`. -You can later retrieve the pinned data with `.pin_read()`: +You can later retrieve the pinned data as a `pandas.DataFrame` with `.pin_read()`: ```{python} board.pin_read("mtcars") From 63895dc4337c9672afdc04e092267c01aa8a81a9 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Sat, 20 Jul 2024 13:47:45 +1200 Subject: [PATCH 5/9] Use `assert_never` from `typing_extensions` not `typing`. --- pins/drivers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pins/drivers.py b/pins/drivers.py index 08ec5f39..a52f5265 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -1,5 +1,7 @@ from pathlib import Path -from typing import Literal, Sequence, assert_never +from typing import Literal, Sequence + +from typing_extensions import assert_never from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError From 036c7e94a266cba14b637eeb038cf40ef8f57849 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 24 Jul 2024 10:50:50 +1200 Subject: [PATCH 6/9] Refactor _get_df_family to raise an error instead. --- pins/drivers.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/pins/drivers.py b/pins/drivers.py index a52f5265..ab352120 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -1,8 +1,6 @@ from pathlib import Path from typing import Literal, Sequence -from typing_extensions import assert_never - from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError from .meta import Meta @@ -24,7 +22,8 @@ def _assert_is_pandas_df(x, file_type: str) -> None: ) -def _get_df_family(df) -> Literal["unknown", "pandas", "polars"]: +def _get_df_family(df) -> Literal["pandas", "polars"]: + """Return the type of DataFrame, or raise NotImplementedError if we can't decide.""" try: import polars as pl except ModuleNotFoundError: @@ -36,16 +35,15 @@ def _get_df_family(df) -> Literal["unknown", "pandas", "polars"]: is_pandas_df = isinstance(df, pd.DataFrame) - if not is_polars_df and not is_pandas_df: - return "unknown" - if is_polars_df and is_pandas_df: # Hybrid DataFrame type! - return "unknown" + if is_polars_df and is_pandas_df: + raise NotImplementedError( + "Hybrid DataFrames (simultaneously pandas and polars) are not supported." + ) elif is_polars_df: return "polars" elif is_pandas_df: return "pandas" - else: - assert_never(df) + raise NotImplementedError(f"Unrecognized DataFrame type: {type(df)}") def load_path(meta, path_to_version): @@ -234,15 +232,13 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen def default_title(obj, name): - df_family = _get_df_family(obj) - - if df_family in ("pandas", "polars"): - # TODO(compat): title says CSV rather than data.frame - # see https://github.com/machow/pins-python/issues/5 - shape_str = " x ".join(map(str, obj.shape)) - return f"{name}: a pinned {shape_str} DataFrame" - elif df_family == "unknown": + try: + _get_df_family(obj) + except NotImplementedError: obj_name = type(obj).__qualname__ return f"{name}: a pinned {obj_name} object" - else: - assert_never(df_family) + + # TODO(compat): title says CSV rather than data.frame + # see https://github.com/machow/pins-python/issues/5 + shape_str = " x ".join(map(str, obj.shape)) + return f"{name}: a pinned {shape_str} DataFrame" From 85bd8c621dc9c17a66ea9b97555727c4c4f407ca Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Thu, 25 Jul 2024 10:33:44 +1200 Subject: [PATCH 7/9] Write more robust df-library choosing logic. --- pins/drivers.py | 131 ++++++++++++++++++++++++++++++------------------ 1 file changed, 82 insertions(+), 49 deletions(-) diff --git a/pins/drivers.py b/pins/drivers.py index ab352120..abf9d01e 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Literal, Sequence +from typing import Literal, Sequence, TypeAlias from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError @@ -11,39 +11,7 @@ UNSAFE_TYPES = frozenset(["joblib"]) REQUIRES_SINGLE_FILE = frozenset(["csv", "joblib", "file"]) - - -def _assert_is_pandas_df(x, file_type: str) -> None: - df_family = _get_df_family(x) - - if df_family != "pandas": - raise NotImplementedError( - f"Currently only pandas.DataFrame can be saved as type {file_type!r}." - ) - - -def _get_df_family(df) -> Literal["pandas", "polars"]: - """Return the type of DataFrame, or raise NotImplementedError if we can't decide.""" - try: - import polars as pl - except ModuleNotFoundError: - is_polars_df = False - else: - is_polars_df = isinstance(df, pl.DataFrame) - - import pandas as pd - - is_pandas_df = isinstance(df, pd.DataFrame) - - if is_polars_df and is_pandas_df: - raise NotImplementedError( - "Hybrid DataFrames (simultaneously pandas and polars) are not supported." - ) - elif is_polars_df: - return "polars" - elif is_pandas_df: - return "pandas" - raise NotImplementedError(f"Unrecognized DataFrame type: {type(df)}") +_DFLib: TypeAlias = Literal["pandas", "polars"] def load_path(meta, path_to_version): @@ -176,36 +144,31 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen final_name = f"{fname}{suffix}" if type == "csv": - _assert_is_pandas_df(obj, file_type=type) - + _choose_df_lib(obj, supported_libs=["pandas"], file_type=type) obj.to_csv(final_name, index=False) elif type == "arrow": # NOTE: R pins accepts the type arrow, and saves it as feather. # we allow reading this type, but raise an error for writing. - _assert_is_pandas_df(obj, file_type=type) - + _choose_df_lib(obj, supported_libs=["pandas"], file_type=type) obj.to_feather(final_name) elif type == "feather": - _assert_is_pandas_df(obj, file_type=type) + _choose_df_lib(obj, supported_libs=["pandas"], file_type=type) raise NotImplementedError( 'Saving data as type "feather" no longer supported. Use type "arrow" instead.' ) elif type == "parquet": - df_family = _get_df_family(obj) - if df_family == "polars": - obj.write_parquet(final_name) - elif df_family == "pandas": + df_lib = _choose_df_lib(obj, supported_libs=["pandas", "polars"], file_type=type) + + if df_lib == "pandas": obj.to_parquet(final_name) + elif df_lib == "polars": + obj.write_parquet(final_name) else: - msg = ( - "Currently only pandas.DataFrame and polars.DataFrame can be saved to " - "a parquet file." - ) - raise NotImplementedError(msg) + raise NotImplementedError elif type == "joblib": import joblib @@ -233,7 +196,7 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen def default_title(obj, name): try: - _get_df_family(obj) + _choose_df_lib(obj) except NotImplementedError: obj_name = type(obj).__qualname__ return f"{name}: a pinned {obj_name} object" @@ -242,3 +205,73 @@ def default_title(obj, name): # see https://github.com/machow/pins-python/issues/5 shape_str = " x ".join(map(str, obj.shape)) return f"{name}: a pinned {shape_str} DataFrame" + + +def _choose_df_lib( + df, + *, + supported_libs: list[_DFLib] = ["pandas", "polars"], + file_type: str | None = None, +) -> _DFLib: + """Return the type of DataFrame library used in the given DataFrame. + + Args: + df: + The object to check - might not be a DataFrame necessarily. + supported_libs: + The DataFrame libraries to accept for this df. + file_type: + The file type we're trying to save to - used to give more specific error messages. + + Raises: + NotImplementedError: If the DataFrame type is not recognized. + """ + df_libs: list[_DFLib] = [] + + # pandas + import pandas as pd + + if isinstance(df, pd.DataFrame): + df_libs.append("pandas") + + # polars + try: + import polars as pl + except ModuleNotFoundError: + pass + else: + if isinstance(df, pl.DataFrame): + df_libs.append("polars") + + if len(df_libs) == 1: + (df_lib,) = df_libs + elif len(df_libs) > 1: + msg = ( + f"Hybrid DataFrames are not supported: " + f"should only be one of {supported_libs!r}, " + f"but got an object from multiple libraries {df_libs!r}." + ) + raise NotImplementedError(msg) + else: + raise NotImplementedError(f"Unrecognized DataFrame type: {type(df)}") + + if df_lib not in supported_libs: + if file_type is None: + ftype_clause = "in pins" + else: + ftype_clause = f"for type {file_type!r}" + + if len(supported_libs) == 1: + msg = ( + f"Currently only {supported_libs[0]} DataFrames can be saved " + f"{ftype_clause}. {df_lib} DataFrames are not yet supported." + ) + else: + msg = ( + f"Currently only DataFrames from the following libraries can be saved " + f"{ftype_clause}: {supported_libs!r}." + ) + + raise NotImplementedError(msg) + + return df_lib From 280cf5271165e7015ba209a0774844e7b1fe97d7 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Thu, 25 Jul 2024 11:11:30 +1200 Subject: [PATCH 8/9] Use tmp_path instead of tmp_dir2 --- pins/tests/test_drivers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pins/tests/test_drivers.py b/pins/tests/test_drivers.py index b82fcad3..5186e635 100644 --- a/pins/tests/test_drivers.py +++ b/pins/tests/test_drivers.py @@ -84,7 +84,7 @@ def test_driver_roundtrip(tmp_path: Path, type_): "parquet", ], ) -def test_driver_polars_roundtrip(tmp_dir2, type_): +def test_driver_polars_roundtrip(tmp_path, type_): import polars as pl df = pl.DataFrame({"x": [1, 2, 3]}) @@ -92,14 +92,14 @@ def test_driver_polars_roundtrip(tmp_dir2, type_): fname = "some_df" full_file = f"{fname}.{type_}" - p_obj = tmp_dir2 / fname + p_obj = tmp_path / fname res_fname = save_data(df, p_obj, type_) assert Path(res_fname).name == full_file meta = MetaRaw(full_file, type_, "my_pin") pandas_df = load_data( - meta, fsspec.filesystem("file"), tmp_dir2, allow_pickle_read=True + meta, fsspec.filesystem("file"), tmp_path, allow_pickle_read=True ) # Convert from pandas to polars From fd2a12e3aedcbd6175c642746102ddcaa7151759 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Thu, 25 Jul 2024 11:35:26 +1200 Subject: [PATCH 9/9] Refactoring and adding tests. --- pins/drivers.py | 35 ++++++++++++++++-------- pins/tests/test_drivers.py | 56 +++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/pins/drivers.py b/pins/drivers.py index abf9d01e..57cafdf1 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -196,24 +196,32 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen def default_title(obj, name): try: - _choose_df_lib(obj) + df_lib = _choose_df_lib(obj) except NotImplementedError: obj_name = type(obj).__qualname__ return f"{name}: a pinned {obj_name} object" + _df_lib_to_objname: dict[_DFLib, str] = { + "polars": "DataFrame", + "pandas": "DataFrame", + } + # TODO(compat): title says CSV rather than data.frame # see https://github.com/machow/pins-python/issues/5 shape_str = " x ".join(map(str, obj.shape)) - return f"{name}: a pinned {shape_str} DataFrame" + return f"{name}: a pinned {shape_str} {_df_lib_to_objname[df_lib]}" def _choose_df_lib( df, *, - supported_libs: list[_DFLib] = ["pandas", "polars"], + supported_libs: list[_DFLib] | None = None, file_type: str | None = None, ) -> _DFLib: - """Return the type of DataFrame library used in the given DataFrame. + """Return the library associated with a DataFrame, e.g. "pandas". + + The arguments `supported_libs` and `file_type` must be specified together, and are + meant to be used when saving an object, to choose the appropriate library. Args: df: @@ -221,11 +229,15 @@ def _choose_df_lib( supported_libs: The DataFrame libraries to accept for this df. file_type: - The file type we're trying to save to - used to give more specific error messages. + The file type we're trying to save to - used to give more specific error + messages. Raises: - NotImplementedError: If the DataFrame type is not recognized. + NotImplementedError: If the DataFrame type is not recognized, or not supported. """ + if (supported_libs is None) + (file_type is None) == 1: + raise ValueError("Must provide both or neither of supported_libs and file_type") + df_libs: list[_DFLib] = [] # pandas @@ -243,6 +255,7 @@ def _choose_df_lib( if isinstance(df, pl.DataFrame): df_libs.append("polars") + # Make sure there's only one library associated with the dataframe if len(df_libs) == 1: (df_lib,) = df_libs elif len(df_libs) > 1: @@ -255,16 +268,14 @@ def _choose_df_lib( else: raise NotImplementedError(f"Unrecognized DataFrame type: {type(df)}") - if df_lib not in supported_libs: - if file_type is None: - ftype_clause = "in pins" - else: - ftype_clause = f"for type {file_type!r}" + # Raise if the library is not supported + if supported_libs is not None and df_lib not in supported_libs: + ftype_clause = f"for type {file_type!r}" if len(supported_libs) == 1: msg = ( f"Currently only {supported_libs[0]} DataFrames can be saved " - f"{ftype_clause}. {df_lib} DataFrames are not yet supported." + f"{ftype_clause}. DataFrames from {df_lib} are not yet supported." ) else: msg = ( diff --git a/pins/tests/test_drivers.py b/pins/tests/test_drivers.py index 5186e635..5f8d92b3 100644 --- a/pins/tests/test_drivers.py +++ b/pins/tests/test_drivers.py @@ -6,7 +6,7 @@ import pytest from pins.config import PINS_ENV_INSECURE_READ -from pins.drivers import default_title, load_data, save_data +from pins.drivers import _choose_df_lib, default_title, load_data, save_data from pins.errors import PinsInsecureReadError from pins.meta import MetaRaw from pins.tests.helpers import rm_env @@ -191,3 +191,57 @@ def test_driver_apply_suffix_false(tmp_path: Path): res_fname = save_data(df, p_obj, type_, apply_suffix=False) assert Path(res_fname).name == "some_df" + + +class TestChooseDFLib: + def test_pandas(self): + assert _choose_df_lib(pd.DataFrame({"x": [1]})) == "pandas" + + def test_polars(self): + assert _choose_df_lib(pl.DataFrame({"x": [1]})) == "polars" + + def test_list_raises(self): + with pytest.raises( + NotImplementedError, match="Unrecognized DataFrame type: " + ): + _choose_df_lib([]) + + def test_pandas_subclass(self): + class MyDataFrame(pd.DataFrame): + pass + + assert _choose_df_lib(MyDataFrame({"x": [1]})) == "pandas" + + def test_ftype_compatible(self): + assert ( + _choose_df_lib( + pd.DataFrame({"x": [1]}), supported_libs=["pandas"], file_type="csv" + ) + == "pandas" + ) + + def test_ftype_incompatible(self): + with pytest.raises( + NotImplementedError, + match=( + "Currently only pandas DataFrames can be saved for type 'csv'. " + "DataFrames from polars are not yet supported." + ), + ): + _choose_df_lib( + pl.DataFrame({"x": [1]}), supported_libs=["pandas"], file_type="csv" + ) + + def test_supported_alone_raises(self): + with pytest.raises( + ValueError, + match="Must provide both or neither of supported_libs and file_type", + ): + _choose_df_lib(..., supported_libs=["pandas"]) + + def test_file_type_alone_raises(self): + with pytest.raises( + ValueError, + match="Must provide both or neither of supported_libs and file_type", + ): + _choose_df_lib(..., file_type="csv")