Skip to content

GH: 624 - Added dtype_backend to all read_* functions #655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 24, 2023
2 changes: 2 additions & 0 deletions pandas-stubs/core/generic.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ from pandas._typing import (
CSVQuoting,
Dtype,
DtypeArg,
DtypeBackend,
FilePath,
FileWriteMode,
FillnaOptions,
Expand Down Expand Up @@ -373,6 +374,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
convert_string: _bool = ...,
convert_integer: _bool = ...,
convert_boolean: _bool = ...,
dtype_backend: DtypeBackend = ...,
) -> NDFrameT: ...
def fillna(
self,
Expand Down
3 changes: 1 addition & 2 deletions pandas-stubs/core/series.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ from typing_extensions import (
import xarray as xr

from pandas._libs.interval import Interval
from pandas._libs.lib import NoDefault
from pandas._libs.missing import NAType
from pandas._libs.tslibs import BaseOffset
from pandas._typing import (
Expand Down Expand Up @@ -1135,7 +1134,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
convert_string: _bool = ...,
convert_integer: _bool = ...,
convert_boolean: _bool = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
dtype_backend: DtypeBackend = ...,
) -> Series[S1]: ...
@overload
def ffill(
Expand Down
6 changes: 3 additions & 3 deletions pandas-stubs/io/clipboards.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ from pandas.io.parsers import TextFileReader
def read_clipboard(
sep: str | None = ...,
*,
dtype_backend: DtypeBackend | NoDefault = ...,
delimiter: str | None = ...,
header: int | Sequence[int] | Literal["infer"] | None = ...,
names: ListLikeHashable | None = ...,
Expand Down Expand Up @@ -82,12 +83,12 @@ def read_clipboard(
memory_map: bool = ...,
float_precision: Literal["high", "legacy", "round_trip"] | None = ...,
storage_options: StorageOptions | None = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
) -> TextFileReader: ...
@overload
def read_clipboard(
sep: str | None = ...,
*,
dtype_backend: DtypeBackend | NoDefault = ...,
delimiter: str | None = ...,
header: int | Sequence[int] | Literal["infer"] | None = ...,
names: ListLikeHashable | None = ...,
Expand Down Expand Up @@ -140,12 +141,12 @@ def read_clipboard(
memory_map: bool = ...,
float_precision: Literal["high", "legacy", "round_trip"] | None = ...,
storage_options: StorageOptions | None = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
) -> TextFileReader: ...
@overload
def read_clipboard(
sep: str | None = ...,
*,
dtype_backend: DtypeBackend | NoDefault = ...,
delimiter: str | None = ...,
header: int | Sequence[int] | Literal["infer"] | None = ...,
names: ListLikeHashable | None = ...,
Expand Down Expand Up @@ -198,7 +199,6 @@ def read_clipboard(
memory_map: bool = ...,
float_precision: Literal["high", "legacy", "round_trip"] | None = ...,
storage_options: StorageOptions | None = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
) -> DataFrame: ...
def to_clipboard(
obj, excel: bool = ..., sep: str | None = ..., **kwargs: Any
Expand Down
6 changes: 3 additions & 3 deletions pandas-stubs/io/parsers/readers.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,9 @@ def read_fwf(
colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
widths: Sequence[int] | None = ...,
infer_nrows: int = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
iterator: Literal[True],
chunksize: int | None = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
**kwds: Any,
) -> TextFileReader: ...
@overload
Expand All @@ -410,9 +410,9 @@ def read_fwf(
colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
widths: Sequence[int] | None = ...,
infer_nrows: int = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
iterator: bool = ...,
chunksize: int,
dtype_backend: DtypeBackend | NoDefault = ...,
**kwds: Any,
) -> TextFileReader: ...
@overload
Expand All @@ -422,9 +422,9 @@ def read_fwf(
colspecs: Sequence[tuple[int, int]] | Literal["infer"] | None = ...,
widths: Sequence[int] | None = ...,
infer_nrows: int = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
iterator: Literal[False] = ...,
chunksize: None = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
**kwds: Any,
) -> DataFrame: ...

Expand Down
6 changes: 6 additions & 0 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2597,3 +2597,9 @@ def test_suffix_prefix_index() -> None:
check(
assert_type(df.add_prefix("_col", axis="columns"), pd.DataFrame), pd.DataFrame
)


def test_convert_dtypes_dtype_backend() -> None:
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
dfn = df.convert_dtypes(dtype_backend="numpy_nullable")
check(assert_type(dfn, pd.DataFrame), pd.DataFrame)
59 changes: 34 additions & 25 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,8 +1272,7 @@ def test_read_sql_dtype_backend() -> None:
conn2.close()


@lxml_skip
def test_all_read_dtype_backend() -> None:
def test_all_read_without_lxml_dtype_backend() -> None:
with ensure_clean() as path:
check(assert_type(DF.to_csv(path), None), type(None))
s1 = read_csv(path, iterator=True, dtype_backend="pyarrow")
Expand All @@ -1285,20 +1284,6 @@ def test_all_read_dtype_backend() -> None:
assert_type(read_fwf(path, dtype_backend="pyarrow"), DataFrame), DataFrame
)

with ensure_clean() as path:
check(assert_type(DF.to_html(path), None), type(None))
check(
assert_type(
read_html(path, dtype_backend="numpy_nullable"), List[DataFrame]
),
list,
)

check(assert_type(DF.to_xml(path), None), type(None))
check(
assert_type(read_xml(path, dtype_backend="pyarrow"), DataFrame), DataFrame
)

check(assert_type(DF.to_json(path), None), type(None))
check(
assert_type(read_json(path, dtype_backend="pyarrow"), DataFrame), DataFrame
Expand All @@ -1321,11 +1306,12 @@ def test_all_read_dtype_backend() -> None:
)
con.close()

check(assert_type(DF.to_orc(path), None), type(None))
check(
assert_type(read_orc(path, dtype_backend="numpy_nullable"), DataFrame),
DataFrame,
)
if not WINDOWS:
check(assert_type(DF.to_orc(path), None), type(None))
check(
assert_type(read_orc(path, dtype_backend="numpy_nullable"), DataFrame),
DataFrame,
)

check(assert_type(DF.to_feather(path), None), type(None))
check(
Expand All @@ -1343,10 +1329,6 @@ def test_all_read_dtype_backend() -> None:
np.ndarray,
)

# con = sqlite3.connect(path)
# check(assert_type(DF.to_sql("test", con=con), Union[int, None]))
# assert_type(read_sql_table("test", con=con), DataFrame)

with ensure_clean(".xlsx") as path:
as_str: str = path
DF.to_excel(path)
Expand All @@ -1365,3 +1347,30 @@ def test_all_read_dtype_backend() -> None:
),
TextFileReader,
)

if TYPE_CHECKING:
with ensure_clean() as path:
co1 = sqlite3.connect(path)
assert_type(DF.to_sql("test", con=co1), Union[int, None])
assert_type(
read_sql_table("test", con=co1, dtype_backend="numpy_nullable"),
DataFrame,
)
co1.close()


@lxml_skip
def test_read_with_lxml_dtype_backend() -> None:
with ensure_clean() as path:
check(assert_type(DF.to_html(path), None), type(None))
check(
assert_type(
read_html(path, dtype_backend="numpy_nullable"), List[DataFrame]
),
list,
)

check(assert_type(DF.to_xml(path), None), type(None))
check(
assert_type(read_xml(path, dtype_backend="pyarrow"), DataFrame), DataFrame
)
6 changes: 6 additions & 0 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1830,3 +1830,9 @@ def test_prefix_summix_axis() -> None:
if TYPE_CHECKING_INVALID_USAGE:
check(assert_type(s.add_prefix("_item", axis=1), pd.Series), pd.Series) # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues]
check(assert_type(s.add_suffix("_item", axis="columns"), pd.Series), pd.Series) # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues]


def test_convert_dtypes_dtype_backend() -> None:
s = pd.Series([1, 2, 3, 4])
s1 = s.convert_dtypes(dtype_backend="numpy_nullable")
check(assert_type(s1, pd.Series), pd.Series)