Skip to content

Commit ad06bbb

Browse files
authored
CLN: Remove deprecated read_*(date_parser=) (#58624)
1 parent e6edc47 commit ad06bbb

File tree

8 files changed

+30
-946
lines changed

8 files changed

+30
-946
lines changed

doc/source/user_guide/io.rst

-13
Original file line numberDiff line numberDiff line change
@@ -279,19 +279,6 @@ parse_dates : boolean or list of ints or names or list of lists or dict, default
279279
keep_date_col : boolean, default ``False``
280280
If ``True`` and parse_dates specifies combining multiple columns then keep the
281281
original columns.
282-
date_parser : function, default ``None``
283-
Function to use for converting a sequence of string columns to an array of
284-
datetime instances. The default uses ``dateutil.parser.parser`` to do the
285-
conversion. pandas will try to call date_parser in three different ways,
286-
advancing to the next if an exception occurs: 1) Pass one or more arrays (as
287-
defined by parse_dates) as arguments; 2) concatenate (row-wise) the string
288-
values from the columns defined by parse_dates into a single array and pass
289-
that; and 3) call date_parser once for each row using one or more strings
290-
(corresponding to the columns defined by parse_dates) as arguments.
291-
292-
.. deprecated:: 2.0.0
293-
Use ``date_format`` instead, or read in as ``object`` and then apply
294-
:func:`to_datetime` as-needed.
295282
date_format : str or dict of column -> format, default ``None``
296283
If used in conjunction with ``parse_dates``, will parse dates according to this
297284
format. For anything more complex,

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ Removal of prior version deprecations/changes
254254
- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`)
255255
- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
256256
- Enforced deprecation of ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock`` (:issue:`58467`)
257+
- Enforced deprecation of ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`)
257258
- Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`)
258259
- Enforced deprecation of argument ``infer_datetime_format`` in :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
259260
- Enforced deprecation of non-standard (``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`)

pandas/io/excel/_base.py

-38
Original file line numberDiff line numberDiff line change
@@ -240,20 +240,6 @@
240240
For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``.
241241
242242
Note: A fast-path exists for iso8601-formatted dates.
243-
date_parser : function, optional
244-
Function to use for converting a sequence of string columns to an array of
245-
datetime instances. The default uses ``dateutil.parser.parser`` to do the
246-
conversion. Pandas will try to call `date_parser` in three different ways,
247-
advancing to the next if an exception occurs: 1) Pass one or more arrays
248-
(as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
249-
string values from the columns defined by `parse_dates` into a single array
250-
and pass that; and 3) call `date_parser` once for each row using one or
251-
more strings (corresponding to the columns defined by `parse_dates`) as
252-
arguments.
253-
254-
.. deprecated:: 2.0.0
255-
Use ``date_format`` instead, or read in as ``object`` and then apply
256-
:func:`to_datetime` as-needed.
257243
date_format : str or dict of column -> format, default ``None``
258244
If used in conjunction with ``parse_dates``, will parse dates according to this
259245
format. For anything more complex,
@@ -398,7 +384,6 @@ def read_excel(
398384
na_filter: bool = ...,
399385
verbose: bool = ...,
400386
parse_dates: list | dict | bool = ...,
401-
date_parser: Callable | lib.NoDefault = ...,
402387
date_format: dict[Hashable, str] | str | None = ...,
403388
thousands: str | None = ...,
404389
decimal: str = ...,
@@ -436,7 +421,6 @@ def read_excel(
436421
na_filter: bool = ...,
437422
verbose: bool = ...,
438423
parse_dates: list | dict | bool = ...,
439-
date_parser: Callable | lib.NoDefault = ...,
440424
date_format: dict[Hashable, str] | str | None = ...,
441425
thousands: str | None = ...,
442426
decimal: str = ...,
@@ -474,7 +458,6 @@ def read_excel(
474458
na_filter: bool = True,
475459
verbose: bool = False,
476460
parse_dates: list | dict | bool = False,
477-
date_parser: Callable | lib.NoDefault = lib.no_default,
478461
date_format: dict[Hashable, str] | str | None = None,
479462
thousands: str | None = None,
480463
decimal: str = ".",
@@ -521,7 +504,6 @@ def read_excel(
521504
na_filter=na_filter,
522505
verbose=verbose,
523506
parse_dates=parse_dates,
524-
date_parser=date_parser,
525507
date_format=date_format,
526508
thousands=thousands,
527509
decimal=decimal,
@@ -726,7 +708,6 @@ def parse(
726708
na_values=None,
727709
verbose: bool = False,
728710
parse_dates: list | dict | bool = False,
729-
date_parser: Callable | lib.NoDefault = lib.no_default,
730711
date_format: dict[Hashable, str] | str | None = None,
731712
thousands: str | None = None,
732713
decimal: str = ".",
@@ -795,7 +776,6 @@ def parse(
795776
false_values=false_values,
796777
na_values=na_values,
797778
parse_dates=parse_dates,
798-
date_parser=date_parser,
799779
date_format=date_format,
800780
thousands=thousands,
801781
decimal=decimal,
@@ -829,7 +809,6 @@ def _parse_sheet(
829809
false_values: Iterable[Hashable] | None = None,
830810
na_values=None,
831811
parse_dates: list | dict | bool = False,
832-
date_parser: Callable | lib.NoDefault = lib.no_default,
833812
date_format: dict[Hashable, str] | str | None = None,
834813
thousands: str | None = None,
835814
decimal: str = ".",
@@ -942,7 +921,6 @@ def _parse_sheet(
942921
na_values=na_values,
943922
skip_blank_lines=False, # GH 39808
944923
parse_dates=parse_dates,
945-
date_parser=date_parser,
946924
date_format=date_format,
947925
thousands=thousands,
948926
decimal=decimal,
@@ -1648,7 +1626,6 @@ def parse(
16481626
nrows: int | None = None,
16491627
na_values=None,
16501628
parse_dates: list | dict | bool = False,
1651-
date_parser: Callable | lib.NoDefault = lib.no_default,
16521629
date_format: str | dict[Hashable, str] | None = None,
16531630
thousands: str | None = None,
16541631
comment: str | None = None,
@@ -1737,20 +1714,6 @@ def parse(
17371714
``pd.to_datetime`` after ``pd.read_excel``.
17381715
17391716
Note: A fast-path exists for iso8601-formatted dates.
1740-
date_parser : function, optional
1741-
Function to use for converting a sequence of string columns to an array of
1742-
datetime instances. The default uses ``dateutil.parser.parser`` to do the
1743-
conversion. Pandas will try to call `date_parser` in three different ways,
1744-
advancing to the next if an exception occurs: 1) Pass one or more arrays
1745-
(as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
1746-
string values from the columns defined by `parse_dates` into a single array
1747-
and pass that; and 3) call `date_parser` once for each row using one or
1748-
more strings (corresponding to the columns defined by `parse_dates`) as
1749-
arguments.
1750-
1751-
.. deprecated:: 2.0.0
1752-
Use ``date_format`` instead, or read in as ``object`` and then apply
1753-
:func:`to_datetime` as-needed.
17541717
date_format : str or dict of column -> format, default ``None``
17551718
If used in conjunction with ``parse_dates``, will parse dates
17561719
according to this format. For anything more complex,
@@ -1810,7 +1773,6 @@ def parse(
18101773
nrows=nrows,
18111774
na_values=na_values,
18121775
parse_dates=parse_dates,
1813-
date_parser=date_parser,
18141776
date_format=date_format,
18151777
thousands=thousands,
18161778
comment=comment,

pandas/io/parsers/base_parser.py

+23-72
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from collections import defaultdict
44
from copy import copy
55
import csv
6-
import datetime
76
from enum import Enum
87
import itertools
98
from typing import (
@@ -127,7 +126,6 @@ def __init__(self, kwds) -> None:
127126

128127
self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
129128
self._parse_date_cols: Iterable = []
130-
self.date_parser = kwds.pop("date_parser", lib.no_default)
131129
self.date_format = kwds.pop("date_format", None)
132130
self.dayfirst = kwds.pop("dayfirst", False)
133131
self.keep_date_col = kwds.pop("keep_date_col", False)
@@ -146,7 +144,6 @@ def __init__(self, kwds) -> None:
146144
self.cache_dates = kwds.pop("cache_dates", True)
147145

148146
self._date_conv = _make_date_converter(
149-
date_parser=self.date_parser,
150147
date_format=self.date_format,
151148
dayfirst=self.dayfirst,
152149
cache_dates=self.cache_dates,
@@ -1120,84 +1117,39 @@ def _get_empty_meta(
11201117

11211118

11221119
def _make_date_converter(
1123-
date_parser=lib.no_default,
11241120
dayfirst: bool = False,
11251121
cache_dates: bool = True,
11261122
date_format: dict[Hashable, str] | str | None = None,
11271123
):
1128-
if date_parser is not lib.no_default:
1129-
warnings.warn(
1130-
"The argument 'date_parser' is deprecated and will "
1131-
"be removed in a future version. "
1132-
"Please use 'date_format' instead, or read your data in as 'object' dtype "
1133-
"and then call 'to_datetime'.",
1134-
FutureWarning,
1135-
stacklevel=find_stack_level(),
1136-
)
1137-
if date_parser is not lib.no_default and date_format is not None:
1138-
raise TypeError("Cannot use both 'date_parser' and 'date_format'")
1139-
1140-
def unpack_if_single_element(arg):
1141-
# NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
1142-
if isinstance(arg, np.ndarray) and arg.ndim == 1 and len(arg) == 1:
1143-
return arg[0]
1144-
return arg
1145-
11461124
def converter(*date_cols, col: Hashable):
11471125
if len(date_cols) == 1 and date_cols[0].dtype.kind in "Mm":
11481126
return date_cols[0]
1127+
# TODO: Can we remove concat_date_cols after deprecation of parsing
1128+
# multiple cols?
1129+
strs = parsing.concat_date_cols(date_cols)
1130+
date_fmt = (
1131+
date_format.get(col) if isinstance(date_format, dict) else date_format
1132+
)
11491133

1150-
if date_parser is lib.no_default:
1151-
strs = parsing.concat_date_cols(date_cols)
1152-
date_fmt = (
1153-
date_format.get(col) if isinstance(date_format, dict) else date_format
1134+
str_objs = ensure_object(strs)
1135+
try:
1136+
result = tools.to_datetime(
1137+
str_objs,
1138+
format=date_fmt,
1139+
utc=False,
1140+
dayfirst=dayfirst,
1141+
cache=cache_dates,
11541142
)
1143+
except (ValueError, TypeError):
1144+
# test_usecols_with_parse_dates4
1145+
# test_multi_index_parse_dates
1146+
return str_objs
11551147

1156-
str_objs = ensure_object(strs)
1157-
try:
1158-
result = tools.to_datetime(
1159-
str_objs,
1160-
format=date_fmt,
1161-
utc=False,
1162-
dayfirst=dayfirst,
1163-
cache=cache_dates,
1164-
)
1165-
except (ValueError, TypeError):
1166-
# test_usecols_with_parse_dates4
1167-
return str_objs
1168-
1169-
if isinstance(result, DatetimeIndex):
1170-
arr = result.to_numpy()
1171-
arr.flags.writeable = True
1172-
return arr
1173-
return result._values
1174-
else:
1175-
try:
1176-
pre_parsed = date_parser(
1177-
*(unpack_if_single_element(arg) for arg in date_cols)
1178-
)
1179-
try:
1180-
result = tools.to_datetime(
1181-
pre_parsed,
1182-
cache=cache_dates,
1183-
)
1184-
except (ValueError, TypeError):
1185-
# test_read_csv_with_custom_date_parser
1186-
result = pre_parsed
1187-
if isinstance(result, datetime.datetime):
1188-
raise Exception("scalar parser")
1189-
return result
1190-
except Exception:
1191-
# e.g. test_datetime_fractional_seconds
1192-
pre_parsed = parsing.try_parse_dates(
1193-
parsing.concat_date_cols(date_cols),
1194-
parser=date_parser,
1195-
)
1196-
try:
1197-
return tools.to_datetime(pre_parsed)
1198-
except (ValueError, TypeError):
1199-
# TODO: not reached in tests 2023-10-27; needed?
1200-
return pre_parsed
1148+
if isinstance(result, DatetimeIndex):
1149+
arr = result.to_numpy()
1150+
arr.flags.writeable = True
1151+
return arr
1152+
return result._values
12011153

12021154
return converter
12031155

@@ -1230,7 +1182,6 @@ def converter(*date_cols, col: Hashable):
12301182
"parse_dates": False,
12311183
"keep_date_col": False,
12321184
"dayfirst": False,
1233-
"date_parser": lib.no_default,
12341185
"date_format": None,
12351186
"usecols": None,
12361187
# 'iterator': False,

pandas/io/parsers/readers.py

+3-27
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
119119
skip_blank_lines: bool
120120
parse_dates: bool | Sequence[Hashable] | None
121121
keep_date_col: bool | lib.NoDefault
122-
date_parser: Callable | lib.NoDefault
123122
date_format: str | dict[Hashable, str] | None
124123
dayfirst: bool
125124
cache_dates: bool
@@ -306,8 +305,7 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
306305
The behavior is as follows:
307306
308307
* ``bool``. If ``True`` -> try parsing the index.
309-
* ``None``. Behaves like ``True`` if ``date_parser`` or ``date_format`` are
310-
specified.
308+
* ``None``. Behaves like ``True`` if ``date_format`` is specified.
311309
* ``list`` of ``int`` or names. e.g. If ``[1, 2, 3]`` -> try parsing columns 1, 2, 3
312310
each as a separate date column.
313311
* ``list`` of ``list``. e.g. If ``[[1, 3]]`` -> combine columns 1 and 3 and parse
@@ -325,20 +323,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
325323
keep_date_col : bool, default False
326324
If ``True`` and ``parse_dates`` specifies combining multiple columns then
327325
keep the original columns.
328-
date_parser : Callable, optional
329-
Function to use for converting a sequence of string columns to an array of
330-
``datetime`` instances. The default uses ``dateutil.parser.parser`` to do the
331-
conversion. pandas will try to call ``date_parser`` in three different ways,
332-
advancing to the next if an exception occurs: 1) Pass one or more arrays
333-
(as defined by ``parse_dates``) as arguments; 2) concatenate (row-wise) the
334-
string values from the columns defined by ``parse_dates`` into a single array
335-
and pass that; and 3) call ``date_parser`` once for each row using one or
336-
more strings (corresponding to the columns defined by ``parse_dates``) as
337-
arguments.
338-
339-
.. deprecated:: 2.0.0
340-
Use ``date_format`` instead, or read in as ``object`` and then apply
341-
:func:`~pandas.to_datetime` as-needed.
342326
date_format : str or dict of column -> format, optional
343327
Format to use for parsing dates when used in conjunction with ``parse_dates``.
344328
The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See
@@ -624,13 +608,10 @@ def _read(
624608
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
625609
) -> DataFrame | TextFileReader:
626610
"""Generic reader of line files."""
627-
# if we pass a date_parser and parse_dates=False, we should not parse the
611+
# if we pass a date_format and parse_dates=False, we should not parse the
628612
# dates GH#44366
629613
if kwds.get("parse_dates", None) is None:
630-
if (
631-
kwds.get("date_parser", lib.no_default) is lib.no_default
632-
and kwds.get("date_format", None) is None
633-
):
614+
if kwds.get("date_format", None) is None:
634615
kwds["parse_dates"] = False
635616
else:
636617
kwds["parse_dates"] = True
@@ -749,7 +730,6 @@ def read_csv(
749730
# Datetime Handling
750731
parse_dates: bool | Sequence[Hashable] | None = None,
751732
keep_date_col: bool | lib.NoDefault = lib.no_default,
752-
date_parser: Callable | lib.NoDefault = lib.no_default,
753733
date_format: str | dict[Hashable, str] | None = None,
754734
dayfirst: bool = False,
755735
cache_dates: bool = True,
@@ -928,7 +908,6 @@ def read_table(
928908
# Datetime Handling
929909
parse_dates: bool | Sequence[Hashable] | None = None,
930910
keep_date_col: bool | lib.NoDefault = lib.no_default,
931-
date_parser: Callable | lib.NoDefault = lib.no_default,
932911
date_format: str | dict[Hashable, str] | None = None,
933912
dayfirst: bool = False,
934913
cache_dates: bool = True,
@@ -1638,9 +1617,6 @@ def TextParser(*args, **kwds) -> TextFileReader:
16381617
Comment out remainder of line
16391618
parse_dates : bool, default False
16401619
keep_date_col : bool, default False
1641-
date_parser : function, optional
1642-
1643-
.. deprecated:: 2.0.0
16441620
date_format : str or dict of column -> format, default ``None``
16451621
16461622
.. versionadded:: 2.0.0

pandas/tests/io/excel/test_writers.py

-13
Original file line numberDiff line numberDiff line change
@@ -295,19 +295,6 @@ def test_read_excel_parse_dates(self, tmp_excel):
295295
res = pd.read_excel(tmp_excel, parse_dates=["date_strings"], index_col=0)
296296
tm.assert_frame_equal(df, res)
297297

298-
date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y")
299-
with tm.assert_produces_warning(
300-
FutureWarning,
301-
match="use 'date_format' instead",
302-
raise_on_extra_warnings=False,
303-
):
304-
res = pd.read_excel(
305-
tmp_excel,
306-
parse_dates=["date_strings"],
307-
date_parser=date_parser,
308-
index_col=0,
309-
)
310-
tm.assert_frame_equal(df, res)
311298
res = pd.read_excel(
312299
tmp_excel, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0
313300
)

0 commit comments

Comments
 (0)