From c576f430e2e64c0db469980e2f569a5c682356e5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 20 May 2021 16:11:38 -0700 Subject: [PATCH 01/10] Add test for GH 15452 --- pandas/tests/indexing/multiindex/test_loc.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 0c6f2faf77f00..45989f1941110 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -801,3 +801,10 @@ def test_mi_partial_indexing_list_raises(): frame.columns.names = ["state", "color"] with pytest.raises(KeyError, match="\\[2\\] not in index"): frame.loc[["b", 2], "Colorado"] + + +def test_mi_indexing_list_nonexistent_raises(): + # GH 15452 + s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]])) + with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"): + s.loc[["not", "found"]] From 9839583ffff217a73e54bd2c5571acfa47258112 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 20 May 2021 16:20:14 -0700 Subject: [PATCH 02/10] Add test for GH 15613 --- pandas/tests/frame/indexing/test_where.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 7244624e563e3..32a499f6e9168 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -745,3 +745,15 @@ def test_where_bool_comparison(): } ) tm.assert_frame_equal(result, expected) + + +def test_where_none_nan_coerce(): + # GH 15613 + expected = DataFrame( + { + "A": [Timestamp("20130101"), pd.NaT, Timestamp("20130103")], + "B": [1, 2, np.nan], + } + ) + result = expected.where(expected.notnull(), None) + tm.assert_frame_equal(result, expected) From e89587d8907a09869fb5ef52e53797258de0ec9d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 20 May 2021 16:23:06 -0700 Subject: [PATCH 03/10] Add test for GH 15829 --- pandas/tests/scalar/timestamp/test_constructors.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 2340d154e9e10..83e40aa5cb96b 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -437,6 +437,13 @@ def test_bounds_with_different_units(self): dt64 = np.datetime64(date_string, unit) Timestamp(dt64) + @pytest.mark.parametrize("arg", ["001-01-01", "0001-01-01"]) + def test_out_of_bounds_string_consistency(self, arg): + # GH 15829 + msg = "Out of bounds" + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(arg) + def test_min_valid(self): # Ensure that Timestamp.min is a valid Timestamp Timestamp(Timestamp.min) From cbd908e8f0d11bf40c02e6565920139826313f80 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 20 May 2021 16:28:41 -0700 Subject: [PATCH 04/10] Add test for GH 15891 --- pandas/tests/frame/methods/test_to_csv.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 3b2668aea001c..f6dff6eeb6640 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -1330,3 +1330,11 @@ def test_to_csv_numpy_16_bug(self): result = buf.getvalue() assert "2000-01-01" in result + + def test_to_csv_na_quoting(self): + # GH 15891 + result = DataFrame([None, None]).to_csv( + None, header=False, index=False, na_rep="" + ) + expected = '""\n""\n' + assert result == expected From 9419cf7feaac847f32fba721a26de713eddf376b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 20 May 2021 16:33:32 -0700 Subject: [PATCH 05/10] Add test for GH 15898 --- pandas/tests/tools/test_to_numeric.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index e863fb45b1f81..eecb9492f29e3 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -780,3 +780,10 @@ def test_downcast_nullable_mask_is_copied(): arr[1] = pd.NA # should not modify result tm.assert_extension_array_equal(result, expected) + + +def test_to_numeric_scientific_notation(): + # GH 15898 + result = to_numeric("1.7e+308") + expected = np.float64(1.7e308) + assert result == expected From 70adb6dc8207be8110de1234178601aa17120a4e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 20 May 2021 16:43:20 -0700 Subject: [PATCH 06/10] Add test for GH 16018 --- pandas/tests/indexing/multiindex/test_loc.py | 23 ++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 45989f1941110..558270ac86532 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -808,3 +808,26 @@ def test_mi_indexing_list_nonexistent_raises(): s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]])) with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"): s.loc[["not", "found"]] + + +def test_mi_add_cell_missing_row_non_unique(): + # GH 16018 + result = DataFrame( + [[1, 2, 5, 6], [3, 4, 7, 8]], + index=["a", "a"], + columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), + ) + result.loc["c"] = -1 + result.loc["c", (1, "A")] = 3 + result.loc["d", (1, "A")] = 3 + expected = DataFrame( + [ + [1.0, 2.0, 5.0, 6.0], + [3.0, 4.0, 7.0, 8.0], + [3.0, -1.0, -1, -1], + [3.0, np.nan, np.nan, np.nan], + ], + index=["a", "a", "c", "d"], + columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), + ) + tm.assert_frame_equal(result, expected) From fb305a916a0e37488a9c75353b78ac6d66602168 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 20 May 2021 16:52:32 -0700 Subject: [PATCH 07/10] Add test for GH 16174 --- pandas/tests/groupby/test_groupby.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d256b19dbb148..7bea5a9d60cc8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1657,7 +1657,7 @@ def test_index_label_overlaps_location(): expected = ser.take([1, 3, 4]) tm.assert_series_equal(actual, expected) - # ... and again, with a generic Index of floats + # and again, with a generic Index of floats df.index = df.index.astype(float) g = df.groupby(list("ababb")) actual = g.filter(lambda x: len(x) > 2) @@ -2282,3 +2282,23 @@ def test_groupby_empty_multi_column(): [], columns=["C"], index=MultiIndex([[], []], [[], []], names=["A", "B"]) ) tm.assert_frame_equal(result, expected) + + +def test_groupby_filtered_df_std(): + # GH 16174 + dicts = [ + {"filter_col": False, "groupby_col": True, "bool_col": True, "float_col": 10.5}, + {"filter_col": True, "groupby_col": True, "bool_col": True, "float_col": 20.5}, + {"filter_col": True, "groupby_col": True, "bool_col": True, "float_col": 30.5}, + ] + df = DataFrame(dicts) + + df_filter = df[df["filter_col"] == True] # noqa:E712 + dfgb = df_filter.groupby("groupby_col") + result = dfgb.std() + expected = DataFrame( + [[0.0, 0.0, 7.071068]], + columns=["filter_col", "bool_col", "float_col"], + index=Index([True], name="groupby_col"), + ) + tm.assert_frame_equal(result, expected) From f1e2d4a8f9e1b1741aac9a65661781301a6c4769 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 21 May 2021 10:13:03 -0700 Subject: [PATCH 08/10] Add test for GH 16309 --- pandas/tests/indexing/test_loc.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 48bc3e18d9883..0b5697fcedaec 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2684,3 +2684,13 @@ def test_loc_assign_dict_to_row(self, dtype): expected = DataFrame({"A": ["newA", "def"], "B": ["newB", "jkl"]}, dtype=dtype) tm.assert_frame_equal(df, expected) + + def test_loc_setitem_dict_timedelta_multple_set(self): + # GH 16309 + result = DataFrame(columns=["time", "value"]) + result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + expected = DataFrame( + [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1] + ) + tm.assert_frame_equal(result, expected) From 14e78fd50221c93cc91eb40dafdc0061c2e54160 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 21 May 2021 15:00:37 -0700 Subject: [PATCH 09/10] normalize carriage return for windows --- pandas/tests/frame/methods/test_to_csv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index f6dff6eeb6640..769b08373b890 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -1333,8 +1333,11 @@ def test_to_csv_numpy_16_bug(self): def test_to_csv_na_quoting(self): # GH 15891 - result = DataFrame([None, None]).to_csv( - None, header=False, index=False, na_rep="" + # Normalize carriage return for Windows OS + result = ( + DataFrame([None, None]) + .to_csv(None, header=False, index=False, na_rep="") + .replace("\r\n", "\n") ) expected = '""\n""\n' assert result == expected From 4a91a2fd2b09a75a020aeb6ea8bb85db2f3b0bd5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 May 2021 22:28:13 -0700 Subject: [PATCH 10/10] Skip for arraymanager --- pandas/tests/indexing/test_loc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0b5697fcedaec..b8e7b83c97ddb 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2685,7 +2685,8 @@ def test_loc_assign_dict_to_row(self, dtype): tm.assert_frame_equal(df, expected) - def test_loc_setitem_dict_timedelta_multple_set(self): + @td.skip_array_manager_invalid_test + def test_loc_setitem_dict_timedelta_multiple_set(self): # GH 16309 result = DataFrame(columns=["time", "value"]) result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"}