From bddb335e66a25d9444f980c8f785a67a8e0bc4d3 Mon Sep 17 00:00:00 2001 From: udit710 Date: Sun, 23 Mar 2025 23:58:03 +1100 Subject: [PATCH 1/2] Added enhancement feature to Group by without by raising TypError --- pandas/core/frame.py | 5 ++- pandas/core/series.py | 5 ++- pandas/tests/groupby/test_grouping.py | 55 ++++++++++++++++++++++++--- 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f65277f660f7..ff867a08a01d9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9153,8 +9153,9 @@ def groupby( ) -> DataFrameGroupBy: from pandas.core.groupby.generic import DataFrameGroupBy - if level is None and by is None: - raise TypeError("You have to supply one of 'by' and 'level'") + if level is None and (by is None or by == []): + by = Series(0, index=self.index) + # raise TypeError("You have to supply one of 'by' and 'level'") return DataFrameGroupBy( obj=self, diff --git a/pandas/core/series.py b/pandas/core/series.py index 258e0100a8558..ff4012861d09e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1973,8 +1973,9 @@ def groupby( ) -> SeriesGroupBy: from pandas.core.groupby.generic import SeriesGroupBy - if level is None and by is None: - raise TypeError("You have to supply one of 'by' and 'level'") + if level is None and (by is None or by == []): + by = Series(0, index=self.index) + # raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: raise TypeError("as_index=False only valid with DataFrame") diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 53e9c53efebf7..6e4fedc60a5c9 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -695,17 +695,60 @@ def test_groupby_level_with_nas(self, sort): expected = Series([6.0, 18.0], index=[0.0, 1.0]) tm.assert_series_equal(result, expected) + def test_groupby_without_by(self): + # Test DataFrame.groupby() without any fields (global aggregation) + df = DataFrame({"A": [1, 2, 3, 4], "B": [10, 20, 30, 40]}) + + # Test basic aggregation with no fields + result = df.groupby().sum() + expected = df.sum().to_frame().T + tm.assert_frame_equal(result, expected) + + # Test with multiple aggregations + result = df.groupby().agg(["sum", "mean"]) + expected = df.agg(["sum", "mean"]) + tm.assert_frame_equal(result, expected) + + # Test Series.groupby() without any fields + s = Series([1, 2, 3, 4]) + result = s.groupby().sum() + expected = Series([10]) # Sum of the values + tm.assert_series_equal(result, expected) + + # Test with conditional logic - should work with None/empty list too + groupby_fields = None + result = df.groupby(groupby_fields).sum() + expected = df.sum().to_frame().T + tm.assert_frame_equal(result, expected) + + # Test with empty list + result = df.groupby([]).sum() + tm.assert_frame_equal(result, expected) + def test_groupby_args(self, multiindex_dataframe_random_data): # PR8618 and issue 8015 frame = multiindex_dataframe_random_data - msg = "You have to supply one of 'by' and 'level'" - with pytest.raises(TypeError, match=msg): - frame.groupby() + # No longer expecting errors when groupby() is called with no arguments + # This is now valid behavior that puts all rows in a single group + result = frame.groupby().sum() + expected = frame.sum().to_frame().T + tm.assert_frame_equal(result, expected) - msg = "You have to supply one of 'by' and 'level'" - with pytest.raises(TypeError, match=msg): - frame.groupby(by=None, level=None) + result = frame.groupby(by=None, level=None).sum() + tm.assert_frame_equal(result, expected) + + # def test_groupby_args(self, multiindex_dataframe_random_data): + # # PR8618 and issue 8015 + # frame = multiindex_dataframe_random_data + + # msg = "You have to supply one of 'by' and 'level'" + # with pytest.raises(TypeError, match=msg): + # frame.groupby() + + # msg = "You have to supply one of 'by' and 'level'" + # with pytest.raises(TypeError, match=msg): + # frame.groupby(by=None, level=None) @pytest.mark.parametrize( "sort,labels", From f23101f74678acb5a325531ae6c71b49b27117f1 Mon Sep 17 00:00:00 2001 From: udit710 Date: Mon, 24 Mar 2025 00:18:29 +1100 Subject: [PATCH 2/2] Add info to docs --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 1 - pandas/core/series.py | 1 - pandas/tests/groupby/test_grouping.py | 19 ++----------------- 4 files changed, 3 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index bad06329c4bfa..acf6970e2e1ca 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -56,6 +56,7 @@ Other enhancements - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) +- :meth:`DataFrame.groupby` now accepts no fields for ``groupby`` by in :meth:`DataFrame.groupby` (:issue:`61160`) - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) - :meth:`DataFrame.to_json` now encodes ``Decimal`` as strings instead of floats (:issue:`60698`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff867a08a01d9..66d1fc7815ac1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9155,7 +9155,6 @@ def groupby( if level is None and (by is None or by == []): by = Series(0, index=self.index) - # raise TypeError("You have to supply one of 'by' and 'level'") return DataFrameGroupBy( obj=self, diff --git a/pandas/core/series.py b/pandas/core/series.py index ff4012861d09e..21a35784c0b4d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1975,7 +1975,6 @@ def groupby( if level is None and (by is None or by == []): by = Series(0, index=self.index) - # raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: raise TypeError("as_index=False only valid with DataFrame") diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 6e4fedc60a5c9..d9a80a1855152 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -696,7 +696,7 @@ def test_groupby_level_with_nas(self, sort): tm.assert_series_equal(result, expected) def test_groupby_without_by(self): - # Test DataFrame.groupby() without any fields (global aggregation) + # GH 61160 df = DataFrame({"A": [1, 2, 3, 4], "B": [10, 20, 30, 40]}) # Test basic aggregation with no fields @@ -715,13 +715,12 @@ def test_groupby_without_by(self): expected = Series([10]) # Sum of the values tm.assert_series_equal(result, expected) - # Test with conditional logic - should work with None/empty list too + # Test with conditional logic - should work with None/empty list groupby_fields = None result = df.groupby(groupby_fields).sum() expected = df.sum().to_frame().T tm.assert_frame_equal(result, expected) - # Test with empty list result = df.groupby([]).sum() tm.assert_frame_equal(result, expected) @@ -729,8 +728,6 @@ def test_groupby_args(self, multiindex_dataframe_random_data): # PR8618 and issue 8015 frame = multiindex_dataframe_random_data - # No longer expecting errors when groupby() is called with no arguments - # This is now valid behavior that puts all rows in a single group result = frame.groupby().sum() expected = frame.sum().to_frame().T tm.assert_frame_equal(result, expected) @@ -738,18 +735,6 @@ def test_groupby_args(self, multiindex_dataframe_random_data): result = frame.groupby(by=None, level=None).sum() tm.assert_frame_equal(result, expected) - # def test_groupby_args(self, multiindex_dataframe_random_data): - # # PR8618 and issue 8015 - # frame = multiindex_dataframe_random_data - - # msg = "You have to supply one of 'by' and 'level'" - # with pytest.raises(TypeError, match=msg): - # frame.groupby() - - # msg = "You have to supply one of 'by' and 'level'" - # with pytest.raises(TypeError, match=msg): - # frame.groupby(by=None, level=None) - @pytest.mark.parametrize( "sort,labels", [