From 5dd77866c28d1a95958628ba0412d1dfdb10f99b Mon Sep 17 00:00:00 2001 From: Venkat <87000728+kpvenkat47@users.noreply.github.com> Date: Tue, 18 Mar 2025 19:25:39 +0530 Subject: [PATCH 1/4] Fix from records() to preserve columns when nrows=0 --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f65277f660f7..7ee8992e1c39d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2231,7 +2231,7 @@ def maybe_reorder( if is_iterator(data): if nrows == 0: - return cls() + return cls(columns=columns) try: first_row = next(data) From cc21929647532edd4eff03401c863cb33349cc09 Mon Sep 17 00:00:00 2001 From: kpvenkat47 Date: Tue, 18 Mar 2025 20:43:36 +0530 Subject: [PATCH 2/4] Update empty DataFrame initialization to preserve columns - Changed 'if nrows == 0' to return Cls(columns=columns) in core/frame.py. - Added test to verify column preservation. --- pandas/core/frame.py | 2 +- pandas/core/frame_test_constructors.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 pandas/core/frame_test_constructors.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f65277f660f7..7ee8992e1c39d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2231,7 +2231,7 @@ def maybe_reorder( if is_iterator(data): if nrows == 0: - return cls() + return cls(columns=columns) try: first_row = next(data) diff --git a/pandas/core/frame_test_constructors.py b/pandas/core/frame_test_constructors.py new file mode 100644 index 0000000000000..b542c8b21ee79 --- /dev/null +++ b/pandas/core/frame_test_constructors.py @@ -0,0 +1,7 @@ +import pandas as pd +def test_empty_df_preserve_col(): + rows = [] + df = pd.DataFrame.from_records(iter(rows), columns=['col_1', 'Col_2'], nrows=0) + assert list(df.columns)==['col_1', 'Col_2'] + assert len(df) == 0 + \ No newline at end of file From e2fbcebd61b1bc5d68489b75e94bd3d0338c671a Mon Sep 17 00:00:00 2001 From: kpvenkat47 Date: Wed, 26 Mar 2025 18:50:40 +0530 Subject: [PATCH 3/4] Updated PR #6114 based on review feedback --- pandas/tests/frame/test_constructors.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 037a2ae294bb2..400a36305bb96 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2780,6 +2780,12 @@ def test_construction_nan_value_timedelta64_dtype(self): ) tm.assert_frame_equal(result, expected) + def test_from_records_empty_iterator_with_preserve_columns(self): + + rows = [] + df = pd.DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) + assert list(df.columns) == ["col_1", "Col_2"] + assert len(df) == 0 class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): From 5dcc1ac81005c5139d86705d46f615cf45ee9bfc Mon Sep 17 00:00:00 2001 From: kpvenkat47 Date: Tue, 1 Apr 2025 18:43:54 +0530 Subject: [PATCH 4/4] All reviews are updated.Please check --- pandas/core/frame_test_constructors.py | 7 ------- pandas/tests/frame/constructors/test_from_records.py | 8 ++++++++ pandas/tests/frame/test_constructors.py | 8 +------- 3 files changed, 9 insertions(+), 14 deletions(-) delete mode 100644 pandas/core/frame_test_constructors.py diff --git a/pandas/core/frame_test_constructors.py b/pandas/core/frame_test_constructors.py deleted file mode 100644 index b542c8b21ee79..0000000000000 --- a/pandas/core/frame_test_constructors.py +++ /dev/null @@ -1,7 +0,0 @@ -import pandas as pd -def test_empty_df_preserve_col(): - rows = [] - df = pd.DataFrame.from_records(iter(rows), columns=['col_1', 'Col_2'], nrows=0) - assert list(df.columns)==['col_1', 'Col_2'] - assert len(df) == 0 - \ No newline at end of file diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 1d4a2c0075e3e..57bd7a3c877ea 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -492,3 +492,11 @@ def test_from_records_structured_array(self): expected_result = DataFrame(modified_data) tm.assert_frame_equal(actual_result, expected_result) + + + def test_from_records_empty_iterator_with_preserve_columns(self): + # GH#61140 + rows = [] + result = DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) + expected = DataFrame([],columns=["col_1", "Col_2"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 400a36305bb96..18f0d4a864c04 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2779,13 +2779,7 @@ def test_construction_nan_value_timedelta64_dtype(self): ["NaT", "0 days 00:00:00.000000001"], dtype="timedelta64[ns]" ) tm.assert_frame_equal(result, expected) - - def test_from_records_empty_iterator_with_preserve_columns(self): - - rows = [] - df = pd.DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) - assert list(df.columns) == ["col_1", "Col_2"] - assert len(df) == 0 + class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):