Skip to content

Commit 26a4fc8

Browse files
BUG: ensure we still honor copy=True in Series constructor in all cases
1 parent 47fea80 commit 26a4fc8

File tree

2 files changed

+53
-10
lines changed

2 files changed

+53
-10
lines changed

pandas/core/series.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,12 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc]
265265
See the :ref:`user guide <basics.dtypes>` for more usages.
266266
name : Hashable, default None
267267
The name to give to the Series.
268-
copy : bool, default False
269-
Copy input data. Only affects Series or 1d ndarray input. See examples.
268+
copy : bool, default None
269+
Copy input data. By default, will copy if the input data is a numpy or
270+
pandas array.
271+
Set to False to avoid copying, at your own risk (if you know the input
272+
data won't be modified elsewhere).
273+
Only affects Series or 1d ndarray input. See examples.
270274
271275
See Also
272276
--------
@@ -397,6 +401,7 @@ def __init__(
397401
if copy is not False:
398402
if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
399403
data = data.copy()
404+
copy = False
400405
if copy is None:
401406
copy = False
402407

@@ -411,6 +416,7 @@ def __init__(
411416
Pandas4Warning,
412417
stacklevel=2,
413418
)
419+
allow_mgr = True
414420

415421
name = ibase.maybe_extract_name(name, data, type(self))
416422

@@ -436,9 +442,8 @@ def __init__(
436442
if isinstance(data, Index):
437443
if dtype is not None:
438444
data = data.astype(dtype)
439-
440-
refs = data._references
441-
copy = False
445+
if not copy:
446+
refs = data._references
442447

443448
elif isinstance(data, np.ndarray):
444449
if len(data.dtype):
@@ -454,8 +459,9 @@ def __init__(
454459
data = data._mgr.copy(deep=False)
455460
else:
456461
data = data.reindex(index)
457-
copy = False
458462
data = data._mgr
463+
if data._has_no_reference(0):
464+
copy = False
459465
elif isinstance(data, Mapping):
460466
data, index = self._init_dict(data, index, dtype)
461467
dtype = None
@@ -500,8 +506,10 @@ def __init__(
500506
# create/copy the manager
501507
if isinstance(data, SingleBlockManager):
502508
if dtype is not None:
509+
if not astype_is_view(data.dtype, pandas_dtype(dtype)):
510+
copy = False
503511
data = data.astype(dtype=dtype)
504-
elif copy:
512+
if copy:
505513
data = data.copy(deep=True)
506514
else:
507515
data = sanitize_array(data, index, dtype, copy)

pandas/tests/copy_view/test_constructors.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,24 @@ def test_series_from_series(dtype):
4747
ser.iloc[0] = 0
4848
assert result.iloc[0] == 1
4949

50+
# forcing copy=False still gives a CoW shallow copy
51+
result = Series(ser, dtype=dtype, copy=False)
52+
assert np.shares_memory(get_array(ser), get_array(result))
53+
assert result._mgr.blocks[0].refs.has_reference()
54+
55+
# forcing copy=True still results in an actual hard copy up front
56+
result = Series(ser, dtype=dtype, copy=True)
57+
assert not np.shares_memory(get_array(ser), get_array(result))
58+
assert ser._mgr._has_no_reference(0)
59+
5060

5161
def test_series_from_series_with_reindex():
5262
# Case: constructing a Series from another Series with specifying an index
5363
# that potentially requires a reindex of the values
5464
ser = Series([1, 2, 3], name="name")
5565

5666
# passing an index that doesn't actually require a reindex of the values
57-
# -> without CoW we get an actual mutating view
67+
# -> still getting a CoW shallow copy
5868
for index in [
5969
ser.index,
6070
ser.index.copy(),
@@ -66,6 +76,11 @@ def test_series_from_series_with_reindex():
6676
result.iloc[0] = 0
6777
assert ser.iloc[0] == 1
6878

79+
# forcing copy=True still results in an actual hard copy up front
80+
result = Series(ser, index=index, copy=True)
81+
assert not np.shares_memory(ser.values, result.values)
82+
assert not result._mgr.blocks[0].refs.has_reference()
83+
6984
# ensure that if an actual reindex is needed, we don't have any refs
7085
# (mutating the result wouldn't trigger CoW)
7186
result = Series(ser, index=[0, 1, 2, 3])
@@ -87,6 +102,13 @@ def test_series_from_array(idx, dtype, arr):
87102
arr[0] = 100
88103
tm.assert_series_equal(ser, ser_orig)
89104

105+
# if the user explicitly passes copy=False, we get an actual view
106+
# not protected by CoW
107+
ser = Series(arr, dtype=dtype, index=idx, copy=False)
108+
assert np.shares_memory(get_array(ser), data)
109+
arr[0] = 50
110+
assert ser.iloc[0] == 50
111+
90112

91113
@pytest.mark.parametrize("copy", [True, False, None])
92114
def test_series_from_array_different_dtype(copy):
@@ -112,9 +134,22 @@ def test_series_from_index(idx):
112134
ser.iloc[0] = ser.iloc[1]
113135
tm.assert_index_equal(idx, expected)
114136

137+
# forcing copy=False still gives a CoW shallow copy
138+
ser = Series(idx, copy=False)
139+
assert np.shares_memory(get_array(ser), get_array(idx))
140+
assert not ser._mgr._has_no_reference(0)
141+
ser.iloc[0] = ser.iloc[1]
142+
tm.assert_index_equal(idx, expected)
143+
144+
# forcing copy=True still results in a copy
145+
ser = Series(idx, copy=True)
146+
assert not np.shares_memory(get_array(ser), get_array(idx))
147+
assert ser._mgr._has_no_reference(0)
115148

116-
def test_series_from_index_different_dtypes():
117-
idx = Index([1, 2, 3], dtype="int64")
149+
150+
@pytest.mark.parametrize("copy", [True, False, None])
151+
def test_series_from_index_different_dtypes(copy):
152+
idx = Index([1, 2, 3], dtype="int64", copy=copy)
118153
ser = Series(idx, dtype="int32")
119154
assert not np.shares_memory(get_array(ser), get_array(idx))
120155
assert ser._mgr._has_no_reference(0)

0 commit comments

Comments
 (0)