Skip to content

Commit c5f0eb4

Browse files
committed
1
1 parent 944c527 commit c5f0eb4

File tree

2 files changed

+59
-4
lines changed

2 files changed

+59
-4
lines changed

pandas/core/arrays/arrow/accessors.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,45 @@ def __getitem__(self, key: int | slice) -> Series:
156156
from pandas import Series
157157

158158
if isinstance(key, int):
159-
# TODO: Support negative key but pyarrow does not allow
160-
# element index to be an array.
161-
# if key < 0:
162-
# key = pc.add(key, pc.list_value_length(self._pa_array))
159+
if key < 0:
160+
arr = self._pa_array
161+
lengths = pc.list_value_length(arr)
162+
not_null = pc.is_valid(arr)
163+
length_zero = pc.equal(lengths, 0)
164+
length_too_short = pc.less(lengths, abs(key))
165+
should_error = pc.and_(not_null, pc.or_(length_zero, length_too_short))
166+
if pc.any(should_error).as_py():
167+
for i in range(len(arr)):
168+
if not arr.is_null()[i].as_py():
169+
current_length = lengths[i].as_py()
170+
if current_length == 0:
171+
raise IndexError(f"Index {key} is out of bounds: should be in [0, 0)")
172+
if current_length < abs(key):
173+
raise IndexError(f"Index {key} is out of bounds: should be in [{-current_length}, {current_length})")
174+
chunks = arr.chunks if isinstance(arr, pa.ChunkedArray) else [arr]
175+
all_results = []
176+
for chunk in chunks:
177+
if len(chunk) == 0:
178+
continue
179+
chunk_lengths = pc.list_value_length(chunk)
180+
chunk_offsets = chunk.offsets
181+
offsets = chunk_offsets.slice(0, len(chunk))
182+
indices = pc.add(pc.add(offsets, chunk_lengths), key)
183+
taken_values = chunk.values.take(indices)
184+
if chunk.null_count > 0:
185+
mask = chunk.is_null()
186+
null_scalar = pa.scalar(None, type=chunk.type.value_type)
187+
chunk_result = pc.if_else(mask, null_scalar, taken_values)
188+
else:
189+
chunk_result = taken_values
190+
all_results.append(chunk_result)
191+
result_values = pa.concat_arrays(all_results) if all_results else pa.array([],type=arr.type.value_type)
192+
return Series(
193+
result_values,
194+
dtype=ArrowDtype(result_values.type),
195+
index=self._data.index,
196+
name=self._data.name,
197+
)
163198
element = pc.list_element(self._pa_array, key)
164199
return Series(
165200
element,

pandas/tests/series/accessors/test_list_accessor.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,26 @@ def test_list_getitem(list_dtype):
3030
tm.assert_series_equal(actual, expected)
3131

3232

33+
def test_list_getitem_negative_index():
34+
ser = Series(
35+
[[1, 2, 3], [4, None, 5], None],
36+
dtype=ArrowDtype(pa.list_(pa.int64())),
37+
name="a",
38+
)
39+
actual = ser.list[-1]
40+
expected = Series([3, 5, None], dtype="int64[pyarrow]", name="a")
41+
tm.assert_series_equal(actual, expected)
42+
43+
ser_empty = Series([[]], dtype=ArrowDtype(pa.list_(pa.int64())))
44+
with pytest.raises(IndexError, match="Index -1 is out of bounds: should be in \\[0, 0\\)"):
45+
ser_empty.list[-1]
46+
47+
ser_mixed = Series([[1], [1, 2], [1, 2, 3]], dtype=ArrowDtype(pa.list_(pa.int64())))
48+
actual = ser_mixed.list[-1]
49+
expected = Series([1, 2, 3], dtype="int64[pyarrow]")
50+
tm.assert_series_equal(actual, expected)
51+
52+
3353
def test_list_getitem_index():
3454
# GH 58425
3555
ser = Series(

0 commit comments

Comments
 (0)