Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions Doc/library/stdtypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3173,6 +3173,92 @@ objects.

.. versionadded:: 3.14

.. method:: take_bytes(n=None, /)

Take the first *n* bytes as an immutable :class:`bytes`. Defaults to all
bytes.

If *n* is negative indexes from the end and takes the first :func:`len`
plus *n* bytes. If *n* is out of bounds raises :exc:`IndexError`.

Taking less than the full length will leave remaining bytes in the
:class:`bytearray` which requires a copy. If the remaining bytes should be
discarded use :func:`~bytearray.resize` or :keyword:`del` to truncate
then :func:`~bytearray.take_bytes` without a size.

.. impl-detail::

Taking all bytes is a zero-copy operation.

.. list-table:: Suggested Replacements
:header-rows: 1

* - Description
- Old
- New

* - Return :class:`bytes` after working with :class:`bytearray`
- .. code:: python


def read() -> bytes:
buffer = bytearray(1024)
...
return bytes(buffer)
- .. code:: python

def read() -> bytes:
buffer = bytearray(1024)
...
return buffer.take_bytes()

* - Empty a buffer getting the bytes
- .. code:: python

buffer = bytearray(1024)
...
data = bytes(buffer)
buffer.clear()
- .. code:: python

buffer = bytearray(1024)
...
data = buffer.take_bytes()

* - Split a buffer at a specific separator
- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = bytes(buffer[:n + 1])
del buffer[:n + 1]
assert buffer == bytearray(b'def')

- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = buffer.take_bytes(n + 1)

* - Split a buffer at a specific separator; discard after the separator
- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = bytes(buffer[:n])
buffer.clear()
assert data == b'abc'
assert len(buffer) == 0

- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
buffer.resize(n)
data = buffer.take_bytes()

.. versionadded:: next

Since bytearray objects are sequences of integers (akin to a list), for a
bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be
a bytearray object of length 1. (This contrasts with text strings, where
Expand Down
1 change: 1 addition & 0 deletions Include/cpython/bytearrayobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ typedef struct {
char *ob_bytes; /* Physical backing buffer */
char *ob_start; /* Logical start inside ob_bytes */
Py_ssize_t ob_exports; /* How many buffer exports */
PyObject *ob_bytes_object; /* PyBytes for zero-copy bytes conversion */
} PyByteArrayObject;

PyAPI_DATA(char) _PyByteArray_empty_string[];
Expand Down
71 changes: 71 additions & 0 deletions Lib/test/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1390,6 +1390,16 @@ def test_clear(self):
b.append(ord('p'))
self.assertEqual(b, b'p')

# Cleared object should be empty.
b = bytearray(b'abc')
b.clear()
self.assertEqual(b.__alloc__(), 0)
base_size = sys.getsizeof(bytearray())
self.assertEqual(sys.getsizeof(b), base_size)
c = b.copy()
self.assertEqual(c.__alloc__(), 0)
self.assertEqual(sys.getsizeof(c), base_size)

def test_copy(self):
b = bytearray(b'abc')
bb = b.copy()
Expand Down Expand Up @@ -1451,6 +1461,61 @@ def test_resize(self):
self.assertRaises(MemoryError, bytearray().resize, sys.maxsize)
self.assertRaises(MemoryError, bytearray(1000).resize, sys.maxsize)

def test_take_bytes(self):
ba = bytearray(b'ab')
self.assertEqual(ba.take_bytes(), b'ab')
self.assertEqual(len(ba), 0)
self.assertEqual(ba, bytearray(b''))
self.assertEqual(ba.__alloc__(), 0)
base_size = sys.getsizeof(bytearray())
self.assertEqual(sys.getsizeof(ba), base_size)

# Positive and negative slicing.
ba = bytearray(b'abcdef')
self.assertEqual(ba.take_bytes(1), b'a')
self.assertEqual(ba, bytearray(b'bcdef'))
self.assertEqual(len(ba), 5)
self.assertEqual(ba.take_bytes(-5), b'')
self.assertEqual(ba, bytearray(b'bcdef'))
self.assertEqual(len(ba), 5)
self.assertEqual(ba.take_bytes(-3), b'bc')
self.assertEqual(ba, bytearray(b'def'))
self.assertEqual(len(ba), 3)
self.assertEqual(ba.take_bytes(3), b'def')
self.assertEqual(ba, bytearray(b''))
self.assertEqual(len(ba), 0)

# Take nothing from emptiness.
self.assertEqual(ba.take_bytes(0), b'')
self.assertEqual(ba.take_bytes(), b'')
self.assertEqual(ba.take_bytes(None), b'')

# Out of bounds, bad take value.
self.assertRaises(IndexError, ba.take_bytes, -1)
self.assertRaises(TypeError, ba.take_bytes, 3.14)
ba = bytearray(b'abcdef')
self.assertRaises(IndexError, ba.take_bytes, 7)

# Offset between physical and logical start (ob_bytes != ob_start).
ba = bytearray(b'abcde')
del ba[:2]
self.assertEqual(ba, bytearray(b'cde'))
self.assertEqual(ba.take_bytes(), b'cde')

# Overallocation at end.
ba = bytearray(b'abcde')
del ba[-2:]
self.assertEqual(ba, bytearray(b'abc'))
self.assertEqual(ba.take_bytes(), b'abc')
ba = bytearray(b'abcde')
ba.resize(4)
self.assertEqual(ba.take_bytes(), b'abcd')

# Take of a bytearray with references should fail.
ba = bytearray(b'abc')
with memoryview(ba) as mv:
self.assertRaises(BufferError, ba.take_bytes)
self.assertEqual(ba.take_bytes(), b'abc')

def test_setitem(self):
def setitem_as_mapping(b, i, val):
Expand Down Expand Up @@ -2557,6 +2622,11 @@ def zfill(b, a):
c = a.zfill(0x400000)
assert not c or c[-1] not in (0xdd, 0xcd)

def take_bytes(b, a):
b.wait()
c = a.take_bytes()
assert not c or c[0] == 48 # '0'

def check(funcs, a=None, *args):
if a is None:
a = bytearray(b'0' * 0x400000)
Expand Down Expand Up @@ -2617,6 +2687,7 @@ def check(funcs, a=None, *args):
check([clear] + [splitlines] * 10, bytearray(b'\n' * 0x400))
check([clear] + [startswith] * 10)
check([clear] + [strip] * 10)
check([clear] + [take_bytes] * 10)

check([clear] + [contains] * 10)
check([clear] + [subscript] * 10)
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ def test_objecttypes(self):
samples = [b'', b'u'*100000]
for sample in samples:
x = bytearray(sample)
check(x, vsize('n2Pi') + x.__alloc__())
check(x, vsize('n2PiP') + x.__alloc__())
# bytearray_iterator
check(iter(bytearray()), size('nP'))
# bytes
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Update :class:`bytearray` to use a :class:`bytes` under the hood as its buffer
and add :func:`bytearray.take_bytes` to take it out.
Loading
Loading