- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 33.2k
          gh-139871: Add bytearray.take_bytes([n]) to efficiently extract bytes
          #140128
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
daf8e02
              39b2d15
              86faf1d
              a9328f4
              e9f5ca9
              4784957
              db19def
              451c302
              bab7151
              cb2377c
              20175f8
              e485595
              b5535d0
              7c6e8a8
              4e27d13
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Update :class:`bytearray` to use a :class:`bytes` under the hood as its buffer | ||
| and add :func:`bytearray.take_bytes` to take it out. | 
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|  | @@ -141,22 +141,26 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) | |||||
| } | ||||||
|  | ||||||
| new = PyObject_New(PyByteArrayObject, &PyByteArray_Type); | ||||||
| if (new == NULL) | ||||||
| if (new == NULL) { | ||||||
| return NULL; | ||||||
| } | ||||||
|  | ||||||
| if (size == 0) { | ||||||
| new->ob_bytes_object = NULL; | ||||||
| new->ob_bytes = NULL; | ||||||
| alloc = 0; | ||||||
| } | ||||||
| else { | ||||||
| alloc = size + 1; | ||||||
| new->ob_bytes = PyMem_Malloc(alloc); | ||||||
| new->ob_bytes_object = PyBytes_FromStringAndSize(NULL, alloc); | ||||||
|         
                  cmaloney marked this conversation as resolved.
              Show resolved
            Hide resolved         
                  cmaloney marked this conversation as resolved.
              Show resolved
            Hide resolved | ||||||
| new->ob_bytes = PyBytes_AsString(new->ob_bytes_object); | ||||||
| if (new->ob_bytes == NULL) { | ||||||
| Py_DECREF(new); | ||||||
| return PyErr_NoMemory(); | ||||||
| } | ||||||
| if (bytes != NULL && size > 0) | ||||||
| if (bytes != NULL && size > 0) { | ||||||
| memcpy(new->ob_bytes, bytes, size); | ||||||
| } | ||||||
| new->ob_bytes[size] = '\0'; /* Trailing null byte */ | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
 Line 133 in 1bfe86c 
 I've tried implementing that twice now and a lot more code moves (ex. lots of branches can go away; amount of space allocated changes a lot); I'd like to not do in this PR although am happy to work on doing in a separate PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in particular, what  | ||||||
| } | ||||||
| Py_SET_SIZE(new, size); | ||||||
|  | @@ -189,7 +193,6 @@ static int | |||||
| bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) | ||||||
| { | ||||||
| _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); | ||||||
| void *sval; | ||||||
| PyByteArrayObject *obj = ((PyByteArrayObject *)self); | ||||||
| /* All computations are done unsigned to avoid integer overflows | ||||||
| (see issue #22335). */ | ||||||
|  | @@ -244,25 +247,28 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) | |||||
| return -1; | ||||||
| } | ||||||
|  | ||||||
| /* re-align data to the start of the allocation. */ | ||||||
| if (logical_offset > 0) { | ||||||
| sval = PyMem_Malloc(alloc); | ||||||
| if (sval == NULL) { | ||||||
| PyErr_NoMemory(); | ||||||
| memmove(obj->ob_bytes, obj->ob_start, | ||||||
| Py_MIN(requested_size, Py_SIZE(self))); | ||||||
| } | ||||||
|  | ||||||
| if (obj->ob_bytes_object == NULL) { | ||||||
| obj->ob_bytes_object = PyBytes_FromStringAndSize(NULL, alloc); | ||||||
| if (obj->ob_bytes_object == NULL) { | ||||||
| return -1; | ||||||
| } | ||||||
| memcpy(sval, PyByteArray_AS_STRING(self), | ||||||
| Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self))); | ||||||
| PyMem_Free(obj->ob_bytes); | ||||||
| } | ||||||
| else { | ||||||
| sval = PyMem_Realloc(obj->ob_bytes, alloc); | ||||||
| if (sval == NULL) { | ||||||
| PyErr_NoMemory(); | ||||||
| if (_PyBytes_Resize(&obj->ob_bytes_object, alloc) == -1) { | ||||||
| Py_SET_SIZE(self, 0); | ||||||
| obj->ob_bytes = obj->ob_start = NULL; | ||||||
| FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, 0); | ||||||
| return -1; | ||||||
| } | ||||||
| } | ||||||
|  | ||||||
| obj->ob_bytes = obj->ob_start = sval; | ||||||
| obj->ob_bytes = obj->ob_start = PyBytes_AS_STRING(obj->ob_bytes_object); | ||||||
| Py_SET_SIZE(self, size); | ||||||
| FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, alloc); | ||||||
| obj->ob_bytes[size] = '\0'; /* Trailing null byte */ | ||||||
|  | @@ -1169,9 +1175,7 @@ bytearray_dealloc(PyObject *op) | |||||
| "deallocated bytearray object has exported buffers"); | ||||||
| PyErr_Print(); | ||||||
| } | ||||||
| if (self->ob_bytes != 0) { | ||||||
| PyMem_Free(self->ob_bytes); | ||||||
| } | ||||||
| Py_CLEAR(self->ob_bytes_object); | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 no need to set it to NULL here. | ||||||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||||||
| } | ||||||
|  | ||||||
|  | @@ -1491,6 +1495,95 @@ bytearray_resize_impl(PyByteArrayObject *self, Py_ssize_t size) | |||||
| } | ||||||
|  | ||||||
|  | ||||||
| /*[clinic input] | ||||||
| @critical_section | ||||||
| bytearray.take_bytes | ||||||
| n: object = None | ||||||
| Bytes to take, negative indexes from end. None indicates all bytes. | ||||||
| / | ||||||
| Take *n* bytes from the bytearray and return them as a bytes object. | ||||||
| [clinic start generated code]*/ | ||||||
|  | ||||||
| static PyObject * | ||||||
| bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n) | ||||||
| /*[clinic end generated code: output=3147fbc0bbbe8d94 input=b15b5172cdc6deda]*/ | ||||||
| { | ||||||
| Py_ssize_t to_take, original; | ||||||
| Py_ssize_t size = Py_SIZE(self); | ||||||
| if (Py_IsNone(n)) { | ||||||
| to_take = original = size; | ||||||
| } | ||||||
| // Integer index, from start (zero, positive) or end (negative). | ||||||
| else if (_PyIndex_Check(n)) { | ||||||
| to_take = original = PyNumber_AsSsize_t(n, PyExc_IndexError); | ||||||
| if (to_take == -1 && PyErr_Occurred()) { | ||||||
| return NULL; | ||||||
| } | ||||||
| if (to_take < 0) { | ||||||
| to_take += size; | ||||||
| } | ||||||
| } else { | ||||||
| PyErr_SetString(PyExc_TypeError, "n must be an integer or None"); | ||||||
| return NULL; | ||||||
| } | ||||||
|  | ||||||
| if (to_take < 0 || to_take > size) { | ||||||
| PyErr_Format(PyExc_IndexError, | ||||||
| "can't take %d(%d) outside size %d", | ||||||
|         
                  cmaloney marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||||||
| original, to_take, size); | ||||||
| return NULL; | ||||||
| } | ||||||
|  | ||||||
| // Exports may change the contents, No mutable bytes allowed. | ||||||
| if (!_canresize(self)) { | ||||||
| return NULL; | ||||||
| } | ||||||
|  | ||||||
| if (to_take == 0 || size == 0) { | ||||||
| return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); | ||||||
| } | ||||||
|  | ||||||
| // Copy remaining bytes to a new bytes. | ||||||
| PyObject *remaining = NULL; | ||||||
| Py_ssize_t remaining_length = size - to_take; | ||||||
| if (remaining_length > 0) { | ||||||
| // +1 to copy across the null which always ends a bytearray. | ||||||
| remaining = PyBytes_FromStringAndSize(self->ob_start + to_take, | ||||||
| remaining_length + 1); | ||||||
| if (remaining == NULL) { | ||||||
| return NULL; | ||||||
| } | ||||||
| } | ||||||
|  | ||||||
| // If the bytes are offset inside the buffer must first align. | ||||||
| if (self->ob_start != self->ob_bytes) { | ||||||
| memmove(self->ob_bytes, self->ob_start, to_take); | ||||||
| self->ob_start = self->ob_bytes; | ||||||
| } | ||||||
|  | ||||||
| if (_PyBytes_Resize(&self->ob_bytes_object, to_take) == -1) { | ||||||
| Py_CLEAR(remaining); | ||||||
| return NULL; | ||||||
| } | ||||||
|  | ||||||
| // Point the bytearray towards the buffer with the remaining data. | ||||||
| PyObject *result = self->ob_bytes_object; | ||||||
| self->ob_bytes_object = remaining; | ||||||
| if (remaining) { | ||||||
| self->ob_bytes = self->ob_start = PyBytes_AS_STRING(self->ob_bytes_object); | ||||||
| Py_SET_SIZE(self, size - to_take); | ||||||
| FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, size - to_take + 1); | ||||||
| } | ||||||
| else { | ||||||
| self->ob_bytes = self->ob_start = NULL; | ||||||
| Py_SET_SIZE(self, 0); | ||||||
| FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, 0); | ||||||
| } | ||||||
|  | ||||||
| return result; | ||||||
| } | ||||||
|  | ||||||
|  | ||||||
| /*[clinic input] | ||||||
| @critical_section | ||||||
| bytearray.translate | ||||||
|  | @@ -2686,6 +2779,7 @@ static PyMethodDef bytearray_methods[] = { | |||||
| BYTEARRAY_STARTSWITH_METHODDEF | ||||||
| BYTEARRAY_STRIP_METHODDEF | ||||||
| {"swapcase", bytearray_swapcase, METH_NOARGS, _Py_swapcase__doc__}, | ||||||
| BYTEARRAY_TAKE_BYTES_METHODDEF | ||||||
| {"title", bytearray_title, METH_NOARGS, _Py_title__doc__}, | ||||||
| BYTEARRAY_TRANSLATE_METHODDEF | ||||||
| {"upper", bytearray_upper, METH_NOARGS, _Py_upper__doc__}, | ||||||
|  | ||||||
Uh oh!
There was an error while loading. Please reload this page.