From db81df6b738beaa1f4fb281fa1253ba56c048354 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sun, 9 Nov 2025 17:54:43 -0800 Subject: [PATCH 1/9] gh-141311: Avoid assertion in BytesIO readinto Account for when self->pos is equal to PY_SSIZE_T_MAX. The length of read was correctly set to zero but the asserts assumed self->pos couldn't reach PY_SSIZE_T_MAX. Return early to avoid edge cases. --- Lib/test/test_io/test_memoryio.py | 12 ++++++++++++ .../2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst | 3 +++ Modules/_io/bytesio.c | 5 +++-- 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst diff --git a/Lib/test/test_io/test_memoryio.py b/Lib/test/test_io/test_memoryio.py index 63998a86c45b53..4c349c00e4c601 100644 --- a/Lib/test/test_io/test_memoryio.py +++ b/Lib/test/test_io/test_memoryio.py @@ -12,6 +12,7 @@ import pickle import sys import weakref +from test.support.import_helper import import_module class IntLike: def __init__(self, num): @@ -552,6 +553,17 @@ def test_relative_seek(self): memio.seek(1, 1) self.assertEqual(memio.read(), buf[1:]) + def test_issue141311(self): + _testcapi = import_module("_testcapi") + + memio = self.ioclass() + # Seek should allow PY_SSIZE_T_MAX, read should be capped to buffer size. + # Past end of buffer read should always return empty bytes (EOF). + self.assertEqual(_testcapi.PY_SSIZE_T_MAX, + memio.seek(_testcapi.PY_SSIZE_T_MAX)) + buf = bytearray(2) + self.assertEqual(0, memio.readinto(buf)) + def test_unicode(self): memio = self.ioclass() diff --git a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst new file mode 100644 index 00000000000000..8cc943a7831573 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst @@ -0,0 +1,3 @@ +Fix assertion failure in :class:`io.BytesIO` implementation of +:func:`~io.BufferedIOBase.readinto` when the current offset is at the max +offset and readinto is called. diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 30d61f9d68e610..3641822880463c 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -609,8 +609,9 @@ _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) n = self->string_size - self->pos; if (len > n) { len = n; - if (len < 0) - len = 0; + if (len < 0) { + return PyLong_FromSsize_t(0); + } } assert(self->pos + len < PY_SSIZE_T_MAX); From 34ed126850aafd9901546ae515d6a838b5f1b93c Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sun, 9 Nov 2025 20:59:39 -0800 Subject: [PATCH 2/9] Tweak comment for correctness. --- Lib/test/test_io/test_memoryio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_io/test_memoryio.py b/Lib/test/test_io/test_memoryio.py index 4c349c00e4c601..82a04516b5094c 100644 --- a/Lib/test/test_io/test_memoryio.py +++ b/Lib/test/test_io/test_memoryio.py @@ -557,8 +557,8 @@ def test_issue141311(self): _testcapi = import_module("_testcapi") memio = self.ioclass() - # Seek should allow PY_SSIZE_T_MAX, read should be capped to buffer size. - # Past end of buffer read should always return empty bytes (EOF). + # Seek allows PY_SSIZE_T_MAX, read handle that. + # Past end of buffer read should always return 0 (EOF). self.assertEqual(_testcapi.PY_SSIZE_T_MAX, memio.seek(_testcapi.PY_SSIZE_T_MAX)) buf = bytearray(2) From 2c84efe798458699c379fca0eb6a965284a17fdc Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 10 Nov 2025 09:55:24 -0800 Subject: [PATCH 3/9] Simplify by using sys.maxsize --- Lib/test/test_io/test_memoryio.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_io/test_memoryio.py b/Lib/test/test_io/test_memoryio.py index 82a04516b5094c..842081d1329c57 100644 --- a/Lib/test/test_io/test_memoryio.py +++ b/Lib/test/test_io/test_memoryio.py @@ -12,7 +12,6 @@ import pickle import sys import weakref -from test.support.import_helper import import_module class IntLike: def __init__(self, num): @@ -554,13 +553,10 @@ def test_relative_seek(self): self.assertEqual(memio.read(), buf[1:]) def test_issue141311(self): - _testcapi = import_module("_testcapi") - memio = self.ioclass() - # Seek allows PY_SSIZE_T_MAX, read handle that. + # Seek allows PY_SSIZE_T_MAX, read should handle that. # Past end of buffer read should always return 0 (EOF). - self.assertEqual(_testcapi.PY_SSIZE_T_MAX, - memio.seek(_testcapi.PY_SSIZE_T_MAX)) + self.assertEqual(sys.maxsize, memio.seek(sys.maxsize)) buf = bytearray(2) self.assertEqual(0, memio.readinto(buf)) From d119904111cae9b1c1dc0506d044ffb7e921c2b2 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 10 Nov 2025 10:14:07 -0800 Subject: [PATCH 4/9] Reference io.BytesIO.readinto directly --- .../Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst index 8cc943a7831573..a14dc87b338ec1 100644 --- a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst +++ b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst @@ -1,3 +1,2 @@ -Fix assertion failure in :class:`io.BytesIO` implementation of -:func:`~io.BufferedIOBase.readinto` when the current offset is at the max -offset and readinto is called. +Fix assertion failure in :class:`!io.BytesIO.readinto` when the current offset +is at the max offset and readinto is called. From d78cc84018c982cbdf746cdde67814414b74bf92 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 10 Nov 2025 16:01:46 -0800 Subject: [PATCH 5/9] Resolve read_bytes_lock_held and audit other cases For `read_bytes_lock_held` the `size` parameter is always set to 0 by calling code currently if self->pos > self->string_size. I added a range check for self->pos as an extra safety as codepaths chagne but can remove if it's too much. `write_bytes_lock_held`: `endpos` is a `size_t` so can hold `2 * PY_SSIZE_T_MAX`. Value is bounds checked in `resize_buffer_lock_held`. A number of cases use `scan_eol_lock_held` to move forward. That has code which checks `self->pos >= self->string_size` and returns `0`. Callsites all seem to handle that correctly. --- Lib/test/test_io/test_memoryio.py | 6 ++++++ Modules/_io/bytesio.c | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/Lib/test/test_io/test_memoryio.py b/Lib/test/test_io/test_memoryio.py index 842081d1329c57..bb023735e21398 100644 --- a/Lib/test/test_io/test_memoryio.py +++ b/Lib/test/test_io/test_memoryio.py @@ -54,6 +54,12 @@ def testSeek(self): self.assertEqual(buf[3:], bytesIo.read()) self.assertRaises(TypeError, bytesIo.seek, 0.0) + self.assertEqual(sys.maxsize, bytesIo.seek(sys.maxsize)) + self.assertEqual(self.EOF, bytesIo.read(4)) + + self.assertEqual(sys.maxsize - 2, bytesIo.seek(sys.maxsize - 2)) + self.assertEqual(self.EOF, bytesIo.read(4)) + def testTell(self): buf = self.buftype("1234567890") bytesIo = self.ioclass(buf) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 3641822880463c..0b177b336afa04 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -436,6 +436,15 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size) return Py_NewRef(self->buf); } + /* gh-141311: avoid overflow with self->buf + self->pos */ + if (self->pos >= PY_SSIZE_T_MAX - size) { + self->pos = PY_SSIZE_T_MAX; + size = 0; + } + if (size == 0) { + return PyBytes_FromStringAndSize(NULL, 0); + } + output = PyBytes_AS_STRING(self->buf) + self->pos; self->pos += size; return PyBytes_FromStringAndSize(output, size); @@ -610,6 +619,7 @@ _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) if (len > n) { len = n; if (len < 0) { + /* gh-141311: avoid overflow with self->buf + self->pos */ return PyLong_FromSsize_t(0); } } From 4c12be7ba819dc7a084a9093ef54abdeb4228000 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 10 Nov 2025 23:17:11 -0800 Subject: [PATCH 6/9] Update blurb for additional change --- .../Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst index a14dc87b338ec1..bacc05cbea2b82 100644 --- a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst +++ b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst @@ -1,2 +1,3 @@ -Fix assertion failure in :class:`!io.BytesIO.readinto` when the current offset -is at the max offset and readinto is called. +Fix assertion failure in :func:`!io.BytesIO.readinto` and possible +undefined behavior in :class:`io.BytesIO` when the current position +is at or near :data:`sys.maxsize`. From e06d274092971c7e5bb2fa977dc4c2226bcf7521 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 11 Nov 2025 12:50:32 -0800 Subject: [PATCH 7/9] Update for review comments --- Modules/_io/bytesio.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 0b177b336afa04..48c829f419d3d3 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -436,11 +436,7 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size) return Py_NewRef(self->buf); } - /* gh-141311: avoid overflow with self->buf + self->pos */ - if (self->pos >= PY_SSIZE_T_MAX - size) { - self->pos = PY_SSIZE_T_MAX; - size = 0; - } + /* gh-141311: avoid past end of self->buf access */ if (size == 0) { return PyBytes_FromStringAndSize(NULL, 0); } @@ -619,12 +615,12 @@ _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) if (len > n) { len = n; if (len < 0) { - /* gh-141311: avoid overflow with self->buf + self->pos */ + /* gh-141311: avoid past end of self->buf access */ return PyLong_FromSsize_t(0); } } - assert(self->pos + len < PY_SSIZE_T_MAX); + assert(self->pos + len <= PY_SSIZE_T_MAX); assert(len >= 0); memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len); self->pos += len; From 6cc0ab344203a2cdca8ae2a16bb6e1087bc25261 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 11 Nov 2025 12:53:48 -0800 Subject: [PATCH 8/9] tweak blurb --- .../Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst index bacc05cbea2b82..affc84bb652a70 100644 --- a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst +++ b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst @@ -1,3 +1,2 @@ -Fix assertion failure in :func:`!io.BytesIO.readinto` and possible -undefined behavior in :class:`io.BytesIO` when the current position -is at or near :data:`sys.maxsize`. +Fix assertion failure in :func:`!io.BytesIO.readinto` and possible out of bounds +read when position is near :data:`sys.maxsize` in :class:`io.BytesIO`. From d205d4afb926595a09315751ac964017b7de8b2f Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 11 Nov 2025 14:24:38 -0800 Subject: [PATCH 9/9] Improve precision of comments around the undefined behavior and safeties --- .../Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst | 4 ++-- Modules/_io/bytesio.c | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst index affc84bb652a70..bb425ce5df309d 100644 --- a/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst +++ b/Misc/NEWS.d/next/Library/2025-11-09-18-55-13.gh-issue-141311.qZ3swc.rst @@ -1,2 +1,2 @@ -Fix assertion failure in :func:`!io.BytesIO.readinto` and possible out of bounds -read when position is near :data:`sys.maxsize` in :class:`io.BytesIO`. +Fix assertion failure in :func:`!io.BytesIO.readinto` and undefined behavior +arising when read position is above capcity in :class:`io.BytesIO`. diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 48c829f419d3d3..715ade7b743c37 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -436,7 +436,9 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size) return Py_NewRef(self->buf); } - /* gh-141311: avoid past end of self->buf access */ + /* gh-141311: Avoid undefined behavior when self->pos (limit PY_SSIZE_T_MAX) + is beyond the size of self->buf. Assert above validates size is always in + bounds. When self->pos is out of bounds calling code sets size to 0. */ if (size == 0) { return PyBytes_FromStringAndSize(NULL, 0); } @@ -615,7 +617,8 @@ _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) if (len > n) { len = n; if (len < 0) { - /* gh-141311: avoid past end of self->buf access */ + /* gh-141311: Avoid undefined behavior when self->pos (limit + PY_SSIZE_T_MAX) points beyond the size of self->buf. */ return PyLong_FromSsize_t(0); } }