Skip to content

Commit 4f3edd6

Browse files
[3.12] gh-105235: Prevent reading outside buffer during mmap.find() (GH-105252) (#106708)
gh-105235: Prevent reading outside buffer during mmap.find() (GH-105252) * Add a special case for s[-m:] == p in _PyBytes_Find * Add tests for _PyBytes_Find * Make sure that start <= end in mmap.find (cherry picked from commit ab86426) Co-authored-by: Dennis Sweeney <[email protected]>
1 parent 30f6274 commit 4f3edd6

File tree

5 files changed

+161
-3
lines changed

5 files changed

+161
-3
lines changed

Lib/test/test_mmap.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,27 @@ def test_find_end(self):
299299
self.assertEqual(m.find(b'one', 1, -2), -1)
300300
self.assertEqual(m.find(bytearray(b'one')), 0)
301301

302+
for i in range(-n-1, n+1):
303+
for j in range(-n-1, n+1):
304+
for p in [b"o", b"on", b"two", b"ones", b"s"]:
305+
expected = data.find(p, i, j)
306+
self.assertEqual(m.find(p, i, j), expected, (p, i, j))
307+
308+
def test_find_does_not_access_beyond_buffer(self):
309+
try:
310+
flags = mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS
311+
PAGESIZE = mmap.PAGESIZE
312+
PROT_NONE = 0
313+
PROT_READ = mmap.PROT_READ
314+
except AttributeError as e:
315+
raise unittest.SkipTest("mmap flags unavailable") from e
316+
for i in range(0, 2049):
317+
with mmap.mmap(-1, PAGESIZE * (i + 1),
318+
flags=flags, prot=PROT_NONE) as guard:
319+
with mmap.mmap(-1, PAGESIZE * (i + 2048),
320+
flags=flags, prot=PROT_READ) as fm:
321+
fm.find(b"fo", -2)
322+
302323

303324
def test_rfind(self):
304325
# test the new 'end' parameter works as expected
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Prevent out-of-bounds memory access during ``mmap.find()`` calls.

Modules/_testinternalcapi.c

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "frameobject.h"
1616
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
1717
#include "pycore_bitutils.h" // _Py_bswap32()
18+
#include "pycore_bytesobject.h" // _PyBytes_Find()
1819
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
1920
#include "pycore_ceval.h" // _PyEval_AddPendingCall
2021
#include "pycore_fileutils.h" // _Py_normpath
@@ -441,6 +442,118 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
441442
}
442443

443444

445+
static int
446+
check_bytes_find(const char *haystack0, const char *needle0,
447+
int offset, Py_ssize_t expected)
448+
{
449+
Py_ssize_t len_haystack = strlen(haystack0);
450+
Py_ssize_t len_needle = strlen(needle0);
451+
Py_ssize_t result_1 = _PyBytes_Find(haystack0, len_haystack,
452+
needle0, len_needle, offset);
453+
if (result_1 != expected) {
454+
PyErr_Format(PyExc_AssertionError,
455+
"Incorrect result_1: '%s' in '%s' (offset=%zd)",
456+
needle0, haystack0, offset);
457+
return -1;
458+
}
459+
// Allocate new buffer with no NULL terminator.
460+
char *haystack = PyMem_Malloc(len_haystack);
461+
if (haystack == NULL) {
462+
PyErr_NoMemory();
463+
return -1;
464+
}
465+
char *needle = PyMem_Malloc(len_needle);
466+
if (needle == NULL) {
467+
PyMem_Free(haystack);
468+
PyErr_NoMemory();
469+
return -1;
470+
}
471+
memcpy(haystack, haystack0, len_haystack);
472+
memcpy(needle, needle0, len_needle);
473+
Py_ssize_t result_2 = _PyBytes_Find(haystack, len_haystack,
474+
needle, len_needle, offset);
475+
PyMem_Free(haystack);
476+
PyMem_Free(needle);
477+
if (result_2 != expected) {
478+
PyErr_Format(PyExc_AssertionError,
479+
"Incorrect result_2: '%s' in '%s' (offset=%zd)",
480+
needle0, haystack0, offset);
481+
return -1;
482+
}
483+
return 0;
484+
}
485+
486+
static int
487+
check_bytes_find_large(Py_ssize_t len_haystack, Py_ssize_t len_needle,
488+
const char *needle)
489+
{
490+
char *zeros = PyMem_RawCalloc(len_haystack, 1);
491+
if (zeros == NULL) {
492+
PyErr_NoMemory();
493+
return -1;
494+
}
495+
Py_ssize_t res = _PyBytes_Find(zeros, len_haystack, needle, len_needle, 0);
496+
PyMem_RawFree(zeros);
497+
if (res != -1) {
498+
PyErr_Format(PyExc_AssertionError,
499+
"check_bytes_find_large(%zd, %zd) found %zd",
500+
len_haystack, len_needle, res);
501+
return -1;
502+
}
503+
return 0;
504+
}
505+
506+
static PyObject *
507+
test_bytes_find(PyObject *self, PyObject *Py_UNUSED(args))
508+
{
509+
#define CHECK(H, N, O, E) do { \
510+
if (check_bytes_find(H, N, O, E) < 0) { \
511+
return NULL; \
512+
} \
513+
} while (0)
514+
515+
CHECK("", "", 0, 0);
516+
CHECK("Python", "", 0, 0);
517+
CHECK("Python", "", 3, 3);
518+
CHECK("Python", "", 6, 6);
519+
CHECK("Python", "yth", 0, 1);
520+
CHECK("ython", "yth", 1, 1);
521+
CHECK("thon", "yth", 2, -1);
522+
CHECK("Python", "thon", 0, 2);
523+
CHECK("ython", "thon", 1, 2);
524+
CHECK("thon", "thon", 2, 2);
525+
CHECK("hon", "thon", 3, -1);
526+
CHECK("Pytho", "zz", 0, -1);
527+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ab", 0, -1);
528+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ba", 0, -1);
529+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "bb", 0, -1);
530+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "ab", 0, 30);
531+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", "ba", 0, 30);
532+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb", "bb", 0, 30);
533+
#undef CHECK
534+
535+
// Hunt for segfaults
536+
// n, m chosen here so that (n - m) % (m + 1) == 0
537+
// This would make default_find in fastsearch.h access haystack[n].
538+
if (check_bytes_find_large(2048, 2, "ab") < 0) {
539+
return NULL;
540+
}
541+
if (check_bytes_find_large(4096, 16, "0123456789abcdef") < 0) {
542+
return NULL;
543+
}
544+
if (check_bytes_find_large(8192, 2, "ab") < 0) {
545+
return NULL;
546+
}
547+
if (check_bytes_find_large(16384, 4, "abcd") < 0) {
548+
return NULL;
549+
}
550+
if (check_bytes_find_large(32768, 2, "ab") < 0) {
551+
return NULL;
552+
}
553+
Py_RETURN_NONE;
554+
}
555+
556+
444557
static PyObject *
445558
normalize_path(PyObject *self, PyObject *filename)
446559
{
@@ -950,6 +1063,7 @@ static PyMethodDef module_functions[] = {
9501063
{"reset_path_config", test_reset_path_config, METH_NOARGS},
9511064
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
9521065
{"test_edit_cost", test_edit_cost, METH_NOARGS},
1066+
{"test_bytes_find", test_bytes_find, METH_NOARGS},
9531067
{"normalize_path", normalize_path, METH_O, NULL},
9541068
{"get_getpath_codeobject", get_getpath_codeobject, METH_NOARGS, NULL},
9551069
{"EncodeLocaleEx", encode_locale_ex, METH_VARARGS},

Modules/mmapmodule.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,12 +343,17 @@ mmap_gfind(mmap_object *self,
343343

344344
Py_ssize_t res;
345345
CHECK_VALID_OR_RELEASE(NULL, view);
346-
if (reverse) {
346+
if (end < start) {
347+
res = -1;
348+
}
349+
else if (reverse) {
350+
assert(0 <= start && start <= end && end <= self->size);
347351
res = _PyBytes_ReverseFind(
348352
self->data + start, end - start,
349353
view.buf, view.len, start);
350354
}
351355
else {
356+
assert(0 <= start && start <= end && end <= self->size);
352357
res = _PyBytes_Find(
353358
self->data + start, end - start,
354359
view.buf, view.len, start);

Objects/bytesobject.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,8 +1274,25 @@ _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
12741274
const char *needle, Py_ssize_t len_needle,
12751275
Py_ssize_t offset)
12761276
{
1277-
return stringlib_find(haystack, len_haystack,
1278-
needle, len_needle, offset);
1277+
assert(len_haystack >= 0);
1278+
assert(len_needle >= 0);
1279+
// Extra checks because stringlib_find accesses haystack[len_haystack].
1280+
if (len_needle == 0) {
1281+
return offset;
1282+
}
1283+
if (len_needle > len_haystack) {
1284+
return -1;
1285+
}
1286+
assert(len_haystack >= 1);
1287+
Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1288+
needle, len_needle, offset);
1289+
if (res == -1) {
1290+
Py_ssize_t last_align = len_haystack - len_needle;
1291+
if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1292+
return offset + last_align;
1293+
}
1294+
}
1295+
return res;
12791296
}
12801297

12811298
Py_ssize_t

0 commit comments

Comments
 (0)