Skip to content

Commit fb75d01

Browse files
corona10methane
andauthored
gh-91146: More reduce allocation size of list from str.split/rsplit (gh-95493)
Co-authored-by: Inada Naoki <[email protected]>
1 parent 347c783 commit fb75d01

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
Reduce allocation size of :class:`list` from :meth:`str.split`
2-
and :meth:`str.rsplit`. Patch by Dong-hee Na.
2+
and :meth:`str.rsplit`. Patch by Dong-hee Na and Inada Naoki.

Objects/unicodeobject.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9698,11 +9698,11 @@ split(PyObject *self,
96989698
PyObject* out;
96999699
len1 = PyUnicode_GET_LENGTH(self);
97009700
kind1 = PyUnicode_KIND(self);
9701-
if (maxcount < 0) {
9702-
maxcount = len1;
9703-
}
97049701

9705-
if (substring == NULL)
9702+
if (substring == NULL) {
9703+
if (maxcount < 0) {
9704+
maxcount = (len1 - 1) / 2 + 1;
9705+
}
97069706
switch (kind1) {
97079707
case PyUnicode_1BYTE_KIND:
97089708
if (PyUnicode_IS_ASCII(self))
@@ -9728,9 +9728,16 @@ split(PyObject *self,
97289728
default:
97299729
Py_UNREACHABLE();
97309730
}
9731+
}
97319732

97329733
kind2 = PyUnicode_KIND(substring);
97339734
len2 = PyUnicode_GET_LENGTH(substring);
9735+
if (maxcount < 0) {
9736+
// if len2 == 0, it will raise ValueError.
9737+
maxcount = len2 == 0 ? 0 : (len1 / len2) + 1;
9738+
// handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1
9739+
maxcount = maxcount < 0 ? len1 : maxcount;
9740+
}
97349741
if (kind1 < kind2 || len1 < len2) {
97359742
out = PyList_New(1);
97369743
if (out == NULL)
@@ -9785,11 +9792,11 @@ rsplit(PyObject *self,
97859792

97869793
len1 = PyUnicode_GET_LENGTH(self);
97879794
kind1 = PyUnicode_KIND(self);
9788-
if (maxcount < 0) {
9789-
maxcount = len1;
9790-
}
97919795

9792-
if (substring == NULL)
9796+
if (substring == NULL) {
9797+
if (maxcount < 0) {
9798+
maxcount = (len1 - 1) / 2 + 1;
9799+
}
97939800
switch (kind1) {
97949801
case PyUnicode_1BYTE_KIND:
97959802
if (PyUnicode_IS_ASCII(self))
@@ -9815,9 +9822,15 @@ rsplit(PyObject *self,
98159822
default:
98169823
Py_UNREACHABLE();
98179824
}
9818-
9825+
}
98199826
kind2 = PyUnicode_KIND(substring);
98209827
len2 = PyUnicode_GET_LENGTH(substring);
9828+
if (maxcount < 0) {
9829+
// if len2 == 0, it will raise ValueError.
9830+
maxcount = len2 == 0 ? 0 : (len1 / len2) + 1;
9831+
// handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1
9832+
maxcount = maxcount < 0 ? len1 : maxcount;
9833+
}
98219834
if (kind1 < kind2 || len1 < len2) {
98229835
out = PyList_New(1);
98239836
if (out == NULL)

0 commit comments

Comments
 (0)