Skip to content

Commit 666ecfb

Browse files
authored
bpo-1635741: Release Unicode interned strings at exit (GH-21269)
* PyUnicode_InternInPlace() now ensures that interned strings are ready. * Add _PyUnicode_ClearInterned(). * Py_Finalize() now releases Unicode interned strings: call _PyUnicode_ClearInterned().
1 parent 90db465 commit 666ecfb

File tree

3 files changed

+30
-32
lines changed

3 files changed

+30
-32
lines changed

Include/internal/pycore_pylifecycle.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ extern void _PyGC_Fini(PyThreadState *tstate);
7878
extern void _PyType_Fini(void);
7979
extern void _Py_HashRandomization_Fini(void);
8080
extern void _PyUnicode_Fini(PyThreadState *tstate);
81+
extern void _PyUnicode_ClearInterned(PyThreadState *tstate);
8182
extern void _PyLong_Fini(PyThreadState *tstate);
8283
extern void _PyFaulthandler_Fini(void);
8384
extern void _PyHash_Fini(void);

Objects/unicodeobject.c

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
5555
#include <windows.h>
5656
#endif
5757

58-
/* Uncomment to display statistics on interned strings at exit when
59-
using Valgrind or Insecure++. */
58+
/* Uncomment to display statistics on interned strings at exit
59+
in _PyUnicode_ClearInterned(). */
6060
/* #define INTERNED_STATS 1 */
6161

6262

@@ -15681,6 +15681,11 @@ PyUnicode_InternInPlace(PyObject **p)
1568115681
}
1568215682

1568315683
#ifdef INTERNED_STRINGS
15684+
if (PyUnicode_READY(s) == -1) {
15685+
PyErr_Clear();
15686+
return;
15687+
}
15688+
1568415689
if (interned == NULL) {
1568515690
interned = PyDict_New();
1568615691
if (interned == NULL) {
@@ -15733,23 +15738,29 @@ PyUnicode_InternFromString(const char *cp)
1573315738
}
1573415739

1573515740

15736-
#if defined(WITH_VALGRIND) || defined(__INSURE__)
15737-
static void
15738-
unicode_release_interned(void)
15741+
void
15742+
_PyUnicode_ClearInterned(PyThreadState *tstate)
1573915743
{
15740-
if (interned == NULL || !PyDict_Check(interned)) {
15744+
if (!_Py_IsMainInterpreter(tstate)) {
15745+
// interned dict is shared by all interpreters
15746+
return;
15747+
}
15748+
15749+
if (interned == NULL) {
1574115750
return;
1574215751
}
15752+
assert(PyDict_CheckExact(interned));
15753+
1574315754
PyObject *keys = PyDict_Keys(interned);
15744-
if (keys == NULL || !PyList_Check(keys)) {
15755+
if (keys == NULL) {
1574515756
PyErr_Clear();
1574615757
return;
1574715758
}
15759+
assert(PyList_CheckExact(keys));
1574815760

15749-
/* Since unicode_release_interned() is intended to help a leak
15750-
detector, interned unicode strings are not forcibly deallocated;
15751-
rather, we give them their stolen references back, and then clear
15752-
and DECREF the interned dict. */
15761+
/* Interned unicode strings are not forcibly deallocated; rather, we give
15762+
them their stolen references back, and then clear and DECREF the
15763+
interned dict. */
1575315764

1575415765
Py_ssize_t n = PyList_GET_SIZE(keys);
1575515766
#ifdef INTERNED_STATS
@@ -15759,9 +15770,8 @@ unicode_release_interned(void)
1575915770
#endif
1576015771
for (Py_ssize_t i = 0; i < n; i++) {
1576115772
PyObject *s = PyList_GET_ITEM(keys, i);
15762-
if (PyUnicode_READY(s) == -1) {
15763-
Py_UNREACHABLE();
15764-
}
15773+
assert(PyUnicode_IS_READY(s));
15774+
1576515775
switch (PyUnicode_CHECK_INTERNED(s)) {
1576615776
case SSTATE_INTERNED_IMMORTAL:
1576715777
Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
@@ -15788,10 +15798,10 @@ unicode_release_interned(void)
1578815798
mortal_size, immortal_size);
1578915799
#endif
1579015800
Py_DECREF(keys);
15801+
1579115802
PyDict_Clear(interned);
1579215803
Py_CLEAR(interned);
1579315804
}
15794-
#endif
1579515805

1579615806

1579715807
/********************* Unicode Iterator **************************/
@@ -16160,31 +16170,17 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1616016170
void
1616116171
_PyUnicode_Fini(PyThreadState *tstate)
1616216172
{
16163-
struct _Py_unicode_state *state = &tstate->interp->unicode;
16173+
// _PyUnicode_ClearInterned() must be called before
1616416174

16165-
int is_main_interp = _Py_IsMainInterpreter(tstate);
16166-
if (is_main_interp) {
16167-
#if defined(WITH_VALGRIND) || defined(__INSURE__)
16168-
/* Insure++ is a memory analysis tool that aids in discovering
16169-
* memory leaks and other memory problems. On Python exit, the
16170-
* interned string dictionaries are flagged as being in use at exit
16171-
* (which it is). Under normal circumstances, this is fine because
16172-
* the memory will be automatically reclaimed by the system. Under
16173-
* memory debugging, it's a huge source of useless noise, so we
16174-
* trade off slower shutdown for less distraction in the memory
16175-
* reports. -baw
16176-
*/
16177-
unicode_release_interned();
16178-
#endif /* __INSURE__ */
16179-
}
16175+
struct _Py_unicode_state *state = &tstate->interp->unicode;
1618016176

1618116177
Py_CLEAR(state->empty_string);
1618216178

1618316179
for (Py_ssize_t i = 0; i < 256; i++) {
1618416180
Py_CLEAR(state->latin1[i]);
1618516181
}
1618616182

16187-
if (is_main_interp) {
16183+
if (_Py_IsMainInterpreter(tstate)) {
1618816184
unicode_clear_static_strings();
1618916185
}
1619016186

Python/pylifecycle.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,6 +1263,7 @@ finalize_interp_types(PyThreadState *tstate)
12631263
_PyFrame_Fini(tstate);
12641264
_PyAsyncGen_Fini(tstate);
12651265
_PyContext_Fini(tstate);
1266+
_PyUnicode_ClearInterned(tstate);
12661267

12671268
_PyDict_Fini(tstate);
12681269
_PyList_Fini(tstate);

0 commit comments

Comments
 (0)