Skip to content

Commit 9454060

Browse files
authored
bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)
bpo-29240, bpo-32030: If the encoding change (C locale coerced or UTF-8 Mode changed), Py_Main() now reads again the configuration with the new encoding. Changes: * Add _Py_UnixMain() called by main(). * Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be called multipled times. * Rename pymain_parse_cmdline_envvars() to pymain_read_conf(). * Py_Main() now clears orig_argc and orig_argv at exit. * Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is no need anymore to get two copies of the wchar_t** argv. * _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn. * Py_UTF8Mode is now initialized to -1. * Locale coercion (PEP 538) now respects -I and -E options.
1 parent e796b2f commit 9454060

File tree

12 files changed

+324
-214
lines changed

12 files changed

+324
-214
lines changed

Doc/using/cmdline.rst

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -779,9 +779,7 @@ conflict.
779779

780780
If set to the value ``0``, causes the main Python command line application
781781
to skip coercing the legacy ASCII-based C locale to a more capable UTF-8
782-
based alternative. Note that this setting is checked even when the
783-
:option:`-E` or :option:`-I` options are used, as it is handled prior to
784-
the processing of command line options.
782+
based alternative.
785783

786784
If this variable is *not* set, or is set to a value other than ``0``, and
787785
the current locale reported for the ``LC_CTYPE`` category is the default

Include/pylifecycle.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *);
105105

106106
/* Bootstrap __main__ (defined in Modules/main.c) */
107107
PyAPI_FUNC(int) Py_Main(int argc, wchar_t **argv);
108+
#ifdef Py_BUILD_CORE
109+
PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);
110+
#endif
108111

109112
/* In getpath.c */
110113
PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void);
@@ -194,7 +197,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
194197

195198
/* Legacy locale support */
196199
#ifndef Py_LIMITED_API
197-
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void);
200+
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
198201
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
199202
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
200203
#endif

Include/pystate.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,19 @@ typedef struct {
3838
int show_alloc_count; /* -X showalloccount */
3939
int dump_refs; /* PYTHONDUMPREFS */
4040
int malloc_stats; /* PYTHONMALLOCSTATS */
41-
int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable */
41+
int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
42+
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
43+
int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable,
44+
-1 means unknown */
4245

4346
wchar_t *module_search_path_env; /* PYTHONPATH environment variable */
4447
wchar_t *home; /* PYTHONHOME environment variable,
4548
see also Py_SetPythonHome(). */
4649
wchar_t *program_name; /* Program name, see also Py_GetProgramName() */
4750
} _PyCoreConfig;
4851

49-
#define _PyCoreConfig_INIT (_PyCoreConfig){.use_hash_seed = -1}
52+
#define _PyCoreConfig_INIT \
53+
(_PyCoreConfig){.use_hash_seed = -1, .coerce_c_locale = -1, .utf8_mode = -1}
5054
/* Note: _PyCoreConfig_INIT sets other fields to 0/NULL */
5155

5256
/* Placeholders while working on the new configuration API

Lib/test/test_c_locale_coercion.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def _set_locale_in_subprocess(locale_name):
6565
# If there's no valid CODESET, we expect coercion to be skipped
6666
cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
6767
cmd = cmd_fmt.format(locale_name)
68-
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
68+
result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='')
6969
return result.rc == 0
7070

7171

@@ -131,7 +131,6 @@ def get_child_details(cls, env_vars):
131131
"""
132132
result, py_cmd = run_python_until_end(
133133
"-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
134-
__isolated=True,
135134
**env_vars
136135
)
137136
if not result.rc == 0:
@@ -236,6 +235,7 @@ def test_external_target_locale_configuration(self):
236235
"LANG": "",
237236
"LC_CTYPE": "",
238237
"LC_ALL": "",
238+
"PYTHONCOERCECLOCALE": "",
239239
}
240240
for env_var in ("LANG", "LC_CTYPE"):
241241
for locale_to_set in AVAILABLE_TARGETS:
@@ -294,6 +294,7 @@ def _check_c_locale_coercion(self,
294294
"LANG": "",
295295
"LC_CTYPE": "",
296296
"LC_ALL": "",
297+
"PYTHONCOERCECLOCALE": "",
297298
}
298299
base_var_dict.update(extra_vars)
299300
for env_var in ("LANG", "LC_CTYPE"):

Lib/test/test_cmd_line.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ def test_xdev(self):
551551
self.assertEqual(out, "True")
552552

553553
# Warnings
554-
code = ("import sys, warnings; "
554+
code = ("import warnings; "
555555
"print(' '.join('%s::%s' % (f[0], f[2].__name__) "
556556
"for f in warnings.filters))")
557557
if Py_DEBUG:

Lib/test/test_utf8_mode.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,19 @@
77
import sys
88
import textwrap
99
import unittest
10+
from test import support
1011
from test.support.script_helper import assert_python_ok, assert_python_failure
1112

1213

1314
MS_WINDOWS = (sys.platform == 'win32')
1415

1516

1617
class UTF8ModeTests(unittest.TestCase):
17-
# Override PYTHONUTF8 and PYTHONLEGACYWINDOWSFSENCODING environment
18-
# variables by default
19-
DEFAULT_ENV = {'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': ''}
18+
DEFAULT_ENV = {
19+
'PYTHONUTF8': '',
20+
'PYTHONLEGACYWINDOWSFSENCODING': '',
21+
'PYTHONCOERCECLOCALE': '0',
22+
}
2023

2124
def posix_locale(self):
2225
loc = locale.setlocale(locale.LC_CTYPE, None)
@@ -53,7 +56,7 @@ def test_xoption(self):
5356
self.assertEqual(out, '0')
5457

5558
if MS_WINDOWS:
56-
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8
59+
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
5760
# and has the priority over -X utf8
5861
out = self.get_output('-X', 'utf8', '-c', code,
5962
PYTHONLEGACYWINDOWSFSENCODING='1')
@@ -201,6 +204,25 @@ def test_locale_getpreferredencoding(self):
201204
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
202205
self.assertEqual(out, 'UTF-8 UTF-8')
203206

207+
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
208+
def test_cmd_line(self):
209+
arg = 'h\xe9\u20ac'.encode('utf-8')
210+
arg_utf8 = arg.decode('utf-8')
211+
arg_ascii = arg.decode('ascii', 'surrogateescape')
212+
code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
213+
214+
def check(utf8_opt, expected, **kw):
215+
out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
216+
args = out.partition(':')[2].rstrip()
217+
self.assertEqual(args, ascii(expected), out)
218+
219+
check('utf8', [arg_utf8])
220+
if sys.platform == 'darwin' or support.is_android:
221+
c_arg = arg_utf8
222+
else:
223+
c_arg = arg_ascii
224+
check('utf8=0', [c_arg], LC_ALL='C')
225+
204226

205227
if __name__ == "__main__":
206228
unittest.main()

Modules/getpath.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ extern "C" {
112112

113113
#define DECODE_LOCALE_ERR(NAME, LEN) \
114114
((LEN) == (size_t)-2) \
115-
? _Py_INIT_USER_ERR("cannot decode " #NAME) \
115+
? _Py_INIT_USER_ERR("cannot decode " NAME) \
116116
: _Py_INIT_NO_MEMORY()
117117

118118
typedef struct {

0 commit comments

Comments
 (0)