Skip to content

Commit ad3eac1

Browse files
gh-52551: Fix encoding issues in strftime() (GH-125193)
Fix time.strftime(), the strftime() method and formatting of the datetime classes datetime, date and time. * Characters not encodable in the current locale are now acceptable in the format string. * Surrogate pairs and sequence of surrogatescape-encoded bytes are no longer recombinated. * Embedded null character no longer terminates the format string. This fixes also gh-78662 and gh-124531.
1 parent 0cb20f2 commit ad3eac1

File tree

5 files changed

+307
-232
lines changed

5 files changed

+307
-232
lines changed

Lib/test/datetimetester.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,11 +2949,32 @@ def test_more_strftime(self):
29492949
self.assertEqual(t.strftime("%z"), "-0200" + z)
29502950
self.assertEqual(t.strftime("%:z"), "-02:00:" + z)
29512951

2952-
# bpo-34482: Check that surrogates don't cause a crash.
2953-
try:
2954-
t.strftime('%y\ud800%m %H\ud800%M')
2955-
except UnicodeEncodeError:
2956-
pass
2952+
def test_strftime_special(self):
2953+
t = self.theclass(2004, 12, 31, 6, 22, 33, 47)
2954+
s1 = t.strftime('%c')
2955+
s2 = t.strftime('%B')
2956+
# gh-52551, gh-78662: Unicode strings should pass through strftime,
2957+
# independently from locale.
2958+
self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d')
2959+
self.assertEqual(t.strftime('\U0001f4bb%c\U0001f40d%B'), f'\U0001f4bb{s1}\U0001f40d{s2}')
2960+
self.assertEqual(t.strftime('%c\U0001f4bb%B\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d')
2961+
# Lone surrogates should pass through.
2962+
self.assertEqual(t.strftime('\ud83d'), '\ud83d')
2963+
self.assertEqual(t.strftime('\udc0d'), '\udc0d')
2964+
self.assertEqual(t.strftime('\ud83d%c\udc0d%B'), f'\ud83d{s1}\udc0d{s2}')
2965+
self.assertEqual(t.strftime('%c\ud83d%B\udc0d'), f'{s1}\ud83d{s2}\udc0d')
2966+
self.assertEqual(t.strftime('%c\udc0d%B\ud83d'), f'{s1}\udc0d{s2}\ud83d')
2967+
# Surrogate pairs should not recombine.
2968+
self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d')
2969+
self.assertEqual(t.strftime('%c\ud83d\udc0d%B'), f'{s1}\ud83d\udc0d{s2}')
2970+
# Surrogate-escaped bytes should not recombine.
2971+
self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d')
2972+
self.assertEqual(t.strftime('%c\udcf0\udc9f\udc90\udc8d%B'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
2973+
# gh-124531: The null character should not terminate the format string.
2974+
self.assertEqual(t.strftime('\0'), '\0')
2975+
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
2976+
self.assertEqual(t.strftime('\0%c\0%B'), f'\0{s1}\0{s2}')
2977+
self.assertEqual(t.strftime('%c\0%B\0'), f'{s1}\0{s2}\0')
29572978

29582979
def test_extract(self):
29592980
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
@@ -3736,6 +3757,33 @@ def test_strftime(self):
37363757
# gh-85432: The parameter was named "fmt" in the pure-Python impl.
37373758
t.strftime(format="%f")
37383759

3760+
def test_strftime_special(self):
3761+
t = self.theclass(1, 2, 3, 4)
3762+
s1 = t.strftime('%I%p%Z')
3763+
s2 = t.strftime('%X')
3764+
# gh-52551, gh-78662: Unicode strings should pass through strftime,
3765+
# independently from locale.
3766+
self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d')
3767+
self.assertEqual(t.strftime('\U0001f4bb%I%p%Z\U0001f40d%X'), f'\U0001f4bb{s1}\U0001f40d{s2}')
3768+
self.assertEqual(t.strftime('%I%p%Z\U0001f4bb%X\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d')
3769+
# Lone surrogates should pass through.
3770+
self.assertEqual(t.strftime('\ud83d'), '\ud83d')
3771+
self.assertEqual(t.strftime('\udc0d'), '\udc0d')
3772+
self.assertEqual(t.strftime('\ud83d%I%p%Z\udc0d%X'), f'\ud83d{s1}\udc0d{s2}')
3773+
self.assertEqual(t.strftime('%I%p%Z\ud83d%X\udc0d'), f'{s1}\ud83d{s2}\udc0d')
3774+
self.assertEqual(t.strftime('%I%p%Z\udc0d%X\ud83d'), f'{s1}\udc0d{s2}\ud83d')
3775+
# Surrogate pairs should not recombine.
3776+
self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d')
3777+
self.assertEqual(t.strftime('%I%p%Z\ud83d\udc0d%X'), f'{s1}\ud83d\udc0d{s2}')
3778+
# Surrogate-escaped bytes should not recombine.
3779+
self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d')
3780+
self.assertEqual(t.strftime('%I%p%Z\udcf0\udc9f\udc90\udc8d%X'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
3781+
# gh-124531: The null character should not terminate the format string.
3782+
self.assertEqual(t.strftime('\0'), '\0')
3783+
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
3784+
self.assertEqual(t.strftime('\0%I%p%Z\0%X'), f'\0{s1}\0{s2}')
3785+
self.assertEqual(t.strftime('%I%p%Z\0%X\0'), f'{s1}\0{s2}\0')
3786+
37393787
def test_format(self):
37403788
t = self.theclass(1, 2, 3, 4)
37413789
self.assertEqual(t.__format__(''), str(t))
@@ -4259,9 +4307,8 @@ def tzname(self, dt): return self.tz
42594307
self.assertRaises(TypeError, t.strftime, "%Z")
42604308

42614309
# Issue #6697:
4262-
if '_Fast' in self.__class__.__name__:
4263-
Badtzname.tz = '\ud800'
4264-
self.assertRaises(ValueError, t.strftime, "%Z")
4310+
Badtzname.tz = '\ud800'
4311+
self.assertEqual(t.strftime("%Z"), '\ud800')
42654312

42664313
def test_hash_edge_cases(self):
42674314
# Offsets that overflow a basic time.

Lib/test/test_time.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,33 @@ def test_strftime(self):
181181
self.fail('conversion specifier: %r failed.' % format)
182182

183183
self.assertRaises(TypeError, time.strftime, b'%S', tt)
184-
# embedded null character
185-
self.assertRaises(ValueError, time.strftime, '%S\0', tt)
184+
185+
def test_strftime_special(self):
186+
tt = time.gmtime(self.t)
187+
s1 = time.strftime('%c', tt)
188+
s2 = time.strftime('%B', tt)
189+
# gh-52551, gh-78662: Unicode strings should pass through strftime,
190+
# independently from locale.
191+
self.assertEqual(time.strftime('\U0001f40d', tt), '\U0001f40d')
192+
self.assertEqual(time.strftime('\U0001f4bb%c\U0001f40d%B', tt), f'\U0001f4bb{s1}\U0001f40d{s2}')
193+
self.assertEqual(time.strftime('%c\U0001f4bb%B\U0001f40d', tt), f'{s1}\U0001f4bb{s2}\U0001f40d')
194+
# Lone surrogates should pass through.
195+
self.assertEqual(time.strftime('\ud83d', tt), '\ud83d')
196+
self.assertEqual(time.strftime('\udc0d', tt), '\udc0d')
197+
self.assertEqual(time.strftime('\ud83d%c\udc0d%B', tt), f'\ud83d{s1}\udc0d{s2}')
198+
self.assertEqual(time.strftime('%c\ud83d%B\udc0d', tt), f'{s1}\ud83d{s2}\udc0d')
199+
self.assertEqual(time.strftime('%c\udc0d%B\ud83d', tt), f'{s1}\udc0d{s2}\ud83d')
200+
# Surrogate pairs should not recombine.
201+
self.assertEqual(time.strftime('\ud83d\udc0d', tt), '\ud83d\udc0d')
202+
self.assertEqual(time.strftime('%c\ud83d\udc0d%B', tt), f'{s1}\ud83d\udc0d{s2}')
203+
# Surrogate-escaped bytes should not recombine.
204+
self.assertEqual(time.strftime('\udcf0\udc9f\udc90\udc8d', tt), '\udcf0\udc9f\udc90\udc8d')
205+
self.assertEqual(time.strftime('%c\udcf0\udc9f\udc90\udc8d%B', tt), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
206+
# gh-124531: The null character should not terminate the format string.
207+
self.assertEqual(time.strftime('\0', tt), '\0')
208+
self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000)
209+
self.assertEqual(time.strftime('\0%c\0%B', tt), f'\0{s1}\0{s2}')
210+
self.assertEqual(time.strftime('%c\0%B\0', tt), f'{s1}\0{s2}\0')
186211

187212
def _bounds_checking(self, func):
188213
# Make sure that strftime() checks the bounds of the various parts
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Fix encoding issues in :func:`time.strftime`, the
2+
:meth:`~datetime.datetime.strftime` method of the :mod:`datetime` classes
3+
:class:`~datetime.datetime`, :class:`~datetime.date` and
4+
:class:`~datetime.time` and formatting of these classes. Characters not
5+
encodable in the current locale are now acceptable in the format string.
6+
Surrogate pairs and sequence of surrogatescape-encoded bytes are no longer
7+
recombinated. Embedded null character no longer terminates the format
8+
string.

0 commit comments

Comments
 (0)