Skip to content

Commit 26ae210

Browse files
committed
gh-116609: Ignore UTF-16 BOM in importlib.resources._functional tests
To test the `errors` argument, we read a UTF-16 file as UTF-8 with "backslashreplace" error handling. However, the utf-16 codec adds an endian-specific byte-order mark, so on big-endian machines the expectation doesn't match the test file (which was saved on a little-endian machine). Use endswith to ignore the BOM.
1 parent abfa16b commit 26ae210

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

Lib/test/test_importlib/resources/test_functional.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ def _gen_resourcetxt_path_parts(self):
3232
with self.subTest(path_parts=path_parts):
3333
yield path_parts
3434

35+
def assertEndsWith(self, string, suffix):
36+
"""Assert that `string` ends with `suffix`.
37+
38+
Used to ignore an architecture-specific UTF-16 byte-order mark."""
39+
self.assertEqual(string[-len(suffix):], suffix)
40+
3541
def test_read_text(self):
3642
self.assertEqual(
3743
resources.read_text(self.anchor01, 'utf-8.file'),
@@ -65,12 +71,12 @@ def test_read_text(self):
6571
),
6672
'\x00\x01\x02\x03',
6773
)
68-
self.assertEqual(
74+
self.assertEndsWith( # ignore the BOM
6975
resources.read_text(
7076
self.anchor01, 'utf-16.file',
7177
errors='backslashreplace',
7278
),
73-
'Hello, UTF-16 world!\n'.encode('utf-16').decode(
79+
'Hello, UTF-16 world!\n'.encode('utf-16-le').decode(
7480
errors='backslashreplace',
7581
),
7682
)
@@ -112,9 +118,9 @@ def test_open_text(self):
112118
self.anchor01, 'utf-16.file',
113119
errors='backslashreplace',
114120
) as f:
115-
self.assertEqual(
121+
self.assertEndsWith( # ignore the BOM
116122
f.read(),
117-
'Hello, UTF-16 world!\n'.encode('utf-16').decode(
123+
'Hello, UTF-16 world!\n'.encode('utf-16-le').decode(
118124
errors='backslashreplace',
119125
),
120126
)

0 commit comments

Comments
 (0)