@@ -3263,7 +3263,11 @@ def test_code_page_name(self):
3263
3263
codecs .code_page_decode , self .CP_UTF8 , b'\xff ' , 'strict' , True )
3264
3264
3265
3265
def check_decode (self , cp , tests ):
3266
- for raw , errors , expected in tests :
3266
+ for raw , errors , expected , * rest in tests :
3267
+ if rest :
3268
+ altexpected , = rest
3269
+ else :
3270
+ altexpected = expected
3267
3271
if expected is not None :
3268
3272
try :
3269
3273
decoded = codecs .code_page_decode (cp , raw , errors , True )
@@ -3280,8 +3284,21 @@ def check_decode(self, cp, tests):
3280
3284
self .assertRaises (UnicodeDecodeError ,
3281
3285
codecs .code_page_decode , cp , raw , errors , True )
3282
3286
3287
+ if altexpected is not None :
3288
+ decoded = raw .decode (f'cp{ cp } ' , errors )
3289
+ self .assertEqual (decoded , altexpected ,
3290
+ '%a.decode("cp%s", %r)=%a != %a'
3291
+ % (raw , cp , errors , decoded , altexpected ))
3292
+ else :
3293
+ self .assertRaises (UnicodeDecodeError ,
3294
+ raw .decode , f'cp{ cp } ' , errors )
3295
+
3283
3296
def check_encode (self , cp , tests ):
3284
- for text , errors , expected in tests :
3297
+ for text , errors , expected , * rest in tests :
3298
+ if rest :
3299
+ altexpected , = rest
3300
+ else :
3301
+ altexpected = expected
3285
3302
if expected is not None :
3286
3303
try :
3287
3304
encoded = codecs .code_page_encode (cp , text , errors )
@@ -3292,18 +3309,26 @@ def check_encode(self, cp, tests):
3292
3309
'%a.encode("cp%s", %r)=%a != %a'
3293
3310
% (text , cp , errors , encoded [0 ], expected ))
3294
3311
self .assertEqual (encoded [1 ], len (text ))
3312
+
3313
+ encoded = text .encode (f'cp{ cp } ' , errors )
3314
+ self .assertEqual (encoded , altexpected ,
3315
+ '%a.encode("cp%s", %r)=%a != %a'
3316
+ % (text , cp , errors , encoded , altexpected ))
3295
3317
else :
3296
3318
self .assertRaises (UnicodeEncodeError ,
3297
3319
codecs .code_page_encode , cp , text , errors )
3320
+ self .assertRaises (UnicodeEncodeError ,
3321
+ text .encode , f'cp{ cp } ' , errors )
3298
3322
3299
3323
def test_cp932 (self ):
3300
3324
self .check_encode (932 , (
3301
3325
('abc' , 'strict' , b'abc' ),
3302
3326
('\uff44 \u9a3e ' , 'strict' , b'\x82 \x84 \xe9 \x80 ' ),
3327
+ ('\uf8f3 ' , 'strict' , b'\xff ' ),
3303
3328
# test error handlers
3304
3329
('\xff ' , 'strict' , None ),
3305
3330
('[\xff ]' , 'ignore' , b'[]' ),
3306
- ('[\xff ]' , 'replace' , b'[y]' ),
3331
+ ('[\xff ]' , 'replace' , b'[y]' , b'[?]' ),
3307
3332
('[\u20ac ]' , 'replace' , b'[?]' ),
3308
3333
('[\xff ]' , 'backslashreplace' , b'[\\ xff]' ),
3309
3334
('[\xff ]' , 'namereplace' ,
@@ -3317,12 +3342,12 @@ def test_cp932(self):
3317
3342
(b'abc' , 'strict' , 'abc' ),
3318
3343
(b'\x82 \x84 \xe9 \x80 ' , 'strict' , '\uff44 \u9a3e ' ),
3319
3344
# invalid bytes
3320
- (b'[\xff ]' , 'strict' , None ),
3321
- (b'[\xff ]' , 'ignore' , '[]' ),
3322
- (b'[\xff ]' , 'replace' , '[\ufffd ]' ),
3323
- (b'[\xff ]' , 'backslashreplace' , '[\\ xff]' ),
3324
- (b'[\xff ]' , 'surrogateescape' , '[\udcff ]' ),
3325
- (b'[\xff ]' , 'surrogatepass' , None ),
3345
+ (b'[\xff ]' , 'strict' , None , '[ \uf8f3 ]' ),
3346
+ (b'[\xff ]' , 'ignore' , '[]' , '[ \uf8f3 ]' ),
3347
+ (b'[\xff ]' , 'replace' , '[\ufffd ]' , '[ \uf8f3 ]' ),
3348
+ (b'[\xff ]' , 'backslashreplace' , '[\\ xff]' , '[ \uf8f3 ]' ),
3349
+ (b'[\xff ]' , 'surrogateescape' , '[\udcff ]' , '[ \uf8f3 ]' ),
3350
+ (b'[\xff ]' , 'surrogatepass' , None , '[ \uf8f3 ]' ),
3326
3351
(b'\x81 \x00 abc' , 'strict' , None ),
3327
3352
(b'\x81 \x00 abc' , 'ignore' , '\x00 abc' ),
3328
3353
(b'\x81 \x00 abc' , 'replace' , '\ufffd \x00 abc' ),
@@ -3337,7 +3362,7 @@ def test_cp1252(self):
3337
3362
# test error handlers
3338
3363
('\u0141 ' , 'strict' , None ),
3339
3364
('\u0141 ' , 'ignore' , b'' ),
3340
- ('\u0141 ' , 'replace' , b'L' ),
3365
+ ('\u0141 ' , 'replace' , b'L' , b'?' ),
3341
3366
('\udc98 ' , 'surrogateescape' , b'\x98 ' ),
3342
3367
('\udc98 ' , 'surrogatepass' , None ),
3343
3368
))
@@ -3347,6 +3372,59 @@ def test_cp1252(self):
3347
3372
(b'\xff ' , 'strict' , '\xff ' ),
3348
3373
))
3349
3374
3375
+ def test_cp708 (self ):
3376
+ self .check_encode (708 , (
3377
+ ('abc2%' , 'strict' , b'abc2%' ),
3378
+ ('\u060c \u0621 \u064a ' , 'strict' , b'\xac \xc1 \xea ' ),
3379
+ ('\u2562 \xe7 \xa0 ' , 'strict' , b'\x86 \x87 \xff ' ),
3380
+ ('\x9a \x9f ' , 'strict' , b'\x9a \x9f ' ),
3381
+ ('\u256b ' , 'strict' , b'\xc0 ' ),
3382
+ # test error handlers
3383
+ ('[\u0662 ]' , 'strict' , None ),
3384
+ ('[\u0662 ]' , 'ignore' , b'[]' ),
3385
+ ('[\u0662 ]' , 'replace' , b'[?]' ),
3386
+ ('\udca0 ' , 'surrogateescape' , b'\xa0 ' ),
3387
+ ('\udca0 ' , 'surrogatepass' , None ),
3388
+ ))
3389
+ self .check_decode (708 , (
3390
+ (b'abc2%' , 'strict' , 'abc2%' ),
3391
+ (b'\xac \xc1 \xea ' , 'strict' , '\u060c \u0621 \u064a ' ),
3392
+ (b'\x86 \x87 \xff ' , 'strict' , '\u2562 \xe7 \xa0 ' ),
3393
+ (b'\x9a \x9f ' , 'strict' , '\x9a \x9f ' ),
3394
+ (b'\xc0 ' , 'strict' , '\u256b ' ),
3395
+ # test error handlers
3396
+ (b'\xa0 ' , 'strict' , None ),
3397
+ (b'[\xa0 ]' , 'ignore' , '[]' ),
3398
+ (b'[\xa0 ]' , 'replace' , '[\ufffd ]' ),
3399
+ (b'[\xa0 ]' , 'backslashreplace' , '[\\ xa0]' ),
3400
+ (b'[\xa0 ]' , 'surrogateescape' , '[\udca0 ]' ),
3401
+ (b'[\xa0 ]' , 'surrogatepass' , None ),
3402
+ ))
3403
+
3404
+ def test_cp20106 (self ):
3405
+ self .check_encode (20106 , (
3406
+ ('abc' , 'strict' , b'abc' ),
3407
+ ('\xa7 \xc4 \xdf ' , 'strict' , b'@[~' ),
3408
+ # test error handlers
3409
+ ('@' , 'strict' , None ),
3410
+ ('@' , 'ignore' , b'' ),
3411
+ ('@' , 'replace' , b'?' ),
3412
+ ('\udcbf ' , 'surrogateescape' , b'\xbf ' ),
3413
+ ('\udcbf ' , 'surrogatepass' , None ),
3414
+ ))
3415
+ self .check_decode (20106 , (
3416
+ (b'abc' , 'strict' , 'abc' ),
3417
+ (b'@[~' , 'strict' , '\xa7 \xc4 \xdf ' ),
3418
+ (b'\xe1 \xfe ' , 'strict' , 'a\xdf ' ),
3419
+ # test error handlers
3420
+ (b'(\xbf )' , 'strict' , None ),
3421
+ (b'(\xbf )' , 'ignore' , '()' ),
3422
+ (b'(\xbf )' , 'replace' , '(\ufffd )' ),
3423
+ (b'(\xbf )' , 'backslashreplace' , '(\\ xbf)' ),
3424
+ (b'(\xbf )' , 'surrogateescape' , '(\udcbf )' ),
3425
+ (b'(\xbf )' , 'surrogatepass' , None ),
3426
+ ))
3427
+
3350
3428
def test_cp_utf7 (self ):
3351
3429
cp = 65000
3352
3430
self .check_encode (cp , (
@@ -3419,17 +3497,15 @@ def test_incremental(self):
3419
3497
False )
3420
3498
self .assertEqual (decoded , ('abc' , 3 ))
3421
3499
3422
- def test_mbcs_alias (self ):
3423
- # Check that looking up our 'default' codepage will return
3424
- # mbcs when we don't have a more specific one available
3425
- code_page = 99_999
3426
- name = f'cp{ code_page } '
3427
- with mock .patch ('_winapi.GetACP' , return_value = code_page ):
3428
- try :
3429
- codec = codecs .lookup (name )
3430
- self .assertEqual (codec .name , 'mbcs' )
3431
- finally :
3432
- codecs .unregister (name )
3500
+ def test_mbcs_code_page (self ):
3501
+ # Check that codec for the current Windows (ANSII) code page is
3502
+ # always available.
3503
+ try :
3504
+ from _winapi import GetACP
3505
+ except ImportError :
3506
+ self .skipTest ('requires _winapi.GetACP' )
3507
+ cp = GetACP ()
3508
+ codecs .lookup (f'cp{ cp } ' )
3433
3509
3434
3510
@support .bigmemtest (size = 2 ** 31 , memuse = 7 , dry_run = False )
3435
3511
def test_large_input (self , size ):
0 commit comments