@@ -873,16 +873,30 @@ def test_ignore_case(self):
873
873
self .assertEqual (re .match (r"((a)\s(abc|a))" , "a a" , re .I ).group (1 ), "a a" )
874
874
self .assertEqual (re .match (r"((a)\s(abc|a)*)" , "a aa" , re .I ).group (1 ), "a aa" )
875
875
876
- assert '\u212a ' .lower () == 'k' # 'K'
876
+ # Two different characters have the same lowercase.
877
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
877
878
self .assertTrue (re .match (r'K' , '\u212a ' , re .I ))
878
879
self .assertTrue (re .match (r'k' , '\u212a ' , re .I ))
879
880
self .assertTrue (re .match (r'\u212a' , 'K' , re .I ))
880
881
self .assertTrue (re .match (r'\u212a' , 'k' , re .I ))
881
- assert '\u017f ' .upper () == 'S' # 'ſ'
882
+
883
+ # Two different characters have the same uppercase.
884
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
882
885
self .assertTrue (re .match (r'S' , '\u017f ' , re .I ))
883
886
self .assertTrue (re .match (r's' , '\u017f ' , re .I ))
884
887
self .assertTrue (re .match (r'\u017f' , 'S' , re .I ))
885
888
self .assertTrue (re .match (r'\u017f' , 's' , re .I ))
889
+
890
+ # Two different characters have the same uppercase. Unicode 9.0+.
891
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
892
+ self .assertTrue (re .match (r'\u0412' , '\u0432 ' , re .I ))
893
+ self .assertTrue (re .match (r'\u0412' , '\u1c80 ' , re .I ))
894
+ self .assertTrue (re .match (r'\u0432' , '\u0412 ' , re .I ))
895
+ self .assertTrue (re .match (r'\u0432' , '\u1c80 ' , re .I ))
896
+ self .assertTrue (re .match (r'\u1c80' , '\u0412 ' , re .I ))
897
+ self .assertTrue (re .match (r'\u1c80' , '\u0432 ' , re .I ))
898
+
899
+ # Two different characters have the same multicharacter uppercase.
886
900
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
887
901
self .assertTrue (re .match (r'\ufb05' , '\ufb06 ' , re .I ))
888
902
self .assertTrue (re .match (r'\ufb06' , '\ufb05 ' , re .I ))
@@ -896,16 +910,31 @@ def test_ignore_case_set(self):
896
910
self .assertTrue (re .match (br'[19a]' , b'a' , re .I ))
897
911
self .assertTrue (re .match (br'[19a]' , b'A' , re .I ))
898
912
self .assertTrue (re .match (br'[19A]' , b'a' , re .I ))
899
- assert '\u212a ' .lower () == 'k' # 'K'
913
+
914
+ # Two different characters have the same lowercase.
915
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
900
916
self .assertTrue (re .match (r'[19K]' , '\u212a ' , re .I ))
901
917
self .assertTrue (re .match (r'[19k]' , '\u212a ' , re .I ))
902
918
self .assertTrue (re .match (r'[19\u212a]' , 'K' , re .I ))
903
919
self .assertTrue (re .match (r'[19\u212a]' , 'k' , re .I ))
904
- assert '\u017f ' .upper () == 'S' # 'ſ'
920
+
921
+ # Two different characters have the same uppercase.
922
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
905
923
self .assertTrue (re .match (r'[19S]' , '\u017f ' , re .I ))
906
924
self .assertTrue (re .match (r'[19s]' , '\u017f ' , re .I ))
907
925
self .assertTrue (re .match (r'[19\u017f]' , 'S' , re .I ))
908
926
self .assertTrue (re .match (r'[19\u017f]' , 's' , re .I ))
927
+
928
+ # Two different characters have the same uppercase. Unicode 9.0+.
929
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
930
+ self .assertTrue (re .match (r'[19\u0412]' , '\u0432 ' , re .I ))
931
+ self .assertTrue (re .match (r'[19\u0412]' , '\u1c80 ' , re .I ))
932
+ self .assertTrue (re .match (r'[19\u0432]' , '\u0412 ' , re .I ))
933
+ self .assertTrue (re .match (r'[19\u0432]' , '\u1c80 ' , re .I ))
934
+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0412 ' , re .I ))
935
+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0432 ' , re .I ))
936
+
937
+ # Two different characters have the same multicharacter uppercase.
909
938
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
910
939
self .assertTrue (re .match (r'[19\ufb05]' , '\ufb06 ' , re .I ))
911
940
self .assertTrue (re .match (r'[19\ufb06]' , '\ufb05 ' , re .I ))
@@ -929,16 +958,30 @@ def test_ignore_case_range(self):
929
958
self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010428 ' , re .I ))
930
959
self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010400 ' , re .I ))
931
960
932
- assert '\u212a ' .lower () == 'k' # 'K'
961
+ # Two different characters have the same lowercase.
962
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
933
963
self .assertTrue (re .match (r'[J-M]' , '\u212a ' , re .I ))
934
964
self .assertTrue (re .match (r'[j-m]' , '\u212a ' , re .I ))
935
965
self .assertTrue (re .match (r'[\u2129-\u212b]' , 'K' , re .I ))
936
966
self .assertTrue (re .match (r'[\u2129-\u212b]' , 'k' , re .I ))
937
- assert '\u017f ' .upper () == 'S' # 'ſ'
967
+
968
+ # Two different characters have the same uppercase.
969
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
938
970
self .assertTrue (re .match (r'[R-T]' , '\u017f ' , re .I ))
939
971
self .assertTrue (re .match (r'[r-t]' , '\u017f ' , re .I ))
940
972
self .assertTrue (re .match (r'[\u017e-\u0180]' , 'S' , re .I ))
941
973
self .assertTrue (re .match (r'[\u017e-\u0180]' , 's' , re .I ))
974
+
975
+ # Two different characters have the same uppercase. Unicode 9.0+.
976
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
977
+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u0432 ' , re .I ))
978
+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u1c80 ' , re .I ))
979
+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u0412 ' , re .I ))
980
+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u1c80 ' , re .I ))
981
+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0412 ' , re .I ))
982
+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0432 ' , re .I ))
983
+
984
+ # Two different characters have the same multicharacter uppercase.
942
985
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
943
986
self .assertTrue (re .match (r'[\ufb04-\ufb05]' , '\ufb06 ' , re .I ))
944
987
self .assertTrue (re .match (r'[\ufb06-\ufb07]' , '\ufb05 ' , re .I ))
0 commit comments