Skip to content

Commit 24d2f68

Browse files
authored
+ lexer.rl: parse meta-control-hex chars in regexes starting from 3.1 (#828)
This commit tracks upstream commit ruby/ruby@11ae581.
1 parent 547d731 commit 24d2f68

File tree

3 files changed

+76
-3
lines changed

3 files changed

+76
-3
lines changed

lib/parser/lexer.rl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,12 +738,14 @@ class Parser::Lexer
738738

739739
maybe_escaped_char = (
740740
'\\' c_any %unescape_char
741+
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
741742
| ( c_any - [\\] ) %read_post_meta_or_ctrl_char
742743
);
743744

744745
maybe_escaped_ctrl_char = ( # why?!
745746
'\\' c_any %unescape_char %slash_c_char
746747
| '?' % { @escape = "\x7f" }
748+
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
747749
| ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
748750
);
749751

@@ -935,7 +937,7 @@ class Parser::Lexer
935937
# b"
936938
# must be parsed as "ab"
937939
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
938-
elsif current_literal.regexp?
940+
elsif current_literal.regexp? && @version < 31
939941
# Regular expressions should include escape sequences in their
940942
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
941943
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)

test/test_lexer.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3719,4 +3719,29 @@ def test_meta_escape_slash_u__after_30
37193719
refute_scanned_meta_escape_slash_u('"\M-\u0000"')
37203720
refute_scanned_meta_escape_slash_u('"\M-\U0000"')
37213721
end
3722+
3723+
def test_meta_control_hex_escaped_char
3724+
setup_lexer(19)
3725+
3726+
assert_scanned("\"\\c\\xFF\"",
3727+
:tSTRING, "\x9F", [0, 8])
3728+
3729+
assert_scanned("\"\\c\\M-\\xFF\"",
3730+
:tSTRING, "\x9F", [0, 11])
3731+
3732+
assert_scanned("\"\\C-\\xFF\"",
3733+
:tSTRING, "\x9F", [0, 9])
3734+
3735+
assert_scanned("\"\\C-\\M-\\xFF\"",
3736+
:tSTRING, "\x9F", [0, 12])
3737+
3738+
assert_scanned("\"\\M-\\xFF\"",
3739+
:tSTRING, "\x9F", [0, 9])
3740+
3741+
assert_scanned("\"\\M-\\C-\\xFF\"",
3742+
:tSTRING, "\x9F", [0, 12])
3743+
3744+
assert_scanned("\"\\M-\\c\\xFF\"",
3745+
:tSTRING, "\x9F", [0, 11])
3746+
end
37223747
end

test/test_parser.rb

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5608,7 +5608,7 @@ def test_regexp_encoding
56085608
s(:str, "")),
56095609
%q{/\xa8/n =~ ""}.dup.force_encoding(Encoding::UTF_8),
56105610
%{},
5611-
SINCE_1_9)
5611+
SINCE_3_1 - SINCE_1_9)
56125612
end
56135613

56145614
#
@@ -6513,7 +6513,7 @@ def test_parser_bug_198
65136513
s(:str, "#")),
65146514
%q{[/()\\1/, ?#]},
65156515
%q{},
6516-
SINCE_1_9)
6516+
SINCE_3_1 - SINCE_1_9)
65176517
end
65186518

65196519
def test_parser_bug_272
@@ -10672,4 +10672,50 @@ def test_warn_on_duplicate_hash_key
1067210672
%q{ ~~~~~ location},
1067310673
SINCE_3_1)
1067410674
end
10675+
10676+
def test_control_meta_escape_chars_in_regexp
10677+
x9f = "\x9F".dup.force_encoding('ascii-8bit')
10678+
10679+
assert_parses(
10680+
s(:regexp, s(:str, x9f), s(:regopt)),
10681+
%q{/\c\xFF/}.dup.force_encoding('ascii-8bit'),
10682+
%q{},
10683+
SINCE_3_1)
10684+
10685+
assert_parses(
10686+
s(:regexp, s(:str, x9f), s(:regopt)),
10687+
%q{/\c\M-\xFF/}.dup.force_encoding('ascii-8bit'),
10688+
%q{},
10689+
SINCE_3_1)
10690+
10691+
assert_parses(
10692+
s(:regexp, s(:str, x9f), s(:regopt)),
10693+
%q{/\C-\xFF/}.dup.force_encoding('ascii-8bit'),
10694+
%q{},
10695+
SINCE_3_1)
10696+
10697+
assert_parses(
10698+
s(:regexp, s(:str, x9f), s(:regopt)),
10699+
%q{/\C-\M-\xFF/}.dup.force_encoding('ascii-8bit'),
10700+
%q{},
10701+
SINCE_3_1)
10702+
10703+
assert_parses(
10704+
s(:regexp, s(:str, x9f), s(:regopt)),
10705+
%q{/\M-\xFF/}.dup.force_encoding('ascii-8bit'),
10706+
%q{},
10707+
SINCE_3_1)
10708+
10709+
assert_parses(
10710+
s(:regexp, s(:str, x9f), s(:regopt)),
10711+
%q{/\M-\C-\xFF/}.dup.force_encoding('ascii-8bit'),
10712+
%q{},
10713+
SINCE_3_1)
10714+
10715+
assert_parses(
10716+
s(:regexp, s(:str, x9f), s(:regopt)),
10717+
%q{/\M-\c\xFF/}.dup.force_encoding('ascii-8bit'),
10718+
%q{},
10719+
SINCE_3_1)
10720+
end
1067510721
end

0 commit comments

Comments
 (0)