Skip to content

Commit 7dc747e

Browse files
authored
fix escaping of ctrl- and meta-chars in regexes (#831)
1 parent a0108e8 commit 7dc747e

File tree

2 files changed

+60
-2
lines changed

2 files changed

+60
-2
lines changed

lib/parser/lexer.rl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -937,7 +937,11 @@ class Parser::Lexer
937937
# b"
938938
# must be parsed as "ab"
939939
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
940-
elsif current_literal.regexp? && @version < 31
940+
elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
941+
# Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
942+
# supported by regexes so far, so it needs a separate branch.
943+
current_literal.extend_string(@escape, @ts, @te)
944+
elsif current_literal.regexp?
941945
# Regular expressions should include escape sequences in their
942946
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
943947
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)

test/test_parser.rb

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10673,7 +10673,61 @@ def test_warn_on_duplicate_hash_key
1067310673
SINCE_3_1)
1067410674
end
1067510675

10676-
def test_control_meta_escape_chars_in_regexp
10676+
def test_parser_bug_830
10677+
assert_parses(
10678+
s(:regexp,
10679+
s(:str, "\\("),
10680+
s(:regopt)),
10681+
%q{/\(/},
10682+
%q{},
10683+
ALL_VERSIONS)
10684+
end
10685+
10686+
def test_control_meta_escape_chars_in_regexp__before_31
10687+
assert_parses(
10688+
s(:regexp, s(:str, "\\c\\xFF"), s(:regopt)),
10689+
%q{/\c\xFF/}.dup.force_encoding('ascii-8bit'),
10690+
%q{},
10691+
ALL_VERSIONS - SINCE_3_1)
10692+
10693+
assert_parses(
10694+
s(:regexp, s(:str, "\\c\\M-\\xFF"), s(:regopt)),
10695+
%q{/\c\M-\xFF/}.dup.force_encoding('ascii-8bit'),
10696+
%q{},
10697+
ALL_VERSIONS - SINCE_3_1)
10698+
10699+
assert_parses(
10700+
s(:regexp, s(:str, "\\C-\\xFF"), s(:regopt)),
10701+
%q{/\C-\xFF/}.dup.force_encoding('ascii-8bit'),
10702+
%q{},
10703+
ALL_VERSIONS - SINCE_3_1)
10704+
10705+
assert_parses(
10706+
s(:regexp, s(:str, "\\C-\\M-\\xFF"), s(:regopt)),
10707+
%q{/\C-\M-\xFF/}.dup.force_encoding('ascii-8bit'),
10708+
%q{},
10709+
ALL_VERSIONS - SINCE_3_1)
10710+
10711+
assert_parses(
10712+
s(:regexp, s(:str, "\\M-\\xFF"), s(:regopt)),
10713+
%q{/\M-\xFF/}.dup.force_encoding('ascii-8bit'),
10714+
%q{},
10715+
ALL_VERSIONS - SINCE_3_1)
10716+
10717+
assert_parses(
10718+
s(:regexp, s(:str, "\\M-\\C-\\xFF"), s(:regopt)),
10719+
%q{/\M-\C-\xFF/}.dup.force_encoding('ascii-8bit'),
10720+
%q{},
10721+
ALL_VERSIONS - SINCE_3_1)
10722+
10723+
assert_parses(
10724+
s(:regexp, s(:str, "\\M-\\c\\xFF"), s(:regopt)),
10725+
%q{/\M-\c\xFF/}.dup.force_encoding('ascii-8bit'),
10726+
%q{},
10727+
ALL_VERSIONS - SINCE_3_1)
10728+
end
10729+
10730+
def test_control_meta_escape_chars_in_regexp__since_31
1067710731
x9f = "\x9F".dup.force_encoding('ascii-8bit')
1067810732

1067910733
assert_parses(

0 commit comments

Comments
 (0)