Ignore:
Timestamp:
Aug 24, 2017, 2:14:43 PM (8 years ago)
Author:
[email protected]
Message:

Add support for RegExp "dotAll" flag
https://bugs.webkit.org/show_bug.cgi?id=175924

Reviewed by Keith Miller.

JSTests:

Updated tests for new dotAll ('s' flag) changes.

  • es6/Proxy_internal_get_calls_RegExp.prototype.flags.js:
  • stress/static-getter-in-names.js:

Source/JavaScriptCore:

The dotAll RegExp flag, 's', changes . to match any character including line terminators.
Added a the "dotAll" identifier as well as RegExp.prototype.dotAll getter.
Added a new any character CharacterClass that is used to match . terms in a dotAll flags
RegExp. In the YARR pattern and parsing code, changed the NewlineClassID, which was only
used for '.' processing, to DotClassID. The selection of which builtin character class
that DotClassID resolves to when generating the pattern is conditional on the dotAll flag.
This NewlineClassID to DotClassID refactoring includes the atomBuiltInCharacterClass() in
the WebCore content extensions code in the PatternParser class.

As an optimization, the Yarr JIT actually doesn't perform match checks against the builtin
any character CharacterClass, it merely reads the character. There is another optimization
in our DotStart enclosure processing where a non-capturing regular expression in the form
of .*<expression.*, with options beginning and/or trailing $, match the contained
expression and then look for the extents of the surrounding .*'s. When used with the
dotAll flag, that processing alwys results with the beinning of the string and the end
of the string. Therefore we short circuit the finding the beginning and end of the line
or string with dotAll patterns.

  • bytecode/BytecodeDumper.cpp:

(JSC::regexpToSourceString):

  • runtime/CommonIdentifiers.h:
  • runtime/RegExp.cpp:

(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):

  • runtime/RegExp.h:
  • runtime/RegExpKey.h:
  • runtime/RegExpPrototype.cpp:

(JSC::RegExpPrototype::finishCreation):
(JSC::flagsString):
(JSC::regExpProtoGetterDotAll):

  • yarr/YarrInterpreter.cpp:

(JSC::Yarr::Interpreter::matchDotStarEnclosure):

  • yarr/YarrInterpreter.h:

(JSC::Yarr::BytecodePattern::dotAll const):

  • yarr/YarrJIT.cpp:

(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::generateDotStarEnclosure):

  • yarr/YarrParser.h:

(JSC::Yarr::Parser::parseTokens):

  • yarr/YarrPattern.cpp:

(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
(JSC::Yarr::YarrPatternConstructor::optimizeDotStarWrappedExpressions):
(JSC::Yarr::YarrPattern::YarrPattern):
(JSC::Yarr::PatternTerm::dump):
(JSC::Yarr::anycharCreate):

  • yarr/YarrPattern.h:

(JSC::Yarr::YarrPattern::reset):
(JSC::Yarr::YarrPattern::anyCharacterClass):
(JSC::Yarr::YarrPattern::dotAll const):

Source/WebCore:

Changed due to refactoring NewlineClassID to DotClassID.

No new tests. No change in behavior.

  • contentextensions/URLFilterParser.cpp:

(WebCore::ContentExtensions::PatternParser::atomBuiltInCharacterClass):

LayoutTests:

  • js/regexp-dotall-expected.txt: Added.
  • js/regexp-dotall.html: Added.
  • js/script-tests/Object-getOwnPropertyNames.js:
  • js/script-tests/regexp-dotall.js: Added.

New tests.

  • js/Object-getOwnPropertyNames-expected.txt:

Updated tests for new dotAll ('s' flag) changes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/yarr/YarrJIT.cpp

    r221111 r221160  
    11701170        JumpList matchDest;
    11711171        readCharacter(m_checkedOffset - term->inputPosition, character);
    1172         matchCharacterClass(character, matchDest, term->characterClass);
    1173 
    1174         if (term->invert())
    1175             op.m_jumps.append(matchDest);
    1176         else {
    1177             op.m_jumps.append(jump());
    1178             matchDest.link(this);
    1179         }
    1180 
     1172        // If we are matching the "any character" builtin class we only need to read the
     1173        // character and don't need to match as it will always succeed.
     1174        if (term->invert() || term->characterClass != m_pattern.anyCharacterClass()) {
     1175            matchCharacterClass(character, matchDest, term->characterClass);
     1176
     1177            if (term->invert())
     1178                op.m_jumps.append(matchDest);
     1179            else {
     1180                op.m_jumps.append(jump());
     1181                matchDest.link(this);
     1182            }
     1183        }
    11811184#ifdef JIT_UNICODE_EXPRESSIONS
    11821185        if (m_decodeSurrogatePairs) {
     
    12161219        JumpList matchDest;
    12171220        readCharacter(m_checkedOffset - term->inputPosition - term->quantityMaxCount, character, countRegister);
    1218         matchCharacterClass(character, matchDest, term->characterClass);
    1219 
    1220         if (term->invert())
    1221             op.m_jumps.append(matchDest);
    1222         else {
    1223             op.m_jumps.append(jump());
    1224             matchDest.link(this);
     1221        // If we are matching the "any character" builtin class we only need to read the
     1222        // character and don't need to match as it will always succeed.
     1223        if (term->invert() || term->characterClass != m_pattern.anyCharacterClass()) {
     1224            matchCharacterClass(character, matchDest, term->characterClass);
     1225
     1226            if (term->invert())
     1227                op.m_jumps.append(matchDest);
     1228            else {
     1229                op.m_jumps.append(jump());
     1230                matchDest.link(this);
     1231            }
    12251232        }
    12261233
     
    12641271            JumpList matchDest;
    12651272            readCharacter(m_checkedOffset - term->inputPosition, character);
    1266             matchCharacterClass(character, matchDest, term->characterClass);
    1267             failures.append(jump());
     1273            // If we are matching the "any character" builtin class we only need to read the
     1274            // character and don't need to match as it will always succeed.
     1275            if (term->characterClass != m_pattern.anyCharacterClass()) {
     1276                matchCharacterClass(character, matchDest, term->characterClass);
     1277                failures.append(jump());
     1278            }
    12681279            matchDest.link(this);
    12691280        }
     
    13661377        JumpList matchDest;
    13671378        readCharacter(m_checkedOffset - term->inputPosition, character);
    1368         matchCharacterClass(character, matchDest, term->characterClass);
    1369 
    1370         if (term->invert())
    1371             nonGreedyFailures.append(matchDest);
    1372         else {
    1373             nonGreedyFailures.append(jump());
    1374             matchDest.link(this);
     1379        // If we are matching the "any character" builtin class we only need to read the
     1380        // character and don't need to match as it will always succeed.
     1381        if (term->invert() || term->characterClass != m_pattern.anyCharacterClass()) {
     1382            matchCharacterClass(character, matchDest, term->characterClass);
     1383
     1384            if (term->invert())
     1385                nonGreedyFailures.append(matchDest);
     1386            else {
     1387                nonGreedyFailures.append(jump());
     1388                matchDest.link(this);
     1389            }
    13751390        }
    13761391
     
    14071422        JumpList saveStartIndex;
    14081423        JumpList foundEndingNewLine;
     1424
     1425        if (m_pattern.dotAll()) {
     1426            move(TrustedImm32(0), matchPos);
     1427            setMatchStart(matchPos);
     1428            move(length, index);
     1429            return;
     1430        }
    14091431
    14101432        ASSERT(!m_pattern.m_body->m_hasFixedSize);
Note: See TracChangeset for help on using the changeset viewer.