Ignore:
Timestamp:
Dec 8, 2017, 12:32:42 PM (7 years ago)
Author:
[email protected]
Message:

YARR: JIT RegExps with greedy parenthesized sub patterns
https://bugs.webkit.org/show_bug.cgi?id=180538

Reviewed by JF Bastien.

This patch adds JIT support for regular expressions containing greedy counted
parenthesis. An example expression that couldn't be JIT'ed before is /q(a|b)*q/.

Just like in the interpreter, expressions with nested parenthetical subpatterns
require saving the results of previous matches of the parentheses contents along
with any associated state. This saved state is needed in the case that we need
to backtrack. This state is called ParenContext within the code space allocated
for this ParenContext is managed using a simple block allocator within the JIT'ed
code. The raw space managed by this allocator is passed into the JIT'ed function.

Since this fixed sized space may be exceeded, this patch adds a fallback mechanism.
If the JIT'ed code exhausts all its ParenContext space, it returns a new error
JSRegExpJITCodeFailure. The caller will then bytecompile and interpret the
expression.

Due to increased register usage by the parenthesis handling code, the use of
registers by the JIT engine was restructured, with registers used for Unicode
pattern matching replaced with constants.

Reworked some of the context structures that are used across the interpreter
and JIT implementations to make them a little more uniform and to handle the
needs of JIT'ing the new parentheses forms.

To help with development and debugging of this code, compiled patterns dumping
code was enhanced. Also added the ability to also dump interpreter ByteCodes.

  • runtime/RegExp.cpp:

(JSC::byteCodeCompilePattern):
(JSC::RegExp::byteCodeCompileIfNecessary):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):

  • runtime/RegExp.h:
  • runtime/RegExpInlines.h:

(JSC::RegExp::matchInline):

  • testRegExp.cpp:

(parseRegExpLine):
(runFromFiles):

  • yarr/Yarr.h:
  • yarr/YarrInterpreter.cpp:

(JSC::Yarr::ByteCompiler::compile):
(JSC::Yarr::ByteCompiler::dumpDisjunction):

  • yarr/YarrJIT.cpp:

(JSC::Yarr::YarrGenerator::ParenContextSizes::ParenContextSizes):
(JSC::Yarr::YarrGenerator::ParenContextSizes::numSubpatterns):
(JSC::Yarr::YarrGenerator::ParenContextSizes::frameSlots):
(JSC::Yarr::YarrGenerator::ParenContext::sizeFor):
(JSC::Yarr::YarrGenerator::ParenContext::nextOffset):
(JSC::Yarr::YarrGenerator::ParenContext::beginOffset):
(JSC::Yarr::YarrGenerator::ParenContext::matchAmountOffset):
(JSC::Yarr::YarrGenerator::ParenContext::subpatternOffset):
(JSC::Yarr::YarrGenerator::ParenContext::savedFrameOffset):
(JSC::Yarr::YarrGenerator::initParenContextFreeList):
(JSC::Yarr::YarrGenerator::allocatePatternContext):
(JSC::Yarr::YarrGenerator::freePatternContext):
(JSC::Yarr::YarrGenerator::savePatternContext):
(JSC::Yarr::YarrGenerator::restorePatternContext):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::storeToFrame):
(JSC::Yarr::YarrGenerator::generateJITFailReturn):
(JSC::Yarr::YarrGenerator::clearMatches):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::opCompileParenthesesSubpattern):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):

  • yarr/YarrJIT.h:

(JSC::Yarr::YarrCodeBlock::execute):

  • yarr/YarrPattern.cpp:

(JSC::Yarr::indentForNestingLevel):
(JSC::Yarr::dumpUChar32):
(JSC::Yarr::dumpCharacterClass):
(JSC::Yarr::PatternTerm::dump):
(JSC::Yarr::YarrPattern::dumpPattern):

  • yarr/YarrPattern.h:

(JSC::Yarr::PatternTerm::containsAnyCaptures):
(JSC::Yarr::BackTrackInfoParenthesesOnce::returnAddressIndex):
(JSC::Yarr::BackTrackInfoParentheses::beginIndex):
(JSC::Yarr::BackTrackInfoParentheses::returnAddressIndex):
(JSC::Yarr::BackTrackInfoParentheses::matchAmountIndex):
(JSC::Yarr::BackTrackInfoParentheses::patternContextHeadIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex): Deleted.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/yarr/YarrJIT.h

    r221052 r225695  
    3939#endif
    4040
     41#if CPU(ARM64) || (CPU(X86_64) && !OS(WINDOWS))
     42#define JIT_ALL_PARENS_EXPRESSIONS
     43constexpr size_t patternContextBufferSize = 8192; // Space caller allocates to save nested parenthesis context
     44#endif
     45
    4146namespace JSC {
    4247
     
    4853class YarrCodeBlock {
    4954#if CPU(X86_64) || CPU(ARM64)
     55#ifdef JIT_ALL_PARENS_EXPRESSIONS
     56    typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
     57    typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
     58    typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
     59    typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
     60#else
    5061    typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
    5162    typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
    5263    typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length) YARR_CALL;
    5364    typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length) YARR_CALL;
     65#endif
    5466#else
    5567    typedef EncodedMatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
     
    8294    void set16BitCodeMatchOnly(MacroAssemblerCodeRef matchOnly) { m_matchOnly16 = matchOnly; }
    8395
     96#ifdef JIT_ALL_PARENS_EXPRESSIONS
     97    MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize)
     98    {
     99        ASSERT(has8BitCode());
     100        return MatchResult(reinterpret_cast<YarrJITCode8>(m_ref8.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize));
     101    }
     102
     103    MatchResult execute(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize)
     104    {
     105        ASSERT(has16BitCode());
     106        return MatchResult(reinterpret_cast<YarrJITCode16>(m_ref16.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize));
     107    }
     108
     109    MatchResult execute(const LChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize)
     110    {
     111        ASSERT(has8BitCodeMatchOnly());
     112        return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly8>(m_matchOnly8.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize));
     113    }
     114
     115    MatchResult execute(const UChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize)
     116    {
     117        ASSERT(has16BitCodeMatchOnly());
     118        return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize));
     119    }
     120#else
    84121    MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output)
    85122    {
     
    105142        return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length));
    106143    }
     144#endif
    107145
    108146#if ENABLE(REGEXP_TRACING)
Note: See TracChangeset for help on using the changeset viewer.