Ignore:
Timestamp:
Aug 24, 2018, 11:22:29 AM (7 years ago)
Author:
[email protected]
Message:

YARR: JIT RegExps with non-greedy parenthesized sub patterns
https://bugs.webkit.org/show_bug.cgi?id=180876

Reviewed by Filip Pizlo.

Implemented the non-greedy nested parenthesis based on the prior greedy nested parenthesis work.
For the matching code, the greedy path was correct except that we don't try matching for the
non-greedy case. Added a jump out to the term after the parenthesis and a label to perform the
first / next match when we backtrack. The backtracking code needs to check to see if we have
tried the first match or if we can do another match.

Updated the disassembly annotations to include parenthesis capturing info, quantifier type and
count. Did other minor cleanup as well.

Fixed function name typo, added missing 't' in "setUsesPaternContextBuffer()".

Updated the text in some comments, both for this change as well as accuracy for existing code.

  • yarr/YarrJIT.cpp:

(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::opCompileParenthesesSubpattern):
(JSC::Yarr::YarrGenerator::compile):
(JSC::Yarr::dumpCompileFailure):
(JSC::Yarr::jitCompile):

  • yarr/YarrJIT.h:

(JSC::Yarr::YarrCodeBlock::setUsesPatternContextBuffer):
(JSC::Yarr::YarrCodeBlock::setUsesPaternContextBuffer): Deleted.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/yarr/YarrJIT.cpp

    r235238 r235322  
    22352235
    22362236                // If the parentheses are quantified Greedy then add a label to jump back
    2237                 // to if get a failed match from after the parentheses. For NonGreedy
     2237                // to if we get a failed match from after the parentheses. For NonGreedy
    22382238                // parentheses, link the jump from before the subpattern to here.
    22392239                if (term->quantityType == QuantifierGreedy)
     
    22992299                //  - To check for empty matches, which must be rejected.
    23002300                //
    2301                 // At the head of a NonGreedy set of parentheses we'll immediately set the
    2302                 // value on the stack to -1 (indicating a match skipping the subpattern),
     2301                // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
     2302                // in the backtrack info to -1 (indicating a match skipping the subpattern),
    23032303                // and plant a jump to the end. We'll also plant a label to backtrack to
    2304                 // to reenter the subpattern later, with a store to set up index on the
    2305                 // second iteration.
     2304                // to reenter the subpattern later, with a store to set 'begin' to current index
     2305                // on the second iteration.
    23062306                //
    23072307                // FIXME: for capturing parens, could use the index in the capture array?
     
    23902390
    23912391                // If the parentheses are quantified Greedy then add a label to jump back
    2392                 // to if get a failed match from after the parentheses. For NonGreedy
     2392                // to if we get a failed match from after the parentheses. For NonGreedy
    23932393                // parentheses, link the jump from before the subpattern to here.
    23942394                if (term->quantityType == QuantifierGreedy) {
     
    24022402                    YarrOp& beginOp = m_ops[op.m_previousOp];
    24032403                    beginOp.m_jumps.link(this);
     2404                    op.m_reentry = label();
    24042405                }
    24052406#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
     
    29632964                    m_backtrackingState.link(this);
    29642965
    2965                     if (term->quantityType == QuantifierGreedy) {
    2966                         RegisterID currParenContextReg = regT0;
    2967                         RegisterID newParenContextReg = regT1;
    2968 
    2969                         loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
    2970 
    2971                         restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
    2972 
    2973                         freeParenContext(currParenContextReg, newParenContextReg);
    2974                         storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
    2975                         const RegisterID countTemporary = regT0;
    2976                         loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
    2977                         Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
    2978 
    2979                         sub32(TrustedImm32(1), countTemporary);
    2980                         storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
    2981 
     2966                    RegisterID currParenContextReg = regT0;
     2967                    RegisterID newParenContextReg = regT1;
     2968
     2969                    loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
     2970                   
     2971                    restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
     2972                   
     2973                    freeParenContext(currParenContextReg, newParenContextReg);
     2974                    storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
     2975
     2976                    const RegisterID countTemporary = regT0;
     2977                    loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
     2978                    Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
     2979
     2980                    sub32(TrustedImm32(1), countTemporary);
     2981                    storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
     2982
     2983                    jump(m_ops[op.m_nextOp].m_reentry);
     2984
     2985                    zeroLengthMatch.link(this);
     2986
     2987                    // Clear the flag in the stackframe indicating we didn't run through the subpattern.
     2988                    storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
     2989
     2990                    if (term->quantityType == QuantifierGreedy)
    29822991                        jump(m_ops[op.m_nextOp].m_reentry);
    2983 
    2984                         zeroLengthMatch.link(this);
    2985 
    2986                         // Clear the flag in the stackframe indicating we didn't run through the subpattern.
    2987                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
    2988 
    2989                         jump(m_ops[op.m_nextOp].m_reentry);
    2990                     }
    29912992
    29922993                    // If Greedy, jump to the end.
     
    30113012                    m_backtrackingState.link(this);
    30123013
    3013                     // Check whether we should backtrack back into the parentheses, or if we
    3014                     // are currently in a state where we had skipped over the subpattern
    3015                     // (in which case the flag value on the stack will be -1).
    30163014                    unsigned parenthesesFrameLocation = term->frameLocation;
    3017                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation  + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
    30183015
    30193016                    if (term->quantityType == QuantifierGreedy) {
     3017                        // Check whether we should backtrack back into the parentheses, or if we
     3018                        // are currently in a state where we had skipped over the subpattern
     3019                        // (in which case the flag value on the stack will be -1).
     3020                        Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation  + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
     3021
    30203022                        // For Greedy parentheses, we skip after having already tried going
    30213023                        // through the subpattern, so if we get here we're done.
     
    30283030                        // matching path.
    30293031                        ASSERT(term->quantityType == QuantifierNonGreedy);
     3032
     3033                        const RegisterID beginTemporary = regT0;
     3034                        const RegisterID countTemporary = regT1;
     3035
    30303036                        YarrOp& beginOp = m_ops[op.m_previousOp];
    3031                         hadSkipped.linkTo(beginOp.m_reentry, this);
     3037
     3038                        loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), beginTemporary);
     3039                        branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this);
     3040
     3041                        JumpList exceededMatchLimit;
     3042
     3043                        if (term->quantityMaxCount != quantifyInfinite) {
     3044                            loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
     3045                            exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())));
     3046                        }
     3047
     3048                        branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
     3049
     3050                        exceededMatchLimit.link(this);
    30323051                    }
    30333052
     
    31033122    // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
    31043123    // 'Terminal' (non-capturing parentheses quantified as greedy
    3105     // and infinite), and 0 based greedy quantified parentheses.
     3124    // and infinite), and 0 based greedy / non-greedy quantified parentheses.
    31063125    // Alternatives will use the 'Simple' set of ops if either the
    31073126    // subpattern is terminal (in which case we will never need to
     
    31253144            m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
    31263145            return;
    3127         } if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
     3146        }
     3147       
     3148        if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
    31283149            // Select the 'Once' nodes.
    31293150            parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
     
    31423163        } else {
    31433164#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
    3144             // We only handle generic parenthesis with greedy counts.
    3145             if (term->quantityType != QuantifierGreedy) {
     3165            // We only handle generic parenthesis with non-fixed counts.
     3166            if (term->quantityType == QuantifierFixedCount) {
    31463167                // This subpattern is not supported by the JIT.
    3147                 m_failureReason = JITFailureReason::NonGreedyParenthesizedSubpattern;
     3168                m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
    31483169                return;
    31493170            }
     
    35633584#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
    35643585        if (m_containsNestedSubpatterns)
    3565             codeBlock.setUsesPaternContextBuffer();
     3586            codeBlock.setUsesPatternContextBuffer();
    35663587#endif
    35673588
     
    37753796            out.print("OpParenthesesSubpatternOnceBegin ");
    37763797            if (term->capture())
    3777                 out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
     3798                out.printf("capturing pattern #%u", term->parentheses.subpatternId);
     3799            else
     3800                out.print("non-capturing");
     3801            term->dumpQuantifier(out);
     3802            out.print("\n");
     3803            return(0);
     3804
     3805        case OpParenthesesSubpatternOnceEnd:
     3806            out.print("OpParenthesesSubpatternOnceEnd ");
     3807            if (term->capture())
     3808                out.printf("capturing pattern #%u", term->parentheses.subpatternId);
     3809            else
     3810                out.print("non-capturing");
     3811            term->dumpQuantifier(out);
     3812            out.print("\n");
     3813            return(0);
     3814
     3815        case OpParenthesesSubpatternTerminalBegin:
     3816            out.print("OpParenthesesSubpatternTerminalBegin ");
     3817            if (term->capture())
     3818                out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
    37783819            else
    37793820                out.print("non-capturing\n");
    37803821            return(0);
    37813822
    3782         case OpParenthesesSubpatternOnceEnd:
    3783             out.print("OpParenthesesSubpatternOnceEnd\n");
    3784             return(0);
    3785 
    3786         case OpParenthesesSubpatternTerminalBegin:
    3787             out.print("OpParenthesesSubpatternTerminalBegin ");
     3823        case OpParenthesesSubpatternTerminalEnd:
     3824            out.print("OpParenthesesSubpatternTerminalEnd ");
    37883825            if (term->capture())
    3789                 out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
     3826                out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
    37903827            else
    37913828                out.print("non-capturing\n");
    37923829            return(0);
    37933830
    3794         case OpParenthesesSubpatternTerminalEnd:
    3795             out.print("OpParenthesesSubpatternTerminalEnd ");
    3796             if (term->capture())
    3797                 out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
    3798             else
    3799                 out.print("non-capturing\n");
    3800             return(0);
    3801 
    38023831        case OpParenthesesSubpatternBegin:
    38033832            out.print("OpParenthesesSubpatternBegin ");
    38043833            if (term->capture())
    3805                 out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
     3834                out.printf("capturing pattern #%u", term->parentheses.subpatternId);
    38063835            else
    3807                 out.print("non-capturing\n");
     3836                out.print("non-capturing");
     3837            term->dumpQuantifier(out);
     3838            out.print("\n");
    38083839            return(0);
    38093840
     
    38113842            out.print("OpParenthesesSubpatternEnd ");
    38123843            if (term->capture())
    3813                 out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
     3844                out.printf("capturing pattern #%u", term->parentheses.subpatternId);
    38143845            else
    3815                 out.print("non-capturing\n");
     3846                out.print("non-capturing");
     3847            term->dumpQuantifier(out);
     3848            out.print("\n");
    38163849            return(0);
    38173850
    38183851        case OpParentheticalAssertionBegin:
    3819             out.printf("OpParentheticalAssertionBegin%s\n", op.m_term->invert() ? " inverted" : "");
     3852            out.printf("OpParentheticalAssertionBegin%s\n", term->invert() ? " inverted" : "");
    38203853            return(0);
    38213854
    38223855        case OpParentheticalAssertionEnd:
    3823             out.print("OpParentheticalAssertionEnd%s\n", op.m_term->invert() ? " inverted" : "");
     3856            out.print("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
    38243857            return(0);
    38253858
     
    38933926        dataLog("Can't JIT a pattern containing parenthesized subpatterns\n");
    38943927        break;
    3895     case JITFailureReason::NonGreedyParenthesizedSubpattern:
    3896         dataLog("Can't JIT a pattern containing non-greedy parenthesized subpatterns\n");
     3928    case JITFailureReason::FixedCountParenthesizedSubpattern:
     3929        dataLog("Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
    38973930        break;
    38983931    case JITFailureReason::ExecutableMemoryAllocationFailure:
     
    39103943
    39113944    if (auto failureReason = codeBlock.failureReason()) {
    3912         if (Options::dumpCompiledRegExpPatterns())
     3945        if (Options::dumpCompiledRegExpPatterns()) {
     3946            pattern.dumpPatternString(WTF::dataFile(), patternString);
     3947            dataLog(" : ");
    39133948            dumpCompileFailure(*failureReason);
     3949        }
    39143950    }
    39153951}
Note: See TracChangeset for help on using the changeset viewer.