Ignore:
Timestamp:
Jul 13, 2011, 4:05:40 PM (14 years ago)
Author:
[email protected]
Message:

https://bugs.webkit.org/show_bug.cgi?id=64202
Enh: Improve handling of RegExp in the form of /.*blah.*/

Reviewed by Gavin Barraclough.

../../../../Volumes/Data/src/webkit/LayoutTests:

New tests to support /.*<expression>.*/ enhancement.

  • fast/regex/dotstar-expected.txt: Added.
  • fast/regex/dotstar.html: Added.
  • fast/regex/script-tests/dotstar.js: Added.

../../../../Volumes/Data/src/webkit/Source/JavaScriptCore:

Added code to both the Yarr interpreter and JIT to handle
these expressions a little differently. First off, the terms
in between the leading and trailing .*'s cannot capture and
also this enhancement is limited to single alternative expressions.
If an expression is of the right form with the aforementioned
restrictions, we process the inner terms and then look for the
beginning of the string and end of the string. There is handling
for multiline expressions to allow the beginning and end to be
right after and right before newlines.

This enhancement speeds up expressions of this type 12x on
a MacBookPro.

Cleaned up 'case' statement indentation.

A new set of tests was added as LayoutTests/fast/regex/dotstar.html

  • yarr/YarrInterpreter.cpp:

(JSC::Yarr::Interpreter::InputStream::end):
(JSC::Yarr::Interpreter::matchDotStarEnclosure):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::assertionDotStarEnclosure):
(JSC::Yarr::ByteCompiler::emitDisjunction):

  • yarr/YarrInterpreter.h:

(JSC::Yarr::ByteTerm::DotStarEnclosure):

  • yarr/YarrJIT.cpp:

(JSC::Yarr::YarrGenerator::generateDotStarEnclosure):
(JSC::Yarr::YarrGenerator::backtrackDotStarEnclosure):
(JSC::Yarr::YarrGenerator::generateTerm):
(JSC::Yarr::YarrGenerator::backtrackTerm):

  • yarr/YarrPattern.cpp:

(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPatternConstructor::containsCapturingTerms):
(JSC::Yarr::YarrPatternConstructor::optimizeDotStarWrappedExpressions):
(JSC::Yarr::YarrPattern::compile):

  • yarr/YarrPattern.h:

(JSC::Yarr::PatternTerm::PatternTerm):

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp

    r87109 r90962  
    583583                currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
    584584                break;
     585
     586            case PatternTerm::TypeDotStarEnclosure:
     587                alternative->m_hasFixedSize = false;
     588                term.inputPosition = initialInputPosition;
     589                break;
    585590            }
    586591        }
     
    676681    }
    677682
     683    bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex)
     684    {
     685        Vector<PatternTerm>& terms = alternative->m_terms;
     686
     687        for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) {
     688            PatternTerm& term = terms[termIndex];
     689
     690            if (term.m_capture)
     691                return true;
     692
     693            if (term.type == PatternTerm::TypeParenthesesSubpattern) {
     694                PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
     695                for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
     696                    if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt], 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
     697                        return true;
     698                }
     699            }
     700        }
     701
     702        return false;
     703    }
     704
     705    // This optimization identifies alternatives in the form of
     706    // [^].*[?]<expression>.*[$] for expressions that don't have any
     707    // capturing terms. The alternative is changed to <expression>
     708    // followed by processing of the dot stars to find and adjust the
     709    // beginning and the end of the match.
     710    void optimizeDotStarWrappedExpressions()
     711    {
     712        Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
     713        if (alternatives.size() != 1)
     714            return;
     715
     716        PatternAlternative* alternative = alternatives[0];
     717        Vector<PatternTerm>& terms = alternative->m_terms;
     718        if (terms.size() >= 3) {
     719            bool startsWithBOL = false;
     720            bool endsWithEOL = false;
     721            size_t termIndex, firstExpressionTerm, lastExpressionTerm;
     722
     723            termIndex = 0;
     724            if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) {
     725                startsWithBOL = true;
     726                ++termIndex;
     727            }
     728           
     729            PatternTerm& firstNonAnchorTerm = terms[termIndex];
     730            if ((firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (firstNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || !((firstNonAnchorTerm.quantityType == QuantifierGreedy) || (firstNonAnchorTerm.quantityType == QuantifierNonGreedy)))
     731                return;
     732           
     733            firstExpressionTerm = termIndex + 1;
     734           
     735            termIndex = terms.size() - 1;
     736            if (terms[termIndex].type == PatternTerm::TypeAssertionEOL) {
     737                endsWithEOL = true;
     738                --termIndex;
     739            }
     740           
     741            PatternTerm& lastNonAnchorTerm = terms[termIndex];
     742            if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy))
     743                return;
     744           
     745            lastExpressionTerm = termIndex - 1;
     746
     747            if (firstExpressionTerm > lastExpressionTerm)
     748                return;
     749
     750            if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) {
     751                for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex)
     752                    terms.remove(termIndex);
     753
     754                for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex)
     755                    terms.remove(termIndex - 1);
     756
     757                terms.append(PatternTerm(startsWithBOL, endsWithEOL));
     758               
     759                m_pattern.m_containsBOL = false;
     760            }
     761        }
     762    }
     763
    678764private:
    679765    YarrPattern& m_pattern;
     
    709795
    710796    constructor.checkForTerminalParentheses();
     797    constructor.optimizeDotStarWrappedExpressions();
    711798    constructor.optimizeBOL();
    712799       
Note: See TracChangeset for help on using the changeset viewer.