Ignore:
Timestamp:
Jun 13, 2019, 11:47:22 AM (6 years ago)
Author:
[email protected]
Message:

Yarr bytecode compilation failure should be gracefully handled
https://bugs.webkit.org/show_bug.cgi?id=198700

Reviewed by Michael Saboff.

JSTests:

  • stress/regexp-bytecode-compilation-fail.js: Added.

(shouldThrow):

Source/JavaScriptCore:

Currently, we assume that Yarr bytecode compilation does not fail. But in fact it can fail.
We should gracefully handle this failure as a runtime error, as we did for parse errors in [1].
We also harden Yarr's consumed character calculation by using Checked.

[1]: https://bugs.webkit.org/show_bug.cgi?id=185755

  • inspector/ContentSearchUtilities.cpp:

(Inspector::ContentSearchUtilities::findMagicComment):

  • runtime/RegExp.cpp:

(JSC::RegExp::byteCodeCompileIfNecessary):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):

  • runtime/RegExpInlines.h:

(JSC::RegExp::matchInline):

  • yarr/YarrErrorCode.cpp:

(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):

  • yarr/YarrErrorCode.h:
  • yarr/YarrInterpreter.cpp:

(JSC::Yarr::ByteCompiler::ByteCompiler):
(JSC::Yarr::ByteCompiler::compile):
(JSC::Yarr::ByteCompiler::atomCharacterClass):
(JSC::Yarr::ByteCompiler::atomBackReference):
(JSC::Yarr::ByteCompiler::atomParenthesesOnceBegin):
(JSC::Yarr::ByteCompiler::atomParenthesesTerminalBegin):
(JSC::Yarr::ByteCompiler::atomParenthesesSubpatternBegin):
(JSC::Yarr::ByteCompiler::atomParentheticalAssertionBegin):
(JSC::Yarr::ByteCompiler::popParenthesesStack):
(JSC::Yarr::ByteCompiler::closeAlternative):
(JSC::Yarr::ByteCompiler::closeBodyAlternative):
(JSC::Yarr::ByteCompiler::alternativeBodyDisjunction):
(JSC::Yarr::ByteCompiler::alternativeDisjunction):
(JSC::Yarr::ByteCompiler::emitDisjunction):

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/yarr/YarrInterpreter.cpp

    r243642 r246408  
    16911691        : m_pattern(pattern)
    16921692    {
    1693         m_currentAlternativeIndex = 0;
    1694     }
    1695 
    1696     std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
     1693    }
     1694
     1695    std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock, ErrorCode& errorCode)
    16971696    {
    16981697        regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
    1699         emitDisjunction(m_pattern.m_body);
     1698        if (auto error = emitDisjunction(m_pattern.m_body, 0, 0)) {
     1699            errorCode = error.value();
     1700            return nullptr;
     1701        }
    17001702        regexEnd();
    17011703
     
    17521754        m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
    17531755
    1754         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
    1755         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
    1756         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     1756        m_bodyDisjunction->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
     1757        m_bodyDisjunction->terms.last().atom.quantityType = quantityType;
     1758        m_bodyDisjunction->terms.last().frameLocation = frameLocation;
    17571759    }
    17581760
     
    17631765        m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
    17641766
    1765         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
    1766         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
    1767         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     1767        m_bodyDisjunction->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
     1768        m_bodyDisjunction->terms.last().atom.quantityType = quantityType;
     1769        m_bodyDisjunction->terms.last().frameLocation = frameLocation;
    17681770    }
    17691771
    17701772    void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
    17711773    {
    1772         int beginTerm = m_bodyDisjunction->terms.size();
     1774        unsigned beginTerm = m_bodyDisjunction->terms.size();
    17731775
    17741776        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
    1775         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     1777        m_bodyDisjunction->terms.last().frameLocation = frameLocation;
    17761778        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    1777         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
     1779        m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
    17781780
    17791781        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
     
    17831785    void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
    17841786    {
    1785         int beginTerm = m_bodyDisjunction->terms.size();
     1787        unsigned beginTerm = m_bodyDisjunction->terms.size();
    17861788
    17871789        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
    1788         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     1790        m_bodyDisjunction->terms.last().frameLocation = frameLocation;
    17891791        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    1790         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
     1792        m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
    17911793
    17921794        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
     
    18001802        // https://bugs.webkit.org/show_bug.cgi?id=50136
    18011803
    1802         int beginTerm = m_bodyDisjunction->terms.size();
     1804        unsigned beginTerm = m_bodyDisjunction->terms.size();
    18031805
    18041806        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
    1805         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     1807        m_bodyDisjunction->terms.last().frameLocation = frameLocation;
    18061808        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    1807         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
     1809        m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
    18081810
    18091811        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
     
    18131815    void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
    18141816    {
    1815         int beginTerm = m_bodyDisjunction->terms.size();
     1817        unsigned beginTerm = m_bodyDisjunction->terms.size();
    18161818
    18171819        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
    1818         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     1820        m_bodyDisjunction->terms.last().frameLocation = frameLocation;
    18191821        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    1820         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
     1822        m_bodyDisjunction->terms.last().frameLocation = alternativeFrameLocation;
    18211823
    18221824        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
     
    18541856    {
    18551857        ASSERT(m_parenthesesStack.size());
    1856         int stackEnd = m_parenthesesStack.size() - 1;
    1857         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
    1858         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
    1859         m_parenthesesStack.shrink(stackEnd);
     1858        unsigned beginTerm = m_parenthesesStack.last().beginTerm;
     1859        m_currentAlternativeIndex = m_parenthesesStack.last().savedAlternativeIndex;
     1860        m_parenthesesStack.removeLast();
    18601861
    18611862        ASSERT(beginTerm < m_bodyDisjunction->terms.size());
     
    18651866    }
    18661867
    1867     void closeAlternative(int beginTerm)
    1868     {
    1869         int origBeginTerm = beginTerm;
     1868    void closeAlternative(unsigned beginTerm)
     1869    {
     1870        unsigned origBeginTerm = beginTerm;
    18701871        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
    1871         int endIndex = m_bodyDisjunction->terms.size();
     1872        unsigned endIndex = m_bodyDisjunction->terms.size();
    18721873
    18731874        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
     
    18921893    void closeBodyAlternative()
    18931894    {
    1894         int beginTerm = 0;
    1895         int origBeginTerm = 0;
     1895        unsigned beginTerm = 0;
     1896        unsigned origBeginTerm = 0;
    18961897        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
    1897         int endIndex = m_bodyDisjunction->terms.size();
     1898        unsigned endIndex = m_bodyDisjunction->terms.size();
    18981899
    18991900        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
     
    20102011    void alternativeBodyDisjunction(bool onceThrough)
    20112012    {
    2012         int newAlternativeIndex = m_bodyDisjunction->terms.size();
     2013        unsigned newAlternativeIndex = m_bodyDisjunction->terms.size();
    20132014        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
    20142015        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
     
    20192020    void alternativeDisjunction()
    20202021    {
    2021         int newAlternativeIndex = m_bodyDisjunction->terms.size();
     2022        unsigned newAlternativeIndex = m_bodyDisjunction->terms.size();
    20222023        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
    20232024        m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
     
    20262027    }
    20272028
    2028     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
     2029    Optional<ErrorCode> WARN_UNUSED_RETURN emitDisjunction(PatternDisjunction* disjunction, Checked<unsigned, RecordOverflow> inputCountAlreadyChecked, unsigned parenthesesInputCountAlreadyChecked)
    20292030    {
    20302031        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
    2031             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
     2032            auto currentCountAlreadyChecked = inputCountAlreadyChecked;
    20322033
    20332034            PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
     
    20472048                checkInput(countToCheck);
    20482049                currentCountAlreadyChecked += countToCheck;
     2050                if (currentCountAlreadyChecked.hasOverflowed())
     2051                    return ErrorCode::OffsetTooLarge;
    20492052            }
    20502053
     
    20522055                switch (term.type) {
    20532056                case PatternTerm::TypeAssertionBOL:
    2054                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
     2057                    assertionBOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
    20552058                    break;
    20562059
    20572060                case PatternTerm::TypeAssertionEOL:
    2058                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
     2061                    assertionEOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
    20592062                    break;
    20602063
    20612064                case PatternTerm::TypeAssertionWordBoundary:
    2062                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
     2065                    assertionWordBoundary(term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet());
    20632066                    break;
    20642067
    20652068                case PatternTerm::TypePatternCharacter:
    2066                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
     2069                    atomPatternCharacter(term.patternCharacter, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
    20672070                    break;
    20682071
    20692072                case PatternTerm::TypeCharacterClass:
    2070                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
     2073                    atomCharacterClass(term.characterClass, term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
    20712074                    break;
    20722075
    20732076                case PatternTerm::TypeBackReference:
    2074                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityMaxCount, term.quantityType);
    2075                         break;
     2077                    atomBackReference(term.backReferenceSubpatternId, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
     2078                    break;
    20762079
    20772080                case PatternTerm::TypeForwardReference:
     
    20872090                        else
    20882091                            alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
    2089                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
    2090                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
     2092                        unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
    20912093                        atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
    2092                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
     2094                        if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
     2095                            return error;
    20932096                        atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
    20942097                    } else if (term.parentheses.isTerminal) {
    2095                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
    2096                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
     2098                        unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
    20972099                        atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesTerminal);
    2098                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
     2100                        if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
     2101                            return error;
    20992102                        atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
    21002103                    } else {
    2101                         ASSERT(currentCountAlreadyChecked >= term.inputPosition);
    2102                         unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
     2104                        unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
    21032105                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, 0);
    2104                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
     2106                        if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0))
     2107                            return error;
    21052108                        atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
    21062109                    }
     
    21102113                case PatternTerm::TypeParentheticalAssertion: {
    21112114                    unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
    2112 
    2113                     ASSERT(currentCountAlreadyChecked >= term.inputPosition);
    2114                     unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
     2115                    unsigned positiveInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
    21152116                    unsigned uncheckAmount = 0;
    21162117                    if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
     
    21182119                        uncheckInput(uncheckAmount);
    21192120                        currentCountAlreadyChecked -= uncheckAmount;
     2121                        if (currentCountAlreadyChecked.hasOverflowed())
     2122                            return ErrorCode::OffsetTooLarge;
    21202123                    }
    21212124
    21222125                    atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
    2123                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
     2126                    if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount))
     2127                        return error;
    21242128                    atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityMaxCount, term.quantityType);
    21252129                    if (uncheckAmount) {
    21262130                        checkInput(uncheckAmount);
    21272131                        currentCountAlreadyChecked += uncheckAmount;
     2132                        if (currentCountAlreadyChecked.hasOverflowed())
     2133                            return ErrorCode::OffsetTooLarge;
    21282134                    }
    21292135                    break;
     
    21362142            }
    21372143        }
     2144        return WTF::nullopt;
    21382145    }
    21392146#ifndef NDEBUG
     
    24012408    YarrPattern& m_pattern;
    24022409    std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
    2403     unsigned m_currentAlternativeIndex;
     2410    unsigned m_currentAlternativeIndex { 0 };
    24042411    Vector<ParenthesesStackEntry> m_parenthesesStack;
    24052412    Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
    24062413};
    24072414
    2408 std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
     2415std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ErrorCode& errorCode, ConcurrentJSLock* lock)
    24092416{
    2410     return ByteCompiler(pattern).compile(allocator, lock);
     2417    return ByteCompiler(pattern).compile(allocator, lock, errorCode);
    24112418}
    24122419
Note: See TracChangeset for help on using the changeset viewer.