Ignore:
Timestamp:
Nov 17, 2010, 3:03:40 AM (15 years ago)
Author:
Csaba Osztrogonác
Message:

Extend YARR Interpreter with beginning character look-up optimization
https://bugs.webkit.org/show_bug.cgi?id=45751

Patch by Peter Varga <[email protected]> on 2010-11-17
Reviewed by Gavin Barraclough.

Add beginning character look-up optimization which sets the start
index to the first possible successful pattern match.
Extend YARR Interpreter with lookupForBeginChars function which
implements the beginning character look-up optimization.

  • yarr/RegexInterpreter.cpp:

(JSC::Yarr::Interpreter::InputStream::readPair):
(JSC::Yarr::Interpreter::InputStream::isNotAvailableInput):
(JSC::Yarr::Interpreter::lookupForBeginChars):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::interpret):

  • yarr/RegexInterpreter.h:

(JSC::Yarr::BytecodePattern::BytecodePattern):

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/yarr/RegexInterpreter.cpp

    r72140 r72186  
    197197        }
    198198
     199        int readPair()
     200        {
     201            ASSERT(pos + 1 < length);
     202            return input[pos] | input[pos + 1] << 16;
     203        }
     204
    199205        int readChecked(int position)
    200206        {
     
    262268        {
    263269            return (pos + position) == length;
     270        }
     271
     272        bool isNotAvailableInput(int position)
     273        {
     274            return (pos + position) > length;
    264275        }
    265276
     
    9941005    }
    9951006
     1007    void lookupForBeginChars()
     1008    {
     1009        int character;
     1010        bool firstSingleCharFound;
     1011
     1012        while (true) {
     1013            if (input.isNotAvailableInput(2))
     1014                return;
     1015
     1016            firstSingleCharFound = false;
     1017
     1018            character = input.readPair();
     1019
     1020            for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
     1021                BeginChar bc = pattern->m_beginChars[i];
     1022
     1023                if (!firstSingleCharFound && bc.value <= 0xFFFF) {
     1024                    firstSingleCharFound = true;
     1025                    character &= 0xFFFF;
     1026                }
     1027
     1028                if ((character | bc.mask) == bc.value)
     1029                    return;
     1030            }
     1031
     1032            input.next();
     1033        }
     1034    }
     1035
    9961036#define MATCH_NEXT() { ++context->term; goto matchAgain; }
    9971037#define BACKTRACK() { --context->term; goto backtrack; }
    9981038#define currentTerm() (disjunction->terms[context->term])
    999     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     1039    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
    10001040    {
    10011041        if (!--remainingMatchCount)
     
    10041044        if (btrack)
    10051045            BACKTRACK();
     1046
     1047        if (pattern->m_containsBeginChars && isBody)
     1048            lookupForBeginChars();
    10061049
    10071050        context->matchBegin = input.getPos();
     
    11691212
    11701213            input.next();
     1214
     1215            if (pattern->m_containsBeginChars && isBody)
     1216                lookupForBeginChars();
     1217
    11711218            context->matchBegin = input.getPos();
    11721219
     
    12851332        DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
    12861333
    1287         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context);
     1334        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
    12881335        if (result == JSRegExpMatch) {
    12891336            output[0] = context->matchBegin;
Note: See TracChangeset for help on using the changeset viewer.