Changeset 42853 in webkit for trunk/JavaScriptCore


Ignore:
Timestamp:
Apr 24, 2009, 4:51:47 PM (16 years ago)
Author:
[email protected]
Message:

2009-04-24 Gavin Barraclough <[email protected]>

Reviewed by Sam "Wesley" Weinig.

Improve performance to YARR interpreter.
(From about 3x slower than PCRE on regex-dna to about 30% slower).

  • yarr/RegexCompiler.cpp: (JSC::Yarr::RegexPatternConstructor::setupAlternativeOffsets):
  • yarr/RegexInterpreter.cpp: (JSC::Yarr::Interpreter::checkCharacter): (JSC::Yarr::Interpreter::checkCasedCharacter): (JSC::Yarr::Interpreter::backtrackPatternCharacter): (JSC::Yarr::Interpreter::backtrackPatternCasedCharacter): (JSC::Yarr::Interpreter::matchParentheticalAssertionBegin): (JSC::Yarr::Interpreter::matchParentheticalAssertionEnd): (JSC::Yarr::Interpreter::backtrackParentheticalAssertionBegin): (JSC::Yarr::Interpreter::backtrackParentheticalAssertionEnd): (JSC::Yarr::Interpreter::matchDisjunction): (JSC::Yarr::Interpreter::interpret): (JSC::Yarr::ByteCompiler::atomPatternCharacter): (JSC::Yarr::ByteCompiler::atomParenthesesSubpatternBegin): (JSC::Yarr::ByteCompiler::atomParentheticalAssertionBegin): (JSC::Yarr::ByteCompiler::closeAlternative): (JSC::Yarr::ByteCompiler::closeBodyAlternative): (JSC::Yarr::ByteCompiler::atomParenthesesEnd): (JSC::Yarr::ByteCompiler::regexBegin): (JSC::Yarr::ByteCompiler::regexEnd): (JSC::Yarr::ByteCompiler::alterantiveBodyDisjunction): (JSC::Yarr::ByteCompiler::alterantiveDisjunction): (JSC::Yarr::ByteCompiler::emitDisjunction):
  • yarr/RegexInterpreter.h: (JSC::Yarr::ByteTerm::): (JSC::Yarr::ByteTerm::ByteTerm): (JSC::Yarr::ByteTerm::BodyAlternativeBegin): (JSC::Yarr::ByteTerm::BodyAlternativeDisjunction): (JSC::Yarr::ByteTerm::BodyAlternativeEnd): (JSC::Yarr::ByteTerm::AlternativeBegin): (JSC::Yarr::ByteTerm::AlternativeDisjunction): (JSC::Yarr::ByteTerm::AlternativeEnd): (JSC::Yarr::ByteTerm::SubpatternBegin): (JSC::Yarr::ByteTerm::SubpatternEnd):
  • yarr/RegexJIT.cpp: (JSC::Yarr::RegexGenerator::generateParentheticalAssertion):
  • yarr/RegexPattern.h:
Location:
trunk/JavaScriptCore
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/ChangeLog

    r42851 r42853  
     12009-04-24  Gavin Barraclough  <[email protected]>
     2
     3        Reviewed by Sam "Wesley" Weinig.
     4
     5        Improve performance to YARR interpreter.
     6        (From about 3x slower than PCRE on regex-dna to about 30% slower).
     7
     8        * yarr/RegexCompiler.cpp:
     9        (JSC::Yarr::RegexPatternConstructor::setupAlternativeOffsets):
     10        * yarr/RegexInterpreter.cpp:
     11        (JSC::Yarr::Interpreter::checkCharacter):
     12        (JSC::Yarr::Interpreter::checkCasedCharacter):
     13        (JSC::Yarr::Interpreter::backtrackPatternCharacter):
     14        (JSC::Yarr::Interpreter::backtrackPatternCasedCharacter):
     15        (JSC::Yarr::Interpreter::matchParentheticalAssertionBegin):
     16        (JSC::Yarr::Interpreter::matchParentheticalAssertionEnd):
     17        (JSC::Yarr::Interpreter::backtrackParentheticalAssertionBegin):
     18        (JSC::Yarr::Interpreter::backtrackParentheticalAssertionEnd):
     19        (JSC::Yarr::Interpreter::matchDisjunction):
     20        (JSC::Yarr::Interpreter::interpret):
     21        (JSC::Yarr::ByteCompiler::atomPatternCharacter):
     22        (JSC::Yarr::ByteCompiler::atomParenthesesSubpatternBegin):
     23        (JSC::Yarr::ByteCompiler::atomParentheticalAssertionBegin):
     24        (JSC::Yarr::ByteCompiler::closeAlternative):
     25        (JSC::Yarr::ByteCompiler::closeBodyAlternative):
     26        (JSC::Yarr::ByteCompiler::atomParenthesesEnd):
     27        (JSC::Yarr::ByteCompiler::regexBegin):
     28        (JSC::Yarr::ByteCompiler::regexEnd):
     29        (JSC::Yarr::ByteCompiler::alterantiveBodyDisjunction):
     30        (JSC::Yarr::ByteCompiler::alterantiveDisjunction):
     31        (JSC::Yarr::ByteCompiler::emitDisjunction):
     32        * yarr/RegexInterpreter.h:
     33        (JSC::Yarr::ByteTerm::):
     34        (JSC::Yarr::ByteTerm::ByteTerm):
     35        (JSC::Yarr::ByteTerm::BodyAlternativeBegin):
     36        (JSC::Yarr::ByteTerm::BodyAlternativeDisjunction):
     37        (JSC::Yarr::ByteTerm::BodyAlternativeEnd):
     38        (JSC::Yarr::ByteTerm::AlternativeBegin):
     39        (JSC::Yarr::ByteTerm::AlternativeDisjunction):
     40        (JSC::Yarr::ByteTerm::AlternativeEnd):
     41        (JSC::Yarr::ByteTerm::SubpatternBegin):
     42        (JSC::Yarr::ByteTerm::SubpatternEnd):
     43        * yarr/RegexJIT.cpp:
     44        (JSC::Yarr::RegexGenerator::generateParentheticalAssertion):
     45        * yarr/RegexPattern.h:
     46
    1472009-04-24  Rob Raguet-Schofield  <[email protected]>
    248
  • trunk/JavaScriptCore/yarr/RegexCompiler.cpp

    r42800 r42853  
    631631                term.inputPosition = currentInputPosition;
    632632                term.frameLocation = currentCallFrameSize;
    633                 currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce, currentInputPosition);
     633                currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
    634634                break;
    635635            }
  • trunk/JavaScriptCore/yarr/RegexInterpreter.cpp

    r42805 r42853  
    5353        uintptr_t offset;
    5454    };
    55     struct BackTrackInfoParentheticalAssertionOnce {
     55    struct BackTrackInfoParentheticalAssertion {
    5656        uintptr_t begin;
    5757    };
     
    293293    bool checkCharacter(int testChar, int inputPosition)
    294294    {
     295        return testChar == input.readChecked(inputPosition);
     296    }
     297
     298    bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
     299    {
    295300        int ch = input.readChecked(inputPosition);
    296         return (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch));
     301        return (loChar == ch) || (hiChar == ch);
    297302    }
    298303
     
    363368    }
    364369
    365     bool matchPatternCharacter(ByteTerm& term, DisjunctionContext* context)
    366     {
    367         ASSERT(term.type == ByteTerm::TypePatternCharacter);
    368         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    369 
    370         switch (term.atom.quantityType) {
    371         case QuantifierFixedCount: {
    372             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
    373                 if (!checkCharacter(term.atom.patternCharacter, term.inputPosition + matchAmount))
    374                     return false;
    375             }
    376             return true;
    377         }
    378 
    379         case QuantifierGreedy: {
    380             unsigned matchAmount = 0;
    381             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
    382                 if (!checkCharacter(term.atom.patternCharacter, term.inputPosition - 1)) {
    383                     input.uncheckInput(1);
    384                     break;
    385                 }
    386                 ++matchAmount;
    387             }
    388             backTrack->matchAmount = matchAmount;
    389 
    390             return true;
    391         }
    392 
    393         case QuantifierNonGreedy:
    394             backTrack->matchAmount = 0;
    395             return true;
    396         }
    397 
    398         ASSERT_NOT_REACHED();
    399         return false;
    400     }
    401 
    402370    bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
    403371    {
    404         ASSERT(term.type == ByteTerm::TypePatternCharacter);
    405372        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    406373
     
    421388                ++backTrack->matchAmount;
    422389                if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
     390                    return true;
     391            }
     392            input.uncheckInput(backTrack->matchAmount);
     393            break;
     394        }
     395
     396        return false;
     397    }
     398
     399    bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
     400    {
     401        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
     402
     403        switch (term.atom.quantityType) {
     404        case QuantifierFixedCount:
     405            break;
     406
     407        case QuantifierGreedy:
     408            if (backTrack->matchAmount) {
     409                --backTrack->matchAmount;
     410                input.uncheckInput(1);
     411                return true;
     412            }
     413            break;
     414
     415        case QuantifierNonGreedy:
     416            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
     417                ++backTrack->matchAmount;
     418                if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
    423419                    return true;
    424420            }
     
    718714    }
    719715
    720     bool matchParentheticalAssertionOnceBegin(ByteTerm& term, DisjunctionContext* context)
    721     {
    722         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceBegin);
     716    bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
     717    {
     718        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
    723719        ASSERT(term.atom.quantityCount == 1);
    724720
    725         BackTrackInfoParentheticalAssertionOnce* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertionOnce*>(context->frame + term.frameLocation);
     721        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    726722
    727723        backTrack->begin = input.getPos();
     
    729725    }
    730726
    731     bool matchParentheticalAssertionOnceEnd(ByteTerm& term, DisjunctionContext* context)
    732     {
    733         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceEnd);
     727    bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
     728    {
     729        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
    734730        ASSERT(term.atom.quantityCount == 1);
    735731
    736         BackTrackInfoParentheticalAssertionOnce* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertionOnce*>(context->frame + term.frameLocation);
     732        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    737733
    738734        input.setPos(backTrack->begin);
     
    747743    }
    748744
    749     bool backtrackParentheticalAssertionOnceBegin(ByteTerm& term, DisjunctionContext* context)
    750     {
    751         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceBegin);
     745    bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
     746    {
     747        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
    752748        ASSERT(term.atom.quantityCount == 1);
    753749
     
    761757    }
    762758
    763     bool backtrackParentheticalAssertionOnceEnd(ByteTerm& term, DisjunctionContext* context)
    764     {
    765         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceEnd);
     759    bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
     760    {
     761        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
    766762        ASSERT(term.atom.quantityCount == 1);
    767763
    768         BackTrackInfoParentheticalAssertionOnce* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertionOnce*>(context->frame + term.frameLocation);
     764        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    769765
    770766        input.setPos(backTrack->begin);
     
    962958    }
    963959
    964     bool matchTerm(ByteDisjunction* disjunction, DisjunctionContext* context)
    965     {
    966         ByteTerm& term = disjunction->terms[context->term];
    967 
    968         switch (term.type) {
    969             case ByteTerm::TypeAlternativeBegin:
    970                 return true;
    971             case ByteTerm::TypeAlternativeDisjunction:
    972             case ByteTerm::TypeAlternativeEnd: {
    973                 int offset = term.alternative.end;
    974                 if (term.alternative.isParentheses) {
    975                     BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + term.frameLocation);
    976                     backTrack->offset = offset;
     960#define MATCH_NEXT() { ++context->term; goto matchAgain; }
     961#define BACKTRACK() { --context->term; goto backtrack; }
     962#define currentTerm() (disjunction->terms[context->term])
     963    bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     964    {
     965        if (btrack)
     966            BACKTRACK();
     967
     968        context->matchBegin = input.getPos();
     969        context->term = 0;
     970
     971    matchAgain:
     972        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
     973
     974        switch (currentTerm().type) {
     975        case ByteTerm::TypeSubpatternBegin:
     976            MATCH_NEXT();
     977        case ByteTerm::TypeSubpatternEnd:
     978            context->matchEnd = input.getPos();
     979            return true;
     980
     981        case ByteTerm::TypeBodyAlternativeBegin:
     982            MATCH_NEXT();
     983        case ByteTerm::TypeBodyAlternativeDisjunction:
     984        case ByteTerm::TypeBodyAlternativeEnd:
     985            context->matchEnd = input.getPos();
     986            return true;
     987
     988        case ByteTerm::TypeAlternativeBegin:
     989            MATCH_NEXT();
     990        case ByteTerm::TypeAlternativeDisjunction:
     991        case ByteTerm::TypeAlternativeEnd: {
     992            int offset = currentTerm().alternative.end;
     993            BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
     994            backTrack->offset = offset;
     995            context->term += offset;
     996            MATCH_NEXT();
     997        }
     998
     999        case ByteTerm::TypeAssertionBOL:
     1000            if (matchAssertionBOL(currentTerm()))
     1001                MATCH_NEXT();
     1002            BACKTRACK();
     1003        case ByteTerm::TypeAssertionEOL:
     1004            if (matchAssertionEOL(currentTerm()))
     1005                MATCH_NEXT();
     1006            BACKTRACK();
     1007        case ByteTerm::TypeAssertionWordBoundary:
     1008            if (matchAssertionWordBoundary(currentTerm()))
     1009                MATCH_NEXT();
     1010            BACKTRACK();
     1011
     1012        case ByteTerm::TypePatternCharacterOnce:
     1013        case ByteTerm::TypePatternCharacterFixed: {
     1014            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
     1015                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
     1016                    BACKTRACK();
     1017            }
     1018            MATCH_NEXT();
     1019        }
     1020        case ByteTerm::TypePatternCharacterGreedy: {
     1021            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1022            unsigned matchAmount = 0;
     1023            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
     1024                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
     1025                    input.uncheckInput(1);
     1026                    break;
    9771027                }
    978                 context->term += offset;
    979                 return true;
    980             }
    981 
    982             case ByteTerm::TypeAssertionBOL:
    983                 return matchAssertionBOL(term);
    984             case ByteTerm::TypeAssertionEOL:
    985                 return matchAssertionEOL(term);
    986             case ByteTerm::TypeAssertionWordBoundary:
    987                 return matchAssertionWordBoundary(term);
    988 
    989             case ByteTerm::TypePatternCharacter:
    990                 return matchPatternCharacter(term, context);
    991             case ByteTerm::TypeCharacterClass:
    992                 return matchCharacterClass(term, context);
    993             case ByteTerm::TypeBackReference:
    994                 return matchBackReference(term, context);
    995             case ByteTerm::TypeParenthesesSubpattern:
    996                 return matchParentheses(term, context);
    997             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
    998                 return matchParenthesesOnceBegin(term, context);
    999             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
    1000                 return matchParenthesesOnceEnd(term, context);
    1001             case ByteTerm::TypeParentheticalAssertionOnceBegin:
    1002                 return matchParentheticalAssertionOnceBegin(term, context);
    1003             case ByteTerm::TypeParentheticalAssertionOnceEnd:
    1004                 return matchParentheticalAssertionOnceEnd(term, context);
    1005 
    1006             case ByteTerm::TypeCheckInput:
    1007                 if (input.checkInput(term.checkInputCount))
    1008                     return true;
    1009                 else
    1010                     return false;
    1011 
    1012             default:
    1013                 ASSERT_NOT_REACHED();
     1028                ++matchAmount;
     1029            }
     1030            backTrack->matchAmount = matchAmount;
     1031
     1032            MATCH_NEXT();
     1033        }
     1034        case ByteTerm::TypePatternCharacterNonGreedy: {
     1035            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1036            backTrack->matchAmount = 0;
     1037            MATCH_NEXT();
     1038        }
     1039
     1040        case ByteTerm::TypePatternCasedCharacterOnce:
     1041        case ByteTerm::TypePatternCasedCharacterFixed: {
     1042            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
     1043                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
     1044                    BACKTRACK();
     1045            }
     1046            MATCH_NEXT();
     1047        }
     1048        case ByteTerm::TypePatternCasedCharacterGreedy: {
     1049            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1050            unsigned matchAmount = 0;
     1051            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
     1052                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
     1053                    input.uncheckInput(1);
     1054                    break;
     1055                }
     1056                ++matchAmount;
     1057            }
     1058            backTrack->matchAmount = matchAmount;
     1059
     1060            MATCH_NEXT();
     1061        }
     1062        case ByteTerm::TypePatternCasedCharacterNonGreedy: {
     1063            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1064            backTrack->matchAmount = 0;
     1065            MATCH_NEXT();
     1066        }
     1067
     1068        case ByteTerm::TypeCharacterClass:
     1069            if (matchCharacterClass(currentTerm(), context))
     1070                MATCH_NEXT();
     1071            BACKTRACK();
     1072        case ByteTerm::TypeBackReference:
     1073            if (matchBackReference(currentTerm(), context))
     1074                MATCH_NEXT();
     1075            BACKTRACK();
     1076        case ByteTerm::TypeParenthesesSubpattern:
     1077            if (matchParentheses(currentTerm(), context))
     1078                MATCH_NEXT();
     1079            BACKTRACK();
     1080        case ByteTerm::TypeParenthesesSubpatternOnceBegin:
     1081            if (matchParenthesesOnceBegin(currentTerm(), context))
     1082                MATCH_NEXT();
     1083            BACKTRACK();
     1084        case ByteTerm::TypeParenthesesSubpatternOnceEnd:
     1085            if (matchParenthesesOnceEnd(currentTerm(), context))
     1086                MATCH_NEXT();
     1087            BACKTRACK();
     1088        case ByteTerm::TypeParentheticalAssertionBegin:
     1089            if (matchParentheticalAssertionBegin(currentTerm(), context))
     1090                MATCH_NEXT();
     1091            BACKTRACK();
     1092        case ByteTerm::TypeParentheticalAssertionEnd:
     1093            if (matchParentheticalAssertionEnd(currentTerm(), context))
     1094                MATCH_NEXT();
     1095            BACKTRACK();
     1096
     1097        case ByteTerm::TypeCheckInput:
     1098            if (input.checkInput(currentTerm().checkInputCount))
     1099                MATCH_NEXT();
     1100            BACKTRACK();
     1101        }
     1102
     1103        // We should never fall-through to here.
     1104        ASSERT_NOT_REACHED();
     1105
     1106    backtrack:
     1107        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
     1108
     1109        switch (currentTerm().type) {
     1110        case ByteTerm::TypeSubpatternBegin:
     1111            return false;
     1112        case ByteTerm::TypeSubpatternEnd:
     1113            ASSERT_NOT_REACHED();
     1114
     1115        case ByteTerm::TypeBodyAlternativeBegin:
     1116        case ByteTerm::TypeBodyAlternativeDisjunction: {
     1117            int offset = currentTerm().alternative.next;
     1118            context->term += offset;
     1119            if (offset > 0)
     1120                MATCH_NEXT();
     1121
     1122            if (input.atEnd())
    10141123                return false;
    1015         }
    1016     }
    1017 
    1018     bool backtrackTerm(ByteDisjunction* disjunction, DisjunctionContext* context)
    1019     {
    1020         ByteTerm& term = disjunction->terms[context->term];
    1021 
    1022         switch (term.type) {
     1124
     1125            input.next();
     1126            context->matchBegin = input.getPos();
     1127            MATCH_NEXT();
     1128        }
     1129        case ByteTerm::TypeBodyAlternativeEnd:
     1130            ASSERT_NOT_REACHED();
     1131
    10231132            case ByteTerm::TypeAlternativeBegin:
    10241133            case ByteTerm::TypeAlternativeDisjunction: {
    1025                 int offset = term.alternative.next;
     1134                int offset = currentTerm().alternative.next;
    10261135                context->term += offset;
    1027                 return (offset > 0);
     1136                if (offset > 0)
     1137                    MATCH_NEXT();
     1138                BACKTRACK();
    10281139            }
    10291140            case ByteTerm::TypeAlternativeEnd: {
    10301141                // We should never backtrack back into an alternative of the main body of the regex.
    1031                 ASSERT(term.alternative.isParentheses);
    1032                 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + term.frameLocation);
     1142                BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
    10331143                unsigned offset = backTrack->offset;
    10341144                context->term -= offset;
    1035                 return false;
     1145                BACKTRACK();
    10361146            }
    10371147
     
    10391149            case ByteTerm::TypeAssertionEOL:
    10401150            case ByteTerm::TypeAssertionWordBoundary:
    1041                 return false;
    1042 
    1043             case ByteTerm::TypePatternCharacter:
    1044                 return backtrackPatternCharacter(term, context);
     1151                BACKTRACK();
     1152
     1153            case ByteTerm::TypePatternCharacterOnce:
     1154            case ByteTerm::TypePatternCharacterFixed:
     1155            case ByteTerm::TypePatternCharacterGreedy:
     1156            case ByteTerm::TypePatternCharacterNonGreedy:
     1157                if (backtrackPatternCharacter(currentTerm(), context))
     1158                    MATCH_NEXT();
     1159                BACKTRACK();
     1160            case ByteTerm::TypePatternCasedCharacterOnce:
     1161            case ByteTerm::TypePatternCasedCharacterFixed:
     1162            case ByteTerm::TypePatternCasedCharacterGreedy:
     1163            case ByteTerm::TypePatternCasedCharacterNonGreedy:
     1164                if (backtrackPatternCasedCharacter(currentTerm(), context))
     1165                    MATCH_NEXT();
     1166                BACKTRACK();
    10451167            case ByteTerm::TypeCharacterClass:
    1046                 return backtrackCharacterClass(term, context);
     1168                if (backtrackCharacterClass(currentTerm(), context))
     1169                    MATCH_NEXT();
     1170                BACKTRACK();
    10471171            case ByteTerm::TypeBackReference:
    1048                 return backtrackBackReference(term, context);
     1172                if (backtrackBackReference(currentTerm(), context))
     1173                    MATCH_NEXT();
     1174                BACKTRACK();
    10491175            case ByteTerm::TypeParenthesesSubpattern:
    1050                 return backtrackParentheses(term, context);
     1176                if (backtrackParentheses(currentTerm(), context))
     1177                    MATCH_NEXT();
     1178                BACKTRACK();
    10511179            case ByteTerm::TypeParenthesesSubpatternOnceBegin:
    1052                 return backtrackParenthesesOnceBegin(term, context);
     1180                if (backtrackParenthesesOnceBegin(currentTerm(), context))
     1181                    MATCH_NEXT();
     1182                BACKTRACK();
    10531183            case ByteTerm::TypeParenthesesSubpatternOnceEnd:
    1054                 return backtrackParenthesesOnceEnd(term, context);
    1055             case ByteTerm::TypeParentheticalAssertionOnceBegin:
    1056                 return backtrackParentheticalAssertionOnceBegin(term, context);
    1057             case ByteTerm::TypeParentheticalAssertionOnceEnd:
    1058                 return backtrackParentheticalAssertionOnceEnd(term, context);
     1184                if (backtrackParenthesesOnceEnd(currentTerm(), context))
     1185                    MATCH_NEXT();
     1186                BACKTRACK();
     1187            case ByteTerm::TypeParentheticalAssertionBegin:
     1188                if (backtrackParentheticalAssertionBegin(currentTerm(), context))
     1189                    MATCH_NEXT();
     1190                BACKTRACK();
     1191            case ByteTerm::TypeParentheticalAssertionEnd:
     1192                if (backtrackParentheticalAssertionEnd(currentTerm(), context))
     1193                    MATCH_NEXT();
     1194                BACKTRACK();
    10591195
    10601196            case ByteTerm::TypeCheckInput:
    1061                 input.uncheckInput(term.checkInputCount);
    1062                 return false;
    1063 
    1064             default:
    1065                 ASSERT_NOT_REACHED();
    1066                 return false;
    1067         }
    1068     }
    1069 
    1070     bool matchAlternative(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
    1071     {
    1072         if (btrack)
    1073             goto backtrack;
    1074 
    1075         context->term = -1;
    1076 
    1077         while (true) {
    1078             do {
    1079                 ++context->term;
    1080                 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
    1081                 if (disjunction->terms[context->term].type == ByteTerm::TypePatternEnd)
    1082                     return true;
    1083             } while (matchTerm(disjunction, context));
    1084 
    1085         backtrack:
    1086             do {
    1087                 if (!context->term--)
    1088                     return false;
    1089             } while (!backtrackTerm(disjunction, context));
    1090         }
    1091     }
    1092 
    1093     bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
    1094     {
    1095         if (btrack) {
    1096             bool matched = matchAlternative(disjunction, context, true);
    1097             if (matched) {
    1098                 context->matchEnd = input.getPos();
    1099                 return true;
    1100             }
    1101 
    1102         } else {
    1103             unsigned begin = input.getPos();
    1104             bool matched = matchAlternative(disjunction, context);
    1105             if (matched) {
    1106                 context->matchBegin = begin;
    1107                 context->matchEnd = input.getPos();
    1108                 return true;
    1109             }
    1110             input.setPos(begin);
    1111         }
    1112 
     1197                input.uncheckInput(currentTerm().checkInputCount);
     1198                BACKTRACK();
     1199        }
     1200
     1201        ASSERT_NOT_REACHED();
    11131202        return false;
    11141203    }
     
    11291218    int interpret()
    11301219    {
    1131         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
    1132 
    11331220        for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
    11341221            output[i] = -1;
    11351222
    1136         while (true) {
    1137             bool matched = matchDisjunction(pattern->m_body.get(), context);
    1138 
    1139             if (matched) {
    1140                 output[0] = context->matchBegin;
    1141                 output[1] = context->matchEnd;
    1142 
    1143                 unsigned begin = context->matchBegin;
    1144                 freeDisjunctionContext(context);
    1145                 return begin;
    1146             }
    1147 
    1148             if (input.atEnd()) {
    1149                 freeDisjunctionContext(context);
    1150                 return -1;
    1151             }
    1152 
    1153             input.next();
    1154         }
     1223        DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
     1224
     1225        if (matchDisjunction(pattern->m_body.get(), context)) {
     1226            output[0] = context->matchBegin;
     1227            output[1] = context->matchEnd;
     1228        }
     1229
     1230        freeDisjunctionContext(context);
     1231
     1232        return output[0];
    11551233    }
    11561234
     
    12201298    void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
    12211299    {
    1222         bodyDisjunction->terms.append(ByteTerm(ch, inputPosition));
     1300        if (m_pattern.m_ignoreCase) {
     1301            UChar lo = Unicode::toLower(ch);
     1302            UChar hi = Unicode::toUpper(ch);
     1303           
     1304            if (lo != hi) {
     1305                bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
     1306                return;
     1307            }
     1308        }
     1309
     1310        bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
     1311    }
     1312   
     1313    void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     1314    {
     1315        bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
    12231316
    12241317        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
     
    12261319        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    12271320    }
    1228    
    1229     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
    1230     {
    1231         bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
     1321
     1322    void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     1323    {
     1324        ASSERT(subpatternId);
     1325
     1326        bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
    12321327
    12331328        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
     
    12361331    }
    12371332
    1238     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
    1239     {
    1240         ASSERT(subpatternId);
    1241 
    1242         bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
    1243 
    1244         bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
    1245         bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
    1246         bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    1247     }
    1248 
    12491333    void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
    12501334    {
     
    12531337        bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
    12541338        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    1255         bodyDisjunction->terms.append(ByteTerm::AlternativeBegin(true));
     1339        bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    12561340        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
    12571341
     
    12641348        int beginTerm = bodyDisjunction->terms.size();
    12651349
    1266         bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionOnceBegin, subpatternId, invert, 0));
     1350        bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
    12671351        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    1268         bodyDisjunction->terms.append(ByteTerm::AlternativeBegin(true));
     1352        bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    12691353        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
    12701354
     
    12971381#endif
    12981382
    1299     void closeAlternative(bool inParentheses, int beginTerm)
     1383    void closeAlternative(int beginTerm)
    13001384    {
    13011385        int origBeginTerm = beginTerm;
     
    13171401            bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
    13181402
    1319             bodyDisjunction->terms.append(ByteTerm::AlternativeEnd(inParentheses));
     1403            bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
    13201404            bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
    13211405        }
    13221406    }
    13231407
    1324     void atomParenthesesEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
     1408    void closeBodyAlternative()
     1409    {
     1410        int beginTerm = 0;
     1411        int origBeginTerm = 0;
     1412        ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
     1413        int endIndex = bodyDisjunction->terms.size();
     1414
     1415        unsigned frameLocation = bodyDisjunction->terms[beginTerm].frameLocation;
     1416
     1417        while (bodyDisjunction->terms[beginTerm].alternative.next) {
     1418            beginTerm += bodyDisjunction->terms[beginTerm].alternative.next;
     1419            ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
     1420            bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
     1421            bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
     1422        }
     1423       
     1424        bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
     1425
     1426        bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
     1427        bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
     1428    }
     1429
     1430    void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
    13251431    {
    13261432        unsigned beginTerm = popParenthesesStack();
    1327         closeAlternative(true, beginTerm + 1);
     1433        closeAlternative(beginTerm + 1);
    13281434        unsigned endTerm = bodyDisjunction->terms.size();
    13291435
    1330         bool isAssertion = bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionOnceBegin;
     1436        bool isAssertion = bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
    13311437        bool invertOrCapture = bodyDisjunction->terms[beginTerm].invertOrCapture;
    13321438        unsigned subpatternId = bodyDisjunction->terms[beginTerm].atom.subpatternId;
    13331439
    1334         bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionOnceEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
     1440        bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
    13351441        bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
    13361442        bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
    13371443        bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
    13381444
    1339         if (isAssertion || (quantityCount == 1)) {
     1445        if (doInline) {
    13401446            bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
    13411447            bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
     
    13511457            unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
    13521458            ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
     1459
     1460            parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
    13531461            for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
    13541462                parenthesesDisjunction->terms.append(bodyDisjunction->terms[termInParentheses]);
    1355             parenthesesDisjunction->terms.append(ByteTerm::PatternEnd());
     1463            parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
    13561464
    13571465            bodyDisjunction->terms.shrink(beginTerm);
     
    13691477    {
    13701478        bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
    1371         bodyDisjunction->terms.append(ByteTerm::AlternativeBegin(false));
     1479        bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
    13721480        bodyDisjunction->terms[0].frameLocation = 0;
    13731481        currentAlternativeIndex = 0;
     
    13761484    void regexEnd()
    13771485    {
    1378         closeAlternative(false, 0);
    1379         bodyDisjunction->terms.append(ByteTerm::PatternEnd());
    1380     }
    1381 
    1382     void alterantiveDisjunction()
     1486        closeBodyAlternative();
     1487    }
     1488
     1489    void alterantiveBodyDisjunction()
    13831490    {
    13841491        int newAlternativeIndex = bodyDisjunction->terms.size();
    13851492        bodyDisjunction->terms[currentAlternativeIndex].alternative.next = newAlternativeIndex - currentAlternativeIndex;
    1386         bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction(m_parenthesesStack.size()));
     1493        bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());
     1494
     1495        currentAlternativeIndex = newAlternativeIndex;
     1496    }
     1497
     1498    void alterantiveDisjunction()
     1499    {
     1500        int newAlternativeIndex = bodyDisjunction->terms.size();
     1501        bodyDisjunction->terms[currentAlternativeIndex].alternative.next = newAlternativeIndex - currentAlternativeIndex;
     1502        bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
    13871503
    13881504        currentAlternativeIndex = newAlternativeIndex;
     
    13941510            unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
    13951511       
    1396             if (alt)
    1397                 alterantiveDisjunction();
     1512            if (alt) {
     1513                if (disjunction == m_pattern.m_body)
     1514                    alterantiveBodyDisjunction();
     1515                else
     1516                    alterantiveDisjunction();
     1517            }
    13981518
    13991519            PatternAlternative* alternative = disjunction->m_alternatives[alt];
     
    14421562                            atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
    14431563                            emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
    1444                             atomParenthesesEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
     1564                            atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
    14451565                        } else {
    14461566                            unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
    14471567                            atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
    14481568                            emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
    1449                             atomParenthesesEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
     1569                            atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
    14501570                        }
    14511571                    } else {
     
    14531573                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
    14541574                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
    1455                         atomParenthesesEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
     1575                        atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
    14561576                    }
    14571577                    break;
     
    14591579
    14601580                case PatternTerm::TypeParentheticalAssertion: {
    1461                     unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce;
     1581                    unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
    14621582                   
    14631583                    atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
    14641584                    emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
    1465                     atomParenthesesEnd(term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
     1585                    atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
    14661586                    break;
    14671587                }
     
    15021622COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
    15031623COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
    1504 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertionOnce) == (RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertionOnce);
     1624COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
    15051625COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
    15061626COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);
  • trunk/JavaScriptCore/yarr/RegexInterpreter.h

    r42481 r42853  
    4141struct ByteTerm {
    4242    enum Type {
     43        TypeBodyAlternativeBegin,
     44        TypeBodyAlternativeDisjunction,
     45        TypeBodyAlternativeEnd,
    4346        TypeAlternativeBegin,
    4447        TypeAlternativeDisjunction,
    4548        TypeAlternativeEnd,
    46         TypePatternEnd,
     49        TypeSubpatternBegin,
     50        TypeSubpatternEnd,
    4751        TypeAssertionBOL,
    4852        TypeAssertionEOL,
    4953        TypeAssertionWordBoundary,
    50         TypePatternCharacter,
     54        TypePatternCharacterOnce,
     55        TypePatternCharacterFixed,
     56        TypePatternCharacterGreedy,
     57        TypePatternCharacterNonGreedy,
     58        TypePatternCasedCharacterOnce,
     59        TypePatternCasedCharacterFixed,
     60        TypePatternCasedCharacterGreedy,
     61        TypePatternCasedCharacterNonGreedy,
    5162        TypeCharacterClass,
    5263        TypeBackReference,
     
    5465        TypeParenthesesSubpatternOnceBegin,
    5566        TypeParenthesesSubpatternOnceEnd,
    56         TypeParentheticalAssertionOnceBegin,
    57         TypeParentheticalAssertionOnceEnd,
     67        TypeParentheticalAssertionBegin,
     68        TypeParentheticalAssertionEnd,
    5869        TypeCheckInput,
    5970    } type;
     
    6374            union {
    6475                UChar patternCharacter;
     76                struct {
     77                    UChar lo;
     78                    UChar hi;
     79                } casedCharacter;
    6580                CharacterClass* characterClass;
    6681                unsigned subpatternId;
     
    7489        } atom;
    7590        struct {
    76             bool isParentheses;
    7791            int next;
    7892            int end;
     
    8397    int inputPosition;
    8498
    85     ByteTerm(UChar ch, int inputPos)
    86         : type(ByteTerm::TypePatternCharacter)
    87     {
     99    ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     100        : frameLocation(frameLocation)
     101    {
     102        switch (quantityType) {
     103        case QuantifierFixedCount:
     104            type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
     105            break;
     106        case QuantifierGreedy:
     107            type = ByteTerm::TypePatternCharacterGreedy;
     108            break;
     109        case QuantifierNonGreedy:
     110            type = ByteTerm::TypePatternCharacterNonGreedy;
     111            break;
     112        }
     113
    88114        atom.patternCharacter = ch;
    89         atom.quantityType = QuantifierFixedCount;
    90         atom.quantityCount = 1;
     115        atom.quantityType = quantityType;
     116        atom.quantityCount = quantityCount;
     117        inputPosition = inputPos;
     118    }
     119
     120    ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     121        : frameLocation(frameLocation)
     122    {
     123        switch (quantityType) {
     124        case QuantifierFixedCount:
     125            type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
     126            break;
     127        case QuantifierGreedy:
     128            type = ByteTerm::TypePatternCasedCharacterGreedy;
     129            break;
     130        case QuantifierNonGreedy:
     131            type = ByteTerm::TypePatternCasedCharacterNonGreedy;
     132            break;
     133        }
     134
     135        atom.casedCharacter.lo = lo;
     136        atom.casedCharacter.hi = hi;
     137        atom.quantityType = quantityType;
     138        atom.quantityCount = quantityCount;
    91139        inputPosition = inputPos;
    92140    }
     
    164212    }
    165213
    166     static ByteTerm AlternativeBegin(bool isParentheses)
     214    static ByteTerm BodyAlternativeBegin()
     215    {
     216        ByteTerm term(TypeBodyAlternativeBegin);
     217        term.alternative.next = 0;
     218        term.alternative.end = 0;
     219        return term;
     220    }
     221
     222    static ByteTerm BodyAlternativeDisjunction()
     223    {
     224        ByteTerm term(TypeBodyAlternativeDisjunction);
     225        term.alternative.next = 0;
     226        term.alternative.end = 0;
     227        return term;
     228    }
     229
     230    static ByteTerm BodyAlternativeEnd()
     231    {
     232        ByteTerm term(TypeBodyAlternativeEnd);
     233        term.alternative.next = 0;
     234        term.alternative.end = 0;
     235        return term;
     236    }
     237
     238    static ByteTerm AlternativeBegin()
    167239    {
    168240        ByteTerm term(TypeAlternativeBegin);
    169         term.alternative.isParentheses = isParentheses;
    170         term.alternative.next = 0;
    171         term.alternative.end = 0;
    172         return term;
    173     }
    174 
    175     static ByteTerm AlternativeDisjunction(bool isParentheses)
     241        term.alternative.next = 0;
     242        term.alternative.end = 0;
     243        return term;
     244    }
     245
     246    static ByteTerm AlternativeDisjunction()
    176247    {
    177248        ByteTerm term(TypeAlternativeDisjunction);
    178         term.alternative.isParentheses = isParentheses;
    179         term.alternative.next = 0;
    180         term.alternative.end = 0;
    181         return term;
    182     }
    183 
    184     static ByteTerm AlternativeEnd(bool isParentheses)
     249        term.alternative.next = 0;
     250        term.alternative.end = 0;
     251        return term;
     252    }
     253
     254    static ByteTerm AlternativeEnd()
    185255    {
    186256        ByteTerm term(TypeAlternativeEnd);
    187         term.alternative.isParentheses = isParentheses;
    188         term.alternative.next = 0;
    189         term.alternative.end = 0;
    190         return term;
    191     }
    192 
    193     static ByteTerm PatternEnd()
    194     {
    195         return ByteTerm(TypePatternEnd);
     257        term.alternative.next = 0;
     258        term.alternative.end = 0;
     259        return term;
     260    }
     261
     262    static ByteTerm SubpatternBegin()
     263    {
     264        return ByteTerm(TypeSubpatternBegin);
     265    }
     266
     267    static ByteTerm SubpatternEnd()
     268    {
     269        return ByteTerm(TypeSubpatternEnd);
    196270    }
    197271
  • trunk/JavaScriptCore/yarr/RegexJIT.cpp

    r42800 r42853  
    961961
    962962        unsigned parenthesesFrameLocation = term.frameLocation;
    963         unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce;
     963        unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
    964964
    965965        int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
  • trunk/JavaScriptCore/yarr/RegexPattern.h

    r42800 r42853  
    4141#define RegexStackSpaceForBackTrackInfoBackReference 2
    4242#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
    43 #define RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce 1
     43#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
    4444#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
    4545#define RegexStackSpaceForBackTrackInfoParentheses 4
Note: See TracChangeset for help on using the changeset viewer.