Ignore:
Timestamp:
Apr 24, 2009, 4:51:47 PM (16 years ago)
Author:
[email protected]
Message:

2009-04-24 Gavin Barraclough <[email protected]>

Reviewed by Sam "Wesley" Weinig.

Improve performance to YARR interpreter.
(From about 3x slower than PCRE on regex-dna to about 30% slower).

  • yarr/RegexCompiler.cpp: (JSC::Yarr::RegexPatternConstructor::setupAlternativeOffsets):
  • yarr/RegexInterpreter.cpp: (JSC::Yarr::Interpreter::checkCharacter): (JSC::Yarr::Interpreter::checkCasedCharacter): (JSC::Yarr::Interpreter::backtrackPatternCharacter): (JSC::Yarr::Interpreter::backtrackPatternCasedCharacter): (JSC::Yarr::Interpreter::matchParentheticalAssertionBegin): (JSC::Yarr::Interpreter::matchParentheticalAssertionEnd): (JSC::Yarr::Interpreter::backtrackParentheticalAssertionBegin): (JSC::Yarr::Interpreter::backtrackParentheticalAssertionEnd): (JSC::Yarr::Interpreter::matchDisjunction): (JSC::Yarr::Interpreter::interpret): (JSC::Yarr::ByteCompiler::atomPatternCharacter): (JSC::Yarr::ByteCompiler::atomParenthesesSubpatternBegin): (JSC::Yarr::ByteCompiler::atomParentheticalAssertionBegin): (JSC::Yarr::ByteCompiler::closeAlternative): (JSC::Yarr::ByteCompiler::closeBodyAlternative): (JSC::Yarr::ByteCompiler::atomParenthesesEnd): (JSC::Yarr::ByteCompiler::regexBegin): (JSC::Yarr::ByteCompiler::regexEnd): (JSC::Yarr::ByteCompiler::alterantiveBodyDisjunction): (JSC::Yarr::ByteCompiler::alterantiveDisjunction): (JSC::Yarr::ByteCompiler::emitDisjunction):
  • yarr/RegexInterpreter.h: (JSC::Yarr::ByteTerm::): (JSC::Yarr::ByteTerm::ByteTerm): (JSC::Yarr::ByteTerm::BodyAlternativeBegin): (JSC::Yarr::ByteTerm::BodyAlternativeDisjunction): (JSC::Yarr::ByteTerm::BodyAlternativeEnd): (JSC::Yarr::ByteTerm::AlternativeBegin): (JSC::Yarr::ByteTerm::AlternativeDisjunction): (JSC::Yarr::ByteTerm::AlternativeEnd): (JSC::Yarr::ByteTerm::SubpatternBegin): (JSC::Yarr::ByteTerm::SubpatternEnd):
  • yarr/RegexJIT.cpp: (JSC::Yarr::RegexGenerator::generateParentheticalAssertion):
  • yarr/RegexPattern.h:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/yarr/RegexInterpreter.cpp

    r42805 r42853  
    5353        uintptr_t offset;
    5454    };
    55     struct BackTrackInfoParentheticalAssertionOnce {
     55    struct BackTrackInfoParentheticalAssertion {
    5656        uintptr_t begin;
    5757    };
     
    293293    bool checkCharacter(int testChar, int inputPosition)
    294294    {
     295        return testChar == input.readChecked(inputPosition);
     296    }
     297
     298    bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
     299    {
    295300        int ch = input.readChecked(inputPosition);
    296         return (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch));
     301        return (loChar == ch) || (hiChar == ch);
    297302    }
    298303
     
    363368    }
    364369
    365     bool matchPatternCharacter(ByteTerm& term, DisjunctionContext* context)
    366     {
    367         ASSERT(term.type == ByteTerm::TypePatternCharacter);
    368         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    369 
    370         switch (term.atom.quantityType) {
    371         case QuantifierFixedCount: {
    372             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
    373                 if (!checkCharacter(term.atom.patternCharacter, term.inputPosition + matchAmount))
    374                     return false;
    375             }
    376             return true;
    377         }
    378 
    379         case QuantifierGreedy: {
    380             unsigned matchAmount = 0;
    381             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
    382                 if (!checkCharacter(term.atom.patternCharacter, term.inputPosition - 1)) {
    383                     input.uncheckInput(1);
    384                     break;
    385                 }
    386                 ++matchAmount;
    387             }
    388             backTrack->matchAmount = matchAmount;
    389 
    390             return true;
    391         }
    392 
    393         case QuantifierNonGreedy:
    394             backTrack->matchAmount = 0;
    395             return true;
    396         }
    397 
    398         ASSERT_NOT_REACHED();
    399         return false;
    400     }
    401 
    402370    bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
    403371    {
    404         ASSERT(term.type == ByteTerm::TypePatternCharacter);
    405372        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    406373
     
    421388                ++backTrack->matchAmount;
    422389                if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
     390                    return true;
     391            }
     392            input.uncheckInput(backTrack->matchAmount);
     393            break;
     394        }
     395
     396        return false;
     397    }
     398
     399    bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
     400    {
     401        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
     402
     403        switch (term.atom.quantityType) {
     404        case QuantifierFixedCount:
     405            break;
     406
     407        case QuantifierGreedy:
     408            if (backTrack->matchAmount) {
     409                --backTrack->matchAmount;
     410                input.uncheckInput(1);
     411                return true;
     412            }
     413            break;
     414
     415        case QuantifierNonGreedy:
     416            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
     417                ++backTrack->matchAmount;
     418                if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
    423419                    return true;
    424420            }
     
    718714    }
    719715
    720     bool matchParentheticalAssertionOnceBegin(ByteTerm& term, DisjunctionContext* context)
    721     {
    722         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceBegin);
     716    bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
     717    {
     718        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
    723719        ASSERT(term.atom.quantityCount == 1);
    724720
    725         BackTrackInfoParentheticalAssertionOnce* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertionOnce*>(context->frame + term.frameLocation);
     721        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    726722
    727723        backTrack->begin = input.getPos();
     
    729725    }
    730726
    731     bool matchParentheticalAssertionOnceEnd(ByteTerm& term, DisjunctionContext* context)
    732     {
    733         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceEnd);
     727    bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
     728    {
     729        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
    734730        ASSERT(term.atom.quantityCount == 1);
    735731
    736         BackTrackInfoParentheticalAssertionOnce* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertionOnce*>(context->frame + term.frameLocation);
     732        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    737733
    738734        input.setPos(backTrack->begin);
     
    747743    }
    748744
    749     bool backtrackParentheticalAssertionOnceBegin(ByteTerm& term, DisjunctionContext* context)
    750     {
    751         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceBegin);
     745    bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
     746    {
     747        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
    752748        ASSERT(term.atom.quantityCount == 1);
    753749
     
    761757    }
    762758
    763     bool backtrackParentheticalAssertionOnceEnd(ByteTerm& term, DisjunctionContext* context)
    764     {
    765         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionOnceEnd);
     759    bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
     760    {
     761        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
    766762        ASSERT(term.atom.quantityCount == 1);
    767763
    768         BackTrackInfoParentheticalAssertionOnce* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertionOnce*>(context->frame + term.frameLocation);
     764        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    769765
    770766        input.setPos(backTrack->begin);
     
    962958    }
    963959
    964     bool matchTerm(ByteDisjunction* disjunction, DisjunctionContext* context)
    965     {
    966         ByteTerm& term = disjunction->terms[context->term];
    967 
    968         switch (term.type) {
    969             case ByteTerm::TypeAlternativeBegin:
    970                 return true;
    971             case ByteTerm::TypeAlternativeDisjunction:
    972             case ByteTerm::TypeAlternativeEnd: {
    973                 int offset = term.alternative.end;
    974                 if (term.alternative.isParentheses) {
    975                     BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + term.frameLocation);
    976                     backTrack->offset = offset;
     960#define MATCH_NEXT() { ++context->term; goto matchAgain; }
     961#define BACKTRACK() { --context->term; goto backtrack; }
     962#define currentTerm() (disjunction->terms[context->term])
     963    bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     964    {
     965        if (btrack)
     966            BACKTRACK();
     967
     968        context->matchBegin = input.getPos();
     969        context->term = 0;
     970
     971    matchAgain:
     972        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
     973
     974        switch (currentTerm().type) {
     975        case ByteTerm::TypeSubpatternBegin:
     976            MATCH_NEXT();
     977        case ByteTerm::TypeSubpatternEnd:
     978            context->matchEnd = input.getPos();
     979            return true;
     980
     981        case ByteTerm::TypeBodyAlternativeBegin:
     982            MATCH_NEXT();
     983        case ByteTerm::TypeBodyAlternativeDisjunction:
     984        case ByteTerm::TypeBodyAlternativeEnd:
     985            context->matchEnd = input.getPos();
     986            return true;
     987
     988        case ByteTerm::TypeAlternativeBegin:
     989            MATCH_NEXT();
     990        case ByteTerm::TypeAlternativeDisjunction:
     991        case ByteTerm::TypeAlternativeEnd: {
     992            int offset = currentTerm().alternative.end;
     993            BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
     994            backTrack->offset = offset;
     995            context->term += offset;
     996            MATCH_NEXT();
     997        }
     998
     999        case ByteTerm::TypeAssertionBOL:
     1000            if (matchAssertionBOL(currentTerm()))
     1001                MATCH_NEXT();
     1002            BACKTRACK();
     1003        case ByteTerm::TypeAssertionEOL:
     1004            if (matchAssertionEOL(currentTerm()))
     1005                MATCH_NEXT();
     1006            BACKTRACK();
     1007        case ByteTerm::TypeAssertionWordBoundary:
     1008            if (matchAssertionWordBoundary(currentTerm()))
     1009                MATCH_NEXT();
     1010            BACKTRACK();
     1011
     1012        case ByteTerm::TypePatternCharacterOnce:
     1013        case ByteTerm::TypePatternCharacterFixed: {
     1014            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
     1015                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
     1016                    BACKTRACK();
     1017            }
     1018            MATCH_NEXT();
     1019        }
     1020        case ByteTerm::TypePatternCharacterGreedy: {
     1021            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1022            unsigned matchAmount = 0;
     1023            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
     1024                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
     1025                    input.uncheckInput(1);
     1026                    break;
    9771027                }
    978                 context->term += offset;
    979                 return true;
    980             }
    981 
    982             case ByteTerm::TypeAssertionBOL:
    983                 return matchAssertionBOL(term);
    984             case ByteTerm::TypeAssertionEOL:
    985                 return matchAssertionEOL(term);
    986             case ByteTerm::TypeAssertionWordBoundary:
    987                 return matchAssertionWordBoundary(term);
    988 
    989             case ByteTerm::TypePatternCharacter:
    990                 return matchPatternCharacter(term, context);
    991             case ByteTerm::TypeCharacterClass:
    992                 return matchCharacterClass(term, context);
    993             case ByteTerm::TypeBackReference:
    994                 return matchBackReference(term, context);
    995             case ByteTerm::TypeParenthesesSubpattern:
    996                 return matchParentheses(term, context);
    997             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
    998                 return matchParenthesesOnceBegin(term, context);
    999             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
    1000                 return matchParenthesesOnceEnd(term, context);
    1001             case ByteTerm::TypeParentheticalAssertionOnceBegin:
    1002                 return matchParentheticalAssertionOnceBegin(term, context);
    1003             case ByteTerm::TypeParentheticalAssertionOnceEnd:
    1004                 return matchParentheticalAssertionOnceEnd(term, context);
    1005 
    1006             case ByteTerm::TypeCheckInput:
    1007                 if (input.checkInput(term.checkInputCount))
    1008                     return true;
    1009                 else
    1010                     return false;
    1011 
    1012             default:
    1013                 ASSERT_NOT_REACHED();
     1028                ++matchAmount;
     1029            }
     1030            backTrack->matchAmount = matchAmount;
     1031
     1032            MATCH_NEXT();
     1033        }
     1034        case ByteTerm::TypePatternCharacterNonGreedy: {
     1035            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1036            backTrack->matchAmount = 0;
     1037            MATCH_NEXT();
     1038        }
     1039
     1040        case ByteTerm::TypePatternCasedCharacterOnce:
     1041        case ByteTerm::TypePatternCasedCharacterFixed: {
     1042            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
     1043                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
     1044                    BACKTRACK();
     1045            }
     1046            MATCH_NEXT();
     1047        }
     1048        case ByteTerm::TypePatternCasedCharacterGreedy: {
     1049            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1050            unsigned matchAmount = 0;
     1051            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
     1052                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
     1053                    input.uncheckInput(1);
     1054                    break;
     1055                }
     1056                ++matchAmount;
     1057            }
     1058            backTrack->matchAmount = matchAmount;
     1059
     1060            MATCH_NEXT();
     1061        }
     1062        case ByteTerm::TypePatternCasedCharacterNonGreedy: {
     1063            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
     1064            backTrack->matchAmount = 0;
     1065            MATCH_NEXT();
     1066        }
     1067
     1068        case ByteTerm::TypeCharacterClass:
     1069            if (matchCharacterClass(currentTerm(), context))
     1070                MATCH_NEXT();
     1071            BACKTRACK();
     1072        case ByteTerm::TypeBackReference:
     1073            if (matchBackReference(currentTerm(), context))
     1074                MATCH_NEXT();
     1075            BACKTRACK();
     1076        case ByteTerm::TypeParenthesesSubpattern:
     1077            if (matchParentheses(currentTerm(), context))
     1078                MATCH_NEXT();
     1079            BACKTRACK();
     1080        case ByteTerm::TypeParenthesesSubpatternOnceBegin:
     1081            if (matchParenthesesOnceBegin(currentTerm(), context))
     1082                MATCH_NEXT();
     1083            BACKTRACK();
     1084        case ByteTerm::TypeParenthesesSubpatternOnceEnd:
     1085            if (matchParenthesesOnceEnd(currentTerm(), context))
     1086                MATCH_NEXT();
     1087            BACKTRACK();
     1088        case ByteTerm::TypeParentheticalAssertionBegin:
     1089            if (matchParentheticalAssertionBegin(currentTerm(), context))
     1090                MATCH_NEXT();
     1091            BACKTRACK();
     1092        case ByteTerm::TypeParentheticalAssertionEnd:
     1093            if (matchParentheticalAssertionEnd(currentTerm(), context))
     1094                MATCH_NEXT();
     1095            BACKTRACK();
     1096
     1097        case ByteTerm::TypeCheckInput:
     1098            if (input.checkInput(currentTerm().checkInputCount))
     1099                MATCH_NEXT();
     1100            BACKTRACK();
     1101        }
     1102
     1103        // We should never fall-through to here.
     1104        ASSERT_NOT_REACHED();
     1105
     1106    backtrack:
     1107        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
     1108
     1109        switch (currentTerm().type) {
     1110        case ByteTerm::TypeSubpatternBegin:
     1111            return false;
     1112        case ByteTerm::TypeSubpatternEnd:
     1113            ASSERT_NOT_REACHED();
     1114
     1115        case ByteTerm::TypeBodyAlternativeBegin:
     1116        case ByteTerm::TypeBodyAlternativeDisjunction: {
     1117            int offset = currentTerm().alternative.next;
     1118            context->term += offset;
     1119            if (offset > 0)
     1120                MATCH_NEXT();
     1121
     1122            if (input.atEnd())
    10141123                return false;
    1015         }
    1016     }
    1017 
    1018     bool backtrackTerm(ByteDisjunction* disjunction, DisjunctionContext* context)
    1019     {
    1020         ByteTerm& term = disjunction->terms[context->term];
    1021 
    1022         switch (term.type) {
     1124
     1125            input.next();
     1126            context->matchBegin = input.getPos();
     1127            MATCH_NEXT();
     1128        }
     1129        case ByteTerm::TypeBodyAlternativeEnd:
     1130            ASSERT_NOT_REACHED();
     1131
    10231132            case ByteTerm::TypeAlternativeBegin:
    10241133            case ByteTerm::TypeAlternativeDisjunction: {
    1025                 int offset = term.alternative.next;
     1134                int offset = currentTerm().alternative.next;
    10261135                context->term += offset;
    1027                 return (offset > 0);
     1136                if (offset > 0)
     1137                    MATCH_NEXT();
     1138                BACKTRACK();
    10281139            }
    10291140            case ByteTerm::TypeAlternativeEnd: {
    10301141                // We should never backtrack back into an alternative of the main body of the regex.
    1031                 ASSERT(term.alternative.isParentheses);
    1032                 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + term.frameLocation);
     1142                BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
    10331143                unsigned offset = backTrack->offset;
    10341144                context->term -= offset;
    1035                 return false;
     1145                BACKTRACK();
    10361146            }
    10371147
     
    10391149            case ByteTerm::TypeAssertionEOL:
    10401150            case ByteTerm::TypeAssertionWordBoundary:
    1041                 return false;
    1042 
    1043             case ByteTerm::TypePatternCharacter:
    1044                 return backtrackPatternCharacter(term, context);
     1151                BACKTRACK();
     1152
     1153            case ByteTerm::TypePatternCharacterOnce:
     1154            case ByteTerm::TypePatternCharacterFixed:
     1155            case ByteTerm::TypePatternCharacterGreedy:
     1156            case ByteTerm::TypePatternCharacterNonGreedy:
     1157                if (backtrackPatternCharacter(currentTerm(), context))
     1158                    MATCH_NEXT();
     1159                BACKTRACK();
     1160            case ByteTerm::TypePatternCasedCharacterOnce:
     1161            case ByteTerm::TypePatternCasedCharacterFixed:
     1162            case ByteTerm::TypePatternCasedCharacterGreedy:
     1163            case ByteTerm::TypePatternCasedCharacterNonGreedy:
     1164                if (backtrackPatternCasedCharacter(currentTerm(), context))
     1165                    MATCH_NEXT();
     1166                BACKTRACK();
    10451167            case ByteTerm::TypeCharacterClass:
    1046                 return backtrackCharacterClass(term, context);
     1168                if (backtrackCharacterClass(currentTerm(), context))
     1169                    MATCH_NEXT();
     1170                BACKTRACK();
    10471171            case ByteTerm::TypeBackReference:
    1048                 return backtrackBackReference(term, context);
     1172                if (backtrackBackReference(currentTerm(), context))
     1173                    MATCH_NEXT();
     1174                BACKTRACK();
    10491175            case ByteTerm::TypeParenthesesSubpattern:
    1050                 return backtrackParentheses(term, context);
     1176                if (backtrackParentheses(currentTerm(), context))
     1177                    MATCH_NEXT();
     1178                BACKTRACK();
    10511179            case ByteTerm::TypeParenthesesSubpatternOnceBegin:
    1052                 return backtrackParenthesesOnceBegin(term, context);
     1180                if (backtrackParenthesesOnceBegin(currentTerm(), context))
     1181                    MATCH_NEXT();
     1182                BACKTRACK();
    10531183            case ByteTerm::TypeParenthesesSubpatternOnceEnd:
    1054                 return backtrackParenthesesOnceEnd(term, context);
    1055             case ByteTerm::TypeParentheticalAssertionOnceBegin:
    1056                 return backtrackParentheticalAssertionOnceBegin(term, context);
    1057             case ByteTerm::TypeParentheticalAssertionOnceEnd:
    1058                 return backtrackParentheticalAssertionOnceEnd(term, context);
     1184                if (backtrackParenthesesOnceEnd(currentTerm(), context))
     1185                    MATCH_NEXT();
     1186                BACKTRACK();
     1187            case ByteTerm::TypeParentheticalAssertionBegin:
     1188                if (backtrackParentheticalAssertionBegin(currentTerm(), context))
     1189                    MATCH_NEXT();
     1190                BACKTRACK();
     1191            case ByteTerm::TypeParentheticalAssertionEnd:
     1192                if (backtrackParentheticalAssertionEnd(currentTerm(), context))
     1193                    MATCH_NEXT();
     1194                BACKTRACK();
    10591195
    10601196            case ByteTerm::TypeCheckInput:
    1061                 input.uncheckInput(term.checkInputCount);
    1062                 return false;
    1063 
    1064             default:
    1065                 ASSERT_NOT_REACHED();
    1066                 return false;
    1067         }
    1068     }
    1069 
    1070     bool matchAlternative(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
    1071     {
    1072         if (btrack)
    1073             goto backtrack;
    1074 
    1075         context->term = -1;
    1076 
    1077         while (true) {
    1078             do {
    1079                 ++context->term;
    1080                 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
    1081                 if (disjunction->terms[context->term].type == ByteTerm::TypePatternEnd)
    1082                     return true;
    1083             } while (matchTerm(disjunction, context));
    1084 
    1085         backtrack:
    1086             do {
    1087                 if (!context->term--)
    1088                     return false;
    1089             } while (!backtrackTerm(disjunction, context));
    1090         }
    1091     }
    1092 
    1093     bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
    1094     {
    1095         if (btrack) {
    1096             bool matched = matchAlternative(disjunction, context, true);
    1097             if (matched) {
    1098                 context->matchEnd = input.getPos();
    1099                 return true;
    1100             }
    1101 
    1102         } else {
    1103             unsigned begin = input.getPos();
    1104             bool matched = matchAlternative(disjunction, context);
    1105             if (matched) {
    1106                 context->matchBegin = begin;
    1107                 context->matchEnd = input.getPos();
    1108                 return true;
    1109             }
    1110             input.setPos(begin);
    1111         }
    1112 
     1197                input.uncheckInput(currentTerm().checkInputCount);
     1198                BACKTRACK();
     1199        }
     1200
     1201        ASSERT_NOT_REACHED();
    11131202        return false;
    11141203    }
     
    11291218    int interpret()
    11301219    {
    1131         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
    1132 
    11331220        for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
    11341221            output[i] = -1;
    11351222
    1136         while (true) {
    1137             bool matched = matchDisjunction(pattern->m_body.get(), context);
    1138 
    1139             if (matched) {
    1140                 output[0] = context->matchBegin;
    1141                 output[1] = context->matchEnd;
    1142 
    1143                 unsigned begin = context->matchBegin;
    1144                 freeDisjunctionContext(context);
    1145                 return begin;
    1146             }
    1147 
    1148             if (input.atEnd()) {
    1149                 freeDisjunctionContext(context);
    1150                 return -1;
    1151             }
    1152 
    1153             input.next();
    1154         }
     1223        DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
     1224
     1225        if (matchDisjunction(pattern->m_body.get(), context)) {
     1226            output[0] = context->matchBegin;
     1227            output[1] = context->matchEnd;
     1228        }
     1229
     1230        freeDisjunctionContext(context);
     1231
     1232        return output[0];
    11551233    }
    11561234
     
    12201298    void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
    12211299    {
    1222         bodyDisjunction->terms.append(ByteTerm(ch, inputPosition));
     1300        if (m_pattern.m_ignoreCase) {
     1301            UChar lo = Unicode::toLower(ch);
     1302            UChar hi = Unicode::toUpper(ch);
     1303           
     1304            if (lo != hi) {
     1305                bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
     1306                return;
     1307            }
     1308        }
     1309
     1310        bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
     1311    }
     1312   
     1313    void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     1314    {
     1315        bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
    12231316
    12241317        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
     
    12261319        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    12271320    }
    1228    
    1229     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
    1230     {
    1231         bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
     1321
     1322    void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     1323    {
     1324        ASSERT(subpatternId);
     1325
     1326        bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
    12321327
    12331328        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
     
    12361331    }
    12371332
    1238     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
    1239     {
    1240         ASSERT(subpatternId);
    1241 
    1242         bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
    1243 
    1244         bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
    1245         bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
    1246         bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    1247     }
    1248 
    12491333    void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
    12501334    {
     
    12531337        bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
    12541338        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    1255         bodyDisjunction->terms.append(ByteTerm::AlternativeBegin(true));
     1339        bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    12561340        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
    12571341
     
    12641348        int beginTerm = bodyDisjunction->terms.size();
    12651349
    1266         bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionOnceBegin, subpatternId, invert, 0));
     1350        bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
    12671351        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
    1268         bodyDisjunction->terms.append(ByteTerm::AlternativeBegin(true));
     1352        bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
    12691353        bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
    12701354
     
    12971381#endif
    12981382
    1299     void closeAlternative(bool inParentheses, int beginTerm)
     1383    void closeAlternative(int beginTerm)
    13001384    {
    13011385        int origBeginTerm = beginTerm;
     
    13171401            bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
    13181402
    1319             bodyDisjunction->terms.append(ByteTerm::AlternativeEnd(inParentheses));
     1403            bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
    13201404            bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
    13211405        }
    13221406    }
    13231407
    1324     void atomParenthesesEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
     1408    void closeBodyAlternative()
     1409    {
     1410        int beginTerm = 0;
     1411        int origBeginTerm = 0;
     1412        ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
     1413        int endIndex = bodyDisjunction->terms.size();
     1414
     1415        unsigned frameLocation = bodyDisjunction->terms[beginTerm].frameLocation;
     1416
     1417        while (bodyDisjunction->terms[beginTerm].alternative.next) {
     1418            beginTerm += bodyDisjunction->terms[beginTerm].alternative.next;
     1419            ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
     1420            bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
     1421            bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
     1422        }
     1423       
     1424        bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
     1425
     1426        bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
     1427        bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
     1428    }
     1429
     1430    void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
    13251431    {
    13261432        unsigned beginTerm = popParenthesesStack();
    1327         closeAlternative(true, beginTerm + 1);
     1433        closeAlternative(beginTerm + 1);
    13281434        unsigned endTerm = bodyDisjunction->terms.size();
    13291435
    1330         bool isAssertion = bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionOnceBegin;
     1436        bool isAssertion = bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
    13311437        bool invertOrCapture = bodyDisjunction->terms[beginTerm].invertOrCapture;
    13321438        unsigned subpatternId = bodyDisjunction->terms[beginTerm].atom.subpatternId;
    13331439
    1334         bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionOnceEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
     1440        bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
    13351441        bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
    13361442        bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
    13371443        bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
    13381444
    1339         if (isAssertion || (quantityCount == 1)) {
     1445        if (doInline) {
    13401446            bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
    13411447            bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
     
    13511457            unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
    13521458            ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
     1459
     1460            parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
    13531461            for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
    13541462                parenthesesDisjunction->terms.append(bodyDisjunction->terms[termInParentheses]);
    1355             parenthesesDisjunction->terms.append(ByteTerm::PatternEnd());
     1463            parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
    13561464
    13571465            bodyDisjunction->terms.shrink(beginTerm);
     
    13691477    {
    13701478        bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
    1371         bodyDisjunction->terms.append(ByteTerm::AlternativeBegin(false));
     1479        bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
    13721480        bodyDisjunction->terms[0].frameLocation = 0;
    13731481        currentAlternativeIndex = 0;
     
    13761484    void regexEnd()
    13771485    {
    1378         closeAlternative(false, 0);
    1379         bodyDisjunction->terms.append(ByteTerm::PatternEnd());
    1380     }
    1381 
    1382     void alterantiveDisjunction()
     1486        closeBodyAlternative();
     1487    }
     1488
     1489    void alterantiveBodyDisjunction()
    13831490    {
    13841491        int newAlternativeIndex = bodyDisjunction->terms.size();
    13851492        bodyDisjunction->terms[currentAlternativeIndex].alternative.next = newAlternativeIndex - currentAlternativeIndex;
    1386         bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction(m_parenthesesStack.size()));
     1493        bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());
     1494
     1495        currentAlternativeIndex = newAlternativeIndex;
     1496    }
     1497
     1498    void alterantiveDisjunction()
     1499    {
     1500        int newAlternativeIndex = bodyDisjunction->terms.size();
     1501        bodyDisjunction->terms[currentAlternativeIndex].alternative.next = newAlternativeIndex - currentAlternativeIndex;
     1502        bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
    13871503
    13881504        currentAlternativeIndex = newAlternativeIndex;
     
    13941510            unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
    13951511       
    1396             if (alt)
    1397                 alterantiveDisjunction();
     1512            if (alt) {
     1513                if (disjunction == m_pattern.m_body)
     1514                    alterantiveBodyDisjunction();
     1515                else
     1516                    alterantiveDisjunction();
     1517            }
    13981518
    13991519            PatternAlternative* alternative = disjunction->m_alternatives[alt];
     
    14421562                            atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
    14431563                            emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
    1444                             atomParenthesesEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
     1564                            atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
    14451565                        } else {
    14461566                            unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
    14471567                            atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
    14481568                            emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
    1449                             atomParenthesesEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
     1569                            atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
    14501570                        }
    14511571                    } else {
     
    14531573                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
    14541574                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
    1455                         atomParenthesesEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
     1575                        atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
    14561576                    }
    14571577                    break;
     
    14591579
    14601580                case PatternTerm::TypeParentheticalAssertion: {
    1461                     unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce;
     1581                    unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
    14621582                   
    14631583                    atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
    14641584                    emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
    1465                     atomParenthesesEnd(term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
     1585                    atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
    14661586                    break;
    14671587                }
     
    15021622COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
    15031623COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
    1504 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertionOnce) == (RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertionOnce);
     1624COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
    15051625COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
    15061626COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);
Note: See TracChangeset for help on using the changeset viewer.