Ignore:
Timestamp:
Sep 4, 2018, 2:18:58 PM (7 years ago)
Author:
[email protected]
Message:

YARR: JIT RegExps with back references
https://bugs.webkit.org/show_bug.cgi?id=180874

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Implemented JIT'ed back references for all counted types. The only type of back references
not handled in the JIT are 16bit matches that ignore case. Such support would require the
canonicalization that is currently handled in the Yarr interpreter via a C funtion call.
The back reference processing for surrogate pairs is implemented by individually comparing
each surrogate ala memcmp.

Added a generated canonicalization table for the LChar (8bit) domain to process case
ignored back references.

Added macro assembler load16(ExtendedAddress) for indexed access to the canonicalization table.

Added a new JIT failure reason for forward references as the check to JIT expressions with
forward references we're handled synonimously those containing back references.

This change is only enabled for 64 bit platforms.

  • assembler/MacroAssemblerARM64.h:

(JSC::MacroAssemblerARM64::load16):

  • assembler/MacroAssemblerX86_64.h:

(JSC::MacroAssemblerX86_64::load16):

  • runtime/RegExp.cpp:

(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):

  • yarr/YarrCanonicalize.h:
  • yarr/YarrCanonicalizeUCS2.cpp:
  • yarr/YarrCanonicalizeUCS2.js:

(set characters.hex.set string_appeared_here):

  • yarr/YarrJIT.cpp:

(JSC::Yarr::YarrGenerator::checkNotEnoughInput):
(JSC::Yarr::YarrGenerator::readCharacterDontDecodeSurrogates):
(JSC::Yarr::YarrGenerator::matchBackreference):
(JSC::Yarr::YarrGenerator::generateBackReference):
(JSC::Yarr::YarrGenerator::backtrackBackReference):
(JSC::Yarr::YarrGenerator::generateTerm):
(JSC::Yarr::YarrGenerator::backtrackTerm):
(JSC::Yarr::YarrGenerator::compile):
(JSC::Yarr::dumpCompileFailure):

  • yarr/YarrJIT.h:
  • yarr/YarrPattern.h:

(JSC::Yarr::BackTrackInfoBackReference::beginIndex):
(JSC::Yarr::BackTrackInfoBackReference::matchAmountIndex):

Source/WTF:

Added ENABLE_YARR_JIT_BACKREFERENCES to enable RegExp JIT'ing of back references
for 64 bit platforms only.

  • wtf/Platform.h:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/yarr/YarrJIT.cpp

    r235322 r235636  
    465465    }
    466466
     467    Jump checkNotEnoughInput(RegisterID additionalAmount)
     468    {
     469        add32(index, additionalAmount);
     470        return branch32(Above, additionalAmount, length);
     471    }
     472
    467473    Jump checkInput()
    468474    {
     
    547553#endif
    548554
     555    void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
     556    {
     557        BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
     558       
     559        if (m_charSize == Char8)
     560            load8(address, resultReg);
     561        else
     562            load16Unaligned(address, resultReg);
     563    }
     564   
    549565    void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
    550566    {
     
    11071123    }
    11081124
     1125#if ENABLE(YARR_JIT_BACKREFERENCES)
     1126    void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
     1127    {
     1128        YarrOp& op = m_ops[opIndex];
     1129        PatternTerm* term = op.m_term;
     1130        unsigned subpatternId = term->backReferenceSubpatternId;
     1131
     1132        Label loop(this);
     1133
     1134            readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
     1135            readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
     1136       
     1137        if (!m_pattern.ignoreCase())
     1138            characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
     1139        else {
     1140            Jump charactersMatch = branch32(Equal, character, patternCharacter);
     1141            ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
     1142            load16(characterTableEntry, character);
     1143            ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
     1144            load16(patternTableEntry, patternCharacter);
     1145            characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
     1146            charactersMatch.link(this);
     1147        }
     1148
     1149       
     1150        add32(TrustedImm32(1), index);
     1151        add32(TrustedImm32(1), patternIndex);
     1152       
     1153        branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
     1154    }
     1155
     1156    void generateBackReference(size_t opIndex)
     1157    {
     1158        YarrOp& op = m_ops[opIndex];
     1159        PatternTerm* term = op.m_term;
     1160
     1161        if (m_pattern.ignoreCase() && m_charSize != Char8) {
     1162            m_failureReason = JITFailureReason::BackReference;
     1163            return;
     1164        }
     1165
     1166        unsigned subpatternId = term->backReferenceSubpatternId;
     1167        unsigned parenthesesFrameLocation = term->frameLocation;
     1168
     1169        const RegisterID characterOrTemp = regT0;
     1170        const RegisterID patternIndex = regT1;
     1171        const RegisterID patternTemp = regT2;
     1172
     1173        storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
     1174        if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
     1175            storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
     1176
     1177        JumpList matches;
     1178
     1179        if (term->quantityType != QuantifierNonGreedy) {
     1180            load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
     1181            load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
     1182
     1183            // An empty match is successful without consuming characters
     1184            if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
     1185                matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
     1186                matches.append(branch32(Equal, patternIndex, patternTemp));
     1187            } else {
     1188                Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
     1189                Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
     1190                zeroLengthMatch.link(this);
     1191                storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
     1192                matches.append(jump());
     1193                tryNonZeroMatch.link(this);
     1194            }
     1195        }
     1196
     1197        switch (term->quantityType) {
     1198        case QuantifierFixedCount: {
     1199            Label outerLoop(this);
     1200
     1201            // PatternTemp should contain pattern end index at this point
     1202            sub32(patternIndex, patternTemp);
     1203            if (m_checkedOffset - term->inputPosition)
     1204                sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
     1205            op.m_jumps.append(checkNotEnoughInput(patternTemp));
     1206
     1207            matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
     1208
     1209            if (term->quantityMaxCount != 1) {
     1210                loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
     1211                add32(TrustedImm32(1), characterOrTemp);
     1212                storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
     1213                matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
     1214                load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
     1215                load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
     1216                jump(outerLoop);
     1217            }
     1218            matches.link(this);
     1219            break;
     1220        }
     1221
     1222        case QuantifierGreedy: {
     1223            JumpList incompleteMatches;
     1224
     1225            Label outerLoop(this);
     1226
     1227            // PatternTemp should contain pattern end index at this point
     1228            sub32(patternIndex, patternTemp);
     1229            if (m_checkedOffset - term->inputPosition)
     1230                sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
     1231            matches.append(checkNotEnoughInput(patternTemp));
     1232
     1233            matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
     1234
     1235            loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
     1236            add32(TrustedImm32(1), characterOrTemp);
     1237            storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
     1238            if (term->quantityMaxCount != quantifyInfinite)
     1239                matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
     1240            load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
     1241            load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
     1242
     1243            // Store current index in frame for restoring after a partial match
     1244            storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
     1245            jump(outerLoop);
     1246
     1247            incompleteMatches.link(this);
     1248            loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
     1249
     1250            matches.link(this);
     1251            op.m_reentry = label();
     1252            break;
     1253        }
     1254
     1255        case QuantifierNonGreedy: {
     1256            JumpList incompleteMatches;
     1257
     1258            matches.append(jump());
     1259
     1260            op.m_reentry = label();
     1261
     1262            load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
     1263            load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
     1264
     1265            // An empty match is successful without consuming characters
     1266            Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
     1267            Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
     1268            zeroLengthMatch.link(this);
     1269            storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
     1270            matches.append(jump());
     1271            tryNonZeroMatch.link(this);
     1272
     1273            // Check if we have input remaining to match
     1274            sub32(patternIndex, patternTemp);
     1275            if (m_checkedOffset - term->inputPosition)
     1276                sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
     1277            matches.append(checkNotEnoughInput(patternTemp));
     1278
     1279            storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
     1280
     1281            matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
     1282
     1283            matches.append(jump());
     1284
     1285            incompleteMatches.link(this);
     1286            loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
     1287
     1288            matches.link(this);
     1289            break;
     1290        }
     1291        }
     1292    }
     1293    void backtrackBackReference(size_t opIndex)
     1294    {
     1295        YarrOp& op = m_ops[opIndex];
     1296        PatternTerm* term = op.m_term;
     1297
     1298        unsigned subpatternId = term->backReferenceSubpatternId;
     1299
     1300        m_backtrackingState.link(this);
     1301        op.m_jumps.link(this);
     1302
     1303        JumpList failures;
     1304
     1305        unsigned parenthesesFrameLocation = term->frameLocation;
     1306        switch (term->quantityType) {
     1307        case QuantifierFixedCount:
     1308            loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
     1309            break;
     1310
     1311        case QuantifierGreedy: {
     1312            const RegisterID matchAmount = regT0;
     1313            const RegisterID patternStartIndex = regT1;
     1314            const RegisterID patternEndIndexOrLen = regT2;
     1315
     1316            loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
     1317            failures.append(branchTest32(Zero, matchAmount));
     1318
     1319            load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
     1320            load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
     1321            sub32(patternStartIndex, patternEndIndexOrLen);
     1322            sub32(patternEndIndexOrLen, index);
     1323
     1324            sub32(TrustedImm32(1), matchAmount);
     1325            storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
     1326            jump(op.m_reentry);
     1327            break;
     1328        }
     1329
     1330        case QuantifierNonGreedy: {
     1331            const RegisterID matchAmount = regT0;
     1332
     1333            loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
     1334            if (term->quantityMaxCount != quantifyInfinite)
     1335                failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
     1336            add32(TrustedImm32(1), matchAmount);
     1337            storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
     1338            jump(op.m_reentry);
     1339            break;
     1340        }
     1341        }
     1342        failures.link(this);
     1343        m_backtrackingState.fallthrough();
     1344    }
     1345#endif
     1346
    11091347    void generatePatternCharacterOnce(size_t opIndex)
    11101348    {
     
    18552093
    18562094        case PatternTerm::TypeForwardReference:
     2095            m_failureReason = JITFailureReason::ForwardReference;
    18572096            break;
    18582097
     
    18602099        case PatternTerm::TypeParentheticalAssertion:
    18612100            RELEASE_ASSERT_NOT_REACHED();
     2101
    18622102        case PatternTerm::TypeBackReference:
     2103#if ENABLE(YARR_JIT_BACKREFERENCES)
     2104            generateBackReference(opIndex);
     2105#else
    18632106            m_failureReason = JITFailureReason::BackReference;
     2107#endif
    18642108            break;
    18652109        case PatternTerm::TypeDotStarEnclosure:
     
    19212165
    19222166        case PatternTerm::TypeForwardReference:
     2167            m_failureReason = JITFailureReason::ForwardReference;
    19232168            break;
    19242169
     
    19272172            RELEASE_ASSERT_NOT_REACHED();
    19282173
     2174        case PatternTerm::TypeBackReference:
     2175#if ENABLE(YARR_JIT_BACKREFERENCES)
     2176            backtrackBackReference(opIndex);
     2177#else
     2178            m_failureReason = JITFailureReason::BackReference;
     2179#endif
     2180            break;
     2181
    19292182        case PatternTerm::TypeDotStarEnclosure:
    19302183            backtrackDotStarEnclosure(opIndex);
    1931             break;
    1932 
    1933         case PatternTerm::TypeBackReference:
    1934             m_failureReason = JITFailureReason::BackReference;
    19352184            break;
    19362185        }
     
    35673816#endif
    35683817
     3818        if (m_pattern.m_containsBackreferences
     3819#if ENABLE(YARR_JIT_BACKREFERENCES)
     3820            && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
     3821#endif
     3822            ) {
     3823                codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
     3824                return;
     3825        }
     3826
    35693827        // We need to compile before generating code since we set flags based on compilation that
    35703828        // are used during generation.
     
    37143972                break;
    37153973
     3974            case PatternTerm::TypeBackReference:
     3975                out.printf("BackReference pattern #%u", term->backReferenceSubpatternId);
     3976                term->dumpQuantifier(out);
     3977                break;
     3978
    37163979            case PatternTerm::TypePatternCharacter:
    37173980                out.print("TypePatternCharacter ");
     
    37404003
    37414004            case PatternTerm::TypeForwardReference:
    3742             case PatternTerm::TypeBackReference:
     4005                out.print("TypeForwardReference <not handled>");
     4006                break;
     4007
    37434008            case PatternTerm::TypeParenthesesSubpattern:
    37444009            case PatternTerm::TypeParentheticalAssertion:
     
    38544119
    38554120        case OpParentheticalAssertionEnd:
    3856             out.print("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
     4121            out.printf("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
    38574122            return(0);
    38584123
     
    39184183        break;
    39194184    case JITFailureReason::BackReference:
    3920         dataLog("Can't JIT a pattern containing back references\n");
     4185        dataLog("Can't JIT some patterns containing back references\n");
     4186        break;
     4187    case JITFailureReason::ForwardReference:
     4188        dataLog("Can't JIT a pattern containing forward references\n");
    39214189        break;
    39224190    case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
Note: See TracChangeset for help on using the changeset viewer.