Ignore:
Timestamp:
Apr 14, 2009, 12:06:41 AM (16 years ago)
Author:
[email protected]
Message:

2009-04-13 Gavin Barraclough <[email protected]>

Reviewed by Cap'n Geoff Garen.

Yarr!
(Yet another regex runtime).

Currently disabled by default since the interpreter, whilst awesomely
functional, has not been optimized and is likely slower than PCRE, and
the JIT, whilst faster than WREC, is presently incomplete and does not
fallback to using an interpreter for the cases it cannot handle.

  • JavaScriptCore.xcodeproj/project.pbxproj:
  • assembler/MacroAssemblerX86Common.h: (JSC::MacroAssemblerX86Common::move): (JSC::MacroAssemblerX86Common::swap): (JSC::MacroAssemblerX86Common::signExtend32ToPtr): (JSC::MacroAssemblerX86Common::zeroExtend32ToPtr): (JSC::MacroAssemblerX86Common::branch32): (JSC::MacroAssemblerX86Common::branch16):
  • assembler/X86Assembler.h: (JSC::X86Assembler::cmpw_im): (JSC::X86Assembler::testw_rr): (JSC::X86Assembler::X86InstructionFormatter::immediate16):
  • runtime/RegExp.cpp: (JSC::RegExp::RegExp): (JSC::RegExp::~RegExp): (JSC::RegExp::create): (JSC::RegExp::compile): (JSC::RegExp::match):
  • runtime/RegExp.h:
  • wtf/Platform.h:
  • yarr: Added.
  • yarr/RegexCompiler.cpp: Added. (JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor): (JSC::Yarr::CharacterClassConstructor::reset): (JSC::Yarr::CharacterClassConstructor::append): (JSC::Yarr::CharacterClassConstructor::putChar): (JSC::Yarr::CharacterClassConstructor::isUnicodeUpper): (JSC::Yarr::CharacterClassConstructor::isUnicodeLower): (JSC::Yarr::CharacterClassConstructor::putRange): (JSC::Yarr::CharacterClassConstructor::charClass): (JSC::Yarr::CharacterClassConstructor::addSorted): (JSC::Yarr::CharacterClassConstructor::addSortedRange): (JSC::Yarr::newlineCreate): (JSC::Yarr::digitsCreate): (JSC::Yarr::spacesCreate): (JSC::Yarr::wordcharCreate): (JSC::Yarr::nondigitsCreate): (JSC::Yarr::nonspacesCreate): (JSC::Yarr::nonwordcharCreate): (JSC::Yarr::RegexPatternConstructor::RegexPatternConstructor): (JSC::Yarr::RegexPatternConstructor::~RegexPatternConstructor): (JSC::Yarr::RegexPatternConstructor::reset): (JSC::Yarr::RegexPatternConstructor::assertionBOL): (JSC::Yarr::RegexPatternConstructor::assertionEOL): (JSC::Yarr::RegexPatternConstructor::assertionWordBoundary): (JSC::Yarr::RegexPatternConstructor::atomPatternCharacter): (JSC::Yarr::RegexPatternConstructor::atomBuiltInCharacterClass): (JSC::Yarr::RegexPatternConstructor::atomCharacterClassBegin): (JSC::Yarr::RegexPatternConstructor::atomCharacterClassAtom): (JSC::Yarr::RegexPatternConstructor::atomCharacterClassRange): (JSC::Yarr::RegexPatternConstructor::atomCharacterClassBuiltIn): (JSC::Yarr::RegexPatternConstructor::atomCharacterClassEnd): (JSC::Yarr::RegexPatternConstructor::atomParenthesesSubpatternBegin): (JSC::Yarr::RegexPatternConstructor::atomParentheticalAssertionBegin): (JSC::Yarr::RegexPatternConstructor::atomParenthesesEnd): (JSC::Yarr::RegexPatternConstructor::atomBackReference): (JSC::Yarr::RegexPatternConstructor::copyDisjunction): (JSC::Yarr::RegexPatternConstructor::copyTerm): (JSC::Yarr::RegexPatternConstructor::quantifyAtom): (JSC::Yarr::RegexPatternConstructor::disjunction): (JSC::Yarr::RegexPatternConstructor::regexBegin): (JSC::Yarr::RegexPatternConstructor::regexEnd): (JSC::Yarr::RegexPatternConstructor::regexError): (JSC::Yarr::RegexPatternConstructor::setupAlternativeOffsets): (JSC::Yarr::RegexPatternConstructor::setupDisjunctionOffsets): (JSC::Yarr::RegexPatternConstructor::setupOffsets): (JSC::Yarr::compileRegex):
  • yarr/RegexCompiler.h: Added.
  • yarr/RegexInterpreter.cpp: Added. (JSC::Yarr::Interpreter::appendParenthesesDisjunctionContext): (JSC::Yarr::Interpreter::popParenthesesDisjunctionContext): (JSC::Yarr::Interpreter::DisjunctionContext::DisjunctionContext): (JSC::Yarr::Interpreter::DisjunctionContext::operator new): (JSC::Yarr::Interpreter::allocDisjunctionContext): (JSC::Yarr::Interpreter::freeDisjunctionContext): (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::ParenthesesDisjunctionContext): (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::operator new): (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::restoreOutput): (JSC::Yarr::Interpreter::ParenthesesDisjunctionContext::getDisjunctionContext): (JSC::Yarr::Interpreter::allocParenthesesDisjunctionContext): (JSC::Yarr::Interpreter::freeParenthesesDisjunctionContext): (JSC::Yarr::Interpreter::InputStream::InputStream): (JSC::Yarr::Interpreter::InputStream::next): (JSC::Yarr::Interpreter::InputStream::rewind): (JSC::Yarr::Interpreter::InputStream::read): (JSC::Yarr::Interpreter::InputStream::readChecked): (JSC::Yarr::Interpreter::InputStream::reread): (JSC::Yarr::Interpreter::InputStream::prev): (JSC::Yarr::Interpreter::InputStream::getPos): (JSC::Yarr::Interpreter::InputStream::setPos): (JSC::Yarr::Interpreter::InputStream::atStart): (JSC::Yarr::Interpreter::InputStream::atEnd): (JSC::Yarr::Interpreter::InputStream::checkInput): (JSC::Yarr::Interpreter::InputStream::uncheckInput): (JSC::Yarr::Interpreter::testCharacterClass): (JSC::Yarr::Interpreter::tryConsumeCharacter): (JSC::Yarr::Interpreter::checkCharacter): (JSC::Yarr::Interpreter::tryConsumeCharacterClass): (JSC::Yarr::Interpreter::checkCharacterClass): (JSC::Yarr::Interpreter::tryConsumeBackReference): (JSC::Yarr::Interpreter::matchAssertionBOL): (JSC::Yarr::Interpreter::matchAssertionEOL): (JSC::Yarr::Interpreter::matchAssertionWordBoundary): (JSC::Yarr::Interpreter::matchPatternCharacter): (JSC::Yarr::Interpreter::backtrackPatternCharacter): (JSC::Yarr::Interpreter::matchCharacterClass): (JSC::Yarr::Interpreter::backtrackCharacterClass): (JSC::Yarr::Interpreter::matchBackReference): (JSC::Yarr::Interpreter::backtrackBackReference): (JSC::Yarr::Interpreter::recordParenthesesMatch): (JSC::Yarr::Interpreter::resetMatches): (JSC::Yarr::Interpreter::resetAssertionMatches): (JSC::Yarr::Interpreter::parenthesesDoBacktrack): (JSC::Yarr::Interpreter::matchParenthesesOnceBegin): (JSC::Yarr::Interpreter::matchParenthesesOnceEnd): (JSC::Yarr::Interpreter::backtrackParenthesesOnceBegin): (JSC::Yarr::Interpreter::backtrackParenthesesOnceEnd): (JSC::Yarr::Interpreter::matchParentheticalAssertionOnceBegin): (JSC::Yarr::Interpreter::matchParentheticalAssertionOnceEnd): (JSC::Yarr::Interpreter::backtrackParentheticalAssertionOnceBegin): (JSC::Yarr::Interpreter::backtrackParentheticalAssertionOnceEnd): (JSC::Yarr::Interpreter::matchParentheses): (JSC::Yarr::Interpreter::backtrackParentheses): (JSC::Yarr::Interpreter::matchTerm): (JSC::Yarr::Interpreter::backtrackTerm): (JSC::Yarr::Interpreter::matchAlternative): (JSC::Yarr::Interpreter::matchDisjunction): (JSC::Yarr::Interpreter::matchNonZeroDisjunction): (JSC::Yarr::Interpreter::interpret): (JSC::Yarr::Interpreter::Interpreter): (JSC::Yarr::ByteCompiler::ParenthesesStackEntry::ParenthesesStackEntry): (JSC::Yarr::ByteCompiler::ByteCompiler): (JSC::Yarr::ByteCompiler::compile): (JSC::Yarr::ByteCompiler::checkInput): (JSC::Yarr::ByteCompiler::assertionBOL): (JSC::Yarr::ByteCompiler::assertionEOL): (JSC::Yarr::ByteCompiler::assertionWordBoundary): (JSC::Yarr::ByteCompiler::atomPatternCharacter): (JSC::Yarr::ByteCompiler::atomCharacterClass): (JSC::Yarr::ByteCompiler::atomBackReference): (JSC::Yarr::ByteCompiler::atomParenthesesSubpatternBegin): (JSC::Yarr::ByteCompiler::atomParentheticalAssertionBegin): (JSC::Yarr::ByteCompiler::popParenthesesStack): (JSC::Yarr::ByteCompiler::dumpDisjunction): (JSC::Yarr::ByteCompiler::closeAlternative): (JSC::Yarr::ByteCompiler::atomParenthesesEnd): (JSC::Yarr::ByteCompiler::regexBegin): (JSC::Yarr::ByteCompiler::regexEnd): (JSC::Yarr::ByteCompiler::alterantiveDisjunction): (JSC::Yarr::ByteCompiler::emitDisjunction): (JSC::Yarr::byteCompileRegex): (JSC::Yarr::interpretRegex):
  • yarr/RegexInterpreter.h: Added. (JSC::Yarr::ByteTerm::): (JSC::Yarr::ByteTerm::ByteTerm): (JSC::Yarr::ByteTerm::BOL): (JSC::Yarr::ByteTerm::CheckInput): (JSC::Yarr::ByteTerm::EOL): (JSC::Yarr::ByteTerm::WordBoundary): (JSC::Yarr::ByteTerm::BackReference): (JSC::Yarr::ByteTerm::AlternativeBegin): (JSC::Yarr::ByteTerm::AlternativeDisjunction): (JSC::Yarr::ByteTerm::AlternativeEnd): (JSC::Yarr::ByteTerm::PatternEnd): (JSC::Yarr::ByteTerm::invert): (JSC::Yarr::ByteTerm::capture): (JSC::Yarr::ByteDisjunction::ByteDisjunction): (JSC::Yarr::BytecodePattern::BytecodePattern): (JSC::Yarr::BytecodePattern::~BytecodePattern):
  • yarr/RegexJIT.cpp: Added. (JSC::Yarr::RegexGenerator::optimizeAlternative): (JSC::Yarr::RegexGenerator::matchCharacterClassRange): (JSC::Yarr::RegexGenerator::matchCharacterClass): (JSC::Yarr::RegexGenerator::jumpIfNoAvailableInput): (JSC::Yarr::RegexGenerator::jumpIfAvailableInput): (JSC::Yarr::RegexGenerator::checkInput): (JSC::Yarr::RegexGenerator::atEndOfInput): (JSC::Yarr::RegexGenerator::notAtEndOfInput): (JSC::Yarr::RegexGenerator::jumpIfCharEquals): (JSC::Yarr::RegexGenerator::jumpIfCharNotEquals): (JSC::Yarr::RegexGenerator::readCharacter): (JSC::Yarr::RegexGenerator::storeToFrame): (JSC::Yarr::RegexGenerator::loadFromFrame): (JSC::Yarr::RegexGenerator::TermGenerationState::TermGenerationState): (JSC::Yarr::RegexGenerator::TermGenerationState::resetAlternative): (JSC::Yarr::RegexGenerator::TermGenerationState::alternativeValid): (JSC::Yarr::RegexGenerator::TermGenerationState::nextAlternative): (JSC::Yarr::RegexGenerator::TermGenerationState::alternative): (JSC::Yarr::RegexGenerator::TermGenerationState::resetTerm): (JSC::Yarr::RegexGenerator::TermGenerationState::termValid): (JSC::Yarr::RegexGenerator::TermGenerationState::nextTerm): (JSC::Yarr::RegexGenerator::TermGenerationState::term): (JSC::Yarr::RegexGenerator::TermGenerationState::lookaheadTerm): (JSC::Yarr::RegexGenerator::TermGenerationState::isSinglePatternCharacterLookaheadTerm): (JSC::Yarr::RegexGenerator::TermGenerationState::inputOffset): (JSC::Yarr::RegexGenerator::TermGenerationState::jumpToBacktrack): (JSC::Yarr::RegexGenerator::TermGenerationState::setBacktrackGenerated): (JSC::Yarr::RegexGenerator::jumpToBacktrackCheckEmitPending): (JSC::Yarr::RegexGenerator::genertateAssertionBOL): (JSC::Yarr::RegexGenerator::genertateAssertionEOL): (JSC::Yarr::RegexGenerator::matchAssertionWordchar): (JSC::Yarr::RegexGenerator::genertateAssertionWordBoundary): (JSC::Yarr::RegexGenerator::genertatePatternCharacterSingle): (JSC::Yarr::RegexGenerator::genertatePatternCharacterPair): (JSC::Yarr::RegexGenerator::genertatePatternCharacterFixed): (JSC::Yarr::RegexGenerator::genertatePatternCharacterGreedy): (JSC::Yarr::RegexGenerator::genertatePatternCharacterNonGreedy): (JSC::Yarr::RegexGenerator::genertateCharacterClassSingle): (JSC::Yarr::RegexGenerator::genertateCharacterClassFixed): (JSC::Yarr::RegexGenerator::genertateCharacterClassGreedy): (JSC::Yarr::RegexGenerator::genertateCharacterClassNonGreedy): (JSC::Yarr::RegexGenerator::generateParenthesesSingleDisjunctionOneAlternative): (JSC::Yarr::RegexGenerator::generateParenthesesSingle): (JSC::Yarr::RegexGenerator::generateTerm): (JSC::Yarr::RegexGenerator::generateDisjunction): (JSC::Yarr::RegexGenerator::RegexGenerator): (JSC::Yarr::RegexGenerator::generate): (JSC::Yarr::jitCompileRegex): (JSC::Yarr::executeRegex):
  • yarr/RegexJIT.h: Added. (JSC::Yarr::RegexCodeBlock::RegexCodeBlock):
  • yarr/RegexParser.h: Added. (JSC::Yarr::): (JSC::Yarr::Parser::): (JSC::Yarr::Parser::CharacterClassParserDelegate::CharacterClassParserDelegate): (JSC::Yarr::Parser::CharacterClassParserDelegate::begin): (JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacterUnescaped): (JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter): (JSC::Yarr::Parser::CharacterClassParserDelegate::atomBuiltInCharacterClass): (JSC::Yarr::Parser::CharacterClassParserDelegate::end): (JSC::Yarr::Parser::CharacterClassParserDelegate::assertionWordBoundary): (JSC::Yarr::Parser::CharacterClassParserDelegate::atomBackReference): (JSC::Yarr::Parser::CharacterClassParserDelegate::flush): (JSC::Yarr::Parser::CharacterClassParserDelegate::): (JSC::Yarr::Parser::Parser): (JSC::Yarr::Parser::parseEscape): (JSC::Yarr::Parser::parseAtomEscape): (JSC::Yarr::Parser::parseCharacterClassEscape): (JSC::Yarr::Parser::parseCharacterClass): (JSC::Yarr::Parser::parseParenthesesBegin): (JSC::Yarr::Parser::parseParenthesesEnd): (JSC::Yarr::Parser::parseQuantifier): (JSC::Yarr::Parser::parseTokens): (JSC::Yarr::Parser::parse): (JSC::Yarr::Parser::saveState): (JSC::Yarr::Parser::restoreState): (JSC::Yarr::Parser::atEndOfPattern): (JSC::Yarr::Parser::peek): (JSC::Yarr::Parser::peekIsDigit): (JSC::Yarr::Parser::peekDigit): (JSC::Yarr::Parser::consume): (JSC::Yarr::Parser::consumeDigit): (JSC::Yarr::Parser::consumeNumber): (JSC::Yarr::Parser::consumeOctal): (JSC::Yarr::Parser::tryConsume): (JSC::Yarr::Parser::tryConsumeHex): (JSC::Yarr::parse):
  • yarr/RegexPattern.h: Added. (JSC::Yarr::CharacterRange::CharacterRange): (JSC::Yarr::): (JSC::Yarr::PatternTerm::): (JSC::Yarr::PatternTerm::PatternTerm): (JSC::Yarr::PatternTerm::BOL): (JSC::Yarr::PatternTerm::EOL): (JSC::Yarr::PatternTerm::WordBoundary): (JSC::Yarr::PatternTerm::invert): (JSC::Yarr::PatternTerm::capture): (JSC::Yarr::PatternTerm::quantify): (JSC::Yarr::PatternAlternative::PatternAlternative): (JSC::Yarr::PatternAlternative::lastTerm): (JSC::Yarr::PatternAlternative::removeLastTerm): (JSC::Yarr::PatternDisjunction::PatternDisjunction): (JSC::Yarr::PatternDisjunction::~PatternDisjunction): (JSC::Yarr::PatternDisjunction::addNewAlternative): (JSC::Yarr::RegexPattern::RegexPattern): (JSC::Yarr::RegexPattern::~RegexPattern): (JSC::Yarr::RegexPattern::reset): (JSC::Yarr::RegexPattern::containsIllegalBackReference): (JSC::Yarr::RegexPattern::newlineCharacterClass): (JSC::Yarr::RegexPattern::digitsCharacterClass): (JSC::Yarr::RegexPattern::spacesCharacterClass): (JSC::Yarr::RegexPattern::wordcharCharacterClass): (JSC::Yarr::RegexPattern::nondigitsCharacterClass): (JSC::Yarr::RegexPattern::nonspacesCharacterClass): (JSC::Yarr::RegexPattern::nonwordcharCharacterClass):
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/assembler/MacroAssemblerX86Common.h

    r41089 r42481  
    328328    void move(RegisterID src, RegisterID dest)
    329329    {
    330         m_assembler.movl_rr(src, dest);
     330        if (src != dest)
     331            m_assembler.movl_rr(src, dest);
    331332    }
    332333
     
    338339    void swap(RegisterID reg1, RegisterID reg2)
    339340    {
    340         m_assembler.xchgl_rr(reg1, reg2);
     341        if (reg1 != reg2)
     342            m_assembler.xchgl_rr(reg1, reg2);
    341343    }
    342344
    343345    void signExtend32ToPtr(RegisterID src, RegisterID dest)
    344346    {
    345         if (src != dest)
    346             move(src, dest);
     347        move(src, dest);
    347348    }
    348349
    349350    void zeroExtend32ToPtr(RegisterID src, RegisterID dest)
    350351    {
    351         if (src != dest)
    352             move(src, dest);
     352        move(src, dest);
    353353    }
    354354#endif
     
    407407    }
    408408
     409    Jump branch32(Condition cond, BaseIndex left, Imm32 right)
     410    {
     411        m_assembler.cmpl_im(right.m_value, left.offset, left.base, left.index, left.scale);
     412        return Jump(m_assembler.jCC(cond));
     413    }
     414
    409415    Jump branch16(Condition cond, BaseIndex left, RegisterID right)
    410416    {
    411417        m_assembler.cmpw_rm(right, left.offset, left.base, left.index, left.scale);
     418        return Jump(m_assembler.jCC(cond));
     419    }
     420
     421    Jump branch16(Condition cond, BaseIndex left, Imm32 right)
     422    {
     423        ASSERT(!(right.m_value & 0xFFFF0000));
     424
     425        m_assembler.cmpw_im(right.m_value, left.offset, left.base, left.index, left.scale);
    412426        return Jump(m_assembler.jCC(cond));
    413427    }
Note: See TracChangeset for help on using the changeset viewer.