Changeset 57925 in webkit for trunk/JavaScriptCore/yarr


Ignore:
Timestamp:
Apr 20, 2010, 3:05:43 PM (15 years ago)
Author:
[email protected]
Message:

2010-04-20 Oliver Hunt <[email protected]>

Reviewed by Gavin Barraclough.

Autogenerate yarr character tables
https://bugs.webkit.org/show_bug.cgi?id=37877

Use a python script to automatically generate character tables
for the builtin YARR character classes. This allows us to generate
actual tables as well, by using these tables we can both increase
performance of the check (for complex builtins) and reduce the actual
code size.

4-8% win on string-unpack-code, but lots of noise on other tests so
i'm only confident saying its a 1% win overall.

  • DerivedSources.make:
  • JavaScriptCore.xcodeproj/project.pbxproj:
  • assembler/AbstractMacroAssembler.h: (JSC::AbstractMacroAssembler::ExtendedAddress::ExtendedAddress):
  • assembler/MacroAssembler.h: (JSC::MacroAssembler::branchTest8):
  • assembler/MacroAssemblerX86Common.h: (JSC::MacroAssemblerX86Common::branchTest8):
  • assembler/MacroAssemblerX86_64.h: (JSC::MacroAssemblerX86_64::branchTest8):
  • assembler/X86Assembler.h: (JSC::X86Assembler::cmpb_im): (JSC::X86Assembler::testb_im):
  • bytecode/SamplingTool.cpp: (JSC::SamplingTool::dump):
  • create_regex_tables: Added.
  • yarr/RegexCompiler.cpp: (JSC::Yarr::CharacterClassConstructor::charClass):
  • yarr/RegexJIT.cpp: (JSC::Yarr::RegexGenerator::matchCharacterClass): (JSC::Yarr::RegexGenerator::generatePatternCharacterGreedy): (JSC::Yarr::RegexGenerator::generatePatternCharacterNonGreedy): (JSC::Yarr::RegexGenerator::generateCharacterClassGreedy):
  • yarr/RegexPattern.h: (JSC::Yarr::CharacterClassTable::create): (JSC::Yarr::CharacterClassTable::CharacterClassTable): (JSC::Yarr::CharacterClass::CharacterClass):
Location:
trunk/JavaScriptCore/yarr
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/yarr/RegexCompiler.cpp

    r57608 r57925  
    3737namespace JSC { namespace Yarr {
    3838
     39#include "RegExpJitTables.h"
     40
    3941class CharacterClassConstructor {
    4042public:
     
    142144    CharacterClass* charClass()
    143145    {
    144         CharacterClass* characterClass = new CharacterClass();
     146        CharacterClass* characterClass = new CharacterClass(0);
    145147
    146148        characterClass->m_matches.append(m_matches);
     
    234236};
    235237
    236 
    237 CharacterClass* newlineCreate()
    238 {
    239     CharacterClass* characterClass = new CharacterClass();
    240 
    241     characterClass->m_matches.append('\n');
    242     characterClass->m_matches.append('\r');
    243     characterClass->m_matchesUnicode.append(0x2028);
    244     characterClass->m_matchesUnicode.append(0x2029);
    245    
    246     return characterClass;
    247 }
    248 
    249 CharacterClass* digitsCreate()
    250 {
    251     CharacterClass* characterClass = new CharacterClass();
    252 
    253     characterClass->m_ranges.append(CharacterRange('0', '9'));
    254    
    255     return characterClass;
    256 }
    257 
    258 CharacterClass* spacesCreate()
    259 {
    260     CharacterClass* characterClass = new CharacterClass();
    261 
    262     characterClass->m_matches.append(' ');
    263     characterClass->m_ranges.append(CharacterRange('\t', '\r'));
    264     characterClass->m_matchesUnicode.append(0x00a0);
    265     characterClass->m_matchesUnicode.append(0x1680);
    266     characterClass->m_matchesUnicode.append(0x180e);
    267     characterClass->m_matchesUnicode.append(0x2028);
    268     characterClass->m_matchesUnicode.append(0x2029);
    269     characterClass->m_matchesUnicode.append(0x202f);
    270     characterClass->m_matchesUnicode.append(0x205f);
    271     characterClass->m_matchesUnicode.append(0x3000);
    272     characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
    273    
    274     return characterClass;
    275 }
    276 
    277 CharacterClass* wordcharCreate()
    278 {
    279     CharacterClass* characterClass = new CharacterClass();
    280 
    281     characterClass->m_matches.append('_');
    282     characterClass->m_ranges.append(CharacterRange('0', '9'));
    283     characterClass->m_ranges.append(CharacterRange('A', 'Z'));
    284     characterClass->m_ranges.append(CharacterRange('a', 'z'));
    285    
    286     return characterClass;
    287 }
    288 
    289 CharacterClass* nondigitsCreate()
    290 {
    291     CharacterClass* characterClass = new CharacterClass();
    292 
    293     characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
    294     characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
    295     characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
    296    
    297     return characterClass;
    298 }
    299 
    300 CharacterClass* nonspacesCreate()
    301 {
    302     CharacterClass* characterClass = new CharacterClass();
    303 
    304     characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
    305     characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
    306     characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
    307     characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
    308     characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
    309     characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
    310     characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
    311     characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
    312     characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
    313     characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
    314     characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
    315     characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
    316    
    317     return characterClass;
    318 }
    319 
    320 CharacterClass* nonwordcharCreate()
    321 {
    322     CharacterClass* characterClass = new CharacterClass();
    323 
    324     characterClass->m_matches.append('`');
    325     characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
    326     characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
    327     characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
    328     characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
    329     characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
    330 
    331     return characterClass;
    332 }
    333 
    334 
    335238class RegexPatternConstructor {
    336239public:
  • trunk/JavaScriptCore/yarr/RegexJIT.cpp

    r57608 r57925  
    4141namespace JSC { namespace Yarr {
    4242
    43 
    4443class RegexGenerator : private MacroAssembler {
    4544    friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
     
    156155    void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
    157156    {
     157        if (charClass->m_table) {
     158            ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
     159            matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));   
     160            return;
     161        }
    158162        Jump unicodeFail;
    159163        if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
     
    610614            failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
    611615        }
     616
    612617        add32(Imm32(1), countRegister);
    613618        add32(Imm32(1), index);
    614         branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     619        if (term.quantityCount != 0xffffffff)
     620            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     621        else
     622            jump(loop);
     623
    615624        failures.append(jump());
    616625
     
    647656
    648657        atEndOfInput().linkTo(hardFail, this);
    649         branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
     658        if (term.quantityCount != 0xffffffff)
     659            branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
    650660        if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
    651661            readCharacter(state.inputOffset(), character);
     
    733743        add32(Imm32(1), countRegister);
    734744        add32(Imm32(1), index);
    735         branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     745        if (term.quantityCount != 0xffffffff)
     746            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
     747        else
     748            jump(loop);
     749
    736750        failures.append(jump());
    737751
  • trunk/JavaScriptCore/yarr/RegexPattern.h

    r57608 r57925  
    5757};
    5858
     59struct CharacterClassTable : RefCounted<CharacterClassTable> {
     60    const char* m_table;
     61    bool m_inverted;
     62    static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
     63    {
     64        return adoptRef(new CharacterClassTable(table, inverted));
     65    }
     66
     67private:
     68    CharacterClassTable(const char* table, bool inverted)
     69        : m_table(table)
     70        , m_inverted(inverted)
     71    {
     72    }
     73};
     74
    5975struct CharacterClass : FastAllocBase {
     76    // All CharacterClass instances have to have the full set of matches and ranges,
     77    // they may have an optional table for faster lookups (which must match the
     78    // specified matches and ranges)
     79    CharacterClass(PassRefPtr<CharacterClassTable> table)
     80        : m_table(table)
     81    {
     82    }
    6083    Vector<UChar> m_matches;
    6184    Vector<CharacterRange> m_ranges;
    6285    Vector<UChar> m_matchesUnicode;
    6386    Vector<CharacterRange> m_rangesUnicode;
     87    RefPtr<CharacterClassTable> m_table;
    6488};
    6589
Note: See TracChangeset for help on using the changeset viewer.