Ignore:
Timestamp:
Nov 17, 2010, 1:42:41 AM (15 years ago)
Author:
[email protected]
Message:

2010-11-17 Peter Varga <[email protected]>

Reviewed by Gavin Barraclough.

Collect the beginning characters in a RegExp pattern for look-up
optimization
https://bugs.webkit.org/show_bug.cgi?id=45748

Extend the YARR's parser with an algorithm which collects the potential
beginning characters from a RegExp pattern for later look-up optimization.

  • yarr/RegexCompiler.cpp: (JSC::Yarr::BeginCharHelper::BeginCharHelper): (JSC::Yarr::BeginCharHelper::addBeginChar): (JSC::Yarr::BeginCharHelper::merge): (JSC::Yarr::BeginCharHelper::addCharacter): (JSC::Yarr::BeginCharHelper::linkHotTerms): (JSC::Yarr::RegexPatternConstructor::RegexPatternConstructor): (JSC::Yarr::RegexPatternConstructor::addBeginTerm): (JSC::Yarr::RegexPatternConstructor::setupDisjunctionBeginTerms): (JSC::Yarr::RegexPatternConstructor::setupAlternativeBeginTerms): (JSC::Yarr::RegexPatternConstructor::setupBeginChars): (JSC::Yarr::compileRegex):
  • yarr/RegexPattern.h: (JSC::Yarr::TermChain::TermChain): (JSC::Yarr::BeginChar::BeginChar): (JSC::Yarr::RegexPattern::RegexPattern): (JSC::Yarr::RegexPattern::reset):
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/yarr/RegexPattern.h

    r67790 r72180  
    11/*
    22 * Copyright (C) 2009 Apple Inc. All rights reserved.
     3 * Copyright (C) 2010 Peter Varga ([email protected]), University of Szeged
    34 *
    45 * Redistribution and use in source and binary forms, with or without
     
    284285CharacterClass* nonwordcharCreate();
    285286
     287struct TermChain {
     288    TermChain(PatternTerm term)
     289        : term(term)
     290    {}
     291
     292    PatternTerm term;
     293    Vector<TermChain> hotTerms;
     294};
     295
     296struct BeginChar {
     297    BeginChar()
     298        : value(0)
     299        , mask(0)
     300    {}
     301
     302    BeginChar(unsigned value, unsigned mask)
     303        : value(value)
     304        , mask(mask)
     305    {}
     306
     307    unsigned value;
     308    unsigned mask;
     309};
     310
    286311struct RegexPattern {
    287312    RegexPattern(bool ignoreCase, bool multiline)
     
    289314        , m_multiline(multiline)
    290315        , m_containsBackreferences(false)
     316        , m_containsBeginChars(false)
    291317        , m_containsBOL(false)
    292318        , m_numSubpatterns(0)
     
    314340
    315341        m_containsBackreferences = false;
     342        m_containsBeginChars = false;
    316343        m_containsBOL = false;
    317344
     
    328355        deleteAllValues(m_userCharacterClasses);
    329356        m_userCharacterClasses.clear();
     357        m_beginChars.clear();
    330358    }
    331359
     
    381409    bool m_multiline : 1;
    382410    bool m_containsBackreferences : 1;
     411    bool m_containsBeginChars : 1;
    383412    bool m_containsBOL : 1;
    384413    unsigned m_numSubpatterns;
     
    387416    Vector<PatternDisjunction*, 4> m_disjunctions;
    388417    Vector<CharacterClass*> m_userCharacterClasses;
     418    Vector<BeginChar> m_beginChars;
    389419
    390420private:
Note: See TracChangeset for help on using the changeset viewer.