Ignore:
Timestamp:
Mar 5, 2012, 10:56:29 AM (13 years ago)
Author:
[email protected]
Message:

Lexer: Specialize character predicates for LChar, UChar
https://bugs.webkit.org/show_bug.cgi?id=79677

Reviewed by Oliver Hunt.

This patch specializes isIdentStart, isIdentPart, isWhiteSpace,
and isLineTerminator to perform a more limited number of checks if
the lexer is being instantiated to work on LChar sequences. This
is about a 1.5% win on the --parse-only suite, here.

  • parser/Lexer.cpp:

(JSC::isLatin1): New static helper, specialized for LChar and
UChar.
(JSC::typesOfLatin1Characters): Rename from
typesOfASCIICharacters, and expand to the range of the LChar
type. All uses of isASCII are changed to use isLatin1. Generated
using libunistring.
(JSC::isNonLatin1IdentStart):
(JSC::isIdentStart):
(JSC::isNonLatin1IdentPart):
(JSC::isIdentPart):
(JSC::Lexer::shiftLineTerminator):
(JSC::Lexer::parseIdentifier):
(JSC::Lexer::parseIdentifierSlowCase):
(JSC::Lexer::parseStringSlowCase):
(JSC::Lexer::parseMultilineComment):
(JSC::Lexer::lex):
(JSC::Lexer::scanRegExp):
(JSC::Lexer::skipRegExp): Sprinkle static_cast<T>(_) around.

  • parser/Lexer.h:

(JSC::Lexer::isWhiteSpace):
(JSC::Lexer::isLineTerminator):

  • KeywordLookupGenerator.py:

(Trie.printAsC): Declare specialized isIdentPart static functions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/parser/Lexer.cpp

    r108841 r109769  
    9696};
    9797
    98 // 128 ASCII codes
    99 static const unsigned short typesOfASCIICharacters[128] = {
     98// 256 Latin-1 codes
     99static const unsigned short typesOfLatin1Characters[256] = {
    100100/*   0 - Null               */ CharacterInvalid,
    101101/*   1 - Start of Heading   */ CharacterInvalid,
     
    226226/* 126 - ~                  */ CharacterTilde,
    227227/* 127 - Delete             */ CharacterInvalid,
     228/* 128 - Cc category        */ CharacterInvalid,
     229/* 129 - Cc category        */ CharacterInvalid,
     230/* 130 - Cc category        */ CharacterInvalid,
     231/* 131 - Cc category        */ CharacterInvalid,
     232/* 132 - Cc category        */ CharacterInvalid,
     233/* 133 - Cc category        */ CharacterInvalid,
     234/* 134 - Cc category        */ CharacterInvalid,
     235/* 135 - Cc category        */ CharacterInvalid,
     236/* 136 - Cc category        */ CharacterInvalid,
     237/* 137 - Cc category        */ CharacterInvalid,
     238/* 138 - Cc category        */ CharacterInvalid,
     239/* 139 - Cc category        */ CharacterInvalid,
     240/* 140 - Cc category        */ CharacterInvalid,
     241/* 141 - Cc category        */ CharacterInvalid,
     242/* 142 - Cc category        */ CharacterInvalid,
     243/* 143 - Cc category        */ CharacterInvalid,
     244/* 144 - Cc category        */ CharacterInvalid,
     245/* 145 - Cc category        */ CharacterInvalid,
     246/* 146 - Cc category        */ CharacterInvalid,
     247/* 147 - Cc category        */ CharacterInvalid,
     248/* 148 - Cc category        */ CharacterInvalid,
     249/* 149 - Cc category        */ CharacterInvalid,
     250/* 150 - Cc category        */ CharacterInvalid,
     251/* 151 - Cc category        */ CharacterInvalid,
     252/* 152 - Cc category        */ CharacterInvalid,
     253/* 153 - Cc category        */ CharacterInvalid,
     254/* 154 - Cc category        */ CharacterInvalid,
     255/* 155 - Cc category        */ CharacterInvalid,
     256/* 156 - Cc category        */ CharacterInvalid,
     257/* 157 - Cc category        */ CharacterInvalid,
     258/* 158 - Cc category        */ CharacterInvalid,
     259/* 159 - Cc category        */ CharacterInvalid,
     260/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
     261/* 161 - Po category        */ CharacterInvalid,
     262/* 162 - Sc category        */ CharacterInvalid,
     263/* 163 - Sc category        */ CharacterInvalid,
     264/* 164 - Sc category        */ CharacterInvalid,
     265/* 165 - Sc category        */ CharacterInvalid,
     266/* 166 - So category        */ CharacterInvalid,
     267/* 167 - So category        */ CharacterInvalid,
     268/* 168 - Sk category        */ CharacterInvalid,
     269/* 169 - So category        */ CharacterInvalid,
     270/* 170 - Ll category        */ CharacterIdentifierStart,
     271/* 171 - Pi category        */ CharacterInvalid,
     272/* 172 - Sm category        */ CharacterInvalid,
     273/* 173 - Cf category        */ CharacterInvalid,
     274/* 174 - So category        */ CharacterInvalid,
     275/* 175 - Sk category        */ CharacterInvalid,
     276/* 176 - So category        */ CharacterInvalid,
     277/* 177 - Sm category        */ CharacterInvalid,
     278/* 178 - No category        */ CharacterInvalid,
     279/* 179 - No category        */ CharacterInvalid,
     280/* 180 - Sk category        */ CharacterInvalid,
     281/* 181 - Ll category        */ CharacterIdentifierStart,
     282/* 182 - So category        */ CharacterInvalid,
     283/* 183 - Po category        */ CharacterInvalid,
     284/* 184 - Sk category        */ CharacterInvalid,
     285/* 185 - No category        */ CharacterInvalid,
     286/* 186 - Ll category        */ CharacterIdentifierStart,
     287/* 187 - Pf category        */ CharacterInvalid,
     288/* 188 - No category        */ CharacterInvalid,
     289/* 189 - No category        */ CharacterInvalid,
     290/* 190 - No category        */ CharacterInvalid,
     291/* 191 - Po category        */ CharacterInvalid,
     292/* 192 - Lu category        */ CharacterIdentifierStart,
     293/* 193 - Lu category        */ CharacterIdentifierStart,
     294/* 194 - Lu category        */ CharacterIdentifierStart,
     295/* 195 - Lu category        */ CharacterIdentifierStart,
     296/* 196 - Lu category        */ CharacterIdentifierStart,
     297/* 197 - Lu category        */ CharacterIdentifierStart,
     298/* 198 - Lu category        */ CharacterIdentifierStart,
     299/* 199 - Lu category        */ CharacterIdentifierStart,
     300/* 200 - Lu category        */ CharacterIdentifierStart,
     301/* 201 - Lu category        */ CharacterIdentifierStart,
     302/* 202 - Lu category        */ CharacterIdentifierStart,
     303/* 203 - Lu category        */ CharacterIdentifierStart,
     304/* 204 - Lu category        */ CharacterIdentifierStart,
     305/* 205 - Lu category        */ CharacterIdentifierStart,
     306/* 206 - Lu category        */ CharacterIdentifierStart,
     307/* 207 - Lu category        */ CharacterIdentifierStart,
     308/* 208 - Lu category        */ CharacterIdentifierStart,
     309/* 209 - Lu category        */ CharacterIdentifierStart,
     310/* 210 - Lu category        */ CharacterIdentifierStart,
     311/* 211 - Lu category        */ CharacterIdentifierStart,
     312/* 212 - Lu category        */ CharacterIdentifierStart,
     313/* 213 - Lu category        */ CharacterIdentifierStart,
     314/* 214 - Lu category        */ CharacterIdentifierStart,
     315/* 215 - Sm category        */ CharacterInvalid,
     316/* 216 - Lu category        */ CharacterIdentifierStart,
     317/* 217 - Lu category        */ CharacterIdentifierStart,
     318/* 218 - Lu category        */ CharacterIdentifierStart,
     319/* 219 - Lu category        */ CharacterIdentifierStart,
     320/* 220 - Lu category        */ CharacterIdentifierStart,
     321/* 221 - Lu category        */ CharacterIdentifierStart,
     322/* 222 - Lu category        */ CharacterIdentifierStart,
     323/* 223 - Ll category        */ CharacterIdentifierStart,
     324/* 224 - Ll category        */ CharacterIdentifierStart,
     325/* 225 - Ll category        */ CharacterIdentifierStart,
     326/* 226 - Ll category        */ CharacterIdentifierStart,
     327/* 227 - Ll category        */ CharacterIdentifierStart,
     328/* 228 - Ll category        */ CharacterIdentifierStart,
     329/* 229 - Ll category        */ CharacterIdentifierStart,
     330/* 230 - Ll category        */ CharacterIdentifierStart,
     331/* 231 - Ll category        */ CharacterIdentifierStart,
     332/* 232 - Ll category        */ CharacterIdentifierStart,
     333/* 233 - Ll category        */ CharacterIdentifierStart,
     334/* 234 - Ll category        */ CharacterIdentifierStart,
     335/* 235 - Ll category        */ CharacterIdentifierStart,
     336/* 236 - Ll category        */ CharacterIdentifierStart,
     337/* 237 - Ll category        */ CharacterIdentifierStart,
     338/* 238 - Ll category        */ CharacterIdentifierStart,
     339/* 239 - Ll category        */ CharacterIdentifierStart,
     340/* 240 - Ll category        */ CharacterIdentifierStart,
     341/* 241 - Ll category        */ CharacterIdentifierStart,
     342/* 242 - Ll category        */ CharacterIdentifierStart,
     343/* 243 - Ll category        */ CharacterIdentifierStart,
     344/* 244 - Ll category        */ CharacterIdentifierStart,
     345/* 245 - Ll category        */ CharacterIdentifierStart,
     346/* 246 - Ll category        */ CharacterIdentifierStart,
     347/* 247 - Sm category        */ CharacterInvalid,
     348/* 248 - Ll category        */ CharacterIdentifierStart,
     349/* 249 - Ll category        */ CharacterIdentifierStart,
     350/* 250 - Ll category        */ CharacterIdentifierStart,
     351/* 251 - Ll category        */ CharacterIdentifierStart,
     352/* 252 - Ll category        */ CharacterIdentifierStart,
     353/* 253 - Ll category        */ CharacterIdentifierStart,
     354/* 254 - Ll category        */ CharacterIdentifierStart,
     355/* 255 - Ll category        */ CharacterIdentifierStart
    228356};
    229357
     
    351479void Lexer<T>::shiftLineTerminator()
    352480{
    353     ASSERT(isLineTerminator(m_current));
     481    ASSERT(isLineTerminator(static_cast<T>(m_current)));
    354482
    355483    int m_prev = m_current;
     
    369497}
    370498
    371 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
     499static NEVER_INLINE bool isNonLatin1IdentStart(int c)
    372500{
    373501    return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
    374502}
    375503
    376 static inline bool isIdentStart(int c)
    377 {
    378     return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c);
    379 }
    380 
    381 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
     504static ALWAYS_INLINE bool isLatin1(LChar)
     505{
     506    return true;
     507}
     508
     509static ALWAYS_INLINE bool isLatin1(UChar c)
     510{
     511    return c < 256;
     512}
     513
     514static inline bool isIdentStart(LChar c)
     515{
     516    return typesOfLatin1Characters[c] == CharacterIdentifierStart;
     517}
     518
     519static inline bool isIdentStart(UChar c)
     520{
     521    return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
     522}
     523
     524static NEVER_INLINE bool isNonLatin1IdentPart(int c)
    382525{
    383526    return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
     
    385528}
    386529
    387 static ALWAYS_INLINE bool isIdentPart(int c)
     530static ALWAYS_INLINE bool isIdentPart(LChar c)
    388531{
    389532    // Character types are divided into two groups depending on whether they can be part of an
    390533    // identifier or not. Those whose type value is less or equal than CharacterNumber can be
    391534    // part of an identifier. (See the CharacterType definition for more details.)
    392     return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c);
     535    return typesOfLatin1Characters[c] <= CharacterNumber;
     536}
     537
     538static ALWAYS_INLINE bool isIdentPart(UChar c)
     539{
     540    return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
    393541}
    394542
     
    500648    const LChar* identifierStart = currentCharacter();
    501649   
    502     while (isIdentPart(m_current))
     650    while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current)))
    503651        shift();
    504652   
     
    551699    UChar orAllChars = 0;
    552700   
    553     while (isIdentPart(m_current)) {
     701    while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) {
    554702        orAllChars |= m_current;
    555703        shift();
     
    605753
    606754    while (true) {
    607         if (LIKELY(isIdentPart(m_current))) {
     755        if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) {
    608756            shift();
    609757            continue;
     
    623771        if (UNLIKELY(character == -1))
    624772            return ERRORTOK;
    625         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character)))
     773        UChar ucharacter = static_cast<UChar>(character);
     774        if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
    626775            return ERRORTOK;
    627776        if (shouldCreateIdentifier)
    628             record16(character);
     777            record16(ucharacter);
    629778        identifierStart = currentCharacter();
    630779    }
     
    754903                    record16(escape);
    755904                shift();
    756             } else if (UNLIKELY(isLineTerminator(m_current)))
     905            } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current))))
    757906                shiftLineTerminator();
    758907            else if (m_current == 'x') {
     
    827976        if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
    828977            // New-line or end of input is not allowed
    829             if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1)) {
     978            if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) {
    830979                m_lexErrorMessage = "Unexpected EOF";
    831980                return false;
     
    10071156            return false;
    10081157
    1009         if (isLineTerminator(m_current)) {
     1158        if (isLineTerminator(static_cast<T>(m_current))) {
    10101159            shiftLineTerminator();
    10111160            m_terminator = true;
     
    10361185
    10371186start:
    1038     while (isWhiteSpace(m_current))
     1187    while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current)))
    10391188        shift();
    10401189
     
    10471196
    10481197    CharacterType type;
    1049     if (LIKELY(isASCII(m_current)))
    1050         type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]);
    1051     else if (isNonASCIIIdentStart(m_current))
     1198    if (LIKELY(isLatin1(static_cast<T>(m_current))))
     1199        type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
     1200    else if (isNonLatin1IdentStart(m_current))
    10521201        type = CharacterIdentifierStart;
    1053     else if (isLineTerminator(m_current))
     1202    else if (isLineTerminator(static_cast<T>(m_current)))
    10541203        type = CharacterLineTerminator;
    10551204    else
     
    13381487
    13391488        // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
    1340         if (UNLIKELY(isIdentStart(m_current))) {
     1489        if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) {
    13411490            m_lexErrorMessage = "At least one digit must occur after a decimal point";
    13421491            goto returnError;
     
    13581507        break;
    13591508    case CharacterIdentifierStart:
    1360         ASSERT(isIdentStart(m_current));
     1509        ASSERT(isIdentStart(static_cast<T>(m_current)));
    13611510        // Fall through into CharacterBackSlash.
    13621511    case CharacterBackSlash:
     
    13671516        break;
    13681517    case CharacterLineTerminator:
    1369         ASSERT(isLineTerminator(m_current));
     1518        ASSERT(isLineTerminator(static_cast<T>(m_current)));
    13701519        shiftLineTerminator();
    13711520        m_atLineStart = true;
     
    13851534
    13861535inSingleLineComment:
    1387     while (!isLineTerminator(m_current)) {
     1536    while (!isLineTerminator(static_cast<T>(m_current))) {
    13881537        if (UNLIKELY(m_current == -1))
    13891538            return EOFTOK;
     
    14331582        int current = m_current;
    14341583
    1435         if (isLineTerminator(current) || current == -1) {
     1584        if (isLineTerminator(static_cast<T>(current)) || current == -1) {
    14361585            m_buffer16.resize(0);
    14371586            return false;
     
    14661615    m_buffer16.resize(0);
    14671616
    1468     while (isIdentPart(m_current)) {
     1617    while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) {
    14691618        record16(m_current);
    14701619        shift();
     
    14861635        int current = m_current;
    14871636
    1488         if (isLineTerminator(current) || current == -1)
     1637        if (isLineTerminator(static_cast<T>(current)) || current == -1)
    14891638            return false;
    14901639
     
    15121661    }
    15131662
    1514     while (isIdentPart(m_current))
     1663    while (m_current != -1 && isIdentPart(static_cast<T>(m_current)))
    15151664        shift();
    15161665
Note: See TracChangeset for help on using the changeset viewer.