Ignore:
Timestamp:
Jul 15, 2010, 7:10:43 AM (15 years ago)
Author:
[email protected]
Message:

Refactoring some parts of the lexer
https://bugs.webkit.org/show_bug.cgi?id=41845

Reviewed by Darin Adler.

This patch is a precursor of refactoring the identifier
parsing, which currently slows down the lexer, and not
ready for landing. This patch contains those sources,
which does not slow down the lexer (mainly style changes).

SunSpider: no change (529.4ms to 528.7ms)
--parse-only: no change (31.0ms to 31.2ms)

  • parser/Lexer.cpp:

(JSC::isIdentStart): using typesOfASCIICharacters to determine

whether the current character is in identifier start

(JSC::isIdentPart): using typesOfASCIICharacters to determine

whether the current character is in identifier part

(JSC::Lexer::parseString): style fix
(JSC::Lexer::lex): removing the else after the main which

which reduces code duplication

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/parser/Lexer.cpp

    r63322 r63423  
    4747
    4848
    49 enum CharacterTypes {
     49enum CharacterType {
    5050    // Types for the main switch
    51     CharacterInvalid,
    52 
    53     CharacterAlpha,
     51
     52    // The first three types are fixed, and also used for identifying
     53    // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
     54    CharacterIdentifierStart,
    5455    CharacterZero,
    5556    CharacterNumber,
    5657
     58    CharacterInvalid,
    5759    CharacterLineTerminator,
    5860    CharacterExclamationMark,
     
    8890};
    8991
    90 // 128 ascii codes
    91 static unsigned short AsciiCharacters[128] = {
     92// 128 ASCII codes
     93static const unsigned short typesOfASCIICharacters[128] = {
    9294/*   0 - Null               */ CharacterInvalid,
    9395/*   1 - Start of Heading   */ CharacterInvalid,
     
    126128/*  34 - "                  */ CharacterQuote,
    127129/*  35 - #                  */ CharacterInvalid,
    128 /*  36 - $                  */ CharacterAlpha,
     130/*  36 - $                  */ CharacterIdentifierStart,
    129131/*  37 - %                  */ CharacterModulo,
    130132/*  38 - &                  */ CharacterAnd,
     
    155157/*  63 - ?                  */ CharacterQuestion,
    156158/*  64 - @                  */ CharacterInvalid,
    157 /*  65 - A                  */ CharacterAlpha,
    158 /*  66 - B                  */ CharacterAlpha,
    159 /*  67 - C                  */ CharacterAlpha,
    160 /*  68 - D                  */ CharacterAlpha,
    161 /*  69 - E                  */ CharacterAlpha,
    162 /*  70 - F                  */ CharacterAlpha,
    163 /*  71 - G                  */ CharacterAlpha,
    164 /*  72 - H                  */ CharacterAlpha,
    165 /*  73 - I                  */ CharacterAlpha,
    166 /*  74 - J                  */ CharacterAlpha,
    167 /*  75 - K                  */ CharacterAlpha,
    168 /*  76 - L                  */ CharacterAlpha,
    169 /*  77 - M                  */ CharacterAlpha,
    170 /*  78 - N                  */ CharacterAlpha,
    171 /*  79 - O                  */ CharacterAlpha,
    172 /*  80 - P                  */ CharacterAlpha,
    173 /*  81 - Q                  */ CharacterAlpha,
    174 /*  82 - R                  */ CharacterAlpha,
    175 /*  83 - S                  */ CharacterAlpha,
    176 /*  84 - T                  */ CharacterAlpha,
    177 /*  85 - U                  */ CharacterAlpha,
    178 /*  86 - V                  */ CharacterAlpha,
    179 /*  87 - W                  */ CharacterAlpha,
    180 /*  88 - X                  */ CharacterAlpha,
    181 /*  89 - Y                  */ CharacterAlpha,
    182 /*  90 - Z                  */ CharacterAlpha,
     159/*  65 - A                  */ CharacterIdentifierStart,
     160/*  66 - B                  */ CharacterIdentifierStart,
     161/*  67 - C                  */ CharacterIdentifierStart,
     162/*  68 - D                  */ CharacterIdentifierStart,
     163/*  69 - E                  */ CharacterIdentifierStart,
     164/*  70 - F                  */ CharacterIdentifierStart,
     165/*  71 - G                  */ CharacterIdentifierStart,
     166/*  72 - H                  */ CharacterIdentifierStart,
     167/*  73 - I                  */ CharacterIdentifierStart,
     168/*  74 - J                  */ CharacterIdentifierStart,
     169/*  75 - K                  */ CharacterIdentifierStart,
     170/*  76 - L                  */ CharacterIdentifierStart,
     171/*  77 - M                  */ CharacterIdentifierStart,
     172/*  78 - N                  */ CharacterIdentifierStart,
     173/*  79 - O                  */ CharacterIdentifierStart,
     174/*  80 - P                  */ CharacterIdentifierStart,
     175/*  81 - Q                  */ CharacterIdentifierStart,
     176/*  82 - R                  */ CharacterIdentifierStart,
     177/*  83 - S                  */ CharacterIdentifierStart,
     178/*  84 - T                  */ CharacterIdentifierStart,
     179/*  85 - U                  */ CharacterIdentifierStart,
     180/*  86 - V                  */ CharacterIdentifierStart,
     181/*  87 - W                  */ CharacterIdentifierStart,
     182/*  88 - X                  */ CharacterIdentifierStart,
     183/*  89 - Y                  */ CharacterIdentifierStart,
     184/*  90 - Z                  */ CharacterIdentifierStart,
    183185/*  91 - [                  */ CharacterOpenBracket,
    184186/*  92 - \                  */ CharacterBackSlash,
    185187/*  93 - ]                  */ CharacterCloseBracket,
    186188/*  94 - ^                  */ CharacterXor,
    187 /*  95 - _                  */ CharacterAlpha,
     189/*  95 - _                  */ CharacterIdentifierStart,
    188190/*  96 - `                  */ CharacterInvalid,
    189 /*  97 - a                  */ CharacterAlpha,
    190 /*  98 - b                  */ CharacterAlpha,
    191 /*  99 - c                  */ CharacterAlpha,
    192 /* 100 - d                  */ CharacterAlpha,
    193 /* 101 - e                  */ CharacterAlpha,
    194 /* 102 - f                  */ CharacterAlpha,
    195 /* 103 - g                  */ CharacterAlpha,
    196 /* 104 - h                  */ CharacterAlpha,
    197 /* 105 - i                  */ CharacterAlpha,
    198 /* 106 - j                  */ CharacterAlpha,
    199 /* 107 - k                  */ CharacterAlpha,
    200 /* 108 - l                  */ CharacterAlpha,
    201 /* 109 - m                  */ CharacterAlpha,
    202 /* 110 - n                  */ CharacterAlpha,
    203 /* 111 - o                  */ CharacterAlpha,
    204 /* 112 - p                  */ CharacterAlpha,
    205 /* 113 - q                  */ CharacterAlpha,
    206 /* 114 - r                  */ CharacterAlpha,
    207 /* 115 - s                  */ CharacterAlpha,
    208 /* 116 - t                  */ CharacterAlpha,
    209 /* 117 - u                  */ CharacterAlpha,
    210 /* 118 - v                  */ CharacterAlpha,
    211 /* 119 - w                  */ CharacterAlpha,
    212 /* 120 - x                  */ CharacterAlpha,
    213 /* 121 - y                  */ CharacterAlpha,
    214 /* 122 - z                  */ CharacterAlpha,
     191/*  97 - a                  */ CharacterIdentifierStart,
     192/*  98 - b                  */ CharacterIdentifierStart,
     193/*  99 - c                  */ CharacterIdentifierStart,
     194/* 100 - d                  */ CharacterIdentifierStart,
     195/* 101 - e                  */ CharacterIdentifierStart,
     196/* 102 - f                  */ CharacterIdentifierStart,
     197/* 103 - g                  */ CharacterIdentifierStart,
     198/* 104 - h                  */ CharacterIdentifierStart,
     199/* 105 - i                  */ CharacterIdentifierStart,
     200/* 106 - j                  */ CharacterIdentifierStart,
     201/* 107 - k                  */ CharacterIdentifierStart,
     202/* 108 - l                  */ CharacterIdentifierStart,
     203/* 109 - m                  */ CharacterIdentifierStart,
     204/* 110 - n                  */ CharacterIdentifierStart,
     205/* 111 - o                  */ CharacterIdentifierStart,
     206/* 112 - p                  */ CharacterIdentifierStart,
     207/* 113 - q                  */ CharacterIdentifierStart,
     208/* 114 - r                  */ CharacterIdentifierStart,
     209/* 115 - s                  */ CharacterIdentifierStart,
     210/* 116 - t                  */ CharacterIdentifierStart,
     211/* 117 - u                  */ CharacterIdentifierStart,
     212/* 118 - v                  */ CharacterIdentifierStart,
     213/* 119 - w                  */ CharacterIdentifierStart,
     214/* 120 - x                  */ CharacterIdentifierStart,
     215/* 121 - y                  */ CharacterIdentifierStart,
     216/* 122 - z                  */ CharacterIdentifierStart,
    215217/* 123 - {                  */ CharacterOpenBrace,
    216218/* 124 - |                  */ CharacterOr,
     
    336338static inline bool isIdentStart(int c)
    337339{
    338     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
     340    return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c);
    339341}
    340342
     
    347349static inline bool isIdentPart(int c)
    348350{
    349     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
     351    // Character types are divided into two groups depending on whether they can be part of an
     352    // identifier or not. Those whose type value is less or equal than CharacterNumber can be
     353    // part of an identifier. (See the CharacterType definition for more details.)
     354    return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c);
    350355}
    351356
     
    457462            stringStart = currentCharacter();
    458463            continue;
    459         } else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
     464        }
     465        // Fast check for characters that require special handling.
     466        // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
     467        // as possible, and lets through all common ASCII characters.
     468        if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
    460469            // New-line or end of input is not allowed
    461470            if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1))
     
    494503    m_delimited = false;
    495504
    496     if (isASCII(m_current)) {
    497         ASSERT(m_current >= 0 && m_current < 128);
    498 
    499     switch (AsciiCharacters[m_current]) {
     505    CharacterType type;
     506    if (LIKELY(isASCII(m_current)))
     507        type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]);
     508    else if (isNonASCIIIdentStart(m_current))
     509        type = CharacterIdentifierStart;
     510    else if (isLineTerminator(m_current))
     511        type = CharacterLineTerminator;
     512    else
     513        type = CharacterInvalid;
     514
     515    switch (type) {
    500516    case CharacterGreater:
    501517        shift();
     
    751767        token = STRING;
    752768        break;
    753     case CharacterAlpha:
     769    case CharacterIdentifierStart:
    754770        ASSERT(isIdentStart(m_current));
    755771        goto startIdentifierOrKeyword;
     
    770786        goto returnError;
    771787    }
    772     } else {
    773         // Rare characters
    774 
    775         if (isNonASCIIIdentStart(m_current))
    776             goto startIdentifierOrKeyword;
    777         if (isLineTerminator(m_current)) {
    778             shiftLineTerminator();
    779             m_atLineStart = true;
    780             m_terminator = true;
    781             if (lastTokenWasRestrKeyword())
    782                 goto doneSemicolon;
    783             goto start;
    784         }
    785         goto returnError;
    786     }
    787788
    788789    m_atLineStart = false;
Note: See TracChangeset for help on using the changeset viewer.