Ignore:
Timestamp:
Aug 6, 2010, 3:33:10 AM (15 years ago)
Author:
[email protected]
Message:

Refactor identifier parsing in lexer
https://bugs.webkit.org/show_bug.cgi?id=41845

Reviewed by Darin Adler.

The code is refactored to avoid gotos. The new code
has the same performance as the old one.

SunSpider --parse-only: no change (from 34.0ms to 33.6ms)
SunSpider: no change (from 523.2ms to 523.5ms)

  • parser/Lexer.cpp:

(JSC::Lexer::parseIdent):
(JSC::Lexer::lex):

  • parser/Lexer.h:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/parser/Lexer.cpp

    r63566 r64827  
    400400}
    401401
     402ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, LexType lexType)
     403{
     404    bool bufferRequired = false;
     405    const UChar* identifierStart = currentCharacter();
     406    int identifierLength;
     407
     408    while (true) {
     409        if (LIKELY(isIdentPart(m_current))) {
     410            shift();
     411            continue;
     412        }
     413        if (LIKELY(m_current != '\\'))
     414            break;
     415
     416        // \uXXXX unicode characters.
     417        bufferRequired = true;
     418        if (identifierStart != currentCharacter())
     419            m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
     420        shift();
     421        if (UNLIKELY(m_current != 'u'))
     422            return ERRORTOK;
     423        shift();
     424        int character = getUnicodeCharacter();
     425        if (UNLIKELY(character == -1))
     426            return ERRORTOK;
     427        if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character)))
     428            return ERRORTOK;
     429        record16(character);
     430        identifierStart = currentCharacter();
     431    }
     432
     433    if (!bufferRequired)
     434        identifierLength = currentCharacter() - identifierStart;
     435    else {
     436        if (identifierStart != currentCharacter())
     437            m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
     438        identifierStart = m_buffer16.data();
     439        identifierLength = m_buffer16.size();
     440    }
     441
     442    const Identifier* ident = makeIdentifier(identifierStart, identifierLength);
     443    lvalp->ident = ident;
     444    m_delimited = false;
     445
     446    if (LIKELY(!bufferRequired && lexType == IdentifyReservedWords)) {
     447        // Keywords must not be recognized if there was an \uXXXX in the identifier.
     448        const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
     449        return entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
     450    }
     451
     452    m_buffer16.resize(0);
     453    return IDENT;
     454}
     455
    402456ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp)
    403457{
     
    489543
    490544    JSTokenType token = ERRORTOK;
    491     int identChar = 0;
    492545    m_terminator = false;
    493546
     
    754807        token = CLOSEBRACE;
    755808        break;
    756     case CharacterBackSlash:
    757         goto startIdentifierWithBackslash;
    758809    case CharacterZero:
    759810        goto startNumberWithZeroDigit;
     
    769820    case CharacterIdentifierStart:
    770821        ASSERT(isIdentStart(m_current));
    771         goto startIdentifierOrKeyword;
     822        // Fall through into CharacterBackSlash.
     823    case CharacterBackSlash:
     824        token = parseIdentifier(lvalp, lexType);
     825        break;
    772826    case CharacterLineTerminator:
    773827        ASSERT(isLineTerminator(m_current));
     
    790844    goto returnToken;
    791845
    792 startIdentifierWithBackslash: {
    793     shift();
    794     if (UNLIKELY(m_current != 'u'))
    795         goto returnError;
    796     shift();
    797 
    798     identChar = getUnicodeCharacter();
    799     if (UNLIKELY(identChar == -1))
    800         goto returnError;
    801     if (UNLIKELY(!isIdentStart(identChar)))
    802         goto returnError;
    803     goto inIdentifierAfterCharacterCheck;
    804 }
    805 
    806 startIdentifierOrKeyword: {
    807     const UChar* identifierStart = currentCharacter();
    808     shift();
    809     while (isIdentPart(m_current))
    810         shift();
    811     if (LIKELY(m_current != '\\')) {
    812         // Fast case for idents which does not contain \uCCCC characters
    813         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
    814         goto doneIdentifierOrKeyword;
    815     }
    816     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
    817 }
    818 
    819     do {
    820         shift();
    821         if (UNLIKELY(m_current != 'u'))
    822             goto returnError;
    823         shift();
    824         identChar = getUnicodeCharacter();
    825         if (UNLIKELY(identChar == -1))
    826             goto returnError;
    827         if (UNLIKELY(!isIdentPart(identChar)))
    828             goto returnError;
    829 inIdentifierAfterCharacterCheck:
    830         record16(identChar);
    831 
    832         while (isIdentPart(m_current)) {
    833             record16(m_current);
    834             shift();
    835         }
    836     } while (UNLIKELY(m_current == '\\'));
    837     goto doneIdentifier;
    838 
    839846inSingleLineComment:
    840847    while (!isLineTerminator(m_current)) {
     
    10091016    token = SEMICOLON;
    10101017    m_delimited = true;
    1011     goto returnToken;
    1012 
    1013 doneIdentifier:
    1014     m_atLineStart = false;
    1015     m_delimited = false;
    1016     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    1017     m_buffer16.resize(0);
    1018     token = IDENT;
    1019     goto returnToken;
    1020 
    1021 doneIdentifierOrKeyword: {
    1022     m_atLineStart = false;
    1023     m_delimited = false;
    1024     m_buffer16.resize(0);
    1025     if (lexType == IdentifyReservedWords) {
    1026         const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
    1027         token = entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
    1028     } else
    1029         token = IDENT;
    10301018    // Fall through into returnToken.
    1031 }
    10321019
    10331020returnToken: {
Note: See TracChangeset for help on using the changeset viewer.