Ignore:
Timestamp:
Apr 21, 2012, 1:03:13 PM (13 years ago)
Author:
Darin Adler
Message:

Change JavaScript lexer to use 0 instead of -1 for sentinel, eliminating the need to put characters into ints
https://bugs.webkit.org/show_bug.cgi?id=84523

Reviewed by Oliver Hunt.

Profiles showed that checks against -1 were costly, and I saw they could be eliminated.
Streamlined this code to use standard character types and 0 rather than -1. One benefit
of this is that there's no widening and narrowing. Another is that there are many cases
where we already have the correct behavior for 0, so can eliminate a branch that was
used to test for -1 before. Also eliminates typecasts in the code.

  • parser/Lexer.cpp:

(JSC::Lexer::invalidCharacterMessage): Updated use of String::format since m_current is now a
character type, not an int.
(JSC::Lexer::setCode): Use 0 rather than -1 when past the end.
(JSC::Lexer::shift): Ditto. Also spruced up the comment a bit.
(JSC::Lexer::atEnd): Added. New function that distinguishes an actual 0 character from the end
of the code. This can be used places we used to cheeck for -1.
(JSC::Lexer::peek): Updated to use -1 instead of 0. Removed meaningless comment.
(JSC::Lexer::parseFourDigitUnicodeHex): Changed to use character types instead of int.
(JSC::Lexer::shiftLineTerminator): Removed now-unneeded type casts. Changed local variable that
had a data-member-style name.
(JSC::Lexer::parseIdentifier): Removed now-unneeded explicit checks for -1, since the isIdentPart
function already returns false for the 0 character. Updated types in a couple other places. Used
the atEnd function where needed.
(JSC::Lexer::parseIdentifierSlowCase): More of the same.
(JSC::characterRequiresParseStringSlowCase): Added overloaded helper function for parseString.
(JSC::Lexer::parseString): Ditto.
(JSC::Lexer::parseStringSlowCase): Ditto.
(JSC::Lexer::parseMultilineComment): Ditto.
(JSC::Lexer::lex): More of the same. Also changed code to set the startOffset directly in
the tokenInfo instead of putting it in a local variable first, saving some memory access.
(JSC::Lexer::scanRegExp): Ditto.
(JSC::Lexer::skipRegExp): Ditto.

  • parser/Lexer.h: Changed return type of the peek function and type of m_current from int to

the character type. Added atEnd function.
(JSC::Lexer::setOffset): Used 0 instead of -1 and removed an overzealous attempt to optimize.
(JSC::Lexer::lexExpectIdentifier): Used 0 instead of -1.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/parser/Lexer.cpp

    r114844 r114845  
    387387        return "Invalid character: '`'";
    388388    default:
    389         return String::format("Invalid character '\\u%04u'", m_current).impl();
     389        return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
    390390    }
    391391}
     
    426426        m_current = *m_code;
    427427    else
    428         m_current = -1;
     428        m_current = 0;
    429429    ASSERT(currentOffset() == source.startOffset());
    430430}
     
    440440ALWAYS_INLINE void Lexer<T>::shift()
    441441{
    442     // Faster than an if-else sequence
    443     ASSERT(m_current != -1);
    444     m_current = -1;
    445     m_code++;
     442    // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
     443    m_current = 0;
     444    ++m_code;
    446445    if (LIKELY(m_code < m_codeEnd))
    447446        m_current = *m_code;
     
    449448
    450449template <typename T>
    451 ALWAYS_INLINE int Lexer<T>::peek(int offset)
     450ALWAYS_INLINE bool Lexer<T>::atEnd() const
     451{
     452    ASSERT(!m_current || m_code < m_codeEnd);
     453    return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
     454}
     455
     456template <typename T>
     457ALWAYS_INLINE T Lexer<T>::peek(int offset) const
    452458{
    453459    ASSERT(offset > 0 && offset < 5);
    454460    const T* code = m_code + offset;
    455     return (code < m_codeEnd) ? *code : -1;
     461    return (code < m_codeEnd) ? *code : 0;
    456462}
    457463
     
    459465int Lexer<T>::parseFourDigitUnicodeHex()
    460466{
    461     int char1 = peek(1);
    462     int char2 = peek(2);
    463     int char3 = peek(3);
     467    T char1 = peek(1);
     468    T char2 = peek(2);
     469    T char3 = peek(3);
    464470
    465471    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
     
    477483void Lexer<T>::shiftLineTerminator()
    478484{
    479     ASSERT(isLineTerminator(static_cast<T>(m_current)));
    480 
    481     int prev = m_current;
     485    ASSERT(isLineTerminator(m_current));
     486
     487    T prev = m_current;
    482488    shift();
    483489
     
    646652    const LChar* identifierStart = currentCharacter();
    647653   
    648     while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current)))
     654    while (isIdentPart(m_current))
    649655        shift();
    650656   
     
    696702    UChar orAllChars = 0;
    697703   
    698     while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) {
     704    while (isIdentPart(m_current)) {
    699705        orAllChars |= m_current;
    700706        shift();
     
    748754
    749755    while (true) {
    750         if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) {
     756        if (LIKELY(isIdentPart(m_current))) {
    751757            shift();
    752758            continue;
     
    808814}
    809815
     816static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
     817{
     818    return character < 0xE;
     819}
     820
     821static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
     822{
     823    return character < 0xE || character > 0xFF;
     824}
     825
    810826template <typename T>
    811827template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
     
    813829    int startingOffset = currentOffset();
    814830    int startingLineNumber = lineNumber();
    815     int stringQuoteCharacter = m_current;
     831    T stringQuoteCharacter = m_current;
    816832    shift();
    817833
     
    819835
    820836    while (m_current != stringQuoteCharacter) {
    821         if (UNLIKELY((m_current == '\\'))) {
     837        if (UNLIKELY(m_current == '\\')) {
    822838            if (stringStart != currentCharacter() && shouldBuildStrings)
    823839                append8(stringStart, currentCharacter() - stringStart);
     
    839855                    return false;
    840856                }
    841                 int prev = m_current;
     857                T prev = m_current;
    842858                shift();
    843859                if (shouldBuildStrings)
     
    854870        }
    855871
    856         if (UNLIKELY(((m_current > 0xff) || (m_current < 0xe)))) {
     872        if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
    857873            setOffset(startingOffset);
    858874            setLineNumber(startingLineNumber);
     
    878894template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
    879895{
    880     int stringQuoteCharacter = m_current;
     896    T stringQuoteCharacter = m_current;
    881897    shift();
    882898
     
    896912                    record16(escape);
    897913                shift();
    898             } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current))))
     914            } else if (UNLIKELY(isLineTerminator(m_current)))
    899915                shiftLineTerminator();
    900916            else if (m_current == 'x') {
     
    904920                    return false;
    905921                }
    906                 int prev = m_current;
     922                T prev = m_current;
    907923                shift();
    908924                if (shouldBuildStrings)
     
    934950            } else if (!strictMode && isASCIIOctalDigit(m_current)) {
    935951                // Octal character sequences
    936                 int character1 = m_current;
     952                T character1 = m_current;
    937953                shift();
    938954                if (isASCIIOctalDigit(m_current)) {
    939955                    // Two octal characters
    940                     int character2 = m_current;
     956                    T character2 = m_current;
    941957                    shift();
    942958                    if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
     
    952968                        record16(character1 - '0');
    953969                }
    954             } else if (m_current != -1) {
     970            } else if (!atEnd()) {
    955971                if (shouldBuildStrings)
    956972                    record16(m_current);
     
    965981        }
    966982        // Fast check for characters that require special handling.
    967         // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
     983        // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
    968984        // as possible, and lets through all common ASCII characters.
    969985        if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
    970986            // New-line or end of input is not allowed
    971             if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) {
     987            if (atEnd() || isLineTerminator(m_current)) {
    972988                m_lexErrorMessage = "Unexpected EOF";
    973989                return false;
     
    11461162        }
    11471163
    1148         if (UNLIKELY(m_current == -1))
     1164        if (atEnd())
    11491165            return false;
    11501166
    1151         if (isLineTerminator(static_cast<T>(m_current))) {
     1167        if (isLineTerminator(m_current)) {
    11521168            shiftLineTerminator();
    11531169            m_terminator = true;
     
    11781194
    11791195start:
    1180     while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current)))
    1181         shift();
    1182 
    1183     int startOffset = currentOffset();
    1184 
    1185     if (UNLIKELY(m_current == -1))
     1196    while (isWhiteSpace(m_current))
     1197        shift();
     1198
     1199    if (atEnd())
    11861200        return EOFTOK;
     1201   
     1202    tokenInfo->startOffset = currentOffset();
    11871203
    11881204    CharacterType type;
    1189     if (LIKELY(isLatin1(static_cast<T>(m_current))))
     1205    if (LIKELY(isLatin1(m_current)))
    11901206        type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
    11911207    else if (isNonLatin1IdentStart(m_current))
    11921208        type = CharacterIdentifierStart;
    1193     else if (isLineTerminator(static_cast<T>(m_current)))
     1209    else if (isLineTerminator(m_current))
    11941210        type = CharacterLineTerminator;
    11951211    else
     
    14761492
    14771493        // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
    1478         if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) {
     1494        if (UNLIKELY(isIdentStart(m_current))) {
    14791495            m_lexErrorMessage = "At least one digit must occur after a decimal point";
    14801496            goto returnError;
     
    14941510        break;
    14951511    case CharacterIdentifierStart:
    1496         ASSERT(isIdentStart(static_cast<T>(m_current)));
     1512        ASSERT(isIdentStart(m_current));
    14971513        // Fall through into CharacterBackSlash.
    14981514    case CharacterBackSlash:
     
    15031519        break;
    15041520    case CharacterLineTerminator:
    1505         ASSERT(isLineTerminator(static_cast<T>(m_current)));
     1521        ASSERT(isLineTerminator(m_current));
    15061522        shiftLineTerminator();
    15071523        m_atLineStart = true;
     
    15211537
    15221538inSingleLineComment:
    1523     while (!isLineTerminator(static_cast<T>(m_current))) {
    1524         if (UNLIKELY(m_current == -1))
     1539    while (!isLineTerminator(m_current)) {
     1540        if (atEnd())
    15251541            return EOFTOK;
    15261542        shift();
     
    15371553returnToken:
    15381554    tokenInfo->line = m_lineNumber;
    1539     tokenInfo->startOffset = startOffset;
    15401555    tokenInfo->endOffset = currentOffset();
    15411556    m_lastToken = token;
     
    15451560    m_error = true;
    15461561    tokenInfo->line = m_lineNumber;
    1547     tokenInfo->startOffset = startOffset;
    15481562    tokenInfo->endOffset = currentOffset();
    15491563    return ERRORTOK;
     
    15661580
    15671581    while (true) {
    1568         int current = m_current;
    1569 
    1570         if (isLineTerminator(static_cast<T>(current)) || current == -1) {
     1582        if (isLineTerminator(m_current) || atEnd()) {
    15711583            m_buffer16.resize(0);
    15721584            return false;
    15731585        }
    15741586
    1575         shift();
    1576 
    1577         if (current == '/' && !lastWasEscape && !inBrackets)
    1578             break;
    1579 
    1580         record16(current);
     1587        T prev = m_current;
     1588       
     1589        shift();
     1590
     1591        if (prev == '/' && !lastWasEscape && !inBrackets)
     1592            break;
     1593
     1594        record16(prev);
    15811595
    15821596        if (lastWasEscape) {
     
    15851599        }
    15861600
    1587         switch (current) {
     1601        switch (prev) {
    15881602        case '[':
    15891603            inBrackets = true;
     
    16011615    m_buffer16.resize(0);
    16021616
    1603     while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) {
     1617    while (isIdentPart(m_current)) {
    16041618        record16(m_current);
    16051619        shift();
     
    16191633
    16201634    while (true) {
    1621         int current = m_current;
    1622 
    1623         if (isLineTerminator(static_cast<T>(current)) || current == -1)
     1635        if (isLineTerminator(m_current) || atEnd())
    16241636            return false;
    16251637
    1626         shift();
    1627 
    1628         if (current == '/' && !lastWasEscape && !inBrackets)
     1638        T prev = m_current;
     1639       
     1640        shift();
     1641
     1642        if (prev == '/' && !lastWasEscape && !inBrackets)
    16291643            break;
    16301644
     
    16341648        }
    16351649
    1636         switch (current) {
     1650        switch (prev) {
    16371651        case '[':
    16381652            inBrackets = true;
     
    16471661    }
    16481662
    1649     while (m_current != -1 && isIdentPart(static_cast<T>(m_current)))
     1663    while (isIdentPart(m_current))
    16501664        shift();
    16511665
Note: See TracChangeset for help on using the changeset viewer.