Ignore:
Timestamp:
Apr 21, 2013, 4:26:56 PM (12 years ago)
Author:
[email protected]
Message:

JS Lexer and Parser should be more informative when they encounter errors
https://bugs.webkit.org/show_bug.cgi?id=114924

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Add new tokens to represent the various ways that parsing and lexing have failed.
This gives us the ability to produce better error messages in some cases,
and to indicate whether or not the failure was due to invalid source, or simply
early termination.

The jsc prompt now makes use of this so that you can write functions that
are more than one line long.

  • bytecompiler/BytecodeGenerator.cpp:

(JSC::BytecodeGenerator::generate):

  • jsc.cpp:

(stringFromUTF):
(jscSource):
(runInteractive):

  • parser/Lexer.cpp:

(JSC::::parseFourDigitUnicodeHex):
(JSC::::parseIdentifierSlowCase):
(JSC::::parseString):
(JSC::::parseStringSlowCase):
(JSC::::lex):

  • parser/Lexer.h:

(UnicodeHexValue):
(JSC::Lexer::UnicodeHexValue::UnicodeHexValue):
(JSC::Lexer::UnicodeHexValue::valueType):
(JSC::Lexer::UnicodeHexValue::isValid):
(JSC::Lexer::UnicodeHexValue::value):
(Lexer):

  • parser/Parser.h:

(JSC::Parser::getTokenName):
(JSC::Parser::updateErrorMessageSpecialCase):
(JSC::::parse):

  • parser/ParserError.h:

(ParserError):
(JSC::ParserError::ParserError):

  • parser/ParserTokens.h:
  • runtime/Completion.cpp:

(JSC):
(JSC::checkSyntax):

  • runtime/Completion.h:

(JSC):

LayoutTests:

Update test results to cover improved error messages.

  • fast/js/kde/parse-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T1-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T2-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T3-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T4-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T5-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T1-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T2-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T3-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T4-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T1-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T10-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T2-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T3-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T4-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T5-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T6-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T7-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T8-expected.txt:
  • sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T9-expected.txt:
  • sputnik/Conformance/13_Function_Definition/S13_A7_T3-expected.txt:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/parser/Lexer.cpp

    r148696 r148849  
    597597
    598598template <typename T>
    599 int Lexer<T>::parseFourDigitUnicodeHex()
     599typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
    600600{
    601601    T char1 = peek(1);
     
    604604
    605605    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
    606         return -1;
     606        return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
    607607
    608608    int result = convertUnicode(m_current, char1, char2, char3);
     
    611611    shift();
    612612    shift();
    613     return result;
     613    return UnicodeHexValue(result);
    614614}
    615615
     
    884884        shift();
    885885        if (UNLIKELY(m_current != 'u'))
    886             return ERRORTOK;
    887         shift();
    888         int character = parseFourDigitUnicodeHex();
    889         if (UNLIKELY(character == -1))
    890             return ERRORTOK;
    891         UChar ucharacter = static_cast<UChar>(character);
     886            return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
     887        shift();
     888        UnicodeHexValue character = parseFourDigitUnicodeHex();
     889        if (UNLIKELY(!character.isValid()))
     890            return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
     891        UChar ucharacter = static_cast<UChar>(character.value());
    892892        if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
    893             return ERRORTOK;
     893            return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
    894894        if (shouldCreateIdentifier)
    895895            record16(ucharacter);
     
    942942
    943943template <typename T>
    944 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
     944template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
    945945{
    946946    int startingOffset = currentOffset();
     
    970970                if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
    971971                    m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
    972                     return false;
     972                    return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
    973973                }
    974974                T prev = m_current;
     
    10051005        tokenData->ident = 0;
    10061006
    1007     return true;
    1008 }
    1009 
    1010 template <typename T>
    1011 template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
     1007    return StringParsedSuccessfully;
     1008}
     1009
     1010template <typename T>
     1011template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
    10121012{
    10131013    T stringQuoteCharacter = m_current;
     
    10351035                if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
    10361036                    m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
    1037                     return false;
     1037                    return StringCannotBeParsed;
    10381038                }
    10391039                T prev = m_current;
     
    10441044            } else if (m_current == 'u') {
    10451045                shift();
    1046                 int character = parseFourDigitUnicodeHex();
    1047                 if (character != -1) {
     1046                UnicodeHexValue character = parseFourDigitUnicodeHex();
     1047                if (character.isValid()) {
    10481048                    if (shouldBuildStrings)
    1049                         record16(character);
     1049                        record16(character.value());
    10501050                } else if (m_current == stringQuoteCharacter) {
    10511051                    if (shouldBuildStrings)
     
    10531053                } else {
    10541054                    m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
    1055                     return false;
     1055                    return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
    10561056                }
    10571057            } else if (strictMode && isASCIIDigit(m_current)) {
     
    10611061                if (character1 != '0' || isASCIIDigit(m_current)) {
    10621062                    m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
    1063                     return false;
     1063                    return StringCannotBeParsed;
    10641064                }
    10651065                if (shouldBuildStrings)
     
    10911091            } else {
    10921092                m_lexErrorMessage = "Unterminated string constant";
    1093                 return false;
     1093                return StringUnterminated;
    10941094            }
    10951095
     
    11041104            if (atEnd() || isLineTerminator(m_current)) {
    11051105                m_lexErrorMessage = "Unexpected EOF";
    1106                 return false;
     1106                return atEnd() ? StringUnterminated : StringCannotBeParsed;
    11071107            }
    11081108            // Anything else is just a normal character
     
    11191119
    11201120    m_buffer16.resize(0);
    1121     return true;
     1121    return StringParsedSuccessfully;
    11221122}
    11231123
     
    14631463                goto start;
    14641464            m_lexErrorMessage = "Multiline comment was not closed properly";
     1465            token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
    14651466            goto returnError;
    14661467        }
     
    15821583                    if (strictMode) {
    15831584                        m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
     1585                        token = INVALID_OCTAL_NUMBER_ERRORTOK;
    15841586                        goto returnError;
    15851587                    }
     
    16001602                    if (!parseNumberAfterExponentIndicator()) {
    16011603                        m_lexErrorMessage = "Non-number found after exponent indicator";
     1604                        token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
    16021605                        goto returnError;
    16031606                    }
     
    16121615        if (UNLIKELY(isIdentStart(m_current))) {
    16131616            m_lexErrorMessage = "At least one digit must occur after a decimal point";
     1617            token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
    16141618            goto returnError;
    16151619        }
     
    16181622    case CharacterQuote:
    16191623        if (lexerFlags & LexerFlagsDontBuildStrings) {
    1620             if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
     1624            StringParseResult result = parseString<false>(tokenData, strictMode);
     1625            if (UNLIKELY(result != StringParsedSuccessfully)) {
     1626                token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
    16211627                goto returnError;
     1628            }
    16221629        } else {
    1623             if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
     1630            StringParseResult result = parseString<true>(tokenData, strictMode);
     1631            if (UNLIKELY(result != StringParsedSuccessfully)) {
     1632                token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
    16241633                goto returnError;
     1634            }
    16251635        }
    16261636        shift();
     
    16441654    case CharacterInvalid:
    16451655        m_lexErrorMessage = invalidCharacterMessage();
     1656        token = ERRORTOK;
    16461657        goto returnError;
    16471658    default:
    16481659        RELEASE_ASSERT_NOT_REACHED();
    16491660        m_lexErrorMessage = "Internal Error";
     1661        token = ERRORTOK;
    16501662        goto returnError;
    16511663    }
     
    16791691    tokenLocation->line = m_lineNumber;
    16801692    tokenLocation->endOffset = currentOffset();
    1681     return ERRORTOK;
     1693    RELEASE_ASSERT(token & ErrorTokenFlag);
     1694    return token;
    16821695}
    16831696
Note: See TracChangeset for help on using the changeset viewer.