Context Navigation

← Previous Change
Next Change →

Lexer.cpp

Timestamp:

Apr 21, 2013, 4:26:56 PM (12 years ago)

Author:

Message:

JS Lexer and Parser should be more informative when they encounter errors
https://bugs.webkit.org/show_bug.cgi?id=114924

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Add new tokens to represent the various ways that parsing and lexing have failed.
This gives us the ability to produce better error messages in some cases,
and to indicate whether or not the failure was due to invalid source, or simply
early termination.

The jsc prompt now makes use of this so that you can write functions that
are more than one line long.

bytecompiler/BytecodeGenerator.cpp:

(JSC::BytecodeGenerator::generate):

jsc.cpp:

(stringFromUTF):
(jscSource):
(runInteractive):

parser/Lexer.cpp:

(JSC::::parseFourDigitUnicodeHex):
(JSC::::parseIdentifierSlowCase):
(JSC::::parseString):
(JSC::::parseStringSlowCase):
(JSC::::lex):

parser/Lexer.h:

(UnicodeHexValue):
(JSC::Lexer::UnicodeHexValue::UnicodeHexValue):
(JSC::Lexer::UnicodeHexValue::valueType):
(JSC::Lexer::UnicodeHexValue::isValid):
(JSC::Lexer::UnicodeHexValue::value):
(Lexer):

parser/Parser.h:

(JSC::Parser::getTokenName):
(JSC::Parser::updateErrorMessageSpecialCase):
(JSC::::parse):

parser/ParserError.h:

(ParserError):
(JSC::ParserError::ParserError):

parser/ParserTokens.h:
runtime/Completion.cpp:

(JSC):
(JSC::checkSyntax):

runtime/Completion.h:

(JSC):

LayoutTests:

Update test results to cover improved error messages.

fast/js/kde/parse-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T1-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T2-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T3-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T4-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.2_White_Space/S7.2_A5_T5-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T1-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T2-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T3-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.3_Line_Terminators/S7.3_A6_T4-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T1-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T10-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T2-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T3-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T4-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T5-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T6-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T7-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T8-expected.txt:
sputnik/Conformance/07_Lexical_Conventions/7.7_Punctuators/S7.7_A2_T9-expected.txt:
sputnik/Conformance/13_Function_Definition/S13_A7_T3-expected.txt:

File:

: 1 edited

trunk/Source/JavaScriptCore/parser/Lexer.cpp (modified) (21 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/Source/JavaScriptCore/parser/Lexer.cpp

-              r148696
+              r148849
 template <typename T>
 int Lexer<T>::parseFourDigitUnicodeHex()
+typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
+{
     T char1 = peek(1);
 …
     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
         return -1;
+        return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
     int result = convertUnicode(m_current, char1, char2, char3);
 …
     shift();
     shift();
     return result;
+    return UnicodeHexValue(result);
+}
 …
         shift();
         if (UNLIKELY(m_current != 'u'))
             return ERRORTOK;
         shift();
         int character = parseFourDigitUnicodeHex();
         if (UNLIKELY(character == -1))
             return ERRORTOK;
         UChar ucharacter = static_cast<UChar>(character);
+            return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
+        shift();
+        UnicodeHexValue character = parseFourDigitUnicodeHex();
+        if (UNLIKELY(!character.isValid()))
+            return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
+        UChar ucharacter = static_cast<UChar>(character.value());
         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
             return ERRORTOK;
+            return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
         if (shouldCreateIdentifier)
             record16(ucharacter);
 …
 template <typename T>
 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
+template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
+{
     int startingOffset = currentOffset();
 …
                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
                     return false;
+                    return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
+                }
                 T prev = m_current;
 …
         tokenData->ident = 0;
     return true;
+}
 template <typename T>
 template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
+    return StringParsedSuccessfully;
+}
+template <typename T>
+template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
+{
     T stringQuoteCharacter = m_current;
 …
                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
                     return false;
+                    return StringCannotBeParsed;
+                }
                 T prev = m_current;
 …
             } else if (m_current == 'u') {
                 shift();
                 int character = parseFourDigitUnicodeHex();
                 if (character != -1) {
+                UnicodeHexValue character = parseFourDigitUnicodeHex();
+                if (character.isValid()) {
                     if (shouldBuildStrings)
                         record16(character);
+                        record16(character.value());
                 } else if (m_current == stringQuoteCharacter) {
                     if (shouldBuildStrings)
 …
                 } else {
                     m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
                     return false;
+                    return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
+                }
             } else if (strictMode && isASCIIDigit(m_current)) {
 …
                 if (character1 != '0' || isASCIIDigit(m_current)) {
                     m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
                     return false;
+                    return StringCannotBeParsed;
+                }
                 if (shouldBuildStrings)
 …
             } else {
                 m_lexErrorMessage = "Unterminated string constant";
                 return false;
+                return StringUnterminated;
+            }
 …
             if (atEnd() || isLineTerminator(m_current)) {
                 m_lexErrorMessage = "Unexpected EOF";
                 return false;
+                return atEnd() ? StringUnterminated : StringCannotBeParsed;
+            }
             // Anything else is just a normal character
 …
     m_buffer16.resize(0);
     return true;
+    return StringParsedSuccessfully;
+}
 …
                 goto start;
             m_lexErrorMessage = "Multiline comment was not closed properly";
+            token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
             goto returnError;
+        }
 …
                     if (strictMode) {
                         m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
+                        token = INVALID_OCTAL_NUMBER_ERRORTOK;
                         goto returnError;
+                    }
 …
                     if (!parseNumberAfterExponentIndicator()) {
                         m_lexErrorMessage = "Non-number found after exponent indicator";
+                        token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
                         goto returnError;
+                    }
 …
         if (UNLIKELY(isIdentStart(m_current))) {
             m_lexErrorMessage = "At least one digit must occur after a decimal point";
+            token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
             goto returnError;
+        }
 …
     case CharacterQuote:
         if (lexerFlags & LexerFlagsDontBuildStrings) {
+            if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
+            StringParseResult result = parseString<false>(tokenData, strictMode);
+            if (UNLIKELY(result != StringParsedSuccessfully)) {
+                token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
                 goto returnError;
+            }
         } else {
+            if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
+            StringParseResult result = parseString<true>(tokenData, strictMode);
+            if (UNLIKELY(result != StringParsedSuccessfully)) {
+                token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
                 goto returnError;
+            }
+        }
         shift();
 …
     case CharacterInvalid:
         m_lexErrorMessage = invalidCharacterMessage();
+        token = ERRORTOK;
         goto returnError;
     default:
         RELEASE_ASSERT_NOT_REACHED();
         m_lexErrorMessage = "Internal Error";
+        token = ERRORTOK;
         goto returnError;
+    }
 …
     tokenLocation->line = m_lineNumber;
     tokenLocation->endOffset = currentOffset();
+    return ERRORTOK;
+    RELEASE_ASSERT(token & ErrorTokenFlag);
+    return token;
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 148849 in webkit for trunk/Source/JavaScriptCore/parser/Lexer.cpp

Legend:

trunk/Source/JavaScriptCore/parser/Lexer.cpp

Download in other formats: