Ignore:
Timestamp:
Jan 27, 2017, 7:09:12 PM (8 years ago)
Author:
Yusuke Suzuki
Message:

Lift template escape sequence restrictions in tagged templates
https://bugs.webkit.org/show_bug.cgi?id=166871

Reviewed by Saam Barati.

JSTests:

Update the error messages and add new tests.

  • ChakraCore/test/es6/unicode_6_identifier_Blue524737.baseline-jsc:
  • stress/lift-template-literal.js: Added.

(dump):
(testTag.return.tag):
(testTag):

  • stress/template-literal-syntax.js:

Source/JavaScriptCore:

This patch implements stage 3 Lifting Template Literal Restriction[1].
Prior to this patch, template literal becomes syntax error if it contains
invalid escape sequences. But it is too restricted; Template literal
can have cooked and raw representations and only cooked representation
can escape sequences. So even if invalid escape sequences are included,
the raw representation can be valid.

Lifting Template Literal Restriction relaxes the above restriction.
When invalid escape sequence is included, if target template literals
are used as tagged templates, we make the result of the template including
the invalid escape sequence undefined instead of making it SyntaxError
immediately. It allows us to accept the templates including invalid
escape sequences in the raw representations in tagged templates.

On the other hand, the raw representation is only used in tagged templates.
So if invalid escape sequences are included in the usual template literals,
we just make it SyntaxError as before.

[1]: https://github.com/tc39/proposal-template-literal-revision

  • bytecompiler/BytecodeGenerator.cpp:

(JSC::BytecodeGenerator::emitGetTemplateObject):

  • bytecompiler/NodesCodegen.cpp:

(JSC::TemplateStringNode::emitBytecode):
(JSC::TemplateLiteralNode::emitBytecode):

  • parser/ASTBuilder.h:

(JSC::ASTBuilder::createTemplateString):

  • parser/Lexer.cpp:

(JSC::Lexer<CharacterType>::parseUnicodeEscape):
(JSC::Lexer<T>::parseTemplateLiteral):
(JSC::Lexer<T>::lex):
(JSC::Lexer<T>::scanTemplateString):
(JSC::Lexer<T>::scanTrailingTemplateString): Deleted.

  • parser/Lexer.h:
  • parser/NodeConstructors.h:

(JSC::TemplateStringNode::TemplateStringNode):

  • parser/Nodes.h:

(JSC::TemplateStringNode::cooked):
(JSC::TemplateStringNode::raw):

  • parser/Parser.cpp:

(JSC::Parser<LexerType>::parseAssignmentElement):
(JSC::Parser<LexerType>::parseTemplateString):
(JSC::Parser<LexerType>::parseTemplateLiteral):
(JSC::Parser<LexerType>::parsePrimaryExpression):
(JSC::Parser<LexerType>::parseMemberExpression):

  • parser/ParserTokens.h:
  • parser/SyntaxChecker.h:

(JSC::SyntaxChecker::createTemplateString):

  • runtime/TemplateRegistry.cpp:

(JSC::TemplateRegistry::getTemplateObject):

  • runtime/TemplateRegistryKey.h:

(JSC::TemplateRegistryKey::cookedStrings):
(JSC::TemplateRegistryKey::create):
(JSC::TemplateRegistryKey::TemplateRegistryKey):

  • runtime/TemplateRegistryKeyTable.cpp:

(JSC::TemplateRegistryKeyTable::createKey):

  • runtime/TemplateRegistryKeyTable.h:

LayoutTests:

Update the error messages.

  • inspector/runtime/parse-expected.txt:
  • js/unicode-escape-sequences-expected.txt:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/parser/Lexer.cpp

    r209632 r211319  
    634634};
    635635
    636 template<typename CharacterType> ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
     636template<typename CharacterType>
     637ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
    637638{
    638639    if (m_current == '{') {
     
    643644                return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
    644645            codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
    645             if (codePoint > UCHAR_MAX_VALUE)
    646                 return ParsedUnicodeEscapeValue::Invalid;
     646            if (codePoint > UCHAR_MAX_VALUE) {
     647                // For raw template literal syntax, we consume `NotEscapeSequence`.
     648                // Here, we consume NotCodePoint's HexDigits.
     649                //
     650                // NotEscapeSequence ::
     651                //     u { [lookahread not one of HexDigit]
     652                //     u { NotCodePoint
     653                //     u { CodePoint [lookahead != }]
     654                //
     655                // NotCodePoint ::
     656                //     HexDigits but not if MV of HexDigits <= 0x10FFFF
     657                //
     658                // CodePoint ::
     659                //     HexDigits but not if MV of HexDigits > 0x10FFFF
     660                shift();
     661                while (isASCIIHexDigit(m_current))
     662                    shift();
     663
     664                return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
     665            }
    647666            shift();
    648667        } while (m_current != '}');
     
    654673    auto character3 = peek(2);
    655674    auto character4 = peek(3);
    656     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4)))
    657         return (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
     675    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
     676        auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
     677
     678        // For raw template literal syntax, we consume `NotEscapeSequence`.
     679        //
     680        // NotEscapeSequence ::
     681        //     u [lookahead not one of HexDigit][lookahead != {]
     682        //     u HexDigit [lookahead not one of HexDigit]
     683        //     u HexDigit HexDigit [lookahead not one of HexDigit]
     684        //     u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
     685        while (isASCIIHexDigit(m_current))
     686            shift();
     687
     688        return result;
     689    }
     690
    658691    auto result = convertUnicode(m_current, character2, character3, character4);
    659692    shift();
     
    11821215
    11831216template <typename T>
    1184 template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(EscapeParseMode escapeParseMode, bool strictMode, T stringQuoteCharacter) -> StringParseResult
     1217template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode, T stringQuoteCharacter) -> StringParseResult
    11851218{
    11861219    if (m_current == 'x') {
    11871220        shift();
    11881221        if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
     1222            // For raw template literal syntax, we consume `NotEscapeSequence`.
     1223            //
     1224            // NotEscapeSequence ::
     1225            //     x [lookahread not one of HexDigit]
     1226            //     x HexDigit [lookahread not one of HexDigit]
     1227            if (isASCIIHexDigit(m_current))
     1228                shift();
     1229            ASSERT(!isASCIIHexDigit(m_current));
     1230
    11891231            m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
    1190             return StringCannotBeParsed;
    1191         }
     1232            return atEnd() ? StringUnterminated : StringCannotBeParsed;
     1233        }
     1234
    11921235        T prev = m_current;
    11931236        shift();
     
    11951238            record16(convertHex(prev, m_current));
    11961239        shift();
     1240
    11971241        return StringParsedSuccessfully;
    11981242    }
     
    12011245        shift();
    12021246
    1203         if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) {
     1247        if (escapeParseMode == LexerEscapeParseMode::String && m_current == stringQuoteCharacter) {
    12041248            if (shouldBuildStrings)
    12051249                record16('u');
     
    12151259
    12161260        m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence");
    1217         return character.isIncomplete() ? StringUnterminated : StringCannotBeParsed;
     1261        return atEnd() ? StringUnterminated : StringCannotBeParsed;
    12181262    }
    12191263
     
    12241268            shift();
    12251269            if (character1 != '0' || isASCIIDigit(m_current)) {
     1270                // For raw template literal syntax, we consume `NotEscapeSequence`.
     1271                //
     1272                // NotEscapeSequence ::
     1273                //     0 DecimalDigit
     1274                //     DecimalDigit but not 0
     1275                if (character1 == '0')
     1276                    shift();
     1277
    12261278                m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
    1227                 return StringCannotBeParsed;
     1279                return atEnd() ? StringUnterminated : StringCannotBeParsed;
    12281280            }
    12291281            if (shouldBuildStrings)
     
    12911343                shiftLineTerminator();
    12921344            else {
    1293                 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::String, strictMode, stringQuoteCharacter);
     1345                StringParseResult result = parseComplexEscape<shouldBuildStrings, LexerEscapeParseMode::String>(strictMode, stringQuoteCharacter);
    12941346                if (result != StringParsedSuccessfully)
    12951347                    return result;
     
    13731425
    13741426template <typename T>
    1375 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
    1376 {
     1427typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
     1428{
     1429    bool parseCookedFailed = false;
    13771430    const T* stringStart = currentSourcePtr();
    13781431    const T* rawStringStart = currentSourcePtr();
     
    13831436        if (UNLIKELY(m_current == '\\')) {
    13841437            lineNumberAdder.clear();
    1385             if (stringStart != currentSourcePtr() && shouldBuildStrings)
     1438            if (stringStart != currentSourcePtr())
    13861439                append16(stringStart, currentSourcePtr() - stringStart);
    13871440            shift();
     
    13911444            // Most common escape sequences first.
    13921445            if (escape) {
    1393                 if (shouldBuildStrings)
    1394                     record16(escape);
     1446                record16(escape);
    13951447                shift();
    13961448            } else if (UNLIKELY(isLineTerminator(m_current))) {
    13971449                // Normalize <CR>, <CR><LF> to <LF>.
    13981450                if (m_current == '\r') {
    1399                     if (shouldBuildStrings) {
    1400                         ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
    1401 
    1402                         if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
    1403                             m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
    1404                             m_bufferForRawTemplateString16.append('\n');
    1405                         }
     1451                    ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
     1452
     1453                    if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
     1454                        m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
     1455                        m_bufferForRawTemplateString16.append('\n');
    14061456                    }
    14071457
     
    14201470            } else {
    14211471                bool strictMode = true;
    1422                 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::Template, strictMode, '`');
    1423                 if (result != StringParsedSuccessfully)
    1424                     return result;
     1472                StringParseResult result = parseComplexEscape<true, LexerEscapeParseMode::Template>(strictMode, '`');
     1473                if (result != StringParsedSuccessfully) {
     1474                    if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
     1475                        parseCookedFailed = true;
     1476                    else
     1477                        return result;
     1478                }
    14251479            }
    14261480
     
    14461500                if (m_current == '\r') {
    14471501                    // Normalize <CR>, <CR><LF> to <LF>.
    1448                     if (shouldBuildStrings) {
    1449                         if (stringStart != currentSourcePtr())
    1450                             append16(stringStart, currentSourcePtr() - stringStart);
    1451                         if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
    1452                             m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
    1453 
    1454                         record16('\n');
    1455                         if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
    1456                             m_bufferForRawTemplateString16.append('\n');
    1457                     }
     1502                    if (stringStart != currentSourcePtr())
     1503                        append16(stringStart, currentSourcePtr() - stringStart);
     1504                    if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
     1505                        m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
     1506
     1507                    record16('\n');
     1508                    if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
     1509                        m_bufferForRawTemplateString16.append('\n');
    14581510                    lineNumberAdder.add(m_current);
    14591511                    shift();
     
    14791531    bool isTail = m_current == '`';
    14801532
    1481     if (shouldBuildStrings) {
    1482         if (currentSourcePtr() != stringStart)
    1483             append16(stringStart, currentSourcePtr() - stringStart);
    1484         if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
    1485             m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
    1486     }
    1487 
    1488     if (shouldBuildStrings) {
     1533    if (currentSourcePtr() != stringStart)
     1534        append16(stringStart, currentSourcePtr() - stringStart);
     1535    if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
     1536        m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
     1537
     1538    if (!parseCookedFailed)
    14891539        tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    1490         // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
    1491         if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
    1492             tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
    1493         else
    1494             tokenData->raw = makeEmptyIdentifier();
    1495     } else {
    1496         tokenData->cooked = makeEmptyIdentifier();
    1497         tokenData->raw = makeEmptyIdentifier();
    1498     }
     1540    else
     1541        tokenData->cooked = nullptr;
     1542
     1543    // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
     1544    if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
     1545        tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
     1546    else
     1547        tokenData->raw = nullptr;
     1548
    14991549    tokenData->isTail = isTail;
    15001550
     
    20822132        shift();
    20832133        token = SEMICOLON;
     2134        break;
     2135    case CharacterBackQuote:
     2136        shift();
     2137        token = BACKQUOTE;
    20842138        break;
    20852139    case CharacterOpenBrace:
     
    22352289        break;
    22362290        }
    2237     case CharacterBackQuote: {
    2238         // Skip backquote.
    2239         shift();
    2240         StringParseResult result = StringCannotBeParsed;
    2241         if (lexerFlags & LexerFlagsDontBuildStrings)
    2242             result = parseTemplateLiteral<false>(tokenData, RawStringsBuildMode::BuildRawStrings);
    2243         else
    2244             result = parseTemplateLiteral<true>(tokenData, RawStringsBuildMode::BuildRawStrings);
    2245 
    2246         if (UNLIKELY(result != StringParsedSuccessfully)) {
    2247             token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
    2248             goto returnError;
    2249         }
    2250         token = TEMPLATE;
    2251         break;
    2252         }
    22532291    case CharacterIdentifierStart:
    22542292        ASSERT(isIdentStart(m_current));
     
    24212459
    24222460template <typename T>
    2423 JSTokenType Lexer<T>::scanTrailingTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
     2461JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
    24242462{
    24252463    JSTokenData* tokenData = &tokenRecord->m_data;
     
    24272465    ASSERT(m_buffer16.isEmpty());
    24282466
    2429     // Leading closing brace } is already shifted in the previous token scan.
     2467    // Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
    24302468    // So in this re-scan phase, shift() is not needed here.
    2431     StringParseResult result = parseTemplateLiteral<true>(tokenData, rawStringsBuildMode);
     2469    StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
    24322470    JSTokenType token = ERRORTOK;
    24332471    if (UNLIKELY(result != StringParsedSuccessfully)) {
Note: See TracChangeset for help on using the changeset viewer.