Changeset 183373 in webkit for trunk/Source/JavaScriptCore/parser/Lexer.cpp
- Timestamp:
- Apr 26, 2015, 5:27:28 PM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/parser/Lexer.cpp
r181664 r183373 72 72 CharacterTilde, 73 73 CharacterQuote, 74 CharacterBackQuote, 74 75 CharacterDot, 75 76 CharacterSlash, … … 193 194 /* 94 - ^ */ CharacterXor, 194 195 /* 95 - _ */ CharacterIdentifierStart, 196 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX) 197 /* 96 - ` */ CharacterBackQuote, 198 #else 195 199 /* 96 - ` */ CharacterInvalid, 200 #endif 196 201 /* 97 - a */ CharacterIdentifierStart, 197 202 /* 98 - b */ CharacterIdentifierStart, … … 1020 1025 } 1021 1026 1022 m_buffer16. resize(0);1027 m_buffer16.shrink(0); 1023 1028 return IDENT; 1024 1029 } … … 1053 1058 LChar escape = singleEscape(m_current); 1054 1059 1055 // Most common escape sequences first 1060 // Most common escape sequences first. 1056 1061 if (escape) { 1057 1062 if (shouldBuildStrings) … … 1074 1079 setOffset(startingOffset, startingLineStartOffset); 1075 1080 setLineNumber(startingLineNumber); 1076 m_buffer8. resize(0);1081 m_buffer8.shrink(0); 1077 1082 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode); 1078 1083 } … … 1084 1089 setOffset(startingOffset, startingLineStartOffset); 1085 1090 setLineNumber(startingLineNumber); 1086 m_buffer8. resize(0);1091 m_buffer8.shrink(0); 1087 1092 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode); 1088 1093 } … … 1095 1100 if (shouldBuildStrings) { 1096 1101 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size()); 1097 m_buffer8. resize(0);1102 m_buffer8.shrink(0); 1098 1103 } else 1099 1104 tokenData->ident = 0; … … 1103 1108 1104 1109 template <typename T> 1105 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) 1110 template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(EscapeParseMode escapeParseMode, bool strictMode, T stringQuoteCharacter) -> StringParseResult 1111 { 1112 if (m_current == 'x') { 1113 shift(); 1114 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) { 1115 m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence"); 1116 return StringCannotBeParsed; 1117 } 1118 T prev = m_current; 1119 shift(); 1120 if (shouldBuildStrings) 1121 record16(convertHex(prev, m_current)); 1122 shift(); 1123 return StringParsedSuccessfully; 1124 } 1125 1126 if (m_current == 'u') { 1127 shift(); 1128 UnicodeHexValue character = parseFourDigitUnicodeHex(); 1129 if (character.isValid()) { 1130 if (shouldBuildStrings) 1131 record16(character.value()); 1132 return StringParsedSuccessfully; 1133 } 1134 1135 if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) { 1136 if (shouldBuildStrings) 1137 record16('u'); 1138 return StringParsedSuccessfully; 1139 } 1140 1141 m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence"); 1142 return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed; 1143 } 1144 1145 if (strictMode) { 1146 if (isASCIIDigit(m_current)) { 1147 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit. 1148 int character1 = m_current; 1149 shift(); 1150 if (character1 != '0' || isASCIIDigit(m_current)) { 1151 m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'"); 1152 return StringCannotBeParsed; 1153 } 1154 if (shouldBuildStrings) 1155 record16(0); 1156 return StringParsedSuccessfully; 1157 } 1158 } else { 1159 if (isASCIIOctalDigit(m_current)) { 1160 // Octal character sequences 1161 T character1 = m_current; 1162 shift(); 1163 if (isASCIIOctalDigit(m_current)) { 1164 // Two octal characters 1165 T character2 = m_current; 1166 shift(); 1167 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) { 1168 if (shouldBuildStrings) 1169 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0'); 1170 shift(); 1171 } else { 1172 if (shouldBuildStrings) 1173 record16((character1 - '0') * 8 + character2 - '0'); 1174 } 1175 } else { 1176 if (shouldBuildStrings) 1177 record16(character1 - '0'); 1178 } 1179 return StringParsedSuccessfully; 1180 } 1181 } 1182 1183 if (!atEnd()) { 1184 if (shouldBuildStrings) 1185 record16(m_current); 1186 shift(); 1187 return StringParsedSuccessfully; 1188 } 1189 1190 m_lexErrorMessage = ASCIILiteral("Unterminated string constant"); 1191 return StringUnterminated; 1192 } 1193 1194 template <typename T> 1195 template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult 1106 1196 { 1107 1197 T stringQuoteCharacter = m_current; … … 1125 1215 } else if (UNLIKELY(isLineTerminator(m_current))) 1126 1216 shiftLineTerminator(); 1127 else if (m_current == 'x') { 1128 shift(); 1129 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) { 1130 m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence"); 1131 return StringCannotBeParsed; 1132 } 1133 T prev = m_current; 1134 shift(); 1135 if (shouldBuildStrings) 1136 record16(convertHex(prev, m_current)); 1137 shift(); 1138 } else if (m_current == 'u') { 1139 shift(); 1140 UnicodeHexValue character = parseFourDigitUnicodeHex(); 1141 if (character.isValid()) { 1142 if (shouldBuildStrings) 1143 record16(character.value()); 1144 } else if (m_current == stringQuoteCharacter) { 1145 if (shouldBuildStrings) 1146 record16('u'); 1147 } else { 1148 m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence"); 1149 return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed; 1150 } 1151 } else if (strictMode && isASCIIDigit(m_current)) { 1152 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit. 1153 int character1 = m_current; 1154 shift(); 1155 if (character1 != '0' || isASCIIDigit(m_current)) { 1156 m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'"); 1157 return StringCannotBeParsed; 1158 } 1159 if (shouldBuildStrings) 1160 record16(0); 1161 } else if (!strictMode && isASCIIOctalDigit(m_current)) { 1162 // Octal character sequences 1163 T character1 = m_current; 1164 shift(); 1165 if (isASCIIOctalDigit(m_current)) { 1166 // Two octal characters 1167 T character2 = m_current; 1168 shift(); 1169 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) { 1170 if (shouldBuildStrings) 1171 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0'); 1172 shift(); 1173 } else { 1174 if (shouldBuildStrings) 1175 record16((character1 - '0') * 8 + character2 - '0'); 1176 } 1177 } else { 1178 if (shouldBuildStrings) 1179 record16(character1 - '0'); 1180 } 1181 } else if (!atEnd()) { 1182 if (shouldBuildStrings) 1183 record16(m_current); 1184 shift(); 1185 } else { 1186 m_lexErrorMessage = ASCIILiteral("Unterminated string constant"); 1187 return StringUnterminated; 1217 else { 1218 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::String, strictMode, stringQuoteCharacter); 1219 if (result != StringParsedSuccessfully) 1220 return result; 1188 1221 } 1189 1222 … … 1212 1245 tokenData->ident = 0; 1213 1246 1214 m_buffer16. resize(0);1247 m_buffer16.shrink(0); 1215 1248 return StringParsedSuccessfully; 1216 1249 } 1250 1251 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX) 1252 // While the lexer accepts <LF><CR> (not <CR><LF>) sequence 1253 // as one line terminator and increments one line number, 1254 // TemplateLiteral considers it as two line terminators <LF> and <CR>. 1255 // 1256 // TemplateLiteral normalizes line terminators as follows. 1257 // 1258 // <LF> => <LF> 1259 // <CR> => <LF> 1260 // <CR><LF> => <LF> 1261 // <\u2028> => <\u2028> 1262 // <\u2029> => <\u2029> 1263 // 1264 // So, <LF><CR> should be normalized to <LF><LF>. 1265 // However, the lexer should increment the line number only once for <LF><CR>. 1266 // 1267 // To achieve this, LineNumberAdder holds the current status of line terminator sequence. 1268 // When TemplateLiteral lexer encounters a line terminator, it notifies to LineNumberAdder. 1269 // LineNumberAdder maintains the status and increments the line number when it's necessary. 1270 // For example, LineNumberAdder increments the line number only once for <LF><CR> and <CR><LF>. 1271 template<typename CharacterType> 1272 class LineNumberAdder { 1273 public: 1274 LineNumberAdder(int& lineNumber) 1275 : m_lineNumber(lineNumber) 1276 { 1277 } 1278 1279 void clear() 1280 { 1281 m_previous = 0; 1282 } 1283 1284 void add(CharacterType character) 1285 { 1286 ASSERT(Lexer<CharacterType>::isLineTerminator(character)); 1287 if ((character + m_previous) == ('\n' + '\r')) 1288 m_previous = 0; 1289 else { 1290 ++m_lineNumber; 1291 m_previous = character; 1292 } 1293 } 1294 1295 private: 1296 int& m_lineNumber; 1297 CharacterType m_previous { 0 }; 1298 }; 1299 1300 template <typename T> 1301 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData) 1302 { 1303 const T* stringStart = currentSourcePtr(); 1304 const T* rawStringStart = currentSourcePtr(); 1305 1306 LineNumberAdder<T> lineNumberAdder(m_lineNumber); 1307 1308 while (m_current != '`') { 1309 if (UNLIKELY(m_current == '\\')) { 1310 lineNumberAdder.clear(); 1311 if (stringStart != currentSourcePtr() && shouldBuildStrings) 1312 append16(stringStart, currentSourcePtr() - stringStart); 1313 shift(); 1314 1315 LChar escape = singleEscape(m_current); 1316 1317 // Most common escape sequences first. 1318 if (escape) { 1319 if (shouldBuildStrings) 1320 record16(escape); 1321 shift(); 1322 } else if (UNLIKELY(isLineTerminator(m_current))) { 1323 if (m_current == '\r') { 1324 lineNumberAdder.add(m_current); 1325 shift(); 1326 if (m_current == '\n') { 1327 lineNumberAdder.add(m_current); 1328 shift(); 1329 } 1330 } else { 1331 lineNumberAdder.add(m_current); 1332 shift(); 1333 } 1334 } else { 1335 bool strictMode = true; 1336 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::Template, strictMode, '`'); 1337 if (result != StringParsedSuccessfully) 1338 return result; 1339 } 1340 1341 stringStart = currentSourcePtr(); 1342 continue; 1343 } 1344 1345 if (m_current == '$' && peek(1) == '{') 1346 break; 1347 1348 // Fast check for characters that require special handling. 1349 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently 1350 // as possible, and lets through all common ASCII characters. 1351 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { 1352 // End of input is not allowed. 1353 // Unlike String, line terminator is allowed. 1354 if (atEnd()) { 1355 m_lexErrorMessage = ASCIILiteral("Unexpected EOF"); 1356 return atEnd() ? StringUnterminated : StringCannotBeParsed; 1357 } 1358 1359 if (isLineTerminator(m_current)) { 1360 if (m_current == '\r') { 1361 // Normalize <CR>, <CR><LF> to <LF>. 1362 if (stringStart != currentSourcePtr() && shouldBuildStrings) 1363 append16(stringStart, currentSourcePtr() - stringStart); 1364 if (shouldBuildStrings) 1365 record16('\n'); 1366 lineNumberAdder.add(m_current); 1367 shift(); 1368 if (m_current == '\n') { 1369 lineNumberAdder.add(m_current); 1370 shift(); 1371 } 1372 stringStart = currentSourcePtr(); 1373 } else { 1374 lineNumberAdder.add(m_current); 1375 shift(); 1376 } 1377 continue; 1378 } 1379 // Anything else is just a normal character 1380 } 1381 1382 lineNumberAdder.clear(); 1383 shift(); 1384 } 1385 1386 bool isTail = m_current == '`'; 1387 1388 if (currentSourcePtr() != stringStart && shouldBuildStrings) 1389 append16(stringStart, currentSourcePtr() - stringStart); 1390 1391 if (shouldBuildStrings) { 1392 tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 1393 // TODO: While line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations, 1394 // this raw implementation just slices the source string. As a result, line terminators appear in the raw representation without normalization. 1395 // For example, when parsing `<CR>`, <CR> appears in the raw representation. 1396 // While non-tagged template literals don't use the raw representation, tagged templates use the raw representation. 1397 // So line terminator normalization should be applied to the raw representation when implementing tagged templates. 1398 tokenData->raw = makeIdentifier(rawStringStart, currentSourcePtr() - rawStringStart); 1399 } else { 1400 tokenData->cooked = nullptr; 1401 tokenData->raw = nullptr; 1402 } 1403 tokenData->isTail = isTail; 1404 1405 m_buffer16.shrink(0); 1406 1407 if (isTail) { 1408 // Skip ` 1409 shift(); 1410 } else { 1411 // Skip $ and { 1412 shift(); 1413 shift(); 1414 } 1415 1416 return StringParsedSuccessfully; 1417 } 1418 #endif 1217 1419 1218 1420 template <typename T> … … 1737 1939 } 1738 1940 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); 1739 m_buffer8. resize(0);1941 m_buffer8.shrink(0); 1740 1942 break; 1741 1943 } … … 1757 1959 } 1758 1960 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); 1759 m_buffer8. resize(0);1961 m_buffer8.shrink(0); 1760 1962 break; 1761 1963 } … … 1778 1980 } 1779 1981 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); 1780 m_buffer8. resize(0);1982 m_buffer8.shrink(0); 1781 1983 break; 1782 1984 } … … 1825 2027 goto returnError; 1826 2028 } 1827 m_buffer8.resize(0); 1828 break; 1829 case CharacterQuote: 1830 if (lexerFlags & LexerFlagsDontBuildStrings) { 1831 StringParseResult result = parseString<false>(tokenData, strictMode); 1832 if (UNLIKELY(result != StringParsedSuccessfully)) { 1833 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK; 1834 goto returnError; 1835 } 1836 } else { 1837 StringParseResult result = parseString<true>(tokenData, strictMode); 1838 if (UNLIKELY(result != StringParsedSuccessfully)) { 1839 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK; 1840 goto returnError; 1841 } 2029 m_buffer8.shrink(0); 2030 break; 2031 case CharacterQuote: { 2032 StringParseResult result = StringCannotBeParsed; 2033 if (lexerFlags & LexerFlagsDontBuildStrings) 2034 result = parseString<false>(tokenData, strictMode); 2035 else 2036 result = parseString<true>(tokenData, strictMode); 2037 2038 if (UNLIKELY(result != StringParsedSuccessfully)) { 2039 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK; 2040 goto returnError; 1842 2041 } 1843 2042 shift(); 1844 2043 token = STRING; 1845 2044 break; 2045 } 2046 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX) 2047 case CharacterBackQuote: { 2048 // Skip backquote. 2049 shift(); 2050 StringParseResult result = StringCannotBeParsed; 2051 if (lexerFlags & LexerFlagsDontBuildStrings) 2052 result = parseTemplateLiteral<false>(tokenData); 2053 else 2054 result = parseTemplateLiteral<true>(tokenData); 2055 2056 if (UNLIKELY(result != StringParsedSuccessfully)) { 2057 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK; 2058 goto returnError; 2059 } 2060 token = TEMPLATE; 2061 break; 2062 } 2063 #endif 1846 2064 case CharacterIdentifierStart: 1847 2065 ASSERT(isIdentStart(m_current)); … … 1946 2164 while (true) { 1947 2165 if (isLineTerminator(m_current) || atEnd()) { 1948 m_buffer16. resize(0);2166 m_buffer16.shrink(0); 1949 2167 return false; 1950 2168 } … … 1980 2198 pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether); 1981 2199 1982 m_buffer16. resize(0);2200 m_buffer16.shrink(0); 1983 2201 charactersOredTogether = 0; 1984 2202 … … 1990 2208 1991 2209 flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether); 1992 m_buffer16. resize(0);2210 m_buffer16.shrink(0); 1993 2211 1994 2212 return true; … … 2036 2254 } 2037 2255 2256 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX) 2257 template <typename T> 2258 JSTokenType Lexer<T>::scanTrailingTemplateString(JSToken* tokenRecord) 2259 { 2260 JSTokenData* tokenData = &tokenRecord->m_data; 2261 JSTokenLocation* tokenLocation = &tokenRecord->m_location; 2262 ASSERT(!m_error); 2263 ASSERT(m_buffer16.isEmpty()); 2264 2265 // Leading closing brace } is already shifted in the previous token scan. 2266 // So in this re-scan phase, shift() is not needed here. 2267 StringParseResult result = parseTemplateLiteral<true>(tokenData); 2268 JSTokenType token = ERRORTOK; 2269 if (UNLIKELY(result != StringParsedSuccessfully)) { 2270 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK; 2271 m_error = true; 2272 } else { 2273 token = TEMPLATE; 2274 m_lastToken = token; 2275 } 2276 2277 // Since TemplateString always ends with ` or }, m_atLineStart always becomes false. 2278 m_atLineStart = false; 2279 2280 // Adjust current tokenLocation data for TemplateString. 2281 tokenLocation->line = m_lineNumber; 2282 tokenLocation->endOffset = currentOffset(); 2283 tokenLocation->lineStartOffset = currentLineStartOffset(); 2284 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset); 2285 tokenRecord->m_endPosition = currentPosition(); 2286 return token; 2287 } 2288 #endif 2289 2038 2290 template <typename T> 2039 2291 void Lexer<T>::clear()
Note:
See TracChangeset
for help on using the changeset viewer.