1 | /*
|
---|
2 | * Copyright (C) 1999-2000 Harri Porten ([email protected])
|
---|
3 | * Copyright (C) 2006-2019 Apple Inc. All Rights Reserved.
|
---|
4 | * Copyright (C) 2007 Cameron Zwarich ([email protected])
|
---|
5 | * Copyright (C) 2010 Zoltan Herczeg ([email protected])
|
---|
6 | * Copyright (C) 2012 Mathias Bynens ([email protected])
|
---|
7 | *
|
---|
8 | * This library is free software; you can redistribute it and/or
|
---|
9 | * modify it under the terms of the GNU Library General Public
|
---|
10 | * License as published by the Free Software Foundation; either
|
---|
11 | * version 2 of the License, or (at your option) any later version.
|
---|
12 | *
|
---|
13 | * This library is distributed in the hope that it will be useful,
|
---|
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
16 | * Library General Public License for more details.
|
---|
17 | *
|
---|
18 | * You should have received a copy of the GNU Library General Public License
|
---|
19 | * along with this library; see the file COPYING.LIB. If not, write to
|
---|
20 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
---|
21 | * Boston, MA 02110-1301, USA.
|
---|
22 | *
|
---|
23 | */
|
---|
24 |
|
---|
25 | #include "config.h"
|
---|
26 | #include "Lexer.h"
|
---|
27 |
|
---|
28 | #include "BuiltinNames.h"
|
---|
29 | #include "Identifier.h"
|
---|
30 | #include "KeywordLookup.h"
|
---|
31 | #include "Lexer.lut.h"
|
---|
32 | #include "ParseInt.h"
|
---|
33 | #include <limits.h>
|
---|
34 | #include <string.h>
|
---|
35 | #include <variant>
|
---|
36 | #include <wtf/Assertions.h>
|
---|
37 | #include <wtf/HexNumber.h>
|
---|
38 | #include <wtf/dtoa.h>
|
---|
39 |
|
---|
40 | namespace JSC {
|
---|
41 |
|
---|
42 | bool isLexerKeyword(const Identifier& identifier)
|
---|
43 | {
|
---|
44 | return JSC::mainTable.entry(identifier);
|
---|
45 | }
|
---|
46 |
|
---|
47 | enum CharacterType : uint8_t {
|
---|
48 | // Types for the main switch
|
---|
49 |
|
---|
50 | // The first three types are fixed, and also used for identifying
|
---|
51 | // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
|
---|
52 | CharacterIdentifierStart,
|
---|
53 | CharacterZero,
|
---|
54 | CharacterNumber,
|
---|
55 |
|
---|
56 | // For single-byte characters grandfathered into Other_ID_Continue -- namely just U+00B7 MIDDLE DOT.
|
---|
57 | // (http://unicode.org/reports/tr31/#Backward_Compatibility)
|
---|
58 | CharacterOtherIdentifierPart,
|
---|
59 |
|
---|
60 | CharacterInvalid,
|
---|
61 | CharacterLineTerminator,
|
---|
62 | CharacterExclamationMark,
|
---|
63 | CharacterOpenParen,
|
---|
64 | CharacterCloseParen,
|
---|
65 | CharacterOpenBracket,
|
---|
66 | CharacterCloseBracket,
|
---|
67 | CharacterComma,
|
---|
68 | CharacterColon,
|
---|
69 | CharacterQuestion,
|
---|
70 | CharacterTilde,
|
---|
71 | CharacterQuote,
|
---|
72 | CharacterBackQuote,
|
---|
73 | CharacterDot,
|
---|
74 | CharacterSlash,
|
---|
75 | CharacterBackSlash,
|
---|
76 | CharacterSemicolon,
|
---|
77 | CharacterOpenBrace,
|
---|
78 | CharacterCloseBrace,
|
---|
79 |
|
---|
80 | CharacterAdd,
|
---|
81 | CharacterSub,
|
---|
82 | CharacterMultiply,
|
---|
83 | CharacterModulo,
|
---|
84 | CharacterAnd,
|
---|
85 | CharacterXor,
|
---|
86 | CharacterOr,
|
---|
87 | CharacterLess,
|
---|
88 | CharacterGreater,
|
---|
89 | CharacterEqual,
|
---|
90 |
|
---|
91 | // Other types (only one so far)
|
---|
92 | CharacterWhiteSpace,
|
---|
93 | CharacterHash,
|
---|
94 | CharacterPrivateIdentifierStart
|
---|
95 | };
|
---|
96 |
|
---|
97 | // 256 Latin-1 codes
|
---|
98 | static constexpr const CharacterType typesOfLatin1Characters[256] = {
|
---|
99 | /* 0 - Null */ CharacterInvalid,
|
---|
100 | /* 1 - Start of Heading */ CharacterInvalid,
|
---|
101 | /* 2 - Start of Text */ CharacterInvalid,
|
---|
102 | /* 3 - End of Text */ CharacterInvalid,
|
---|
103 | /* 4 - End of Transm. */ CharacterInvalid,
|
---|
104 | /* 5 - Enquiry */ CharacterInvalid,
|
---|
105 | /* 6 - Acknowledgment */ CharacterInvalid,
|
---|
106 | /* 7 - Bell */ CharacterInvalid,
|
---|
107 | /* 8 - Back Space */ CharacterInvalid,
|
---|
108 | /* 9 - Horizontal Tab */ CharacterWhiteSpace,
|
---|
109 | /* 10 - Line Feed */ CharacterLineTerminator,
|
---|
110 | /* 11 - Vertical Tab */ CharacterWhiteSpace,
|
---|
111 | /* 12 - Form Feed */ CharacterWhiteSpace,
|
---|
112 | /* 13 - Carriage Return */ CharacterLineTerminator,
|
---|
113 | /* 14 - Shift Out */ CharacterInvalid,
|
---|
114 | /* 15 - Shift In */ CharacterInvalid,
|
---|
115 | /* 16 - Data Line Escape */ CharacterInvalid,
|
---|
116 | /* 17 - Device Control 1 */ CharacterInvalid,
|
---|
117 | /* 18 - Device Control 2 */ CharacterInvalid,
|
---|
118 | /* 19 - Device Control 3 */ CharacterInvalid,
|
---|
119 | /* 20 - Device Control 4 */ CharacterInvalid,
|
---|
120 | /* 21 - Negative Ack. */ CharacterInvalid,
|
---|
121 | /* 22 - Synchronous Idle */ CharacterInvalid,
|
---|
122 | /* 23 - End of Transmit */ CharacterInvalid,
|
---|
123 | /* 24 - Cancel */ CharacterInvalid,
|
---|
124 | /* 25 - End of Medium */ CharacterInvalid,
|
---|
125 | /* 26 - Substitute */ CharacterInvalid,
|
---|
126 | /* 27 - Escape */ CharacterInvalid,
|
---|
127 | /* 28 - File Separator */ CharacterInvalid,
|
---|
128 | /* 29 - Group Separator */ CharacterInvalid,
|
---|
129 | /* 30 - Record Separator */ CharacterInvalid,
|
---|
130 | /* 31 - Unit Separator */ CharacterInvalid,
|
---|
131 | /* 32 - Space */ CharacterWhiteSpace,
|
---|
132 | /* 33 - ! */ CharacterExclamationMark,
|
---|
133 | /* 34 - " */ CharacterQuote,
|
---|
134 | /* 35 - # */ CharacterHash,
|
---|
135 | /* 36 - $ */ CharacterIdentifierStart,
|
---|
136 | /* 37 - % */ CharacterModulo,
|
---|
137 | /* 38 - & */ CharacterAnd,
|
---|
138 | /* 39 - ' */ CharacterQuote,
|
---|
139 | /* 40 - ( */ CharacterOpenParen,
|
---|
140 | /* 41 - ) */ CharacterCloseParen,
|
---|
141 | /* 42 - * */ CharacterMultiply,
|
---|
142 | /* 43 - + */ CharacterAdd,
|
---|
143 | /* 44 - , */ CharacterComma,
|
---|
144 | /* 45 - - */ CharacterSub,
|
---|
145 | /* 46 - . */ CharacterDot,
|
---|
146 | /* 47 - / */ CharacterSlash,
|
---|
147 | /* 48 - 0 */ CharacterZero,
|
---|
148 | /* 49 - 1 */ CharacterNumber,
|
---|
149 | /* 50 - 2 */ CharacterNumber,
|
---|
150 | /* 51 - 3 */ CharacterNumber,
|
---|
151 | /* 52 - 4 */ CharacterNumber,
|
---|
152 | /* 53 - 5 */ CharacterNumber,
|
---|
153 | /* 54 - 6 */ CharacterNumber,
|
---|
154 | /* 55 - 7 */ CharacterNumber,
|
---|
155 | /* 56 - 8 */ CharacterNumber,
|
---|
156 | /* 57 - 9 */ CharacterNumber,
|
---|
157 | /* 58 - : */ CharacterColon,
|
---|
158 | /* 59 - ; */ CharacterSemicolon,
|
---|
159 | /* 60 - < */ CharacterLess,
|
---|
160 | /* 61 - = */ CharacterEqual,
|
---|
161 | /* 62 - > */ CharacterGreater,
|
---|
162 | /* 63 - ? */ CharacterQuestion,
|
---|
163 | /* 64 - @ */ CharacterPrivateIdentifierStart,
|
---|
164 | /* 65 - A */ CharacterIdentifierStart,
|
---|
165 | /* 66 - B */ CharacterIdentifierStart,
|
---|
166 | /* 67 - C */ CharacterIdentifierStart,
|
---|
167 | /* 68 - D */ CharacterIdentifierStart,
|
---|
168 | /* 69 - E */ CharacterIdentifierStart,
|
---|
169 | /* 70 - F */ CharacterIdentifierStart,
|
---|
170 | /* 71 - G */ CharacterIdentifierStart,
|
---|
171 | /* 72 - H */ CharacterIdentifierStart,
|
---|
172 | /* 73 - I */ CharacterIdentifierStart,
|
---|
173 | /* 74 - J */ CharacterIdentifierStart,
|
---|
174 | /* 75 - K */ CharacterIdentifierStart,
|
---|
175 | /* 76 - L */ CharacterIdentifierStart,
|
---|
176 | /* 77 - M */ CharacterIdentifierStart,
|
---|
177 | /* 78 - N */ CharacterIdentifierStart,
|
---|
178 | /* 79 - O */ CharacterIdentifierStart,
|
---|
179 | /* 80 - P */ CharacterIdentifierStart,
|
---|
180 | /* 81 - Q */ CharacterIdentifierStart,
|
---|
181 | /* 82 - R */ CharacterIdentifierStart,
|
---|
182 | /* 83 - S */ CharacterIdentifierStart,
|
---|
183 | /* 84 - T */ CharacterIdentifierStart,
|
---|
184 | /* 85 - U */ CharacterIdentifierStart,
|
---|
185 | /* 86 - V */ CharacterIdentifierStart,
|
---|
186 | /* 87 - W */ CharacterIdentifierStart,
|
---|
187 | /* 88 - X */ CharacterIdentifierStart,
|
---|
188 | /* 89 - Y */ CharacterIdentifierStart,
|
---|
189 | /* 90 - Z */ CharacterIdentifierStart,
|
---|
190 | /* 91 - [ */ CharacterOpenBracket,
|
---|
191 | /* 92 - \ */ CharacterBackSlash,
|
---|
192 | /* 93 - ] */ CharacterCloseBracket,
|
---|
193 | /* 94 - ^ */ CharacterXor,
|
---|
194 | /* 95 - _ */ CharacterIdentifierStart,
|
---|
195 | /* 96 - ` */ CharacterBackQuote,
|
---|
196 | /* 97 - a */ CharacterIdentifierStart,
|
---|
197 | /* 98 - b */ CharacterIdentifierStart,
|
---|
198 | /* 99 - c */ CharacterIdentifierStart,
|
---|
199 | /* 100 - d */ CharacterIdentifierStart,
|
---|
200 | /* 101 - e */ CharacterIdentifierStart,
|
---|
201 | /* 102 - f */ CharacterIdentifierStart,
|
---|
202 | /* 103 - g */ CharacterIdentifierStart,
|
---|
203 | /* 104 - h */ CharacterIdentifierStart,
|
---|
204 | /* 105 - i */ CharacterIdentifierStart,
|
---|
205 | /* 106 - j */ CharacterIdentifierStart,
|
---|
206 | /* 107 - k */ CharacterIdentifierStart,
|
---|
207 | /* 108 - l */ CharacterIdentifierStart,
|
---|
208 | /* 109 - m */ CharacterIdentifierStart,
|
---|
209 | /* 110 - n */ CharacterIdentifierStart,
|
---|
210 | /* 111 - o */ CharacterIdentifierStart,
|
---|
211 | /* 112 - p */ CharacterIdentifierStart,
|
---|
212 | /* 113 - q */ CharacterIdentifierStart,
|
---|
213 | /* 114 - r */ CharacterIdentifierStart,
|
---|
214 | /* 115 - s */ CharacterIdentifierStart,
|
---|
215 | /* 116 - t */ CharacterIdentifierStart,
|
---|
216 | /* 117 - u */ CharacterIdentifierStart,
|
---|
217 | /* 118 - v */ CharacterIdentifierStart,
|
---|
218 | /* 119 - w */ CharacterIdentifierStart,
|
---|
219 | /* 120 - x */ CharacterIdentifierStart,
|
---|
220 | /* 121 - y */ CharacterIdentifierStart,
|
---|
221 | /* 122 - z */ CharacterIdentifierStart,
|
---|
222 | /* 123 - { */ CharacterOpenBrace,
|
---|
223 | /* 124 - | */ CharacterOr,
|
---|
224 | /* 125 - } */ CharacterCloseBrace,
|
---|
225 | /* 126 - ~ */ CharacterTilde,
|
---|
226 | /* 127 - Delete */ CharacterInvalid,
|
---|
227 | /* 128 - Cc category */ CharacterInvalid,
|
---|
228 | /* 129 - Cc category */ CharacterInvalid,
|
---|
229 | /* 130 - Cc category */ CharacterInvalid,
|
---|
230 | /* 131 - Cc category */ CharacterInvalid,
|
---|
231 | /* 132 - Cc category */ CharacterInvalid,
|
---|
232 | /* 133 - Cc category */ CharacterInvalid,
|
---|
233 | /* 134 - Cc category */ CharacterInvalid,
|
---|
234 | /* 135 - Cc category */ CharacterInvalid,
|
---|
235 | /* 136 - Cc category */ CharacterInvalid,
|
---|
236 | /* 137 - Cc category */ CharacterInvalid,
|
---|
237 | /* 138 - Cc category */ CharacterInvalid,
|
---|
238 | /* 139 - Cc category */ CharacterInvalid,
|
---|
239 | /* 140 - Cc category */ CharacterInvalid,
|
---|
240 | /* 141 - Cc category */ CharacterInvalid,
|
---|
241 | /* 142 - Cc category */ CharacterInvalid,
|
---|
242 | /* 143 - Cc category */ CharacterInvalid,
|
---|
243 | /* 144 - Cc category */ CharacterInvalid,
|
---|
244 | /* 145 - Cc category */ CharacterInvalid,
|
---|
245 | /* 146 - Cc category */ CharacterInvalid,
|
---|
246 | /* 147 - Cc category */ CharacterInvalid,
|
---|
247 | /* 148 - Cc category */ CharacterInvalid,
|
---|
248 | /* 149 - Cc category */ CharacterInvalid,
|
---|
249 | /* 150 - Cc category */ CharacterInvalid,
|
---|
250 | /* 151 - Cc category */ CharacterInvalid,
|
---|
251 | /* 152 - Cc category */ CharacterInvalid,
|
---|
252 | /* 153 - Cc category */ CharacterInvalid,
|
---|
253 | /* 154 - Cc category */ CharacterInvalid,
|
---|
254 | /* 155 - Cc category */ CharacterInvalid,
|
---|
255 | /* 156 - Cc category */ CharacterInvalid,
|
---|
256 | /* 157 - Cc category */ CharacterInvalid,
|
---|
257 | /* 158 - Cc category */ CharacterInvalid,
|
---|
258 | /* 159 - Cc category */ CharacterInvalid,
|
---|
259 | /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
|
---|
260 | /* 161 - Po category */ CharacterInvalid,
|
---|
261 | /* 162 - Sc category */ CharacterInvalid,
|
---|
262 | /* 163 - Sc category */ CharacterInvalid,
|
---|
263 | /* 164 - Sc category */ CharacterInvalid,
|
---|
264 | /* 165 - Sc category */ CharacterInvalid,
|
---|
265 | /* 166 - So category */ CharacterInvalid,
|
---|
266 | /* 167 - So category */ CharacterInvalid,
|
---|
267 | /* 168 - Sk category */ CharacterInvalid,
|
---|
268 | /* 169 - So category */ CharacterInvalid,
|
---|
269 | /* 170 - Ll category */ CharacterIdentifierStart,
|
---|
270 | /* 171 - Pi category */ CharacterInvalid,
|
---|
271 | /* 172 - Sm category */ CharacterInvalid,
|
---|
272 | /* 173 - Cf category */ CharacterInvalid,
|
---|
273 | /* 174 - So category */ CharacterInvalid,
|
---|
274 | /* 175 - Sk category */ CharacterInvalid,
|
---|
275 | /* 176 - So category */ CharacterInvalid,
|
---|
276 | /* 177 - Sm category */ CharacterInvalid,
|
---|
277 | /* 178 - No category */ CharacterInvalid,
|
---|
278 | /* 179 - No category */ CharacterInvalid,
|
---|
279 | /* 180 - Sk category */ CharacterInvalid,
|
---|
280 | /* 181 - Ll category */ CharacterIdentifierStart,
|
---|
281 | /* 182 - So category */ CharacterInvalid,
|
---|
282 | /* 183 - Po category */ CharacterOtherIdentifierPart,
|
---|
283 | /* 184 - Sk category */ CharacterInvalid,
|
---|
284 | /* 185 - No category */ CharacterInvalid,
|
---|
285 | /* 186 - Ll category */ CharacterIdentifierStart,
|
---|
286 | /* 187 - Pf category */ CharacterInvalid,
|
---|
287 | /* 188 - No category */ CharacterInvalid,
|
---|
288 | /* 189 - No category */ CharacterInvalid,
|
---|
289 | /* 190 - No category */ CharacterInvalid,
|
---|
290 | /* 191 - Po category */ CharacterInvalid,
|
---|
291 | /* 192 - Lu category */ CharacterIdentifierStart,
|
---|
292 | /* 193 - Lu category */ CharacterIdentifierStart,
|
---|
293 | /* 194 - Lu category */ CharacterIdentifierStart,
|
---|
294 | /* 195 - Lu category */ CharacterIdentifierStart,
|
---|
295 | /* 196 - Lu category */ CharacterIdentifierStart,
|
---|
296 | /* 197 - Lu category */ CharacterIdentifierStart,
|
---|
297 | /* 198 - Lu category */ CharacterIdentifierStart,
|
---|
298 | /* 199 - Lu category */ CharacterIdentifierStart,
|
---|
299 | /* 200 - Lu category */ CharacterIdentifierStart,
|
---|
300 | /* 201 - Lu category */ CharacterIdentifierStart,
|
---|
301 | /* 202 - Lu category */ CharacterIdentifierStart,
|
---|
302 | /* 203 - Lu category */ CharacterIdentifierStart,
|
---|
303 | /* 204 - Lu category */ CharacterIdentifierStart,
|
---|
304 | /* 205 - Lu category */ CharacterIdentifierStart,
|
---|
305 | /* 206 - Lu category */ CharacterIdentifierStart,
|
---|
306 | /* 207 - Lu category */ CharacterIdentifierStart,
|
---|
307 | /* 208 - Lu category */ CharacterIdentifierStart,
|
---|
308 | /* 209 - Lu category */ CharacterIdentifierStart,
|
---|
309 | /* 210 - Lu category */ CharacterIdentifierStart,
|
---|
310 | /* 211 - Lu category */ CharacterIdentifierStart,
|
---|
311 | /* 212 - Lu category */ CharacterIdentifierStart,
|
---|
312 | /* 213 - Lu category */ CharacterIdentifierStart,
|
---|
313 | /* 214 - Lu category */ CharacterIdentifierStart,
|
---|
314 | /* 215 - Sm category */ CharacterInvalid,
|
---|
315 | /* 216 - Lu category */ CharacterIdentifierStart,
|
---|
316 | /* 217 - Lu category */ CharacterIdentifierStart,
|
---|
317 | /* 218 - Lu category */ CharacterIdentifierStart,
|
---|
318 | /* 219 - Lu category */ CharacterIdentifierStart,
|
---|
319 | /* 220 - Lu category */ CharacterIdentifierStart,
|
---|
320 | /* 221 - Lu category */ CharacterIdentifierStart,
|
---|
321 | /* 222 - Lu category */ CharacterIdentifierStart,
|
---|
322 | /* 223 - Ll category */ CharacterIdentifierStart,
|
---|
323 | /* 224 - Ll category */ CharacterIdentifierStart,
|
---|
324 | /* 225 - Ll category */ CharacterIdentifierStart,
|
---|
325 | /* 226 - Ll category */ CharacterIdentifierStart,
|
---|
326 | /* 227 - Ll category */ CharacterIdentifierStart,
|
---|
327 | /* 228 - Ll category */ CharacterIdentifierStart,
|
---|
328 | /* 229 - Ll category */ CharacterIdentifierStart,
|
---|
329 | /* 230 - Ll category */ CharacterIdentifierStart,
|
---|
330 | /* 231 - Ll category */ CharacterIdentifierStart,
|
---|
331 | /* 232 - Ll category */ CharacterIdentifierStart,
|
---|
332 | /* 233 - Ll category */ CharacterIdentifierStart,
|
---|
333 | /* 234 - Ll category */ CharacterIdentifierStart,
|
---|
334 | /* 235 - Ll category */ CharacterIdentifierStart,
|
---|
335 | /* 236 - Ll category */ CharacterIdentifierStart,
|
---|
336 | /* 237 - Ll category */ CharacterIdentifierStart,
|
---|
337 | /* 238 - Ll category */ CharacterIdentifierStart,
|
---|
338 | /* 239 - Ll category */ CharacterIdentifierStart,
|
---|
339 | /* 240 - Ll category */ CharacterIdentifierStart,
|
---|
340 | /* 241 - Ll category */ CharacterIdentifierStart,
|
---|
341 | /* 242 - Ll category */ CharacterIdentifierStart,
|
---|
342 | /* 243 - Ll category */ CharacterIdentifierStart,
|
---|
343 | /* 244 - Ll category */ CharacterIdentifierStart,
|
---|
344 | /* 245 - Ll category */ CharacterIdentifierStart,
|
---|
345 | /* 246 - Ll category */ CharacterIdentifierStart,
|
---|
346 | /* 247 - Sm category */ CharacterInvalid,
|
---|
347 | /* 248 - Ll category */ CharacterIdentifierStart,
|
---|
348 | /* 249 - Ll category */ CharacterIdentifierStart,
|
---|
349 | /* 250 - Ll category */ CharacterIdentifierStart,
|
---|
350 | /* 251 - Ll category */ CharacterIdentifierStart,
|
---|
351 | /* 252 - Ll category */ CharacterIdentifierStart,
|
---|
352 | /* 253 - Ll category */ CharacterIdentifierStart,
|
---|
353 | /* 254 - Ll category */ CharacterIdentifierStart,
|
---|
354 | /* 255 - Ll category */ CharacterIdentifierStart
|
---|
355 | };
|
---|
356 |
|
---|
357 | // This table provides the character that results from \X where X is the index in the table beginning
|
---|
358 | // with SPACE. A table value of 0 means that more processing needs to be done.
|
---|
359 | static constexpr const LChar singleCharacterEscapeValuesForASCII[128] = {
|
---|
360 | /* 0 - Null */ 0,
|
---|
361 | /* 1 - Start of Heading */ 0,
|
---|
362 | /* 2 - Start of Text */ 0,
|
---|
363 | /* 3 - End of Text */ 0,
|
---|
364 | /* 4 - End of Transm. */ 0,
|
---|
365 | /* 5 - Enquiry */ 0,
|
---|
366 | /* 6 - Acknowledgment */ 0,
|
---|
367 | /* 7 - Bell */ 0,
|
---|
368 | /* 8 - Back Space */ 0,
|
---|
369 | /* 9 - Horizontal Tab */ 0,
|
---|
370 | /* 10 - Line Feed */ 0,
|
---|
371 | /* 11 - Vertical Tab */ 0,
|
---|
372 | /* 12 - Form Feed */ 0,
|
---|
373 | /* 13 - Carriage Return */ 0,
|
---|
374 | /* 14 - Shift Out */ 0,
|
---|
375 | /* 15 - Shift In */ 0,
|
---|
376 | /* 16 - Data Line Escape */ 0,
|
---|
377 | /* 17 - Device Control 1 */ 0,
|
---|
378 | /* 18 - Device Control 2 */ 0,
|
---|
379 | /* 19 - Device Control 3 */ 0,
|
---|
380 | /* 20 - Device Control 4 */ 0,
|
---|
381 | /* 21 - Negative Ack. */ 0,
|
---|
382 | /* 22 - Synchronous Idle */ 0,
|
---|
383 | /* 23 - End of Transmit */ 0,
|
---|
384 | /* 24 - Cancel */ 0,
|
---|
385 | /* 25 - End of Medium */ 0,
|
---|
386 | /* 26 - Substitute */ 0,
|
---|
387 | /* 27 - Escape */ 0,
|
---|
388 | /* 28 - File Separator */ 0,
|
---|
389 | /* 29 - Group Separator */ 0,
|
---|
390 | /* 30 - Record Separator */ 0,
|
---|
391 | /* 31 - Unit Separator */ 0,
|
---|
392 | /* 32 - Space */ ' ',
|
---|
393 | /* 33 - ! */ '!',
|
---|
394 | /* 34 - " */ '"',
|
---|
395 | /* 35 - # */ '#',
|
---|
396 | /* 36 - $ */ '$',
|
---|
397 | /* 37 - % */ '%',
|
---|
398 | /* 38 - & */ '&',
|
---|
399 | /* 39 - ' */ '\'',
|
---|
400 | /* 40 - ( */ '(',
|
---|
401 | /* 41 - ) */ ')',
|
---|
402 | /* 42 - * */ '*',
|
---|
403 | /* 43 - + */ '+',
|
---|
404 | /* 44 - , */ ',',
|
---|
405 | /* 45 - - */ '-',
|
---|
406 | /* 46 - . */ '.',
|
---|
407 | /* 47 - / */ '/',
|
---|
408 | /* 48 - 0 */ 0,
|
---|
409 | /* 49 - 1 */ 0,
|
---|
410 | /* 50 - 2 */ 0,
|
---|
411 | /* 51 - 3 */ 0,
|
---|
412 | /* 52 - 4 */ 0,
|
---|
413 | /* 53 - 5 */ 0,
|
---|
414 | /* 54 - 6 */ 0,
|
---|
415 | /* 55 - 7 */ 0,
|
---|
416 | /* 56 - 8 */ 0,
|
---|
417 | /* 57 - 9 */ 0,
|
---|
418 | /* 58 - : */ ':',
|
---|
419 | /* 59 - ; */ ';',
|
---|
420 | /* 60 - < */ '<',
|
---|
421 | /* 61 - = */ '=',
|
---|
422 | /* 62 - > */ '>',
|
---|
423 | /* 63 - ? */ '?',
|
---|
424 | /* 64 - @ */ '@',
|
---|
425 | /* 65 - A */ 'A',
|
---|
426 | /* 66 - B */ 'B',
|
---|
427 | /* 67 - C */ 'C',
|
---|
428 | /* 68 - D */ 'D',
|
---|
429 | /* 69 - E */ 'E',
|
---|
430 | /* 70 - F */ 'F',
|
---|
431 | /* 71 - G */ 'G',
|
---|
432 | /* 72 - H */ 'H',
|
---|
433 | /* 73 - I */ 'I',
|
---|
434 | /* 74 - J */ 'J',
|
---|
435 | /* 75 - K */ 'K',
|
---|
436 | /* 76 - L */ 'L',
|
---|
437 | /* 77 - M */ 'M',
|
---|
438 | /* 78 - N */ 'N',
|
---|
439 | /* 79 - O */ 'O',
|
---|
440 | /* 80 - P */ 'P',
|
---|
441 | /* 81 - Q */ 'Q',
|
---|
442 | /* 82 - R */ 'R',
|
---|
443 | /* 83 - S */ 'S',
|
---|
444 | /* 84 - T */ 'T',
|
---|
445 | /* 85 - U */ 'U',
|
---|
446 | /* 86 - V */ 'V',
|
---|
447 | /* 87 - W */ 'W',
|
---|
448 | /* 88 - X */ 'X',
|
---|
449 | /* 89 - Y */ 'Y',
|
---|
450 | /* 90 - Z */ 'Z',
|
---|
451 | /* 91 - [ */ '[',
|
---|
452 | /* 92 - \ */ '\\',
|
---|
453 | /* 93 - ] */ ']',
|
---|
454 | /* 94 - ^ */ '^',
|
---|
455 | /* 95 - _ */ '_',
|
---|
456 | /* 96 - ` */ '`',
|
---|
457 | /* 97 - a */ 'a',
|
---|
458 | /* 98 - b */ 0x08,
|
---|
459 | /* 99 - c */ 'c',
|
---|
460 | /* 100 - d */ 'd',
|
---|
461 | /* 101 - e */ 'e',
|
---|
462 | /* 102 - f */ 0x0C,
|
---|
463 | /* 103 - g */ 'g',
|
---|
464 | /* 104 - h */ 'h',
|
---|
465 | /* 105 - i */ 'i',
|
---|
466 | /* 106 - j */ 'j',
|
---|
467 | /* 107 - k */ 'k',
|
---|
468 | /* 108 - l */ 'l',
|
---|
469 | /* 109 - m */ 'm',
|
---|
470 | /* 110 - n */ 0x0A,
|
---|
471 | /* 111 - o */ 'o',
|
---|
472 | /* 112 - p */ 'p',
|
---|
473 | /* 113 - q */ 'q',
|
---|
474 | /* 114 - r */ 0x0D,
|
---|
475 | /* 115 - s */ 's',
|
---|
476 | /* 116 - t */ 0x09,
|
---|
477 | /* 117 - u */ 0,
|
---|
478 | /* 118 - v */ 0x0B,
|
---|
479 | /* 119 - w */ 'w',
|
---|
480 | /* 120 - x */ 0,
|
---|
481 | /* 121 - y */ 'y',
|
---|
482 | /* 122 - z */ 'z',
|
---|
483 | /* 123 - { */ '{',
|
---|
484 | /* 124 - | */ '|',
|
---|
485 | /* 125 - } */ '}',
|
---|
486 | /* 126 - ~ */ '~',
|
---|
487 | /* 127 - Delete */ 0
|
---|
488 | };
|
---|
489 |
|
---|
490 | template <typename T>
|
---|
491 | Lexer<T>::Lexer(VM& vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
|
---|
492 | : m_positionBeforeLastNewline(0,0,0)
|
---|
493 | , m_isReparsingFunction(false)
|
---|
494 | , m_vm(vm)
|
---|
495 | , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
|
---|
496 | , m_scriptMode(scriptMode)
|
---|
497 | {
|
---|
498 | }
|
---|
499 |
|
---|
500 | static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
|
---|
501 | {
|
---|
502 | if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
|
---|
503 | return INTEGER;
|
---|
504 | return DOUBLE;
|
---|
505 | }
|
---|
506 |
|
---|
507 | template <typename T>
|
---|
508 | Lexer<T>::~Lexer()
|
---|
509 | {
|
---|
510 | }
|
---|
511 |
|
---|
512 | template <typename T>
|
---|
513 | String Lexer<T>::invalidCharacterMessage() const
|
---|
514 | {
|
---|
515 | switch (m_current) {
|
---|
516 | case 0:
|
---|
517 | return "Invalid character: '\\0'"_s;
|
---|
518 | case 10:
|
---|
519 | return "Invalid character: '\\n'"_s;
|
---|
520 | case 11:
|
---|
521 | return "Invalid character: '\\v'"_s;
|
---|
522 | case 13:
|
---|
523 | return "Invalid character: '\\r'"_s;
|
---|
524 | case 35:
|
---|
525 | return "Invalid character: '#'"_s;
|
---|
526 | case 64:
|
---|
527 | return "Invalid character: '@'"_s;
|
---|
528 | case 96:
|
---|
529 | return "Invalid character: '`'"_s;
|
---|
530 | default:
|
---|
531 | return makeString("Invalid character '\\u", hex(m_current, 4, Lowercase), '\'');
|
---|
532 | }
|
---|
533 | }
|
---|
534 |
|
---|
535 | template <typename T>
|
---|
536 | ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
|
---|
537 | {
|
---|
538 | ASSERT(m_code <= m_codeEnd);
|
---|
539 | return m_code;
|
---|
540 | }
|
---|
541 |
|
---|
542 | template <typename T>
|
---|
543 | void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
|
---|
544 | {
|
---|
545 | m_arena = &arena->identifierArena();
|
---|
546 |
|
---|
547 | m_lineNumber = source.firstLine().oneBasedInt();
|
---|
548 | m_lastToken = -1;
|
---|
549 |
|
---|
550 | StringView sourceString = source.provider()->source();
|
---|
551 |
|
---|
552 | if (!sourceString.isNull())
|
---|
553 | setCodeStart(sourceString);
|
---|
554 | else
|
---|
555 | m_codeStart = nullptr;
|
---|
556 |
|
---|
557 | m_source = &source;
|
---|
558 | m_sourceOffset = source.startOffset();
|
---|
559 | m_codeStartPlusOffset = m_codeStart + source.startOffset();
|
---|
560 | m_code = m_codeStartPlusOffset;
|
---|
561 | m_codeEnd = m_codeStart + source.endOffset();
|
---|
562 | m_error = false;
|
---|
563 | m_atLineStart = true;
|
---|
564 | m_lineStart = m_code;
|
---|
565 | m_lexErrorMessage = String();
|
---|
566 | m_sourceURLDirective = String();
|
---|
567 | m_sourceMappingURLDirective = String();
|
---|
568 |
|
---|
569 | m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
|
---|
570 | m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
|
---|
571 | m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
|
---|
572 |
|
---|
573 | if (LIKELY(m_code < m_codeEnd))
|
---|
574 | m_current = *m_code;
|
---|
575 | else
|
---|
576 | m_current = 0;
|
---|
577 | ASSERT(currentOffset() == source.startOffset());
|
---|
578 | }
|
---|
579 |
|
---|
580 | template <typename T>
|
---|
581 | template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
|
---|
582 | {
|
---|
583 | m_code += shiftAmount;
|
---|
584 | ASSERT(currentOffset() >= currentLineStartOffset());
|
---|
585 | m_current = *m_code;
|
---|
586 | }
|
---|
587 |
|
---|
588 | template <typename T>
|
---|
589 | ALWAYS_INLINE void Lexer<T>::shift()
|
---|
590 | {
|
---|
591 | // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
|
---|
592 | m_current = 0;
|
---|
593 | ++m_code;
|
---|
594 | if (LIKELY(m_code < m_codeEnd))
|
---|
595 | m_current = *m_code;
|
---|
596 | }
|
---|
597 |
|
---|
598 | template <typename T>
|
---|
599 | ALWAYS_INLINE bool Lexer<T>::atEnd() const
|
---|
600 | {
|
---|
601 | ASSERT(!m_current || m_code < m_codeEnd);
|
---|
602 | return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
|
---|
603 | }
|
---|
604 |
|
---|
605 | template <typename T>
|
---|
606 | ALWAYS_INLINE T Lexer<T>::peek(int offset) const
|
---|
607 | {
|
---|
608 | ASSERT(offset > 0 && offset < 5);
|
---|
609 | const T* code = m_code + offset;
|
---|
610 | return (code < m_codeEnd) ? *code : 0;
|
---|
611 | }
|
---|
612 |
|
---|
613 | struct ParsedUnicodeEscapeValue {
|
---|
614 | ParsedUnicodeEscapeValue(UChar32 value)
|
---|
615 | : m_value(value)
|
---|
616 | {
|
---|
617 | ASSERT(isValid());
|
---|
618 | }
|
---|
619 |
|
---|
620 | enum SpecialValueType { Incomplete = -2, Invalid = -1 };
|
---|
621 | ParsedUnicodeEscapeValue(SpecialValueType type)
|
---|
622 | : m_value(type)
|
---|
623 | {
|
---|
624 | }
|
---|
625 |
|
---|
626 | bool isValid() const { return m_value >= 0; }
|
---|
627 | bool isIncomplete() const { return m_value == Incomplete; }
|
---|
628 |
|
---|
629 | UChar32 value() const
|
---|
630 | {
|
---|
631 | ASSERT(isValid());
|
---|
632 | return m_value;
|
---|
633 | }
|
---|
634 |
|
---|
635 | private:
|
---|
636 | UChar32 m_value;
|
---|
637 | };
|
---|
638 |
|
---|
639 | template<typename CharacterType>
|
---|
640 | ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
|
---|
641 | {
|
---|
642 | if (m_current == '{') {
|
---|
643 | shift();
|
---|
644 | UChar32 codePoint = 0;
|
---|
645 | do {
|
---|
646 | if (!isASCIIHexDigit(m_current))
|
---|
647 | return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
|
---|
648 | codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
|
---|
649 | if (codePoint > UCHAR_MAX_VALUE) {
|
---|
650 | // For raw template literal syntax, we consume `NotEscapeSequence`.
|
---|
651 | // Here, we consume NotCodePoint's HexDigits.
|
---|
652 | //
|
---|
653 | // NotEscapeSequence ::
|
---|
654 | // u { [lookahread not one of HexDigit]
|
---|
655 | // u { NotCodePoint
|
---|
656 | // u { CodePoint [lookahead != }]
|
---|
657 | //
|
---|
658 | // NotCodePoint ::
|
---|
659 | // HexDigits but not if MV of HexDigits <= 0x10FFFF
|
---|
660 | //
|
---|
661 | // CodePoint ::
|
---|
662 | // HexDigits but not if MV of HexDigits > 0x10FFFF
|
---|
663 | shift();
|
---|
664 | while (isASCIIHexDigit(m_current))
|
---|
665 | shift();
|
---|
666 |
|
---|
667 | return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
|
---|
668 | }
|
---|
669 | shift();
|
---|
670 | } while (m_current != '}');
|
---|
671 | shift();
|
---|
672 | return codePoint;
|
---|
673 | }
|
---|
674 |
|
---|
675 | auto character2 = peek(1);
|
---|
676 | auto character3 = peek(2);
|
---|
677 | auto character4 = peek(3);
|
---|
678 | if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
|
---|
679 | auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
|
---|
680 |
|
---|
681 | // For raw template literal syntax, we consume `NotEscapeSequence`.
|
---|
682 | //
|
---|
683 | // NotEscapeSequence ::
|
---|
684 | // u [lookahead not one of HexDigit][lookahead != {]
|
---|
685 | // u HexDigit [lookahead not one of HexDigit]
|
---|
686 | // u HexDigit HexDigit [lookahead not one of HexDigit]
|
---|
687 | // u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
|
---|
688 | while (isASCIIHexDigit(m_current))
|
---|
689 | shift();
|
---|
690 |
|
---|
691 | return result;
|
---|
692 | }
|
---|
693 |
|
---|
694 | auto result = convertUnicode(m_current, character2, character3, character4);
|
---|
695 | shift();
|
---|
696 | shift();
|
---|
697 | shift();
|
---|
698 | shift();
|
---|
699 | return result;
|
---|
700 | }
|
---|
701 |
|
---|
702 | template <typename T>
|
---|
703 | void Lexer<T>::shiftLineTerminator()
|
---|
704 | {
|
---|
705 | ASSERT(isLineTerminator(m_current));
|
---|
706 |
|
---|
707 | m_positionBeforeLastNewline = currentPosition();
|
---|
708 | T prev = m_current;
|
---|
709 | shift();
|
---|
710 |
|
---|
711 | if (prev == '\r' && m_current == '\n')
|
---|
712 | shift();
|
---|
713 |
|
---|
714 | ++m_lineNumber;
|
---|
715 | m_lineStart = m_code;
|
---|
716 | }
|
---|
717 |
|
---|
718 | template <typename T>
|
---|
719 | ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
|
---|
720 | {
|
---|
721 | return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
|
---|
722 | }
|
---|
723 |
|
---|
724 | template <typename T>
|
---|
725 | ALWAYS_INLINE void Lexer<T>::skipWhitespace()
|
---|
726 | {
|
---|
727 | while (isWhiteSpace(m_current))
|
---|
728 | shift();
|
---|
729 | }
|
---|
730 |
|
---|
731 | static bool isNonLatin1IdentStart(UChar32 c)
|
---|
732 | {
|
---|
733 | return u_hasBinaryProperty(c, UCHAR_ID_START);
|
---|
734 | }
|
---|
735 |
|
---|
736 | template<typename CharacterType>
|
---|
737 | static ALWAYS_INLINE bool isIdentStart(CharacterType c)
|
---|
738 | {
|
---|
739 | static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentStart for UChars that don't need to check for surrogate pairs");
|
---|
740 | if (!isLatin1(c))
|
---|
741 | return isNonLatin1IdentStart(c);
|
---|
742 | return typesOfLatin1Characters[static_cast<LChar>(c)] == CharacterIdentifierStart;
|
---|
743 | }
|
---|
744 |
|
---|
745 | static ALWAYS_INLINE UNUSED_FUNCTION bool isSingleCharacterIdentStart(UChar c)
|
---|
746 | {
|
---|
747 | if (LIKELY(isLatin1(c)))
|
---|
748 | return isIdentStart(static_cast<LChar>(c));
|
---|
749 | return !U16_IS_SURROGATE(c) && isIdentStart(static_cast<UChar32>(c));
|
---|
750 | }
|
---|
751 |
|
---|
752 | static ALWAYS_INLINE bool cannotBeIdentStart(LChar c)
|
---|
753 | {
|
---|
754 | return !isIdentStart(c) && c != '\\';
|
---|
755 | }
|
---|
756 |
|
---|
757 | static ALWAYS_INLINE bool cannotBeIdentStart(UChar c)
|
---|
758 | {
|
---|
759 | if (LIKELY(isLatin1(c)))
|
---|
760 | return cannotBeIdentStart(static_cast<LChar>(c));
|
---|
761 | return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c);
|
---|
762 | }
|
---|
763 |
|
---|
764 | static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
|
---|
765 | {
|
---|
766 | return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == 0x200C || c == 0x200D;
|
---|
767 | }
|
---|
768 |
|
---|
769 | template<typename CharacterType>
|
---|
770 | static ALWAYS_INLINE bool isIdentPart(CharacterType c)
|
---|
771 | {
|
---|
772 | static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentPart for UChars that don't need to check for surrogate pairs");
|
---|
773 | if (!isLatin1(c))
|
---|
774 | return isNonLatin1IdentPart(c);
|
---|
775 |
|
---|
776 | // Character types are divided into two groups depending on whether they can be part of an
|
---|
777 | // identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be
|
---|
778 | // part of an identifier. (See the CharacterType definition for more details.)
|
---|
779 | return typesOfLatin1Characters[static_cast<LChar>(c)] <= CharacterOtherIdentifierPart;
|
---|
780 | }
|
---|
781 |
|
---|
782 | static ALWAYS_INLINE bool isSingleCharacterIdentPart(UChar c)
|
---|
783 | {
|
---|
784 | if (LIKELY(isLatin1(c)))
|
---|
785 | return isIdentPart(static_cast<LChar>(c));
|
---|
786 | return !U16_IS_SURROGATE(c) && isIdentPart(static_cast<UChar32>(c));
|
---|
787 | }
|
---|
788 |
|
---|
789 | static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(LChar c)
|
---|
790 | {
|
---|
791 | return !isIdentPart(c) && c != '\\';
|
---|
792 | }
|
---|
793 |
|
---|
794 | // NOTE: This may give give false negatives (for non-ascii) but won't give false posititves.
|
---|
795 | // This means it can be used to detect the end of a keyword (all keywords are ascii)
|
---|
796 | static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(UChar c)
|
---|
797 | {
|
---|
798 | if (LIKELY(isLatin1(c)))
|
---|
799 | return cannotBeIdentPartOrEscapeStart(static_cast<LChar>(c));
|
---|
800 | return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c);
|
---|
801 | }
|
---|
802 |
|
---|
803 |
|
---|
804 | template<>
|
---|
805 | ALWAYS_INLINE UChar32 Lexer<LChar>::currentCodePoint() const
|
---|
806 | {
|
---|
807 | return m_current;
|
---|
808 | }
|
---|
809 |
|
---|
810 | template<>
|
---|
811 | ALWAYS_INLINE UChar32 Lexer<UChar>::currentCodePoint() const
|
---|
812 | {
|
---|
813 | ASSERT_WITH_MESSAGE(!isIdentStart(static_cast<UChar32>(U_SENTINEL)), "error values shouldn't appear as a valid identifier start code point");
|
---|
814 | if (!U16_IS_SURROGATE(m_current))
|
---|
815 | return m_current;
|
---|
816 |
|
---|
817 | UChar trail = peek(1);
|
---|
818 | if (UNLIKELY(!U16_IS_LEAD(m_current) || !U16_IS_SURROGATE_TRAIL(trail)))
|
---|
819 | return U_SENTINEL;
|
---|
820 |
|
---|
821 | UChar32 codePoint = U16_GET_SUPPLEMENTARY(m_current, trail);
|
---|
822 | return codePoint;
|
---|
823 | }
|
---|
824 |
|
---|
825 | template<typename CharacterType>
|
---|
826 | static inline bool isASCIIDigitOrSeparator(CharacterType character)
|
---|
827 | {
|
---|
828 | return isASCIIDigit(character) || character == '_';
|
---|
829 | }
|
---|
830 |
|
---|
831 | template<typename CharacterType>
|
---|
832 | static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
|
---|
833 | {
|
---|
834 | return isASCIIHexDigit(character) || character == '_';
|
---|
835 | }
|
---|
836 |
|
---|
837 | template<typename CharacterType>
|
---|
838 | static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
|
---|
839 | {
|
---|
840 | return isASCIIBinaryDigit(character) || character == '_';
|
---|
841 | }
|
---|
842 |
|
---|
843 | template<typename CharacterType>
|
---|
844 | static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
|
---|
845 | {
|
---|
846 | return isASCIIOctalDigit(character) || character == '_';
|
---|
847 | }
|
---|
848 |
|
---|
849 | static inline LChar singleEscape(int c)
|
---|
850 | {
|
---|
851 | if (c < 128) {
|
---|
852 | ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
|
---|
853 | return singleCharacterEscapeValuesForASCII[c];
|
---|
854 | }
|
---|
855 | return 0;
|
---|
856 | }
|
---|
857 |
|
---|
858 | template <typename T>
|
---|
859 | inline void Lexer<T>::record8(int c)
|
---|
860 | {
|
---|
861 | ASSERT(isLatin1(c));
|
---|
862 | m_buffer8.append(static_cast<LChar>(c));
|
---|
863 | }
|
---|
864 |
|
---|
865 | template <typename T>
|
---|
866 | inline void Lexer<T>::append8(const T* p, size_t length)
|
---|
867 | {
|
---|
868 | size_t currentSize = m_buffer8.size();
|
---|
869 | m_buffer8.grow(currentSize + length);
|
---|
870 | LChar* rawBuffer = m_buffer8.data() + currentSize;
|
---|
871 |
|
---|
872 | for (size_t i = 0; i < length; i++) {
|
---|
873 | T c = p[i];
|
---|
874 | ASSERT(isLatin1(c));
|
---|
875 | rawBuffer[i] = c;
|
---|
876 | }
|
---|
877 | }
|
---|
878 |
|
---|
879 | template <typename T>
|
---|
880 | inline void Lexer<T>::append16(const LChar* p, size_t length)
|
---|
881 | {
|
---|
882 | size_t currentSize = m_buffer16.size();
|
---|
883 | m_buffer16.grow(currentSize + length);
|
---|
884 | UChar* rawBuffer = m_buffer16.data() + currentSize;
|
---|
885 |
|
---|
886 | for (size_t i = 0; i < length; i++)
|
---|
887 | rawBuffer[i] = p[i];
|
---|
888 | }
|
---|
889 |
|
---|
890 | template <typename T>
|
---|
891 | inline void Lexer<T>::record16(T c)
|
---|
892 | {
|
---|
893 | m_buffer16.append(c);
|
---|
894 | }
|
---|
895 |
|
---|
896 | template <typename T>
|
---|
897 | inline void Lexer<T>::record16(int c)
|
---|
898 | {
|
---|
899 | ASSERT(c >= 0);
|
---|
900 | ASSERT(c <= static_cast<int>(USHRT_MAX));
|
---|
901 | m_buffer16.append(static_cast<UChar>(c));
|
---|
902 | }
|
---|
903 |
|
---|
904 | template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
|
---|
905 | {
|
---|
906 | ASSERT(codePoint >= 0);
|
---|
907 | ASSERT(codePoint <= UCHAR_MAX_VALUE);
|
---|
908 | if (U_IS_BMP(codePoint))
|
---|
909 | record16(codePoint);
|
---|
910 | else {
|
---|
911 | UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
|
---|
912 | append16(codeUnits, 2);
|
---|
913 | }
|
---|
914 | }
|
---|
915 |
|
---|
916 | #if ASSERT_ENABLED
|
---|
917 | bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
|
---|
918 | {
|
---|
919 | if (!ident)
|
---|
920 | return true;
|
---|
921 | /* Just block any use of suspicious identifiers. This is intended to
|
---|
922 | * be used as a safety net while implementing builtins.
|
---|
923 | */
|
---|
924 | // FIXME: How can a debug-only assertion be a safety net?
|
---|
925 | if (*ident == vm.propertyNames->builtinNames().callPublicName())
|
---|
926 | return false;
|
---|
927 | if (*ident == vm.propertyNames->builtinNames().applyPublicName())
|
---|
928 | return false;
|
---|
929 | if (*ident == vm.propertyNames->eval)
|
---|
930 | return false;
|
---|
931 | if (*ident == vm.propertyNames->Function)
|
---|
932 | return false;
|
---|
933 | return true;
|
---|
934 | }
|
---|
935 | #endif // ASSERT_ENABLED
|
---|
936 |
|
---|
937 | template <>
|
---|
938 | template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
|
---|
939 | {
|
---|
940 | tokenData->escaped = false;
|
---|
941 | const ptrdiff_t remaining = m_codeEnd - m_code;
|
---|
942 | if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
|
---|
943 | JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
|
---|
944 | if (keyword != IDENT) {
|
---|
945 | ASSERT((!shouldCreateIdentifier) || tokenData->ident);
|
---|
946 | return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
|
---|
947 | }
|
---|
948 | }
|
---|
949 |
|
---|
950 | bool isPrivateName = m_current == '#';
|
---|
951 | bool isBuiltinName = m_current == '@' && m_parsingBuiltinFunction;
|
---|
952 | bool isWellKnownSymbol = false;
|
---|
953 | if (isBuiltinName) {
|
---|
954 | ASSERT(m_parsingBuiltinFunction);
|
---|
955 | shift();
|
---|
956 | if (m_current == '@') {
|
---|
957 | isWellKnownSymbol = true;
|
---|
958 | shift();
|
---|
959 | }
|
---|
960 | }
|
---|
961 |
|
---|
962 | const LChar* identifierStart = currentSourcePtr();
|
---|
963 |
|
---|
964 | if (isPrivateName)
|
---|
965 | shift();
|
---|
966 |
|
---|
967 | ASSERT(isIdentStart(m_current) || m_current == '\\');
|
---|
968 | while (isIdentPart(m_current))
|
---|
969 | shift();
|
---|
970 |
|
---|
971 | if (UNLIKELY(m_current == '\\'))
|
---|
972 | return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart);
|
---|
973 |
|
---|
974 | const Identifier* ident = nullptr;
|
---|
975 |
|
---|
976 | if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
|
---|
977 | int identifierLength = currentSourcePtr() - identifierStart;
|
---|
978 | ident = makeIdentifier(identifierStart, identifierLength);
|
---|
979 | if (m_parsingBuiltinFunction && isBuiltinName) {
|
---|
980 | if (isWellKnownSymbol)
|
---|
981 | ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpWellKnownSymbol(identifierStart, identifierLength));
|
---|
982 | else
|
---|
983 | ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpPrivateName(identifierStart, identifierLength));
|
---|
984 | if (!ident)
|
---|
985 | return INVALID_PRIVATE_NAME_ERRORTOK;
|
---|
986 | } else {
|
---|
987 | ident = makeIdentifier(identifierStart, identifierLength);
|
---|
988 | if (m_parsingBuiltinFunction) {
|
---|
989 | if (!isSafeBuiltinIdentifier(m_vm, ident)) {
|
---|
990 | m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
|
---|
991 | return ERRORTOK;
|
---|
992 | }
|
---|
993 | if (*ident == m_vm.propertyNames->undefinedKeyword)
|
---|
994 | tokenData->ident = &m_vm.propertyNames->undefinedPrivateName;
|
---|
995 | }
|
---|
996 | }
|
---|
997 | tokenData->ident = ident;
|
---|
998 | } else
|
---|
999 | tokenData->ident = nullptr;
|
---|
1000 |
|
---|
1001 | auto identType = isPrivateName ? PRIVATENAME : IDENT;
|
---|
1002 | if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isBuiltinName) {
|
---|
1003 | ASSERT(shouldCreateIdentifier);
|
---|
1004 | if (remaining < maxTokenLength) {
|
---|
1005 | const HashTableValue* entry = JSC::mainTable.entry(*ident);
|
---|
1006 | ASSERT((remaining < maxTokenLength) || !entry);
|
---|
1007 | if (!entry)
|
---|
1008 | return identType;
|
---|
1009 | JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
|
---|
1010 | return (token != RESERVED_IF_STRICT) || strictMode ? token : identType;
|
---|
1011 | }
|
---|
1012 | return identType;
|
---|
1013 | }
|
---|
1014 |
|
---|
1015 | return identType;
|
---|
1016 | }
|
---|
1017 |
|
---|
1018 | template <>
|
---|
1019 | template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
|
---|
1020 | {
|
---|
1021 | ASSERT(!m_parsingBuiltinFunction);
|
---|
1022 | tokenData->escaped = false;
|
---|
1023 | const ptrdiff_t remaining = m_codeEnd - m_code;
|
---|
1024 | if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
|
---|
1025 | JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
|
---|
1026 | if (keyword != IDENT) {
|
---|
1027 | ASSERT((!shouldCreateIdentifier) || tokenData->ident);
|
---|
1028 | return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
|
---|
1029 | }
|
---|
1030 | }
|
---|
1031 |
|
---|
1032 | bool isPrivateName = m_current == '#';
|
---|
1033 | const UChar* identifierStart = currentSourcePtr();
|
---|
1034 |
|
---|
1035 | if (isPrivateName)
|
---|
1036 | shift();
|
---|
1037 |
|
---|
1038 | UChar orAllChars = 0;
|
---|
1039 | ASSERT(isSingleCharacterIdentStart(m_current) || U16_IS_SURROGATE(m_current) || m_current == '\\');
|
---|
1040 | while (isSingleCharacterIdentPart(m_current)) {
|
---|
1041 | orAllChars |= m_current;
|
---|
1042 | shift();
|
---|
1043 | }
|
---|
1044 |
|
---|
1045 | if (UNLIKELY(U16_IS_SURROGATE(m_current) || m_current == '\\'))
|
---|
1046 | return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart);
|
---|
1047 |
|
---|
1048 | bool isAll8Bit = !(orAllChars & ~0xff);
|
---|
1049 | const Identifier* ident = nullptr;
|
---|
1050 |
|
---|
1051 | if (shouldCreateIdentifier) {
|
---|
1052 | int identifierLength = currentSourcePtr() - identifierStart;
|
---|
1053 | if (isAll8Bit)
|
---|
1054 | ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
|
---|
1055 | else
|
---|
1056 | ident = makeIdentifier(identifierStart, identifierLength);
|
---|
1057 | tokenData->ident = ident;
|
---|
1058 | } else
|
---|
1059 | tokenData->ident = nullptr;
|
---|
1060 |
|
---|
1061 | if (isPrivateName)
|
---|
1062 | return PRIVATENAME;
|
---|
1063 |
|
---|
1064 | if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
|
---|
1065 | ASSERT(shouldCreateIdentifier);
|
---|
1066 | if (remaining < maxTokenLength) {
|
---|
1067 | const HashTableValue* entry = JSC::mainTable.entry(*ident);
|
---|
1068 | ASSERT((remaining < maxTokenLength) || !entry);
|
---|
1069 | if (!entry)
|
---|
1070 | return IDENT;
|
---|
1071 | JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
|
---|
1072 | return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
|
---|
1073 | }
|
---|
1074 | return IDENT;
|
---|
1075 | }
|
---|
1076 |
|
---|
1077 | return IDENT;
|
---|
1078 | }
|
---|
1079 |
|
---|
1080 | template<typename CharacterType>
|
---|
1081 | template<bool shouldCreateIdentifier>
|
---|
1082 | JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode, const CharacterType* identifierStart)
|
---|
1083 | {
|
---|
1084 | ASSERT(U16_IS_SURROGATE(m_current) || m_current == '\\');
|
---|
1085 | ASSERT(m_buffer16.isEmpty());
|
---|
1086 | ASSERT(!tokenData->escaped);
|
---|
1087 |
|
---|
1088 | auto identCharsStart = identifierStart;
|
---|
1089 | bool isPrivateName = *identifierStart == '#';
|
---|
1090 | if (isPrivateName)
|
---|
1091 | ++identCharsStart;
|
---|
1092 |
|
---|
1093 | JSTokenType identType = isPrivateName ? PRIVATENAME : IDENT;
|
---|
1094 | ASSERT(!isPrivateName || identifierStart != currentSourcePtr());
|
---|
1095 |
|
---|
1096 | auto fillBuffer = [&] (bool isStart = false) {
|
---|
1097 | // \uXXXX unicode characters or Surrogate pairs.
|
---|
1098 | if (identifierStart != currentSourcePtr())
|
---|
1099 | m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
|
---|
1100 |
|
---|
1101 | if (m_current == '\\') {
|
---|
1102 | tokenData->escaped = true;
|
---|
1103 | shift();
|
---|
1104 | if (UNLIKELY(m_current != 'u'))
|
---|
1105 | return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
|
---|
1106 | shift();
|
---|
1107 | auto character = parseUnicodeEscape();
|
---|
1108 | if (UNLIKELY(!character.isValid()))
|
---|
1109 | return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
|
---|
1110 | if (UNLIKELY(isStart ? !isIdentStart(character.value()) : !isIdentPart(character.value())))
|
---|
1111 | return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
|
---|
1112 | if (shouldCreateIdentifier)
|
---|
1113 | recordUnicodeCodePoint(character.value());
|
---|
1114 | identifierStart = currentSourcePtr();
|
---|
1115 | return identType;
|
---|
1116 | }
|
---|
1117 |
|
---|
1118 | ASSERT(U16_IS_SURROGATE(m_current));
|
---|
1119 | if (UNLIKELY(!U16_IS_SURROGATE_LEAD(m_current)))
|
---|
1120 | return INVALID_UNICODE_ENCODING_ERRORTOK;
|
---|
1121 |
|
---|
1122 | UChar32 codePoint = currentCodePoint();
|
---|
1123 | if (UNLIKELY(codePoint == U_SENTINEL))
|
---|
1124 | return INVALID_UNICODE_ENCODING_ERRORTOK;
|
---|
1125 | if (UNLIKELY(isStart ? !isNonLatin1IdentStart(codePoint) : !isNonLatin1IdentPart(codePoint)))
|
---|
1126 | return INVALID_IDENTIFIER_UNICODE_ERRORTOK;
|
---|
1127 | append16(m_code, 2);
|
---|
1128 | shift();
|
---|
1129 | shift();
|
---|
1130 | identifierStart = currentSourcePtr();
|
---|
1131 | return identType;
|
---|
1132 | };
|
---|
1133 |
|
---|
1134 | JSTokenType type = fillBuffer(identCharsStart == currentSourcePtr());
|
---|
1135 | if (UNLIKELY(type & CanBeErrorTokenFlag))
|
---|
1136 | return type;
|
---|
1137 |
|
---|
1138 | while (true) {
|
---|
1139 | if (LIKELY(isSingleCharacterIdentPart(m_current))) {
|
---|
1140 | shift();
|
---|
1141 | continue;
|
---|
1142 | }
|
---|
1143 | if (!U16_IS_SURROGATE(m_current) && m_current != '\\')
|
---|
1144 | break;
|
---|
1145 |
|
---|
1146 | type = fillBuffer();
|
---|
1147 | if (UNLIKELY(type & CanBeErrorTokenFlag))
|
---|
1148 | return type;
|
---|
1149 | }
|
---|
1150 |
|
---|
1151 | const Identifier* ident = nullptr;
|
---|
1152 | if (shouldCreateIdentifier) {
|
---|
1153 | if (identifierStart != currentSourcePtr())
|
---|
1154 | m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
|
---|
1155 | ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
|
---|
1156 |
|
---|
1157 | tokenData->ident = ident;
|
---|
1158 | } else
|
---|
1159 | tokenData->ident = nullptr;
|
---|
1160 |
|
---|
1161 | m_buffer16.shrink(0);
|
---|
1162 |
|
---|
1163 | if (LIKELY(!lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
|
---|
1164 | ASSERT(shouldCreateIdentifier);
|
---|
1165 | const HashTableValue* entry = JSC::mainTable.entry(*ident);
|
---|
1166 | if (!entry)
|
---|
1167 | return identType;
|
---|
1168 | JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
|
---|
1169 | if ((token != RESERVED_IF_STRICT) || strictMode)
|
---|
1170 | return ESCAPED_KEYWORD;
|
---|
1171 | }
|
---|
1172 |
|
---|
1173 | return identType;
|
---|
1174 | }
|
---|
1175 |
|
---|
1176 | static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
|
---|
1177 | {
|
---|
1178 | return character < 0xE;
|
---|
1179 | }
|
---|
1180 |
|
---|
1181 | static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
|
---|
1182 | {
|
---|
1183 | return character < 0xE || !isLatin1(character);
|
---|
1184 | }
|
---|
1185 |
|
---|
1186 | template <typename T>
|
---|
1187 | template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
|
---|
1188 | {
|
---|
1189 | int startingOffset = currentOffset();
|
---|
1190 | int startingLineStartOffset = currentLineStartOffset();
|
---|
1191 | int startingLineNumber = lineNumber();
|
---|
1192 | T stringQuoteCharacter = m_current;
|
---|
1193 | shift();
|
---|
1194 |
|
---|
1195 | const T* stringStart = currentSourcePtr();
|
---|
1196 |
|
---|
1197 | while (m_current != stringQuoteCharacter) {
|
---|
1198 | if (UNLIKELY(m_current == '\\')) {
|
---|
1199 | if (stringStart != currentSourcePtr() && shouldBuildStrings)
|
---|
1200 | append8(stringStart, currentSourcePtr() - stringStart);
|
---|
1201 | shift();
|
---|
1202 |
|
---|
1203 | LChar escape = singleEscape(m_current);
|
---|
1204 |
|
---|
1205 | // Most common escape sequences first.
|
---|
1206 | if (escape) {
|
---|
1207 | if (shouldBuildStrings)
|
---|
1208 | record8(escape);
|
---|
1209 | shift();
|
---|
1210 | } else if (UNLIKELY(isLineTerminator(m_current)))
|
---|
1211 | shiftLineTerminator();
|
---|
1212 | else if (m_current == 'x') {
|
---|
1213 | shift();
|
---|
1214 | if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
|
---|
1215 | m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
|
---|
1216 | return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
|
---|
1217 | }
|
---|
1218 | T prev = m_current;
|
---|
1219 | shift();
|
---|
1220 | if (shouldBuildStrings)
|
---|
1221 | record8(convertHex(prev, m_current));
|
---|
1222 | shift();
|
---|
1223 | } else {
|
---|
1224 | setOffset(startingOffset, startingLineStartOffset);
|
---|
1225 | setLineNumber(startingLineNumber);
|
---|
1226 | m_buffer8.shrink(0);
|
---|
1227 | return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
|
---|
1228 | }
|
---|
1229 | stringStart = currentSourcePtr();
|
---|
1230 | continue;
|
---|
1231 | }
|
---|
1232 |
|
---|
1233 | if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
|
---|
1234 | setOffset(startingOffset, startingLineStartOffset);
|
---|
1235 | setLineNumber(startingLineNumber);
|
---|
1236 | m_buffer8.shrink(0);
|
---|
1237 | return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
|
---|
1238 | }
|
---|
1239 |
|
---|
1240 | shift();
|
---|
1241 | }
|
---|
1242 |
|
---|
1243 | if (currentSourcePtr() != stringStart && shouldBuildStrings)
|
---|
1244 | append8(stringStart, currentSourcePtr() - stringStart);
|
---|
1245 | if (shouldBuildStrings) {
|
---|
1246 | tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
|
---|
1247 | m_buffer8.shrink(0);
|
---|
1248 | } else
|
---|
1249 | tokenData->ident = nullptr;
|
---|
1250 |
|
---|
1251 | return StringParsedSuccessfully;
|
---|
1252 | }
|
---|
1253 |
|
---|
1254 | template <typename T>
|
---|
1255 | template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode) -> StringParseResult
|
---|
1256 | {
|
---|
1257 | if (m_current == 'x') {
|
---|
1258 | shift();
|
---|
1259 | if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
|
---|
1260 | // For raw template literal syntax, we consume `NotEscapeSequence`.
|
---|
1261 | //
|
---|
1262 | // NotEscapeSequence ::
|
---|
1263 | // x [lookahread not one of HexDigit]
|
---|
1264 | // x HexDigit [lookahread not one of HexDigit]
|
---|
1265 | if (isASCIIHexDigit(m_current))
|
---|
1266 | shift();
|
---|
1267 | ASSERT(!isASCIIHexDigit(m_current));
|
---|
1268 |
|
---|
1269 | m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
|
---|
1270 | return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
---|
1271 | }
|
---|
1272 |
|
---|
1273 | T prev = m_current;
|
---|
1274 | shift();
|
---|
1275 | if (shouldBuildStrings)
|
---|
1276 | record16(convertHex(prev, m_current));
|
---|
1277 | shift();
|
---|
1278 |
|
---|
1279 | return StringParsedSuccessfully;
|
---|
1280 | }
|
---|
1281 |
|
---|
1282 | if (m_current == 'u') {
|
---|
1283 | shift();
|
---|
1284 |
|
---|
1285 | auto character = parseUnicodeEscape();
|
---|
1286 | if (character.isValid()) {
|
---|
1287 | if (shouldBuildStrings)
|
---|
1288 | recordUnicodeCodePoint(character.value());
|
---|
1289 | return StringParsedSuccessfully;
|
---|
1290 | }
|
---|
1291 |
|
---|
1292 | m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
|
---|
1293 | return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
---|
1294 | }
|
---|
1295 |
|
---|
1296 | if (strictMode) {
|
---|
1297 | if (isASCIIDigit(m_current)) {
|
---|
1298 | // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
|
---|
1299 | int character1 = m_current;
|
---|
1300 | shift();
|
---|
1301 | if (character1 != '0' || isASCIIDigit(m_current)) {
|
---|
1302 | // For raw template literal syntax, we consume `NotEscapeSequence`.
|
---|
1303 | //
|
---|
1304 | // NotEscapeSequence ::
|
---|
1305 | // 0 DecimalDigit
|
---|
1306 | // DecimalDigit but not 0
|
---|
1307 | if (character1 == '0')
|
---|
1308 | shift();
|
---|
1309 |
|
---|
1310 | m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
|
---|
1311 | return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
---|
1312 | }
|
---|
1313 | if (shouldBuildStrings)
|
---|
1314 | record16(0);
|
---|
1315 | return StringParsedSuccessfully;
|
---|
1316 | }
|
---|
1317 | } else {
|
---|
1318 | if (isASCIIOctalDigit(m_current)) {
|
---|
1319 | // Octal character sequences
|
---|
1320 | T character1 = m_current;
|
---|
1321 | shift();
|
---|
1322 | if (isASCIIOctalDigit(m_current)) {
|
---|
1323 | // Two octal characters
|
---|
1324 | T character2 = m_current;
|
---|
1325 | shift();
|
---|
1326 | if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
|
---|
1327 | if (shouldBuildStrings)
|
---|
1328 | record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
|
---|
1329 | shift();
|
---|
1330 | } else {
|
---|
1331 | if (shouldBuildStrings)
|
---|
1332 | record16((character1 - '0') * 8 + character2 - '0');
|
---|
1333 | }
|
---|
1334 | } else {
|
---|
1335 | if (shouldBuildStrings)
|
---|
1336 | record16(character1 - '0');
|
---|
1337 | }
|
---|
1338 | return StringParsedSuccessfully;
|
---|
1339 | }
|
---|
1340 | }
|
---|
1341 |
|
---|
1342 | if (!atEnd()) {
|
---|
1343 | if (shouldBuildStrings)
|
---|
1344 | record16(m_current);
|
---|
1345 | shift();
|
---|
1346 | return StringParsedSuccessfully;
|
---|
1347 | }
|
---|
1348 |
|
---|
1349 | m_lexErrorMessage = "Unterminated string constant"_s;
|
---|
1350 | return StringUnterminated;
|
---|
1351 | }
|
---|
1352 |
|
---|
1353 | template <typename T>
|
---|
1354 | template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
|
---|
1355 | {
|
---|
1356 | T stringQuoteCharacter = m_current;
|
---|
1357 | shift();
|
---|
1358 |
|
---|
1359 | const T* stringStart = currentSourcePtr();
|
---|
1360 |
|
---|
1361 | while (m_current != stringQuoteCharacter) {
|
---|
1362 | if (UNLIKELY(m_current == '\\')) {
|
---|
1363 | if (stringStart != currentSourcePtr() && shouldBuildStrings)
|
---|
1364 | append16(stringStart, currentSourcePtr() - stringStart);
|
---|
1365 | shift();
|
---|
1366 |
|
---|
1367 | LChar escape = singleEscape(m_current);
|
---|
1368 |
|
---|
1369 | // Most common escape sequences first
|
---|
1370 | if (escape) {
|
---|
1371 | if (shouldBuildStrings)
|
---|
1372 | record16(escape);
|
---|
1373 | shift();
|
---|
1374 | } else if (UNLIKELY(isLineTerminator(m_current)))
|
---|
1375 | shiftLineTerminator();
|
---|
1376 | else {
|
---|
1377 | StringParseResult result = parseComplexEscape<shouldBuildStrings>(strictMode);
|
---|
1378 | if (result != StringParsedSuccessfully)
|
---|
1379 | return result;
|
---|
1380 | }
|
---|
1381 |
|
---|
1382 | stringStart = currentSourcePtr();
|
---|
1383 | continue;
|
---|
1384 | }
|
---|
1385 | // Fast check for characters that require special handling.
|
---|
1386 | // Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
|
---|
1387 | static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
|
---|
1388 | if (UNLIKELY(m_current < 0xE)) {
|
---|
1389 | // New-line or end of input is not allowed
|
---|
1390 | if (atEnd() || m_current == '\r' || m_current == '\n') {
|
---|
1391 | m_lexErrorMessage = "Unexpected EOF"_s;
|
---|
1392 | return atEnd() ? StringUnterminated : StringCannotBeParsed;
|
---|
1393 | }
|
---|
1394 | // Anything else is just a normal character
|
---|
1395 | }
|
---|
1396 | shift();
|
---|
1397 | }
|
---|
1398 |
|
---|
1399 | if (currentSourcePtr() != stringStart && shouldBuildStrings)
|
---|
1400 | append16(stringStart, currentSourcePtr() - stringStart);
|
---|
1401 | if (shouldBuildStrings)
|
---|
1402 | tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
|
---|
1403 | else
|
---|
1404 | tokenData->ident = nullptr;
|
---|
1405 |
|
---|
1406 | m_buffer16.shrink(0);
|
---|
1407 | return StringParsedSuccessfully;
|
---|
1408 | }
|
---|
1409 |
|
---|
1410 | template <typename T>
|
---|
1411 | typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
|
---|
1412 | {
|
---|
1413 | bool parseCookedFailed = false;
|
---|
1414 | const T* stringStart = currentSourcePtr();
|
---|
1415 | const T* rawStringStart = currentSourcePtr();
|
---|
1416 |
|
---|
1417 | while (m_current != '`') {
|
---|
1418 | if (UNLIKELY(m_current == '\\')) {
|
---|
1419 | if (stringStart != currentSourcePtr())
|
---|
1420 | append16(stringStart, currentSourcePtr() - stringStart);
|
---|
1421 | shift();
|
---|
1422 |
|
---|
1423 | LChar escape = singleEscape(m_current);
|
---|
1424 |
|
---|
1425 | // Most common escape sequences first.
|
---|
1426 | if (escape) {
|
---|
1427 | record16(escape);
|
---|
1428 | shift();
|
---|
1429 | } else if (UNLIKELY(isLineTerminator(m_current))) {
|
---|
1430 | // Normalize <CR>, <CR><LF> to <LF>.
|
---|
1431 | if (m_current == '\r') {
|
---|
1432 | ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
|
---|
1433 |
|
---|
1434 | if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
|
---|
1435 | m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
|
---|
1436 | m_bufferForRawTemplateString16.append('\n');
|
---|
1437 | }
|
---|
1438 |
|
---|
1439 | shiftLineTerminator();
|
---|
1440 | rawStringStart = currentSourcePtr();
|
---|
1441 | } else
|
---|
1442 | shiftLineTerminator();
|
---|
1443 | } else {
|
---|
1444 | bool strictMode = true;
|
---|
1445 | StringParseResult result = parseComplexEscape<true>(strictMode);
|
---|
1446 | if (result != StringParsedSuccessfully) {
|
---|
1447 | if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
|
---|
1448 | parseCookedFailed = true;
|
---|
1449 | else
|
---|
1450 | return result;
|
---|
1451 | }
|
---|
1452 | }
|
---|
1453 |
|
---|
1454 | stringStart = currentSourcePtr();
|
---|
1455 | continue;
|
---|
1456 | }
|
---|
1457 |
|
---|
1458 | if (m_current == '$' && peek(1) == '{')
|
---|
1459 | break;
|
---|
1460 |
|
---|
1461 | // Fast check for characters that require special handling.
|
---|
1462 | // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
|
---|
1463 | // as possible, and lets through all common ASCII characters.
|
---|
1464 | if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
|
---|
1465 | // End of input is not allowed.
|
---|
1466 | // Unlike String, line terminator is allowed.
|
---|
1467 | if (atEnd()) {
|
---|
1468 | m_lexErrorMessage = "Unexpected EOF"_s;
|
---|
1469 | return StringUnterminated;
|
---|
1470 | }
|
---|
1471 |
|
---|
1472 | if (isLineTerminator(m_current)) {
|
---|
1473 | if (m_current == '\r') {
|
---|
1474 | // Normalize <CR>, <CR><LF> to <LF>.
|
---|
1475 | if (stringStart != currentSourcePtr())
|
---|
1476 | append16(stringStart, currentSourcePtr() - stringStart);
|
---|
1477 | if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
---|
1478 | m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
|
---|
1479 |
|
---|
1480 | record16('\n');
|
---|
1481 | if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
---|
1482 | m_bufferForRawTemplateString16.append('\n');
|
---|
1483 | shiftLineTerminator();
|
---|
1484 | stringStart = currentSourcePtr();
|
---|
1485 | rawStringStart = currentSourcePtr();
|
---|
1486 | } else
|
---|
1487 | shiftLineTerminator();
|
---|
1488 | continue;
|
---|
1489 | }
|
---|
1490 | // Anything else is just a normal character
|
---|
1491 | }
|
---|
1492 |
|
---|
1493 | shift();
|
---|
1494 | }
|
---|
1495 |
|
---|
1496 | bool isTail = m_current == '`';
|
---|
1497 |
|
---|
1498 | if (currentSourcePtr() != stringStart)
|
---|
1499 | append16(stringStart, currentSourcePtr() - stringStart);
|
---|
1500 | if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
---|
1501 | m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
|
---|
1502 |
|
---|
1503 | if (!parseCookedFailed)
|
---|
1504 | tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
|
---|
1505 | else
|
---|
1506 | tokenData->cooked = nullptr;
|
---|
1507 |
|
---|
1508 | // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
|
---|
1509 | if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
|
---|
1510 | tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
|
---|
1511 | else
|
---|
1512 | tokenData->raw = nullptr;
|
---|
1513 |
|
---|
1514 | tokenData->isTail = isTail;
|
---|
1515 |
|
---|
1516 | m_buffer16.shrink(0);
|
---|
1517 | m_bufferForRawTemplateString16.shrink(0);
|
---|
1518 |
|
---|
1519 | if (isTail) {
|
---|
1520 | // Skip `
|
---|
1521 | shift();
|
---|
1522 | } else {
|
---|
1523 | // Skip $ and {
|
---|
1524 | shift();
|
---|
1525 | shift();
|
---|
1526 | }
|
---|
1527 |
|
---|
1528 | return StringParsedSuccessfully;
|
---|
1529 | }
|
---|
1530 |
|
---|
1531 | template <typename T>
|
---|
1532 | ALWAYS_INLINE auto Lexer<T>::parseHex() -> std::optional<NumberParseResult>
|
---|
1533 | {
|
---|
1534 | ASSERT(isASCIIHexDigit(m_current));
|
---|
1535 |
|
---|
1536 | // Optimization: most hexadecimal values fit into 4 bytes.
|
---|
1537 | uint32_t hexValue = 0;
|
---|
1538 | int maximumDigits = 7;
|
---|
1539 |
|
---|
1540 | do {
|
---|
1541 | if (m_current == '_') {
|
---|
1542 | if (UNLIKELY(!isASCIIHexDigit(peek(1))))
|
---|
1543 | return std::nullopt;
|
---|
1544 |
|
---|
1545 | shift();
|
---|
1546 | }
|
---|
1547 |
|
---|
1548 | hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
|
---|
1549 | shift();
|
---|
1550 | --maximumDigits;
|
---|
1551 | } while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= 0);
|
---|
1552 |
|
---|
1553 | if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
|
---|
1554 | return NumberParseResult { static_cast<double>(hexValue) };
|
---|
1555 |
|
---|
1556 | // No more place in the hexValue buffer.
|
---|
1557 | // The values are shifted out and placed into the m_buffer8 vector.
|
---|
1558 | for (int i = 0; i < 8; ++i) {
|
---|
1559 | int digit = hexValue >> 28;
|
---|
1560 | if (digit < 10)
|
---|
1561 | record8(digit + '0');
|
---|
1562 | else
|
---|
1563 | record8(digit - 10 + 'a');
|
---|
1564 | hexValue <<= 4;
|
---|
1565 | }
|
---|
1566 |
|
---|
1567 | while (isASCIIHexDigitOrSeparator(m_current)) {
|
---|
1568 | if (m_current == '_') {
|
---|
1569 | if (UNLIKELY(!isASCIIHexDigit(peek(1))))
|
---|
1570 | return std::nullopt;
|
---|
1571 |
|
---|
1572 | shift();
|
---|
1573 | }
|
---|
1574 |
|
---|
1575 | record8(m_current);
|
---|
1576 | shift();
|
---|
1577 | }
|
---|
1578 |
|
---|
1579 | if (UNLIKELY(m_current == 'n'))
|
---|
1580 | return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
---|
1581 |
|
---|
1582 | return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16) };
|
---|
1583 | }
|
---|
1584 |
|
---|
1585 | template <typename T>
|
---|
1586 | ALWAYS_INLINE auto Lexer<T>::parseBinary() -> std::optional<NumberParseResult>
|
---|
1587 | {
|
---|
1588 | ASSERT(isASCIIBinaryDigit(m_current));
|
---|
1589 |
|
---|
1590 | // Optimization: most binary values fit into 4 bytes.
|
---|
1591 | uint32_t binaryValue = 0;
|
---|
1592 | const unsigned maximumDigits = 32;
|
---|
1593 | int digit = maximumDigits - 1;
|
---|
1594 | // Temporary buffer for the digits. Makes easier
|
---|
1595 | // to reconstruct the input characters when needed.
|
---|
1596 | LChar digits[maximumDigits];
|
---|
1597 |
|
---|
1598 | do {
|
---|
1599 | if (m_current == '_') {
|
---|
1600 | if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
|
---|
1601 | return std::nullopt;
|
---|
1602 |
|
---|
1603 | shift();
|
---|
1604 | }
|
---|
1605 |
|
---|
1606 | binaryValue = (binaryValue << 1) + (m_current - '0');
|
---|
1607 | digits[digit] = m_current;
|
---|
1608 | shift();
|
---|
1609 | --digit;
|
---|
1610 | } while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= 0);
|
---|
1611 |
|
---|
1612 | if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
|
---|
1613 | return NumberParseResult { static_cast<double>(binaryValue) };
|
---|
1614 |
|
---|
1615 | for (int i = maximumDigits - 1; i > digit; --i)
|
---|
1616 | record8(digits[i]);
|
---|
1617 |
|
---|
1618 | while (isASCIIBinaryDigitOrSeparator(m_current)) {
|
---|
1619 | if (m_current == '_') {
|
---|
1620 | if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
|
---|
1621 | return std::nullopt;
|
---|
1622 |
|
---|
1623 | shift();
|
---|
1624 | }
|
---|
1625 |
|
---|
1626 | record8(m_current);
|
---|
1627 | shift();
|
---|
1628 | }
|
---|
1629 |
|
---|
1630 | if (UNLIKELY(m_current == 'n'))
|
---|
1631 | return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
---|
1632 |
|
---|
1633 | if (isASCIIDigit(m_current))
|
---|
1634 | return std::nullopt;
|
---|
1635 |
|
---|
1636 | return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
|
---|
1637 | }
|
---|
1638 |
|
---|
1639 | template <typename T>
|
---|
1640 | ALWAYS_INLINE auto Lexer<T>::parseOctal() -> std::optional<NumberParseResult>
|
---|
1641 | {
|
---|
1642 | ASSERT(isASCIIOctalDigit(m_current));
|
---|
1643 | ASSERT(!m_buffer8.size() || (m_buffer8.size() == 1 && m_buffer8[0] == '0'));
|
---|
1644 | bool isLegacyLiteral = m_buffer8.size();
|
---|
1645 |
|
---|
1646 | // Optimization: most octal values fit into 4 bytes.
|
---|
1647 | uint32_t octalValue = 0;
|
---|
1648 | const unsigned maximumDigits = 10;
|
---|
1649 | int digit = maximumDigits - 1;
|
---|
1650 | // Temporary buffer for the digits. Makes easier
|
---|
1651 | // to reconstruct the input characters when needed.
|
---|
1652 | LChar digits[maximumDigits];
|
---|
1653 |
|
---|
1654 | do {
|
---|
1655 | if (m_current == '_') {
|
---|
1656 | if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
|
---|
1657 | return std::nullopt;
|
---|
1658 |
|
---|
1659 | shift();
|
---|
1660 | }
|
---|
1661 |
|
---|
1662 | octalValue = octalValue * 8 + (m_current - '0');
|
---|
1663 | digits[digit] = m_current;
|
---|
1664 | shift();
|
---|
1665 | --digit;
|
---|
1666 | } while (isASCIIOctalDigitOrSeparator(m_current) && digit >= 0);
|
---|
1667 |
|
---|
1668 | if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
|
---|
1669 | return NumberParseResult { static_cast<double>(octalValue) };
|
---|
1670 |
|
---|
1671 | for (int i = maximumDigits - 1; i > digit; --i)
|
---|
1672 | record8(digits[i]);
|
---|
1673 |
|
---|
1674 | while (isASCIIOctalDigitOrSeparator(m_current)) {
|
---|
1675 | if (m_current == '_') {
|
---|
1676 | if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
|
---|
1677 | return std::nullopt;
|
---|
1678 |
|
---|
1679 | shift();
|
---|
1680 | }
|
---|
1681 |
|
---|
1682 | record8(m_current);
|
---|
1683 | shift();
|
---|
1684 | }
|
---|
1685 |
|
---|
1686 | if (UNLIKELY(m_current == 'n') && !isLegacyLiteral)
|
---|
1687 | return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
---|
1688 |
|
---|
1689 | if (isASCIIDigit(m_current))
|
---|
1690 | return std::nullopt;
|
---|
1691 |
|
---|
1692 | return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
|
---|
1693 | }
|
---|
1694 |
|
---|
1695 | template <typename T>
|
---|
1696 | ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> std::optional<NumberParseResult>
|
---|
1697 | {
|
---|
1698 | ASSERT(isASCIIDigit(m_current) || m_buffer8.size());
|
---|
1699 | bool isLegacyLiteral = m_buffer8.size() && isASCIIDigitOrSeparator(m_current);
|
---|
1700 |
|
---|
1701 | // Optimization: most decimal values fit into 4 bytes.
|
---|
1702 | uint32_t decimalValue = 0;
|
---|
1703 |
|
---|
1704 | // Since parseOctal may be executed before parseDecimal,
|
---|
1705 | // the m_buffer8 may hold ascii digits.
|
---|
1706 | if (!m_buffer8.size()) {
|
---|
1707 | const unsigned maximumDigits = 10;
|
---|
1708 | int digit = maximumDigits - 1;
|
---|
1709 | // Temporary buffer for the digits. Makes easier
|
---|
1710 | // to reconstruct the input characters when needed.
|
---|
1711 | LChar digits[maximumDigits];
|
---|
1712 |
|
---|
1713 | do {
|
---|
1714 | if (m_current == '_') {
|
---|
1715 | if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
|
---|
1716 | return std::nullopt;
|
---|
1717 |
|
---|
1718 | shift();
|
---|
1719 | }
|
---|
1720 |
|
---|
1721 | decimalValue = decimalValue * 10 + (m_current - '0');
|
---|
1722 | digits[digit] = m_current;
|
---|
1723 | shift();
|
---|
1724 | --digit;
|
---|
1725 | } while (isASCIIDigitOrSeparator(m_current) && digit >= 0);
|
---|
1726 |
|
---|
1727 | if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
|
---|
1728 | return NumberParseResult { static_cast<double>(decimalValue) };
|
---|
1729 |
|
---|
1730 | for (int i = maximumDigits - 1; i > digit; --i)
|
---|
1731 | record8(digits[i]);
|
---|
1732 | }
|
---|
1733 |
|
---|
1734 | while (isASCIIDigitOrSeparator(m_current)) {
|
---|
1735 | if (m_current == '_') {
|
---|
1736 | if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
|
---|
1737 | return std::nullopt;
|
---|
1738 |
|
---|
1739 | shift();
|
---|
1740 | }
|
---|
1741 |
|
---|
1742 | record8(m_current);
|
---|
1743 | shift();
|
---|
1744 | }
|
---|
1745 |
|
---|
1746 | if (UNLIKELY(m_current == 'n' && !isLegacyLiteral))
|
---|
1747 | return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
|
---|
1748 |
|
---|
1749 | return std::nullopt;
|
---|
1750 | }
|
---|
1751 |
|
---|
1752 | template <typename T>
|
---|
1753 | ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
|
---|
1754 | {
|
---|
1755 | ASSERT(isASCIIDigit(m_current));
|
---|
1756 | record8('.');
|
---|
1757 |
|
---|
1758 | do {
|
---|
1759 | if (m_current == '_') {
|
---|
1760 | if (UNLIKELY(!isASCIIDigit(peek(1))))
|
---|
1761 | return false;
|
---|
1762 |
|
---|
1763 | shift();
|
---|
1764 | }
|
---|
1765 |
|
---|
1766 | record8(m_current);
|
---|
1767 | shift();
|
---|
1768 | } while (isASCIIDigitOrSeparator(m_current));
|
---|
1769 |
|
---|
1770 | return true;
|
---|
1771 | }
|
---|
1772 |
|
---|
1773 | template <typename T>
|
---|
1774 | ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
|
---|
1775 | {
|
---|
1776 | record8('e');
|
---|
1777 | shift();
|
---|
1778 | if (m_current == '+' || m_current == '-') {
|
---|
1779 | record8(m_current);
|
---|
1780 | shift();
|
---|
1781 | }
|
---|
1782 |
|
---|
1783 | if (!isASCIIDigit(m_current))
|
---|
1784 | return false;
|
---|
1785 |
|
---|
1786 | do {
|
---|
1787 | if (m_current == '_') {
|
---|
1788 | if (UNLIKELY(!isASCIIDigit(peek(1))))
|
---|
1789 | return false;
|
---|
1790 |
|
---|
1791 | shift();
|
---|
1792 | }
|
---|
1793 |
|
---|
1794 | record8(m_current);
|
---|
1795 | shift();
|
---|
1796 | } while (isASCIIDigitOrSeparator(m_current));
|
---|
1797 |
|
---|
1798 | return true;
|
---|
1799 | }
|
---|
1800 |
|
---|
1801 | template <typename T>
|
---|
1802 | ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
|
---|
1803 | {
|
---|
1804 | while (true) {
|
---|
1805 | while (UNLIKELY(m_current == '*')) {
|
---|
1806 | shift();
|
---|
1807 | if (m_current == '/') {
|
---|
1808 | shift();
|
---|
1809 | return true;
|
---|
1810 | }
|
---|
1811 | }
|
---|
1812 |
|
---|
1813 | if (atEnd())
|
---|
1814 | return false;
|
---|
1815 |
|
---|
1816 | if (isLineTerminator(m_current)) {
|
---|
1817 | shiftLineTerminator();
|
---|
1818 | m_hasLineTerminatorBeforeToken = true;
|
---|
1819 | } else
|
---|
1820 | shift();
|
---|
1821 | }
|
---|
1822 | }
|
---|
1823 |
|
---|
1824 | template <typename T>
|
---|
1825 | ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
|
---|
1826 | {
|
---|
1827 | // sourceURL and sourceMappingURL directives.
|
---|
1828 | if (!consume("source"))
|
---|
1829 | return;
|
---|
1830 |
|
---|
1831 | if (consume("URL=")) {
|
---|
1832 | m_sourceURLDirective = parseCommentDirectiveValue();
|
---|
1833 | return;
|
---|
1834 | }
|
---|
1835 |
|
---|
1836 | if (consume("MappingURL=")) {
|
---|
1837 | m_sourceMappingURLDirective = parseCommentDirectiveValue();
|
---|
1838 | return;
|
---|
1839 | }
|
---|
1840 | }
|
---|
1841 |
|
---|
1842 | template <typename T>
|
---|
1843 | ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
|
---|
1844 | {
|
---|
1845 | skipWhitespace();
|
---|
1846 | bool hasNonLatin1 = false;
|
---|
1847 | const T* stringStart = currentSourcePtr();
|
---|
1848 | while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != '"' && m_current != '\'' && !atEnd()) {
|
---|
1849 | if (!isLatin1(m_current))
|
---|
1850 | hasNonLatin1 = true;
|
---|
1851 | shift();
|
---|
1852 | }
|
---|
1853 | const T* stringEnd = currentSourcePtr();
|
---|
1854 | skipWhitespace();
|
---|
1855 |
|
---|
1856 | if (!isLineTerminator(m_current) && !atEnd())
|
---|
1857 | return String();
|
---|
1858 |
|
---|
1859 | unsigned length = stringEnd - stringStart;
|
---|
1860 | if (hasNonLatin1) {
|
---|
1861 | UChar* buffer = nullptr;
|
---|
1862 | String result = StringImpl::createUninitialized(length, buffer);
|
---|
1863 | StringImpl::copyCharacters(buffer, stringStart, length);
|
---|
1864 | return result;
|
---|
1865 | }
|
---|
1866 |
|
---|
1867 | LChar* buffer = nullptr;
|
---|
1868 | String result = StringImpl::createUninitialized(length, buffer);
|
---|
1869 | StringImpl::copyCharacters(buffer, stringStart, length);
|
---|
1870 | return result;
|
---|
1871 | }
|
---|
1872 |
|
---|
1873 | template <typename T>
|
---|
1874 | template <unsigned length>
|
---|
1875 | ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
|
---|
1876 | {
|
---|
1877 | unsigned lengthToCheck = length - 1; // Ignore the ending NULL byte in the string literal.
|
---|
1878 |
|
---|
1879 | unsigned i = 0;
|
---|
1880 | for (; i < lengthToCheck && m_current == input[i]; i++)
|
---|
1881 | shift();
|
---|
1882 |
|
---|
1883 | return i == lengthToCheck;
|
---|
1884 | }
|
---|
1885 |
|
---|
1886 | template <typename T>
|
---|
1887 | bool Lexer<T>::nextTokenIsColon()
|
---|
1888 | {
|
---|
1889 | const T* code = m_code;
|
---|
1890 | while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
|
---|
1891 | code++;
|
---|
1892 |
|
---|
1893 | return code < m_codeEnd && *code == ':';
|
---|
1894 | }
|
---|
1895 |
|
---|
1896 | template <typename T>
|
---|
1897 | void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
|
---|
1898 | {
|
---|
1899 | JSTokenLocation* tokenLocation = &tokenRecord->m_location;
|
---|
1900 | tokenLocation->line = lineNumber;
|
---|
1901 | tokenLocation->endOffset = endOffset;
|
---|
1902 | tokenLocation->lineStartOffset = lineStartOffset;
|
---|
1903 | ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
|
---|
1904 | tokenRecord->m_endPosition = endPosition;
|
---|
1905 | m_lastToken = token;
|
---|
1906 | }
|
---|
1907 |
|
---|
1908 | template <typename T>
|
---|
1909 | JSTokenType Lexer<T>::lexWithoutClearingLineTerminator(JSToken* tokenRecord, OptionSet<LexerFlags> lexerFlags, bool strictMode)
|
---|
1910 | {
|
---|
1911 | JSTokenData* tokenData = &tokenRecord->m_data;
|
---|
1912 | JSTokenLocation* tokenLocation = &tokenRecord->m_location;
|
---|
1913 | m_lastTokenLocation = JSTokenLocation(tokenRecord->m_location);
|
---|
1914 |
|
---|
1915 | ASSERT(!m_error);
|
---|
1916 | ASSERT(m_buffer8.isEmpty());
|
---|
1917 | ASSERT(m_buffer16.isEmpty());
|
---|
1918 |
|
---|
1919 | JSTokenType token = ERRORTOK;
|
---|
1920 |
|
---|
1921 | start:
|
---|
1922 | skipWhitespace();
|
---|
1923 |
|
---|
1924 | tokenLocation->startOffset = currentOffset();
|
---|
1925 | ASSERT(currentOffset() >= currentLineStartOffset());
|
---|
1926 | tokenRecord->m_startPosition = currentPosition();
|
---|
1927 |
|
---|
1928 | if (atEnd()) {
|
---|
1929 | token = EOFTOK;
|
---|
1930 | goto returnToken;
|
---|
1931 | }
|
---|
1932 |
|
---|
1933 | CharacterType type;
|
---|
1934 | if (LIKELY(isLatin1(m_current)))
|
---|
1935 | type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
|
---|
1936 | else {
|
---|
1937 | UChar32 codePoint;
|
---|
1938 | U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint);
|
---|
1939 | if (isNonLatin1IdentStart(codePoint))
|
---|
1940 | type = CharacterIdentifierStart;
|
---|
1941 | else if (isLineTerminator(m_current))
|
---|
1942 | type = CharacterLineTerminator;
|
---|
1943 | else
|
---|
1944 | type = CharacterInvalid;
|
---|
1945 | }
|
---|
1946 |
|
---|
1947 | switch (type) {
|
---|
1948 | case CharacterGreater:
|
---|
1949 | shift();
|
---|
1950 | if (m_current == '>') {
|
---|
1951 | shift();
|
---|
1952 | if (m_current == '>') {
|
---|
1953 | shift();
|
---|
1954 | if (m_current == '=') {
|
---|
1955 | shift();
|
---|
1956 | token = URSHIFTEQUAL;
|
---|
1957 | break;
|
---|
1958 | }
|
---|
1959 | token = URSHIFT;
|
---|
1960 | break;
|
---|
1961 | }
|
---|
1962 | if (m_current == '=') {
|
---|
1963 | shift();
|
---|
1964 | token = RSHIFTEQUAL;
|
---|
1965 | break;
|
---|
1966 | }
|
---|
1967 | token = RSHIFT;
|
---|
1968 | break;
|
---|
1969 | }
|
---|
1970 | if (m_current == '=') {
|
---|
1971 | shift();
|
---|
1972 | token = GE;
|
---|
1973 | break;
|
---|
1974 | }
|
---|
1975 | token = GT;
|
---|
1976 | break;
|
---|
1977 | case CharacterEqual: {
|
---|
1978 | if (peek(1) == '>') {
|
---|
1979 | token = ARROWFUNCTION;
|
---|
1980 | tokenData->line = lineNumber();
|
---|
1981 | tokenData->offset = currentOffset();
|
---|
1982 | tokenData->lineStartOffset = currentLineStartOffset();
|
---|
1983 | ASSERT(tokenData->offset >= tokenData->lineStartOffset);
|
---|
1984 | shift();
|
---|
1985 | shift();
|
---|
1986 | break;
|
---|
1987 | }
|
---|
1988 |
|
---|
1989 | shift();
|
---|
1990 | if (m_current == '=') {
|
---|
1991 | shift();
|
---|
1992 | if (m_current == '=') {
|
---|
1993 | shift();
|
---|
1994 | token = STREQ;
|
---|
1995 | break;
|
---|
1996 | }
|
---|
1997 | token = EQEQ;
|
---|
1998 | break;
|
---|
1999 | }
|
---|
2000 | token = EQUAL;
|
---|
2001 | break;
|
---|
2002 | }
|
---|
2003 | case CharacterLess:
|
---|
2004 | shift();
|
---|
2005 | if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
|
---|
2006 | if (m_scriptMode == JSParserScriptMode::Classic) {
|
---|
2007 | // <!-- marks the beginning of a line comment (for www usage)
|
---|
2008 | goto inSingleLineComment;
|
---|
2009 | }
|
---|
2010 | }
|
---|
2011 | if (m_current == '<') {
|
---|
2012 | shift();
|
---|
2013 | if (m_current == '=') {
|
---|
2014 | shift();
|
---|
2015 | token = LSHIFTEQUAL;
|
---|
2016 | break;
|
---|
2017 | }
|
---|
2018 | token = LSHIFT;
|
---|
2019 | break;
|
---|
2020 | }
|
---|
2021 | if (m_current == '=') {
|
---|
2022 | shift();
|
---|
2023 | token = LE;
|
---|
2024 | break;
|
---|
2025 | }
|
---|
2026 | token = LT;
|
---|
2027 | break;
|
---|
2028 | case CharacterExclamationMark:
|
---|
2029 | shift();
|
---|
2030 | if (m_current == '=') {
|
---|
2031 | shift();
|
---|
2032 | if (m_current == '=') {
|
---|
2033 | shift();
|
---|
2034 | token = STRNEQ;
|
---|
2035 | break;
|
---|
2036 | }
|
---|
2037 | token = NE;
|
---|
2038 | break;
|
---|
2039 | }
|
---|
2040 | token = EXCLAMATION;
|
---|
2041 | break;
|
---|
2042 | case CharacterAdd:
|
---|
2043 | shift();
|
---|
2044 | if (m_current == '+') {
|
---|
2045 | shift();
|
---|
2046 | token = (!m_hasLineTerminatorBeforeToken) ? PLUSPLUS : AUTOPLUSPLUS;
|
---|
2047 | break;
|
---|
2048 | }
|
---|
2049 | if (m_current == '=') {
|
---|
2050 | shift();
|
---|
2051 | token = PLUSEQUAL;
|
---|
2052 | break;
|
---|
2053 | }
|
---|
2054 | token = PLUS;
|
---|
2055 | break;
|
---|
2056 | case CharacterSub:
|
---|
2057 | shift();
|
---|
2058 | if (m_current == '-') {
|
---|
2059 | shift();
|
---|
2060 | if ((m_atLineStart || m_hasLineTerminatorBeforeToken) && m_current == '>') {
|
---|
2061 | if (m_scriptMode == JSParserScriptMode::Classic) {
|
---|
2062 | shift();
|
---|
2063 | goto inSingleLineComment;
|
---|
2064 | }
|
---|
2065 | }
|
---|
2066 | token = (!m_hasLineTerminatorBeforeToken) ? MINUSMINUS : AUTOMINUSMINUS;
|
---|
2067 | break;
|
---|
2068 | }
|
---|
2069 | if (m_current == '=') {
|
---|
2070 | shift();
|
---|
2071 | token = MINUSEQUAL;
|
---|
2072 | break;
|
---|
2073 | }
|
---|
2074 | token = MINUS;
|
---|
2075 | break;
|
---|
2076 | case CharacterMultiply:
|
---|
2077 | shift();
|
---|
2078 | if (m_current == '=') {
|
---|
2079 | shift();
|
---|
2080 | token = MULTEQUAL;
|
---|
2081 | break;
|
---|
2082 | }
|
---|
2083 | if (m_current == '*') {
|
---|
2084 | shift();
|
---|
2085 | if (m_current == '=') {
|
---|
2086 | shift();
|
---|
2087 | token = POWEQUAL;
|
---|
2088 | break;
|
---|
2089 | }
|
---|
2090 | token = POW;
|
---|
2091 | break;
|
---|
2092 | }
|
---|
2093 | token = TIMES;
|
---|
2094 | break;
|
---|
2095 | case CharacterSlash:
|
---|
2096 | shift();
|
---|
2097 | if (m_current == '/') {
|
---|
2098 | shift();
|
---|
2099 | goto inSingleLineCommentCheckForDirectives;
|
---|
2100 | }
|
---|
2101 | if (m_current == '*') {
|
---|
2102 | shift();
|
---|
2103 | auto startLineNumber = m_lineNumber;
|
---|
2104 | auto startLineStartOffset = currentLineStartOffset();
|
---|
2105 | if (parseMultilineComment())
|
---|
2106 | goto start;
|
---|
2107 | m_lexErrorMessage = "Multiline comment was not closed properly"_s;
|
---|
2108 | token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
|
---|
2109 | m_error = true;
|
---|
2110 | fillTokenInfo(tokenRecord, token, startLineNumber, currentOffset(), startLineStartOffset, currentPosition());
|
---|
2111 | return token;
|
---|
2112 | }
|
---|
2113 | if (m_current == '=') {
|
---|
2114 | shift();
|
---|
2115 | token = DIVEQUAL;
|
---|
2116 | break;
|
---|
2117 | }
|
---|
2118 | token = DIVIDE;
|
---|
2119 | break;
|
---|
2120 | case CharacterAnd:
|
---|
2121 | shift();
|
---|
2122 | if (m_current == '&') {
|
---|
2123 | shift();
|
---|
2124 | if (m_current == '=') {
|
---|
2125 | shift();
|
---|
2126 | token = ANDEQUAL;
|
---|
2127 | break;
|
---|
2128 | }
|
---|
2129 | token = AND;
|
---|
2130 | break;
|
---|
2131 | }
|
---|
2132 | if (m_current == '=') {
|
---|
2133 | shift();
|
---|
2134 | token = BITANDEQUAL;
|
---|
2135 | break;
|
---|
2136 | }
|
---|
2137 | token = BITAND;
|
---|
2138 | break;
|
---|
2139 | case CharacterXor:
|
---|
2140 | shift();
|
---|
2141 | if (m_current == '=') {
|
---|
2142 | shift();
|
---|
2143 | token = BITXOREQUAL;
|
---|
2144 | break;
|
---|
2145 | }
|
---|
2146 | token = BITXOR;
|
---|
2147 | break;
|
---|
2148 | case CharacterModulo:
|
---|
2149 | shift();
|
---|
2150 | if (m_current == '=') {
|
---|
2151 | shift();
|
---|
2152 | token = MODEQUAL;
|
---|
2153 | break;
|
---|
2154 | }
|
---|
2155 | token = MOD;
|
---|
2156 | break;
|
---|
2157 | case CharacterOr:
|
---|
2158 | shift();
|
---|
2159 | if (m_current == '=') {
|
---|
2160 | shift();
|
---|
2161 | token = BITOREQUAL;
|
---|
2162 | break;
|
---|
2163 | }
|
---|
2164 | if (m_current == '|') {
|
---|
2165 | shift();
|
---|
2166 | if (m_current == '=') {
|
---|
2167 | shift();
|
---|
2168 | token = OREQUAL;
|
---|
2169 | break;
|
---|
2170 | }
|
---|
2171 | token = OR;
|
---|
2172 | break;
|
---|
2173 | }
|
---|
2174 | token = BITOR;
|
---|
2175 | break;
|
---|
2176 | case CharacterOpenParen:
|
---|
2177 | token = OPENPAREN;
|
---|
2178 | tokenData->line = lineNumber();
|
---|
2179 | tokenData->offset = currentOffset();
|
---|
2180 | tokenData->lineStartOffset = currentLineStartOffset();
|
---|
2181 | shift();
|
---|
2182 | break;
|
---|
2183 | case CharacterCloseParen:
|
---|
2184 | token = CLOSEPAREN;
|
---|
2185 | shift();
|
---|
2186 | break;
|
---|
2187 | case CharacterOpenBracket:
|
---|
2188 | token = OPENBRACKET;
|
---|
2189 | shift();
|
---|
2190 | break;
|
---|
2191 | case CharacterCloseBracket:
|
---|
2192 | token = CLOSEBRACKET;
|
---|
2193 | shift();
|
---|
2194 | break;
|
---|
2195 | case CharacterComma:
|
---|
2196 | token = COMMA;
|
---|
2197 | shift();
|
---|
2198 | break;
|
---|
2199 | case CharacterColon:
|
---|
2200 | token = COLON;
|
---|
2201 | shift();
|
---|
2202 | break;
|
---|
2203 | case CharacterQuestion:
|
---|
2204 | shift();
|
---|
2205 | if (m_current == '?') {
|
---|
2206 | shift();
|
---|
2207 | if (m_current == '=') {
|
---|
2208 | shift();
|
---|
2209 | token = COALESCEEQUAL;
|
---|
2210 | break;
|
---|
2211 | }
|
---|
2212 | token = COALESCE;
|
---|
2213 | break;
|
---|
2214 | }
|
---|
2215 | if (m_current == '.' && !isASCIIDigit(peek(1))) {
|
---|
2216 | shift();
|
---|
2217 | token = QUESTIONDOT;
|
---|
2218 | break;
|
---|
2219 | }
|
---|
2220 | token = QUESTION;
|
---|
2221 | break;
|
---|
2222 | case CharacterTilde:
|
---|
2223 | token = TILDE;
|
---|
2224 | shift();
|
---|
2225 | break;
|
---|
2226 | case CharacterSemicolon:
|
---|
2227 | shift();
|
---|
2228 | token = SEMICOLON;
|
---|
2229 | break;
|
---|
2230 | case CharacterBackQuote:
|
---|
2231 | shift();
|
---|
2232 | token = BACKQUOTE;
|
---|
2233 | break;
|
---|
2234 | case CharacterOpenBrace:
|
---|
2235 | tokenData->line = lineNumber();
|
---|
2236 | tokenData->offset = currentOffset();
|
---|
2237 | tokenData->lineStartOffset = currentLineStartOffset();
|
---|
2238 | ASSERT(tokenData->offset >= tokenData->lineStartOffset);
|
---|
2239 | shift();
|
---|
2240 | token = OPENBRACE;
|
---|
2241 | break;
|
---|
2242 | case CharacterCloseBrace:
|
---|
2243 | tokenData->line = lineNumber();
|
---|
2244 | tokenData->offset = currentOffset();
|
---|
2245 | tokenData->lineStartOffset = currentLineStartOffset();
|
---|
2246 | ASSERT(tokenData->offset >= tokenData->lineStartOffset);
|
---|
2247 | shift();
|
---|
2248 | token = CLOSEBRACE;
|
---|
2249 | break;
|
---|
2250 | case CharacterDot:
|
---|
2251 | shift();
|
---|
2252 | if (!isASCIIDigit(m_current)) {
|
---|
2253 | if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
|
---|
2254 | shift();
|
---|
2255 | shift();
|
---|
2256 | token = DOTDOTDOT;
|
---|
2257 | break;
|
---|
2258 | }
|
---|
2259 | token = DOT;
|
---|
2260 | break;
|
---|
2261 | }
|
---|
2262 | if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
|
---|
2263 | m_lexErrorMessage = "Non-number found after decimal point"_s;
|
---|
2264 | token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
---|
2265 | goto returnError;
|
---|
2266 | }
|
---|
2267 | token = DOUBLE;
|
---|
2268 | if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
|
---|
2269 | m_lexErrorMessage = "Non-number found after exponent indicator"_s;
|
---|
2270 | token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
---|
2271 | goto returnError;
|
---|
2272 | }
|
---|
2273 | size_t parsedLength;
|
---|
2274 | tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
|
---|
2275 | if (token == INTEGER)
|
---|
2276 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2277 |
|
---|
2278 | if (LIKELY(cannotBeIdentStart(m_current))) {
|
---|
2279 | m_buffer8.shrink(0);
|
---|
2280 | break;
|
---|
2281 | }
|
---|
2282 |
|
---|
2283 | if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
---|
2284 | m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
|
---|
2285 | token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
---|
2286 | goto returnError;
|
---|
2287 | }
|
---|
2288 | m_buffer8.shrink(0);
|
---|
2289 | break;
|
---|
2290 | case CharacterZero:
|
---|
2291 | shift();
|
---|
2292 | if (isASCIIAlphaCaselessEqual(m_current, 'x')) {
|
---|
2293 | if (UNLIKELY(!isASCIIHexDigit(peek(1)))) {
|
---|
2294 | m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
|
---|
2295 | token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
|
---|
2296 | goto returnError;
|
---|
2297 | }
|
---|
2298 |
|
---|
2299 | // Shift out the 'x' prefix.
|
---|
2300 | shift();
|
---|
2301 |
|
---|
2302 | auto parseNumberResult = parseHex();
|
---|
2303 | if (!parseNumberResult)
|
---|
2304 | tokenData->doubleValue = 0;
|
---|
2305 | else if (std::holds_alternative<double>(*parseNumberResult))
|
---|
2306 | tokenData->doubleValue = std::get<double>(*parseNumberResult);
|
---|
2307 | else {
|
---|
2308 | token = BIGINT;
|
---|
2309 | shift();
|
---|
2310 | tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
|
---|
2311 | tokenData->radix = 16;
|
---|
2312 | }
|
---|
2313 |
|
---|
2314 | if (LIKELY(cannotBeIdentStart(m_current))) {
|
---|
2315 | if (LIKELY(token != BIGINT))
|
---|
2316 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2317 | m_buffer8.shrink(0);
|
---|
2318 | break;
|
---|
2319 | }
|
---|
2320 |
|
---|
2321 | if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
---|
2322 | m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
|
---|
2323 | token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
|
---|
2324 | goto returnError;
|
---|
2325 | }
|
---|
2326 | if (LIKELY(token != BIGINT))
|
---|
2327 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2328 | m_buffer8.shrink(0);
|
---|
2329 | break;
|
---|
2330 | }
|
---|
2331 | if (isASCIIAlphaCaselessEqual(m_current, 'b')) {
|
---|
2332 | if (UNLIKELY(!isASCIIBinaryDigit(peek(1)))) {
|
---|
2333 | m_lexErrorMessage = "No binary digits after '0b'"_s;
|
---|
2334 | token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
|
---|
2335 | goto returnError;
|
---|
2336 | }
|
---|
2337 |
|
---|
2338 | // Shift out the 'b' prefix.
|
---|
2339 | shift();
|
---|
2340 |
|
---|
2341 | auto parseNumberResult = parseBinary();
|
---|
2342 | if (!parseNumberResult)
|
---|
2343 | tokenData->doubleValue = 0;
|
---|
2344 | else if (std::holds_alternative<double>(*parseNumberResult))
|
---|
2345 | tokenData->doubleValue = std::get<double>(*parseNumberResult);
|
---|
2346 | else {
|
---|
2347 | token = BIGINT;
|
---|
2348 | shift();
|
---|
2349 | tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
|
---|
2350 | tokenData->radix = 2;
|
---|
2351 | }
|
---|
2352 |
|
---|
2353 | if (LIKELY(cannotBeIdentStart(m_current))) {
|
---|
2354 | if (LIKELY(token != BIGINT))
|
---|
2355 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2356 | m_buffer8.shrink(0);
|
---|
2357 | break;
|
---|
2358 | }
|
---|
2359 |
|
---|
2360 | if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
---|
2361 | m_lexErrorMessage = "No space between binary literal and identifier"_s;
|
---|
2362 | token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
|
---|
2363 | goto returnError;
|
---|
2364 | }
|
---|
2365 | if (LIKELY(token != BIGINT))
|
---|
2366 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2367 | m_buffer8.shrink(0);
|
---|
2368 | break;
|
---|
2369 | }
|
---|
2370 |
|
---|
2371 | if (isASCIIAlphaCaselessEqual(m_current, 'o')) {
|
---|
2372 | if (UNLIKELY(!isASCIIOctalDigit(peek(1)))) {
|
---|
2373 | m_lexErrorMessage = "No octal digits after '0o'"_s;
|
---|
2374 | token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
---|
2375 | goto returnError;
|
---|
2376 | }
|
---|
2377 |
|
---|
2378 | // Shift out the 'o' prefix.
|
---|
2379 | shift();
|
---|
2380 |
|
---|
2381 | auto parseNumberResult = parseOctal();
|
---|
2382 | if (!parseNumberResult)
|
---|
2383 | tokenData->doubleValue = 0;
|
---|
2384 | else if (std::holds_alternative<double>(*parseNumberResult))
|
---|
2385 | tokenData->doubleValue = std::get<double>(*parseNumberResult);
|
---|
2386 | else {
|
---|
2387 | token = BIGINT;
|
---|
2388 | shift();
|
---|
2389 | tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
|
---|
2390 | tokenData->radix = 8;
|
---|
2391 | }
|
---|
2392 |
|
---|
2393 | if (LIKELY(cannotBeIdentStart(m_current))) {
|
---|
2394 | if (LIKELY(token != BIGINT))
|
---|
2395 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2396 | m_buffer8.shrink(0);
|
---|
2397 | break;
|
---|
2398 | }
|
---|
2399 |
|
---|
2400 | if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
---|
2401 | m_lexErrorMessage = "No space between octal literal and identifier"_s;
|
---|
2402 | token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
---|
2403 | goto returnError;
|
---|
2404 | }
|
---|
2405 | if (LIKELY(token != BIGINT))
|
---|
2406 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2407 | m_buffer8.shrink(0);
|
---|
2408 | break;
|
---|
2409 | }
|
---|
2410 |
|
---|
2411 | if (UNLIKELY(m_current == '_')) {
|
---|
2412 | m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
|
---|
2413 | token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
---|
2414 | goto returnError;
|
---|
2415 | }
|
---|
2416 |
|
---|
2417 | record8('0');
|
---|
2418 | if (UNLIKELY(strictMode && isASCIIDigit(m_current))) {
|
---|
2419 | m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
|
---|
2420 | token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
|
---|
2421 | goto returnError;
|
---|
2422 | }
|
---|
2423 | if (isASCIIOctalDigit(m_current)) {
|
---|
2424 | auto parseNumberResult = parseOctal();
|
---|
2425 | if (parseNumberResult && std::holds_alternative<double>(*parseNumberResult)) {
|
---|
2426 | tokenData->doubleValue = std::get<double>(*parseNumberResult);
|
---|
2427 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2428 | }
|
---|
2429 | }
|
---|
2430 | FALLTHROUGH;
|
---|
2431 | case CharacterNumber:
|
---|
2432 | if (LIKELY(token != INTEGER && token != DOUBLE)) {
|
---|
2433 | auto parseNumberResult = parseDecimal();
|
---|
2434 | if (parseNumberResult) {
|
---|
2435 | if (std::holds_alternative<double>(*parseNumberResult)) {
|
---|
2436 | tokenData->doubleValue = std::get<double>(*parseNumberResult);
|
---|
2437 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2438 | } else {
|
---|
2439 | token = BIGINT;
|
---|
2440 | shift();
|
---|
2441 | tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
|
---|
2442 | tokenData->radix = 10;
|
---|
2443 | }
|
---|
2444 | } else {
|
---|
2445 | token = INTEGER;
|
---|
2446 | if (m_current == '.') {
|
---|
2447 | shift();
|
---|
2448 | if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
|
---|
2449 | m_lexErrorMessage = "Non-number found after decimal point"_s;
|
---|
2450 | token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
---|
2451 | goto returnError;
|
---|
2452 | }
|
---|
2453 | token = DOUBLE;
|
---|
2454 | }
|
---|
2455 | if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
|
---|
2456 | m_lexErrorMessage = "Non-number found after exponent indicator"_s;
|
---|
2457 | token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
---|
2458 | goto returnError;
|
---|
2459 | }
|
---|
2460 | size_t parsedLength;
|
---|
2461 | tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
|
---|
2462 | if (token == INTEGER)
|
---|
2463 | token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
|
---|
2464 | }
|
---|
2465 | }
|
---|
2466 |
|
---|
2467 | if (LIKELY(cannotBeIdentStart(m_current))) {
|
---|
2468 | m_buffer8.shrink(0);
|
---|
2469 | break;
|
---|
2470 | }
|
---|
2471 |
|
---|
2472 | if (UNLIKELY(isIdentStart(currentCodePoint()))) {
|
---|
2473 | m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
|
---|
2474 | token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
|
---|
2475 | goto returnError;
|
---|
2476 | }
|
---|
2477 | m_buffer8.shrink(0);
|
---|
2478 | break;
|
---|
2479 | case CharacterQuote: {
|
---|
2480 | auto startLineNumber = m_lineNumber;
|
---|
2481 | auto startLineStartOffset = currentLineStartOffset();
|
---|
2482 | StringParseResult result = StringCannotBeParsed;
|
---|
2483 | if (lexerFlags.contains(LexerFlags::DontBuildStrings))
|
---|
2484 | result = parseString<false>(tokenData, strictMode);
|
---|
2485 | else
|
---|
2486 | result = parseString<true>(tokenData, strictMode);
|
---|
2487 |
|
---|
2488 | if (UNLIKELY(result != StringParsedSuccessfully)) {
|
---|
2489 | token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
|
---|
2490 | m_error = true;
|
---|
2491 | fillTokenInfo(tokenRecord, token, startLineNumber, currentOffset(), startLineStartOffset, currentPosition());
|
---|
2492 | return token;
|
---|
2493 | }
|
---|
2494 | shift();
|
---|
2495 | token = STRING;
|
---|
2496 | m_atLineStart = false;
|
---|
2497 | fillTokenInfo(tokenRecord, token, startLineNumber, currentOffset(), startLineStartOffset, currentPosition());
|
---|
2498 | return token;
|
---|
2499 | }
|
---|
2500 | case CharacterIdentifierStart: {
|
---|
2501 | if constexpr (ASSERT_ENABLED) {
|
---|
2502 | UChar32 codePoint;
|
---|
2503 | U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint);
|
---|
2504 | ASSERT(isIdentStart(codePoint));
|
---|
2505 | }
|
---|
2506 | FALLTHROUGH;
|
---|
2507 | }
|
---|
2508 | case CharacterBackSlash:
|
---|
2509 | parseIdent:
|
---|
2510 | if (lexerFlags.contains(LexerFlags::DontBuildKeywords))
|
---|
2511 | token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
|
---|
2512 | else
|
---|
2513 | token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
|
---|
2514 | break;
|
---|
2515 | case CharacterLineTerminator:
|
---|
2516 | ASSERT(isLineTerminator(m_current));
|
---|
2517 | shiftLineTerminator();
|
---|
2518 | m_atLineStart = true;
|
---|
2519 | m_hasLineTerminatorBeforeToken = true;
|
---|
2520 | goto start;
|
---|
2521 | case CharacterHash: {
|
---|
2522 | // Hashbang is only permitted at the start of the source text.
|
---|
2523 | auto next = peek(1);
|
---|
2524 | if (next == '!' && !currentOffset()) {
|
---|
2525 | shift();
|
---|
2526 | shift();
|
---|
2527 | goto inSingleLineComment;
|
---|
2528 | }
|
---|
2529 | // Otherwise, it could be a valid PrivateName.
|
---|
2530 | if (isSingleCharacterIdentStart(next) || next == '\\') {
|
---|
2531 | lexerFlags.remove(LexerFlags::DontBuildKeywords);
|
---|
2532 | goto parseIdent;
|
---|
2533 | }
|
---|
2534 | goto invalidCharacter;
|
---|
2535 | }
|
---|
2536 | case CharacterPrivateIdentifierStart:
|
---|
2537 | if (m_parsingBuiltinFunction)
|
---|
2538 | goto parseIdent;
|
---|
2539 | goto invalidCharacter;
|
---|
2540 | case CharacterOtherIdentifierPart:
|
---|
2541 | case CharacterInvalid:
|
---|
2542 | goto invalidCharacter;
|
---|
2543 | default:
|
---|
2544 | RELEASE_ASSERT_NOT_REACHED();
|
---|
2545 | m_lexErrorMessage = "Internal Error"_s;
|
---|
2546 | token = ERRORTOK;
|
---|
2547 | goto returnError;
|
---|
2548 | }
|
---|
2549 |
|
---|
2550 | m_atLineStart = false;
|
---|
2551 | goto returnToken;
|
---|
2552 |
|
---|
2553 | inSingleLineCommentCheckForDirectives:
|
---|
2554 | // Script comment directives like "//# sourceURL=test.js".
|
---|
2555 | if (UNLIKELY((m_current == '#' || m_current == '@') && isWhiteSpace(peek(1)))) {
|
---|
2556 | shift();
|
---|
2557 | shift();
|
---|
2558 | parseCommentDirective();
|
---|
2559 | }
|
---|
2560 | // Fall through to complete single line comment parsing.
|
---|
2561 |
|
---|
2562 | inSingleLineComment:
|
---|
2563 | {
|
---|
2564 | auto lineNumber = m_lineNumber;
|
---|
2565 | auto endOffset = currentOffset();
|
---|
2566 | auto lineStartOffset = currentLineStartOffset();
|
---|
2567 | auto endPosition = currentPosition();
|
---|
2568 |
|
---|
2569 | while (!isLineTerminator(m_current)) {
|
---|
2570 | if (atEnd()) {
|
---|
2571 | token = EOFTOK;
|
---|
2572 | fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
|
---|
2573 | return token;
|
---|
2574 | }
|
---|
2575 | shift();
|
---|
2576 | }
|
---|
2577 | shiftLineTerminator();
|
---|
2578 | m_atLineStart = true;
|
---|
2579 | m_hasLineTerminatorBeforeToken = true;
|
---|
2580 | if (!lastTokenWasRestrKeyword())
|
---|
2581 | goto start;
|
---|
2582 |
|
---|
2583 | token = SEMICOLON;
|
---|
2584 | fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
|
---|
2585 | return token;
|
---|
2586 | }
|
---|
2587 |
|
---|
2588 | returnToken:
|
---|
2589 | fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
---|
2590 | return token;
|
---|
2591 |
|
---|
2592 | invalidCharacter:
|
---|
2593 | m_lexErrorMessage = invalidCharacterMessage();
|
---|
2594 | token = ERRORTOK;
|
---|
2595 | // Falls through to return error.
|
---|
2596 |
|
---|
2597 | returnError:
|
---|
2598 | m_error = true;
|
---|
2599 | fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
---|
2600 | RELEASE_ASSERT(token & CanBeErrorTokenFlag);
|
---|
2601 | return token;
|
---|
2602 | }
|
---|
2603 |
|
---|
2604 | template <typename T>
|
---|
2605 | static inline void orCharacter(UChar&, UChar);
|
---|
2606 |
|
---|
2607 | template <>
|
---|
2608 | inline void orCharacter<LChar>(UChar&, UChar) { }
|
---|
2609 |
|
---|
2610 | template <>
|
---|
2611 | inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
|
---|
2612 | {
|
---|
2613 | orAccumulator |= character;
|
---|
2614 | }
|
---|
2615 |
|
---|
2616 | template <typename T>
|
---|
2617 | JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
|
---|
2618 | {
|
---|
2619 | JSTokenData* tokenData = &tokenRecord->m_data;
|
---|
2620 | ASSERT(m_buffer16.isEmpty());
|
---|
2621 |
|
---|
2622 | bool lastWasEscape = false;
|
---|
2623 | bool inBrackets = false;
|
---|
2624 | UChar charactersOredTogether = 0;
|
---|
2625 |
|
---|
2626 | if (patternPrefix) {
|
---|
2627 | ASSERT(!isLineTerminator(patternPrefix));
|
---|
2628 | ASSERT(patternPrefix != '/');
|
---|
2629 | ASSERT(patternPrefix != '[');
|
---|
2630 | record16(patternPrefix);
|
---|
2631 | }
|
---|
2632 |
|
---|
2633 | while (true) {
|
---|
2634 | if (isLineTerminator(m_current) || atEnd()) {
|
---|
2635 | m_buffer16.shrink(0);
|
---|
2636 | JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
|
---|
2637 | fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
---|
2638 | m_error = true;
|
---|
2639 | m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
|
---|
2640 | return token;
|
---|
2641 | }
|
---|
2642 |
|
---|
2643 | T prev = m_current;
|
---|
2644 |
|
---|
2645 | shift();
|
---|
2646 |
|
---|
2647 | if (prev == '/' && !lastWasEscape && !inBrackets)
|
---|
2648 | break;
|
---|
2649 |
|
---|
2650 | record16(prev);
|
---|
2651 | orCharacter<T>(charactersOredTogether, prev);
|
---|
2652 |
|
---|
2653 | if (lastWasEscape) {
|
---|
2654 | lastWasEscape = false;
|
---|
2655 | continue;
|
---|
2656 | }
|
---|
2657 |
|
---|
2658 | switch (prev) {
|
---|
2659 | case '[':
|
---|
2660 | inBrackets = true;
|
---|
2661 | break;
|
---|
2662 | case ']':
|
---|
2663 | inBrackets = false;
|
---|
2664 | break;
|
---|
2665 | case '\\':
|
---|
2666 | lastWasEscape = true;
|
---|
2667 | break;
|
---|
2668 | }
|
---|
2669 | }
|
---|
2670 |
|
---|
2671 | tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
|
---|
2672 | m_buffer16.shrink(0);
|
---|
2673 |
|
---|
2674 | ASSERT(m_buffer8.isEmpty());
|
---|
2675 | while (LIKELY(isLatin1(m_current)) && isIdentPart(static_cast<LChar>(m_current))) {
|
---|
2676 | record8(static_cast<LChar>(m_current));
|
---|
2677 | shift();
|
---|
2678 | }
|
---|
2679 |
|
---|
2680 | // Normally this would not be a lex error but dealing with surrogate pairs here is annoying and it's going to be an error anyway...
|
---|
2681 | if (UNLIKELY(!isLatin1(m_current) && !isWhiteSpace(m_current) && !isLineTerminator(m_current))) {
|
---|
2682 | m_buffer8.shrink(0);
|
---|
2683 | JSTokenType token = INVALID_IDENTIFIER_UNICODE_ERRORTOK;
|
---|
2684 | fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
---|
2685 | m_error = true;
|
---|
2686 | String codePoint = String::fromCodePoint(currentCodePoint());
|
---|
2687 | if (!codePoint)
|
---|
2688 | codePoint = "`invalid unicode character`"_s;
|
---|
2689 | m_lexErrorMessage = makeString("Invalid non-latin character in RexExp literal's flags '", getToken(*tokenRecord), codePoint, "'");
|
---|
2690 | return token;
|
---|
2691 | }
|
---|
2692 |
|
---|
2693 | tokenData->flags = makeIdentifier(m_buffer8.data(), m_buffer8.size());
|
---|
2694 | m_buffer8.shrink(0);
|
---|
2695 |
|
---|
2696 | // Since RegExp always ends with / or flags (IdentifierPart), m_atLineStart always becomes false.
|
---|
2697 | m_atLineStart = false;
|
---|
2698 |
|
---|
2699 | JSTokenType token = REGEXP;
|
---|
2700 | fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
|
---|
2701 | return token;
|
---|
2702 | }
|
---|
2703 |
|
---|
2704 | template <typename T>
|
---|
2705 | JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
|
---|
2706 | {
|
---|
2707 | JSTokenData* tokenData = &tokenRecord->m_data;
|
---|
2708 | ASSERT(!m_error);
|
---|
2709 | ASSERT(m_buffer16.isEmpty());
|
---|
2710 |
|
---|
2711 | int startingLineStartOffset = currentLineStartOffset();
|
---|
2712 | int startingLineNumber = lineNumber();
|
---|
2713 |
|
---|
2714 | // Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
|
---|
2715 | // So in this re-scan phase, shift() is not needed here.
|
---|
2716 | StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
|
---|
2717 | JSTokenType token = ERRORTOK;
|
---|
2718 | if (UNLIKELY(result != StringParsedSuccessfully)) {
|
---|
2719 | token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
|
---|
2720 | m_error = true;
|
---|
2721 | } else
|
---|
2722 | token = TEMPLATE;
|
---|
2723 |
|
---|
2724 | // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
|
---|
2725 | m_atLineStart = false;
|
---|
2726 | fillTokenInfo(tokenRecord, token, startingLineNumber, currentOffset(), startingLineStartOffset, currentPosition());
|
---|
2727 | return token;
|
---|
2728 | }
|
---|
2729 |
|
---|
2730 | template <typename T>
|
---|
2731 | void Lexer<T>::clear()
|
---|
2732 | {
|
---|
2733 | m_arena = nullptr;
|
---|
2734 |
|
---|
2735 | Vector<LChar> newBuffer8;
|
---|
2736 | m_buffer8.swap(newBuffer8);
|
---|
2737 |
|
---|
2738 | Vector<UChar> newBuffer16;
|
---|
2739 | m_buffer16.swap(newBuffer16);
|
---|
2740 |
|
---|
2741 | Vector<UChar> newBufferForRawTemplateString16;
|
---|
2742 | m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
|
---|
2743 |
|
---|
2744 | m_isReparsingFunction = false;
|
---|
2745 | }
|
---|
2746 |
|
---|
2747 | // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
|
---|
2748 | template class Lexer<LChar>;
|
---|
2749 | template class Lexer<UChar>;
|
---|
2750 |
|
---|
2751 | } // namespace JSC
|
---|