source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 62628

Last change on this file since 62628 was 62628, checked in by [email protected], 15 years ago

Refactored string parsing inside the lexer
https://bugs.webkit.org/show_bug.cgi?id=41606

Reviewed by Oliver Hunt.

Does not use goto. Although the last sunspider
parse-only tests yields 1.044x speedup, I think the
patch can have a slight improvement at most.

  • parser/Lexer.cpp:

(JSC::singleEscape):
(JSC::Lexer::parseString):
(JSC::Lexer::lex):

  • parser/Lexer.h:
  • Property svn:eol-style set to native
File size: 31.3 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2010 Zoltan Herczeg ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "Lexer.h"
26
27#include "JSFunction.h"
28
29#include "JSGlobalObjectFunctions.h"
30#include "Identifier.h"
31#include "NodeInfo.h"
32#include "Nodes.h"
33#include "dtoa.h"
34#include <ctype.h>
35#include <limits.h>
36#include <string.h>
37#include <wtf/Assertions.h>
38
39using namespace WTF;
40using namespace Unicode;
41
42#include "JSParser.h"
43#include "Lookup.h"
44#include "Lexer.lut.h"
45
46namespace JSC {
47
48
49enum CharacterTypes {
50 // Types for the main switch
51 CharacterInvalid,
52
53 CharacterAlpha,
54 CharacterZero,
55 CharacterNumber,
56
57 CharacterLineTerminator,
58 CharacterExclamationMark,
59 CharacterSimple,
60 CharacterQuote,
61 CharacterDot,
62 CharacterSlash,
63 CharacterBackSlash,
64 CharacterSemicolon,
65 CharacterOpenBrace,
66 CharacterCloseBrace,
67
68 CharacterAdd,
69 CharacterSub,
70 CharacterMultiply,
71 CharacterModulo,
72 CharacterAnd,
73 CharacterXor,
74 CharacterOr,
75 CharacterLess,
76 CharacterGreater,
77 CharacterEqual,
78
79 // Other types (only one so far)
80 CharacterWhiteSpace,
81};
82
83// 128 ascii codes
84static unsigned char AsciiCharacters[128] = {
85/* 0 - Null */ CharacterInvalid,
86/* 1 - Start of Heading */ CharacterInvalid,
87/* 2 - Start of Text */ CharacterInvalid,
88/* 3 - End of Text */ CharacterInvalid,
89/* 4 - End of Transm. */ CharacterInvalid,
90/* 5 - Enquiry */ CharacterInvalid,
91/* 6 - Acknowledgment */ CharacterInvalid,
92/* 7 - Bell */ CharacterInvalid,
93/* 8 - Back Space */ CharacterInvalid,
94/* 9 - Horizontal Tab */ CharacterWhiteSpace,
95/* 10 - Line Feed */ CharacterLineTerminator,
96/* 11 - Vertical Tab */ CharacterWhiteSpace,
97/* 12 - Form Feed */ CharacterWhiteSpace,
98/* 13 - Carriage Return */ CharacterLineTerminator,
99/* 14 - Shift Out */ CharacterInvalid,
100/* 15 - Shift In */ CharacterInvalid,
101/* 16 - Data Line Escape */ CharacterInvalid,
102/* 17 - Device Control 1 */ CharacterInvalid,
103/* 18 - Device Control 2 */ CharacterInvalid,
104/* 19 - Device Control 3 */ CharacterInvalid,
105/* 20 - Device Control 4 */ CharacterInvalid,
106/* 21 - Negative Ack. */ CharacterInvalid,
107/* 22 - Synchronous Idle */ CharacterInvalid,
108/* 23 - End of Transmit */ CharacterInvalid,
109/* 24 - Cancel */ CharacterInvalid,
110/* 25 - End of Medium */ CharacterInvalid,
111/* 26 - Substitute */ CharacterInvalid,
112/* 27 - Escape */ CharacterInvalid,
113/* 28 - File Separator */ CharacterInvalid,
114/* 29 - Group Separator */ CharacterInvalid,
115/* 30 - Record Separator */ CharacterInvalid,
116/* 31 - Unit Separator */ CharacterInvalid,
117/* 32 - Space */ CharacterWhiteSpace,
118/* 33 - ! */ CharacterExclamationMark,
119/* 34 - " */ CharacterQuote,
120/* 35 - # */ CharacterInvalid,
121/* 36 - $ */ CharacterAlpha,
122/* 37 - % */ CharacterModulo,
123/* 38 - & */ CharacterAnd,
124/* 39 - ' */ CharacterQuote,
125/* 40 - ( */ CharacterSimple,
126/* 41 - ) */ CharacterSimple,
127/* 42 - * */ CharacterMultiply,
128/* 43 - + */ CharacterAdd,
129/* 44 - , */ CharacterSimple,
130/* 45 - - */ CharacterSub,
131/* 46 - . */ CharacterDot,
132/* 47 - / */ CharacterSlash,
133/* 48 - 0 */ CharacterZero,
134/* 49 - 1 */ CharacterNumber,
135/* 50 - 2 */ CharacterNumber,
136/* 51 - 3 */ CharacterNumber,
137/* 52 - 4 */ CharacterNumber,
138/* 53 - 5 */ CharacterNumber,
139/* 54 - 6 */ CharacterNumber,
140/* 55 - 7 */ CharacterNumber,
141/* 56 - 8 */ CharacterNumber,
142/* 57 - 9 */ CharacterNumber,
143/* 58 - : */ CharacterSimple,
144/* 59 - ; */ CharacterSemicolon,
145/* 60 - < */ CharacterLess,
146/* 61 - = */ CharacterEqual,
147/* 62 - > */ CharacterGreater,
148/* 63 - ? */ CharacterSimple,
149/* 64 - @ */ CharacterInvalid,
150/* 65 - A */ CharacterAlpha,
151/* 66 - B */ CharacterAlpha,
152/* 67 - C */ CharacterAlpha,
153/* 68 - D */ CharacterAlpha,
154/* 69 - E */ CharacterAlpha,
155/* 70 - F */ CharacterAlpha,
156/* 71 - G */ CharacterAlpha,
157/* 72 - H */ CharacterAlpha,
158/* 73 - I */ CharacterAlpha,
159/* 74 - J */ CharacterAlpha,
160/* 75 - K */ CharacterAlpha,
161/* 76 - L */ CharacterAlpha,
162/* 77 - M */ CharacterAlpha,
163/* 78 - N */ CharacterAlpha,
164/* 79 - O */ CharacterAlpha,
165/* 80 - P */ CharacterAlpha,
166/* 81 - Q */ CharacterAlpha,
167/* 82 - R */ CharacterAlpha,
168/* 83 - S */ CharacterAlpha,
169/* 84 - T */ CharacterAlpha,
170/* 85 - U */ CharacterAlpha,
171/* 86 - V */ CharacterAlpha,
172/* 87 - W */ CharacterAlpha,
173/* 88 - X */ CharacterAlpha,
174/* 89 - Y */ CharacterAlpha,
175/* 90 - Z */ CharacterAlpha,
176/* 91 - [ */ CharacterSimple,
177/* 92 - \ */ CharacterBackSlash,
178/* 93 - ] */ CharacterSimple,
179/* 94 - ^ */ CharacterXor,
180/* 95 - _ */ CharacterAlpha,
181/* 96 - ` */ CharacterInvalid,
182/* 97 - a */ CharacterAlpha,
183/* 98 - b */ CharacterAlpha,
184/* 99 - c */ CharacterAlpha,
185/* 100 - d */ CharacterAlpha,
186/* 101 - e */ CharacterAlpha,
187/* 102 - f */ CharacterAlpha,
188/* 103 - g */ CharacterAlpha,
189/* 104 - h */ CharacterAlpha,
190/* 105 - i */ CharacterAlpha,
191/* 106 - j */ CharacterAlpha,
192/* 107 - k */ CharacterAlpha,
193/* 108 - l */ CharacterAlpha,
194/* 109 - m */ CharacterAlpha,
195/* 110 - n */ CharacterAlpha,
196/* 111 - o */ CharacterAlpha,
197/* 112 - p */ CharacterAlpha,
198/* 113 - q */ CharacterAlpha,
199/* 114 - r */ CharacterAlpha,
200/* 115 - s */ CharacterAlpha,
201/* 116 - t */ CharacterAlpha,
202/* 117 - u */ CharacterAlpha,
203/* 118 - v */ CharacterAlpha,
204/* 119 - w */ CharacterAlpha,
205/* 120 - x */ CharacterAlpha,
206/* 121 - y */ CharacterAlpha,
207/* 122 - z */ CharacterAlpha,
208/* 123 - { */ CharacterOpenBrace,
209/* 124 - | */ CharacterOr,
210/* 125 - } */ CharacterCloseBrace,
211/* 126 - ~ */ CharacterSimple,
212/* 127 - Delete */ CharacterInvalid,
213};
214
215Lexer::Lexer(JSGlobalData* globalData)
216 : m_isReparsing(false)
217 , m_globalData(globalData)
218 , m_keywordTable(JSC::mainTable)
219{
220}
221
222Lexer::~Lexer()
223{
224 m_keywordTable.deleteTable();
225}
226
227ALWAYS_INLINE const UChar* Lexer::currentCharacter() const
228{
229 ASSERT(m_code <= m_codeEnd);
230 return m_code;
231}
232
233ALWAYS_INLINE int Lexer::currentOffset() const
234{
235 return currentCharacter() - m_codeStart;
236}
237
238void Lexer::setCode(const SourceCode& source, ParserArena& arena)
239{
240 m_arena = &arena.identifierArena();
241
242 m_lineNumber = source.firstLine();
243 m_delimited = false;
244 m_lastToken = -1;
245
246 const UChar* data = source.provider()->data();
247
248 m_source = &source;
249 m_codeStart = data;
250 m_code = data + source.startOffset();
251 m_codeEnd = data + source.endOffset();
252 m_error = false;
253 m_atLineStart = true;
254
255 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
256 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
257
258 if (LIKELY(m_code < m_codeEnd))
259 m_current = *m_code;
260 else
261 m_current = -1;
262 ASSERT(currentOffset() == source.startOffset());
263}
264
265ALWAYS_INLINE void Lexer::shift()
266{
267 // Faster than an if-else sequence
268 ASSERT(m_current != -1);
269 m_current = -1;
270 ++m_code;
271 if (LIKELY(m_code < m_codeEnd))
272 m_current = *m_code;
273}
274
275ALWAYS_INLINE int Lexer::peek(int offset)
276{
277 // Only use if necessary
278 ASSERT(offset > 0 && offset < 5);
279 const UChar* code = m_code + offset;
280 return (code < m_codeEnd) ? *code : -1;
281}
282
283int Lexer::getUnicodeCharacter()
284{
285 int char1 = peek(1);
286 int char2 = peek(2);
287 int char3 = peek(3);
288
289 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
290 return -1;
291
292 int result = convertUnicode(m_current, char1, char2, char3);
293 shift();
294 shift();
295 shift();
296 shift();
297 return result;
298}
299
300void Lexer::shiftLineTerminator()
301{
302 ASSERT(isLineTerminator(m_current));
303
304 int m_prev = m_current;
305 shift();
306
307 // Allow both CRLF and LFCR.
308 if (m_prev + m_current == '\n' + '\r')
309 shift();
310
311 ++m_lineNumber;
312}
313
314ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
315{
316 return &m_arena->makeIdentifier(m_globalData, characters, length);
317}
318
319ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const
320{
321 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
322}
323
324static NEVER_INLINE bool isNonASCIIIdentStart(int c)
325{
326 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
327}
328
329static inline bool isIdentStart(int c)
330{
331 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
332}
333
334static NEVER_INLINE bool isNonASCIIIdentPart(int c)
335{
336 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
337 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
338}
339
340static inline bool isIdentPart(int c)
341{
342 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
343}
344
345static inline int singleEscape(int c)
346{
347 switch (c) {
348 case 'b':
349 return 0x08;
350 case 't':
351 return 0x09;
352 case 'n':
353 return 0x0A;
354 case 'v':
355 return 0x0B;
356 case 'f':
357 return 0x0C;
358 case 'r':
359 return 0x0D;
360 case '\\':
361 return '\\';
362 case '\'':
363 return '\'';
364 case '"':
365 return '"';
366 default:
367 return 0;
368 }
369}
370
371inline void Lexer::record8(int c)
372{
373 ASSERT(c >= 0);
374 ASSERT(c <= 0xFF);
375 m_buffer8.append(static_cast<char>(c));
376}
377
378inline void Lexer::record16(UChar c)
379{
380 m_buffer16.append(c);
381}
382
383inline void Lexer::record16(int c)
384{
385 ASSERT(c >= 0);
386 ASSERT(c <= USHRT_MAX);
387 record16(UChar(static_cast<unsigned short>(c)));
388}
389
390ALWAYS_INLINE bool Lexer::parseString(void* lvalp)
391{
392 int stringQuoteCharacter = m_current;
393 shift();
394
395 const UChar* stringStart = currentCharacter();
396
397 while (m_current != stringQuoteCharacter) {
398 if (UNLIKELY(m_current == '\\')) {
399 if (stringStart != currentCharacter())
400 m_buffer16.append(stringStart, currentCharacter() - stringStart);
401 shift();
402
403 int escape = singleEscape(m_current);
404
405 // Most common escape sequences first
406 if (escape) {
407 record16(escape);
408 shift();
409 } else if (UNLIKELY(isLineTerminator(m_current)))
410 shiftLineTerminator();
411 else if (m_current == 'x') {
412 shift();
413 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
414 int prev = m_current;
415 shift();
416 record16(convertHex(prev, m_current));
417 shift();
418 } else
419 record16('x');
420 } else if (m_current == 'u') {
421 shift();
422 int character = getUnicodeCharacter();
423 if (character != -1)
424 record16(character);
425 else if (m_current == stringQuoteCharacter)
426 record16('u');
427 else // Only stringQuoteCharacter allowed after \u
428 return false;
429 } else if (isASCIIOctalDigit(m_current)) {
430 // Octal character sequences
431 int character1 = m_current;
432 shift();
433 if (isASCIIOctalDigit(m_current)) {
434 // Two octal characters
435 int character2 = m_current;
436 shift();
437 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
438 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
439 shift();
440 } else
441 record16((character1 - '0') * 8 + character2 - '0');
442 } else
443 record16(character1 - '0');
444 } else if (m_current != -1) {
445 record16(m_current);
446 shift();
447 } else
448 return false;
449
450 stringStart = currentCharacter();
451 continue;
452 } else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
453 // New-line or end of input is not allowed
454 if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1))
455 return false;
456 // Anything else is just a normal character
457 }
458 shift();
459 }
460
461 if (currentCharacter() != stringStart)
462 m_buffer16.append(stringStart, currentCharacter() - stringStart);
463 reinterpret_cast<YYSTYPE*>(lvalp)->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
464 m_buffer16.resize(0);
465 return true;
466}
467
468int Lexer::lex(void* p1, void* p2)
469{
470 ASSERT(!m_error);
471 ASSERT(m_buffer8.isEmpty());
472 ASSERT(m_buffer16.isEmpty());
473
474 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
475 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
476 int token = 0;
477 m_terminator = false;
478
479start:
480 while (isWhiteSpace(m_current))
481 shift();
482
483 int startOffset = currentOffset();
484
485 if (UNLIKELY(m_current == -1)) {
486 if (!m_terminator && !m_delimited && !m_isReparsing) {
487 // automatic semicolon insertion if program incomplete
488 goto doneSemicolon;
489 }
490 return 0;
491 }
492
493 m_delimited = false;
494
495 if (isASCII(m_current)) {
496 ASSERT(m_current >= 0 && m_current < 128);
497
498 switch (AsciiCharacters[m_current]) {
499 case CharacterGreater:
500 shift();
501 if (m_current == '>') {
502 shift();
503 if (m_current == '>') {
504 shift();
505 if (m_current == '=') {
506 shift();
507 token = URSHIFTEQUAL;
508 break;
509 }
510 token = URSHIFT;
511 break;
512 }
513 if (m_current == '=') {
514 shift();
515 token = RSHIFTEQUAL;
516 break;
517 }
518 token = RSHIFT;
519 break;
520 }
521 if (m_current == '=') {
522 shift();
523 token = GE;
524 break;
525 }
526 token = '>';
527 break;
528 case CharacterEqual:
529 shift();
530 if (m_current == '=') {
531 shift();
532 if (m_current == '=') {
533 shift();
534 token = STREQ;
535 break;
536 }
537 token = EQEQ;
538 break;
539 }
540 token = '=';
541 break;
542 case CharacterLess:
543 shift();
544 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
545 // <!-- marks the beginning of a line comment (for www usage)
546 goto inSingleLineComment;
547 }
548 if (m_current == '<') {
549 shift();
550 if (m_current == '=') {
551 shift();
552 token = LSHIFTEQUAL;
553 break;
554 }
555 token = LSHIFT;
556 break;
557 }
558 if (m_current == '=') {
559 shift();
560 token = LE;
561 break;
562 }
563 token = '<';
564 break;
565 case CharacterExclamationMark:
566 shift();
567 if (m_current == '=') {
568 shift();
569 if (m_current == '=') {
570 shift();
571 token = STRNEQ;
572 break;
573 }
574 token = NE;
575 break;
576 }
577 token = '!';
578 break;
579 case CharacterAdd:
580 shift();
581 if (m_current == '+') {
582 shift();
583 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
584 break;
585 }
586 if (m_current == '=') {
587 shift();
588 token = PLUSEQUAL;
589 break;
590 }
591 token = '+';
592 break;
593 case CharacterSub:
594 shift();
595 if (m_current == '-') {
596 shift();
597 if (m_atLineStart && m_current == '>') {
598 shift();
599 goto inSingleLineComment;
600 }
601 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
602 break;
603 }
604 if (m_current == '=') {
605 shift();
606 token = MINUSEQUAL;
607 break;
608 }
609 token = '-';
610 break;
611 case CharacterMultiply:
612 shift();
613 if (m_current == '=') {
614 shift();
615 token = MULTEQUAL;
616 break;
617 }
618 token = '*';
619 break;
620 case CharacterSlash:
621 shift();
622 if (m_current == '/') {
623 shift();
624 goto inSingleLineComment;
625 }
626 if (m_current == '*') {
627 shift();
628 goto inMultiLineComment;
629 }
630 if (m_current == '=') {
631 shift();
632 token = DIVEQUAL;
633 break;
634 }
635 token = '/';
636 break;
637 case CharacterAnd:
638 shift();
639 if (m_current == '&') {
640 shift();
641 token = AND;
642 break;
643 }
644 if (m_current == '=') {
645 shift();
646 token = ANDEQUAL;
647 break;
648 }
649 token = '&';
650 break;
651 case CharacterXor:
652 shift();
653 if (m_current == '=') {
654 shift();
655 token = XOREQUAL;
656 break;
657 }
658 token = '^';
659 break;
660 case CharacterModulo:
661 shift();
662 if (m_current == '=') {
663 shift();
664 token = MODEQUAL;
665 break;
666 }
667 token = '%';
668 break;
669 case CharacterOr:
670 shift();
671 if (m_current == '=') {
672 shift();
673 token = OREQUAL;
674 break;
675 }
676 if (m_current == '|') {
677 shift();
678 token = OR;
679 break;
680 }
681 token = '|';
682 break;
683 case CharacterDot:
684 shift();
685 if (isASCIIDigit(m_current)) {
686 record8('.');
687 goto inNumberAfterDecimalPoint;
688 }
689 token = '.';
690 break;
691 case CharacterSimple:
692 token = m_current;
693 shift();
694 break;
695 case CharacterSemicolon:
696 m_delimited = true;
697 shift();
698 token = ';';
699 break;
700 case CharacterOpenBrace:
701 lvalp->intValue = currentOffset();
702 shift();
703 token = OPENBRACE;
704 break;
705 case CharacterCloseBrace:
706 lvalp->intValue = currentOffset();
707 m_delimited = true;
708 shift();
709 token = CLOSEBRACE;
710 break;
711 case CharacterBackSlash:
712 goto startIdentifierWithBackslash;
713 case CharacterZero:
714 goto startNumberWithZeroDigit;
715 case CharacterNumber:
716 goto startNumber;
717 case CharacterQuote:
718 if (UNLIKELY(!parseString(lvalp)))
719 goto returnError;
720 shift();
721 m_delimited = false;
722 token = STRING;
723 break;
724 case CharacterAlpha:
725 ASSERT(isIdentStart(m_current));
726 goto startIdentifierOrKeyword;
727 case CharacterLineTerminator:
728 ASSERT(isLineTerminator(m_current));
729 shiftLineTerminator();
730 m_atLineStart = true;
731 m_terminator = true;
732 if (lastTokenWasRestrKeyword()) {
733 token = ';';
734 goto doneSemicolon;
735 }
736 goto start;
737 case CharacterInvalid:
738 goto returnError;
739 default:
740 ASSERT_NOT_REACHED();
741 goto returnError;
742 }
743 } else {
744 // Rare characters
745
746 if (isNonASCIIIdentStart(m_current))
747 goto startIdentifierOrKeyword;
748 if (isLineTerminator(m_current)) {
749 shiftLineTerminator();
750 m_atLineStart = true;
751 m_terminator = true;
752 if (lastTokenWasRestrKeyword())
753 goto doneSemicolon;
754 goto start;
755 }
756 goto returnError;
757 }
758
759 m_atLineStart = false;
760 goto returnToken;
761
762startIdentifierWithBackslash: {
763 shift();
764 if (UNLIKELY(m_current != 'u'))
765 goto returnError;
766 shift();
767
768 token = getUnicodeCharacter();
769 if (UNLIKELY(token == -1))
770 goto returnError;
771 if (UNLIKELY(!isIdentStart(token)))
772 goto returnError;
773 goto inIdentifierAfterCharacterCheck;
774}
775
776startIdentifierOrKeyword: {
777 const UChar* identifierStart = currentCharacter();
778 shift();
779 while (isIdentPart(m_current))
780 shift();
781 if (LIKELY(m_current != '\\')) {
782 // Fast case for idents which does not contain \uCCCC characters
783 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
784 goto doneIdentifierOrKeyword;
785 }
786 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
787}
788
789 do {
790 shift();
791 if (UNLIKELY(m_current != 'u'))
792 goto returnError;
793 shift();
794 token = getUnicodeCharacter();
795 if (UNLIKELY(token == -1))
796 goto returnError;
797 if (UNLIKELY(!isIdentPart(token)))
798 goto returnError;
799inIdentifierAfterCharacterCheck:
800 record16(token);
801
802 while (isIdentPart(m_current)) {
803 record16(m_current);
804 shift();
805 }
806 } while (UNLIKELY(m_current == '\\'));
807 goto doneIdentifier;
808
809inSingleLineComment:
810 while (!isLineTerminator(m_current)) {
811 if (UNLIKELY(m_current == -1))
812 return 0;
813 shift();
814 }
815 shiftLineTerminator();
816 m_atLineStart = true;
817 m_terminator = true;
818 if (lastTokenWasRestrKeyword())
819 goto doneSemicolon;
820 goto start;
821
822inMultiLineComment:
823 while (true) {
824 if (UNLIKELY(m_current == '*')) {
825 shift();
826 if (m_current == '/')
827 break;
828 if (m_current == '*')
829 continue;
830 }
831
832 if (UNLIKELY(m_current == -1))
833 goto returnError;
834
835 if (isLineTerminator(m_current))
836 shiftLineTerminator();
837 else
838 shift();
839 }
840 shift();
841 m_atLineStart = false;
842 goto start;
843
844startNumberWithZeroDigit:
845 shift();
846 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
847 shift();
848 goto inHex;
849 }
850 if (m_current == '.') {
851 record8('0');
852 record8('.');
853 shift();
854 goto inNumberAfterDecimalPoint;
855 }
856 if ((m_current | 0x20) == 'e') {
857 record8('0');
858 record8('e');
859 shift();
860 goto inExponentIndicator;
861 }
862 if (isASCIIOctalDigit(m_current))
863 goto inOctal;
864 if (isASCIIDigit(m_current))
865 goto startNumber;
866 lvalp->doubleValue = 0;
867 goto doneNumeric;
868
869inNumberAfterDecimalPoint:
870 while (isASCIIDigit(m_current)) {
871 record8(m_current);
872 shift();
873 }
874 if ((m_current | 0x20) == 'e') {
875 record8('e');
876 shift();
877 goto inExponentIndicator;
878 }
879 goto doneNumber;
880
881inExponentIndicator:
882 if (m_current == '+' || m_current == '-') {
883 record8(m_current);
884 shift();
885 }
886 if (!isASCIIDigit(m_current))
887 goto returnError;
888 do {
889 record8(m_current);
890 shift();
891 } while (isASCIIDigit(m_current));
892 goto doneNumber;
893
894inOctal: {
895 do {
896 record8(m_current);
897 shift();
898 } while (isASCIIOctalDigit(m_current));
899 if (isASCIIDigit(m_current))
900 goto startNumber;
901
902 double dval = 0;
903
904 const char* end = m_buffer8.end();
905 for (const char* p = m_buffer8.data(); p < end; ++p) {
906 dval *= 8;
907 dval += *p - '0';
908 }
909 if (dval >= mantissaOverflowLowerBound)
910 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
911
912 m_buffer8.resize(0);
913
914 lvalp->doubleValue = dval;
915 goto doneNumeric;
916}
917
918inHex: {
919 do {
920 record8(m_current);
921 shift();
922 } while (isASCIIHexDigit(m_current));
923
924 double dval = 0;
925
926 const char* end = m_buffer8.end();
927 for (const char* p = m_buffer8.data(); p < end; ++p) {
928 dval *= 16;
929 dval += toASCIIHexValue(*p);
930 }
931 if (dval >= mantissaOverflowLowerBound)
932 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
933
934 m_buffer8.resize(0);
935
936 lvalp->doubleValue = dval;
937 goto doneNumeric;
938}
939
940startNumber:
941 record8(m_current);
942 shift();
943 while (isASCIIDigit(m_current)) {
944 record8(m_current);
945 shift();
946 }
947 if (m_current == '.') {
948 record8('.');
949 shift();
950 goto inNumberAfterDecimalPoint;
951 }
952 if ((m_current | 0x20) == 'e') {
953 record8('e');
954 shift();
955 goto inExponentIndicator;
956 }
957
958 // Fall through into doneNumber.
959
960doneNumber:
961 // Null-terminate string for strtod.
962 m_buffer8.append('\0');
963 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
964 m_buffer8.resize(0);
965
966 // Fall through into doneNumeric.
967
968doneNumeric:
969 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
970 if (UNLIKELY(isIdentStart(m_current)))
971 goto returnError;
972
973 m_atLineStart = false;
974 m_delimited = false;
975 token = NUMBER;
976 goto returnToken;
977
978doneSemicolon:
979 token = ';';
980 m_delimited = true;
981 goto returnToken;
982
983doneIdentifier:
984 m_atLineStart = false;
985 m_delimited = false;
986 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
987 m_buffer16.resize(0);
988 token = IDENT;
989 goto returnToken;
990
991doneIdentifierOrKeyword: {
992 m_atLineStart = false;
993 m_delimited = false;
994 m_buffer16.resize(0);
995 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
996 token = entry ? entry->lexerValue() : static_cast<int>(IDENT);
997
998 // Fall through into returnToken.
999}
1000
1001returnToken: {
1002 int lineNumber = m_lineNumber;
1003 llocp->first_line = lineNumber;
1004 llocp->last_line = lineNumber;
1005 llocp->first_column = startOffset;
1006 llocp->last_column = currentOffset();
1007 m_lastToken = token;
1008 return token;
1009}
1010
1011returnError:
1012 m_error = true;
1013 return -1;
1014}
1015
1016bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1017{
1018 ASSERT(m_buffer16.isEmpty());
1019
1020 bool lastWasEscape = false;
1021 bool inBrackets = false;
1022
1023 if (patternPrefix) {
1024 ASSERT(!isLineTerminator(patternPrefix));
1025 ASSERT(patternPrefix != '/');
1026 ASSERT(patternPrefix != '[');
1027 record16(patternPrefix);
1028 }
1029
1030 while (true) {
1031 int current = m_current;
1032
1033 if (isLineTerminator(current) || current == -1) {
1034 m_buffer16.resize(0);
1035 return false;
1036 }
1037
1038 shift();
1039
1040 if (current == '/' && !lastWasEscape && !inBrackets)
1041 break;
1042
1043 record16(current);
1044
1045 if (lastWasEscape) {
1046 lastWasEscape = false;
1047 continue;
1048 }
1049
1050 switch (current) {
1051 case '[':
1052 inBrackets = true;
1053 break;
1054 case ']':
1055 inBrackets = false;
1056 break;
1057 case '\\':
1058 lastWasEscape = true;
1059 break;
1060 }
1061 }
1062
1063 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1064 m_buffer16.resize(0);
1065
1066 while (isIdentPart(m_current)) {
1067 record16(m_current);
1068 shift();
1069 }
1070
1071 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1072 m_buffer16.resize(0);
1073
1074 return true;
1075}
1076
1077bool Lexer::skipRegExp()
1078{
1079 bool lastWasEscape = false;
1080 bool inBrackets = false;
1081
1082 while (true) {
1083 int current = m_current;
1084
1085 if (isLineTerminator(current) || current == -1)
1086 return false;
1087
1088 shift();
1089
1090 if (current == '/' && !lastWasEscape && !inBrackets)
1091 break;
1092
1093 if (lastWasEscape) {
1094 lastWasEscape = false;
1095 continue;
1096 }
1097
1098 switch (current) {
1099 case '[':
1100 inBrackets = true;
1101 break;
1102 case ']':
1103 inBrackets = false;
1104 break;
1105 case '\\':
1106 lastWasEscape = true;
1107 break;
1108 }
1109 }
1110
1111 while (isIdentPart(m_current))
1112 shift();
1113
1114 return true;
1115}
1116
1117void Lexer::clear()
1118{
1119 m_arena = 0;
1120 m_codeWithoutBOMs.clear();
1121
1122 Vector<char> newBuffer8;
1123 m_buffer8.swap(newBuffer8);
1124
1125 Vector<UChar> newBuffer16;
1126 m_buffer16.swap(newBuffer16);
1127
1128 m_isReparsing = false;
1129}
1130
1131SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1132{
1133 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1134}
1135
1136} // namespace JSC
Note: See TracBrowser for help on using the repository browser.