source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 62849

Last change on this file since 62849 was 62849, checked in by [email protected], 15 years ago

Tidy up the lexer

Reviewed by Anders Carlson.

Remove some of the old yacc/lex-isms still present in the lexer

  • parser/JSParser.h:

(JSC::):

  • parser/Lexer.cpp:

(JSC::Lexer::parseString):
(JSC::Lexer::lex):

  • parser/Lexer.h:
  • Property svn:eol-style set to native
File size: 31.2 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2010 Zoltan Herczeg ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "Lexer.h"
26
27#include "JSFunction.h"
28
29#include "JSGlobalObjectFunctions.h"
30#include "Identifier.h"
31#include "NodeInfo.h"
32#include "Nodes.h"
33#include "dtoa.h"
34#include <ctype.h>
35#include <limits.h>
36#include <string.h>
37#include <wtf/Assertions.h>
38
39using namespace WTF;
40using namespace Unicode;
41
42#include "JSParser.h"
43#include "Lookup.h"
44#include "Lexer.lut.h"
45
46namespace JSC {
47
48
49enum CharacterTypes {
50 // Types for the main switch
51 CharacterInvalid,
52
53 CharacterAlpha,
54 CharacterZero,
55 CharacterNumber,
56
57 CharacterLineTerminator,
58 CharacterExclamationMark,
59 CharacterSimple,
60 CharacterQuote,
61 CharacterDot,
62 CharacterSlash,
63 CharacterBackSlash,
64 CharacterSemicolon,
65 CharacterOpenBrace,
66 CharacterCloseBrace,
67
68 CharacterAdd,
69 CharacterSub,
70 CharacterMultiply,
71 CharacterModulo,
72 CharacterAnd,
73 CharacterXor,
74 CharacterOr,
75 CharacterLess,
76 CharacterGreater,
77 CharacterEqual,
78
79 // Other types (only one so far)
80 CharacterWhiteSpace,
81};
82
83// 128 ascii codes
84static unsigned char AsciiCharacters[128] = {
85/* 0 - Null */ CharacterInvalid,
86/* 1 - Start of Heading */ CharacterInvalid,
87/* 2 - Start of Text */ CharacterInvalid,
88/* 3 - End of Text */ CharacterInvalid,
89/* 4 - End of Transm. */ CharacterInvalid,
90/* 5 - Enquiry */ CharacterInvalid,
91/* 6 - Acknowledgment */ CharacterInvalid,
92/* 7 - Bell */ CharacterInvalid,
93/* 8 - Back Space */ CharacterInvalid,
94/* 9 - Horizontal Tab */ CharacterWhiteSpace,
95/* 10 - Line Feed */ CharacterLineTerminator,
96/* 11 - Vertical Tab */ CharacterWhiteSpace,
97/* 12 - Form Feed */ CharacterWhiteSpace,
98/* 13 - Carriage Return */ CharacterLineTerminator,
99/* 14 - Shift Out */ CharacterInvalid,
100/* 15 - Shift In */ CharacterInvalid,
101/* 16 - Data Line Escape */ CharacterInvalid,
102/* 17 - Device Control 1 */ CharacterInvalid,
103/* 18 - Device Control 2 */ CharacterInvalid,
104/* 19 - Device Control 3 */ CharacterInvalid,
105/* 20 - Device Control 4 */ CharacterInvalid,
106/* 21 - Negative Ack. */ CharacterInvalid,
107/* 22 - Synchronous Idle */ CharacterInvalid,
108/* 23 - End of Transmit */ CharacterInvalid,
109/* 24 - Cancel */ CharacterInvalid,
110/* 25 - End of Medium */ CharacterInvalid,
111/* 26 - Substitute */ CharacterInvalid,
112/* 27 - Escape */ CharacterInvalid,
113/* 28 - File Separator */ CharacterInvalid,
114/* 29 - Group Separator */ CharacterInvalid,
115/* 30 - Record Separator */ CharacterInvalid,
116/* 31 - Unit Separator */ CharacterInvalid,
117/* 32 - Space */ CharacterWhiteSpace,
118/* 33 - ! */ CharacterExclamationMark,
119/* 34 - " */ CharacterQuote,
120/* 35 - # */ CharacterInvalid,
121/* 36 - $ */ CharacterAlpha,
122/* 37 - % */ CharacterModulo,
123/* 38 - & */ CharacterAnd,
124/* 39 - ' */ CharacterQuote,
125/* 40 - ( */ CharacterSimple,
126/* 41 - ) */ CharacterSimple,
127/* 42 - * */ CharacterMultiply,
128/* 43 - + */ CharacterAdd,
129/* 44 - , */ CharacterSimple,
130/* 45 - - */ CharacterSub,
131/* 46 - . */ CharacterDot,
132/* 47 - / */ CharacterSlash,
133/* 48 - 0 */ CharacterZero,
134/* 49 - 1 */ CharacterNumber,
135/* 50 - 2 */ CharacterNumber,
136/* 51 - 3 */ CharacterNumber,
137/* 52 - 4 */ CharacterNumber,
138/* 53 - 5 */ CharacterNumber,
139/* 54 - 6 */ CharacterNumber,
140/* 55 - 7 */ CharacterNumber,
141/* 56 - 8 */ CharacterNumber,
142/* 57 - 9 */ CharacterNumber,
143/* 58 - : */ CharacterSimple,
144/* 59 - ; */ CharacterSemicolon,
145/* 60 - < */ CharacterLess,
146/* 61 - = */ CharacterEqual,
147/* 62 - > */ CharacterGreater,
148/* 63 - ? */ CharacterSimple,
149/* 64 - @ */ CharacterInvalid,
150/* 65 - A */ CharacterAlpha,
151/* 66 - B */ CharacterAlpha,
152/* 67 - C */ CharacterAlpha,
153/* 68 - D */ CharacterAlpha,
154/* 69 - E */ CharacterAlpha,
155/* 70 - F */ CharacterAlpha,
156/* 71 - G */ CharacterAlpha,
157/* 72 - H */ CharacterAlpha,
158/* 73 - I */ CharacterAlpha,
159/* 74 - J */ CharacterAlpha,
160/* 75 - K */ CharacterAlpha,
161/* 76 - L */ CharacterAlpha,
162/* 77 - M */ CharacterAlpha,
163/* 78 - N */ CharacterAlpha,
164/* 79 - O */ CharacterAlpha,
165/* 80 - P */ CharacterAlpha,
166/* 81 - Q */ CharacterAlpha,
167/* 82 - R */ CharacterAlpha,
168/* 83 - S */ CharacterAlpha,
169/* 84 - T */ CharacterAlpha,
170/* 85 - U */ CharacterAlpha,
171/* 86 - V */ CharacterAlpha,
172/* 87 - W */ CharacterAlpha,
173/* 88 - X */ CharacterAlpha,
174/* 89 - Y */ CharacterAlpha,
175/* 90 - Z */ CharacterAlpha,
176/* 91 - [ */ CharacterSimple,
177/* 92 - \ */ CharacterBackSlash,
178/* 93 - ] */ CharacterSimple,
179/* 94 - ^ */ CharacterXor,
180/* 95 - _ */ CharacterAlpha,
181/* 96 - ` */ CharacterInvalid,
182/* 97 - a */ CharacterAlpha,
183/* 98 - b */ CharacterAlpha,
184/* 99 - c */ CharacterAlpha,
185/* 100 - d */ CharacterAlpha,
186/* 101 - e */ CharacterAlpha,
187/* 102 - f */ CharacterAlpha,
188/* 103 - g */ CharacterAlpha,
189/* 104 - h */ CharacterAlpha,
190/* 105 - i */ CharacterAlpha,
191/* 106 - j */ CharacterAlpha,
192/* 107 - k */ CharacterAlpha,
193/* 108 - l */ CharacterAlpha,
194/* 109 - m */ CharacterAlpha,
195/* 110 - n */ CharacterAlpha,
196/* 111 - o */ CharacterAlpha,
197/* 112 - p */ CharacterAlpha,
198/* 113 - q */ CharacterAlpha,
199/* 114 - r */ CharacterAlpha,
200/* 115 - s */ CharacterAlpha,
201/* 116 - t */ CharacterAlpha,
202/* 117 - u */ CharacterAlpha,
203/* 118 - v */ CharacterAlpha,
204/* 119 - w */ CharacterAlpha,
205/* 120 - x */ CharacterAlpha,
206/* 121 - y */ CharacterAlpha,
207/* 122 - z */ CharacterAlpha,
208/* 123 - { */ CharacterOpenBrace,
209/* 124 - | */ CharacterOr,
210/* 125 - } */ CharacterCloseBrace,
211/* 126 - ~ */ CharacterSimple,
212/* 127 - Delete */ CharacterInvalid,
213};
214
215Lexer::Lexer(JSGlobalData* globalData)
216 : m_isReparsing(false)
217 , m_globalData(globalData)
218 , m_keywordTable(JSC::mainTable)
219{
220}
221
222Lexer::~Lexer()
223{
224 m_keywordTable.deleteTable();
225}
226
227ALWAYS_INLINE const UChar* Lexer::currentCharacter() const
228{
229 ASSERT(m_code <= m_codeEnd);
230 return m_code;
231}
232
233ALWAYS_INLINE int Lexer::currentOffset() const
234{
235 return currentCharacter() - m_codeStart;
236}
237
238void Lexer::setCode(const SourceCode& source, ParserArena& arena)
239{
240 m_arena = &arena.identifierArena();
241
242 m_lineNumber = source.firstLine();
243 m_delimited = false;
244 m_lastToken = -1;
245
246 const UChar* data = source.provider()->data();
247
248 m_source = &source;
249 m_codeStart = data;
250 m_code = data + source.startOffset();
251 m_codeEnd = data + source.endOffset();
252 m_error = false;
253 m_atLineStart = true;
254
255 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
256 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
257
258 if (LIKELY(m_code < m_codeEnd))
259 m_current = *m_code;
260 else
261 m_current = -1;
262 ASSERT(currentOffset() == source.startOffset());
263}
264
265ALWAYS_INLINE void Lexer::shift()
266{
267 // Faster than an if-else sequence
268 ASSERT(m_current != -1);
269 m_current = -1;
270 ++m_code;
271 if (LIKELY(m_code < m_codeEnd))
272 m_current = *m_code;
273}
274
275ALWAYS_INLINE int Lexer::peek(int offset)
276{
277 // Only use if necessary
278 ASSERT(offset > 0 && offset < 5);
279 const UChar* code = m_code + offset;
280 return (code < m_codeEnd) ? *code : -1;
281}
282
283int Lexer::getUnicodeCharacter()
284{
285 int char1 = peek(1);
286 int char2 = peek(2);
287 int char3 = peek(3);
288
289 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
290 return -1;
291
292 int result = convertUnicode(m_current, char1, char2, char3);
293 shift();
294 shift();
295 shift();
296 shift();
297 return result;
298}
299
300void Lexer::shiftLineTerminator()
301{
302 ASSERT(isLineTerminator(m_current));
303
304 int m_prev = m_current;
305 shift();
306
307 // Allow both CRLF and LFCR.
308 if (m_prev + m_current == '\n' + '\r')
309 shift();
310
311 ++m_lineNumber;
312}
313
314ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
315{
316 return &m_arena->makeIdentifier(m_globalData, characters, length);
317}
318
319ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const
320{
321 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
322}
323
324static NEVER_INLINE bool isNonASCIIIdentStart(int c)
325{
326 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
327}
328
329static inline bool isIdentStart(int c)
330{
331 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
332}
333
334static NEVER_INLINE bool isNonASCIIIdentPart(int c)
335{
336 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
337 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
338}
339
340static inline bool isIdentPart(int c)
341{
342 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
343}
344
345static inline int singleEscape(int c)
346{
347 switch (c) {
348 case 'b':
349 return 0x08;
350 case 't':
351 return 0x09;
352 case 'n':
353 return 0x0A;
354 case 'v':
355 return 0x0B;
356 case 'f':
357 return 0x0C;
358 case 'r':
359 return 0x0D;
360 case '\\':
361 return '\\';
362 case '\'':
363 return '\'';
364 case '"':
365 return '"';
366 default:
367 return 0;
368 }
369}
370
371inline void Lexer::record8(int c)
372{
373 ASSERT(c >= 0);
374 ASSERT(c <= 0xFF);
375 m_buffer8.append(static_cast<char>(c));
376}
377
378inline void Lexer::record16(UChar c)
379{
380 m_buffer16.append(c);
381}
382
383inline void Lexer::record16(int c)
384{
385 ASSERT(c >= 0);
386 ASSERT(c <= USHRT_MAX);
387 record16(UChar(static_cast<unsigned short>(c)));
388}
389
390ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp)
391{
392 int stringQuoteCharacter = m_current;
393 shift();
394
395 const UChar* stringStart = currentCharacter();
396
397 while (m_current != stringQuoteCharacter) {
398 if (UNLIKELY(m_current == '\\')) {
399 if (stringStart != currentCharacter())
400 m_buffer16.append(stringStart, currentCharacter() - stringStart);
401 shift();
402
403 int escape = singleEscape(m_current);
404
405 // Most common escape sequences first
406 if (escape) {
407 record16(escape);
408 shift();
409 } else if (UNLIKELY(isLineTerminator(m_current)))
410 shiftLineTerminator();
411 else if (m_current == 'x') {
412 shift();
413 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
414 int prev = m_current;
415 shift();
416 record16(convertHex(prev, m_current));
417 shift();
418 } else
419 record16('x');
420 } else if (m_current == 'u') {
421 shift();
422 int character = getUnicodeCharacter();
423 if (character != -1)
424 record16(character);
425 else if (m_current == stringQuoteCharacter)
426 record16('u');
427 else // Only stringQuoteCharacter allowed after \u
428 return false;
429 } else if (isASCIIOctalDigit(m_current)) {
430 // Octal character sequences
431 int character1 = m_current;
432 shift();
433 if (isASCIIOctalDigit(m_current)) {
434 // Two octal characters
435 int character2 = m_current;
436 shift();
437 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
438 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
439 shift();
440 } else
441 record16((character1 - '0') * 8 + character2 - '0');
442 } else
443 record16(character1 - '0');
444 } else if (m_current != -1) {
445 record16(m_current);
446 shift();
447 } else
448 return false;
449
450 stringStart = currentCharacter();
451 continue;
452 } else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
453 // New-line or end of input is not allowed
454 if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1))
455 return false;
456 // Anything else is just a normal character
457 }
458 shift();
459 }
460
461 if (currentCharacter() != stringStart)
462 m_buffer16.append(stringStart, currentCharacter() - stringStart);
463 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
464 m_buffer16.resize(0);
465 return true;
466}
467
468int Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp)
469{
470 ASSERT(!m_error);
471 ASSERT(m_buffer8.isEmpty());
472 ASSERT(m_buffer16.isEmpty());
473
474 int token = 0;
475 m_terminator = false;
476
477start:
478 while (isWhiteSpace(m_current))
479 shift();
480
481 int startOffset = currentOffset();
482
483 if (UNLIKELY(m_current == -1)) {
484 if (!m_terminator && !m_delimited && !m_isReparsing) {
485 // automatic semicolon insertion if program incomplete
486 goto doneSemicolon;
487 }
488 return 0;
489 }
490
491 m_delimited = false;
492
493 if (isASCII(m_current)) {
494 ASSERT(m_current >= 0 && m_current < 128);
495
496 switch (AsciiCharacters[m_current]) {
497 case CharacterGreater:
498 shift();
499 if (m_current == '>') {
500 shift();
501 if (m_current == '>') {
502 shift();
503 if (m_current == '=') {
504 shift();
505 token = URSHIFTEQUAL;
506 break;
507 }
508 token = URSHIFT;
509 break;
510 }
511 if (m_current == '=') {
512 shift();
513 token = RSHIFTEQUAL;
514 break;
515 }
516 token = RSHIFT;
517 break;
518 }
519 if (m_current == '=') {
520 shift();
521 token = GE;
522 break;
523 }
524 token = '>';
525 break;
526 case CharacterEqual:
527 shift();
528 if (m_current == '=') {
529 shift();
530 if (m_current == '=') {
531 shift();
532 token = STREQ;
533 break;
534 }
535 token = EQEQ;
536 break;
537 }
538 token = '=';
539 break;
540 case CharacterLess:
541 shift();
542 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
543 // <!-- marks the beginning of a line comment (for www usage)
544 goto inSingleLineComment;
545 }
546 if (m_current == '<') {
547 shift();
548 if (m_current == '=') {
549 shift();
550 token = LSHIFTEQUAL;
551 break;
552 }
553 token = LSHIFT;
554 break;
555 }
556 if (m_current == '=') {
557 shift();
558 token = LE;
559 break;
560 }
561 token = '<';
562 break;
563 case CharacterExclamationMark:
564 shift();
565 if (m_current == '=') {
566 shift();
567 if (m_current == '=') {
568 shift();
569 token = STRNEQ;
570 break;
571 }
572 token = NE;
573 break;
574 }
575 token = '!';
576 break;
577 case CharacterAdd:
578 shift();
579 if (m_current == '+') {
580 shift();
581 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
582 break;
583 }
584 if (m_current == '=') {
585 shift();
586 token = PLUSEQUAL;
587 break;
588 }
589 token = '+';
590 break;
591 case CharacterSub:
592 shift();
593 if (m_current == '-') {
594 shift();
595 if (m_atLineStart && m_current == '>') {
596 shift();
597 goto inSingleLineComment;
598 }
599 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
600 break;
601 }
602 if (m_current == '=') {
603 shift();
604 token = MINUSEQUAL;
605 break;
606 }
607 token = '-';
608 break;
609 case CharacterMultiply:
610 shift();
611 if (m_current == '=') {
612 shift();
613 token = MULTEQUAL;
614 break;
615 }
616 token = '*';
617 break;
618 case CharacterSlash:
619 shift();
620 if (m_current == '/') {
621 shift();
622 goto inSingleLineComment;
623 }
624 if (m_current == '*') {
625 shift();
626 goto inMultiLineComment;
627 }
628 if (m_current == '=') {
629 shift();
630 token = DIVEQUAL;
631 break;
632 }
633 token = '/';
634 break;
635 case CharacterAnd:
636 shift();
637 if (m_current == '&') {
638 shift();
639 token = AND;
640 break;
641 }
642 if (m_current == '=') {
643 shift();
644 token = ANDEQUAL;
645 break;
646 }
647 token = '&';
648 break;
649 case CharacterXor:
650 shift();
651 if (m_current == '=') {
652 shift();
653 token = XOREQUAL;
654 break;
655 }
656 token = '^';
657 break;
658 case CharacterModulo:
659 shift();
660 if (m_current == '=') {
661 shift();
662 token = MODEQUAL;
663 break;
664 }
665 token = '%';
666 break;
667 case CharacterOr:
668 shift();
669 if (m_current == '=') {
670 shift();
671 token = OREQUAL;
672 break;
673 }
674 if (m_current == '|') {
675 shift();
676 token = OR;
677 break;
678 }
679 token = '|';
680 break;
681 case CharacterDot:
682 shift();
683 if (isASCIIDigit(m_current)) {
684 record8('.');
685 goto inNumberAfterDecimalPoint;
686 }
687 token = '.';
688 break;
689 case CharacterSimple:
690 token = m_current;
691 shift();
692 break;
693 case CharacterSemicolon:
694 m_delimited = true;
695 shift();
696 token = ';';
697 break;
698 case CharacterOpenBrace:
699 lvalp->intValue = currentOffset();
700 shift();
701 token = OPENBRACE;
702 break;
703 case CharacterCloseBrace:
704 lvalp->intValue = currentOffset();
705 m_delimited = true;
706 shift();
707 token = CLOSEBRACE;
708 break;
709 case CharacterBackSlash:
710 goto startIdentifierWithBackslash;
711 case CharacterZero:
712 goto startNumberWithZeroDigit;
713 case CharacterNumber:
714 goto startNumber;
715 case CharacterQuote:
716 if (UNLIKELY(!parseString(lvalp)))
717 goto returnError;
718 shift();
719 m_delimited = false;
720 token = STRING;
721 break;
722 case CharacterAlpha:
723 ASSERT(isIdentStart(m_current));
724 goto startIdentifierOrKeyword;
725 case CharacterLineTerminator:
726 ASSERT(isLineTerminator(m_current));
727 shiftLineTerminator();
728 m_atLineStart = true;
729 m_terminator = true;
730 if (lastTokenWasRestrKeyword()) {
731 token = ';';
732 goto doneSemicolon;
733 }
734 goto start;
735 case CharacterInvalid:
736 goto returnError;
737 default:
738 ASSERT_NOT_REACHED();
739 goto returnError;
740 }
741 } else {
742 // Rare characters
743
744 if (isNonASCIIIdentStart(m_current))
745 goto startIdentifierOrKeyword;
746 if (isLineTerminator(m_current)) {
747 shiftLineTerminator();
748 m_atLineStart = true;
749 m_terminator = true;
750 if (lastTokenWasRestrKeyword())
751 goto doneSemicolon;
752 goto start;
753 }
754 goto returnError;
755 }
756
757 m_atLineStart = false;
758 goto returnToken;
759
760startIdentifierWithBackslash: {
761 shift();
762 if (UNLIKELY(m_current != 'u'))
763 goto returnError;
764 shift();
765
766 token = getUnicodeCharacter();
767 if (UNLIKELY(token == -1))
768 goto returnError;
769 if (UNLIKELY(!isIdentStart(token)))
770 goto returnError;
771 goto inIdentifierAfterCharacterCheck;
772}
773
774startIdentifierOrKeyword: {
775 const UChar* identifierStart = currentCharacter();
776 shift();
777 while (isIdentPart(m_current))
778 shift();
779 if (LIKELY(m_current != '\\')) {
780 // Fast case for idents which does not contain \uCCCC characters
781 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
782 goto doneIdentifierOrKeyword;
783 }
784 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
785}
786
787 do {
788 shift();
789 if (UNLIKELY(m_current != 'u'))
790 goto returnError;
791 shift();
792 token = getUnicodeCharacter();
793 if (UNLIKELY(token == -1))
794 goto returnError;
795 if (UNLIKELY(!isIdentPart(token)))
796 goto returnError;
797inIdentifierAfterCharacterCheck:
798 record16(token);
799
800 while (isIdentPart(m_current)) {
801 record16(m_current);
802 shift();
803 }
804 } while (UNLIKELY(m_current == '\\'));
805 goto doneIdentifier;
806
807inSingleLineComment:
808 while (!isLineTerminator(m_current)) {
809 if (UNLIKELY(m_current == -1))
810 return 0;
811 shift();
812 }
813 shiftLineTerminator();
814 m_atLineStart = true;
815 m_terminator = true;
816 if (lastTokenWasRestrKeyword())
817 goto doneSemicolon;
818 goto start;
819
820inMultiLineComment:
821 while (true) {
822 if (UNLIKELY(m_current == '*')) {
823 shift();
824 if (m_current == '/')
825 break;
826 if (m_current == '*')
827 continue;
828 }
829
830 if (UNLIKELY(m_current == -1))
831 goto returnError;
832
833 if (isLineTerminator(m_current))
834 shiftLineTerminator();
835 else
836 shift();
837 }
838 shift();
839 m_atLineStart = false;
840 goto start;
841
842startNumberWithZeroDigit:
843 shift();
844 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
845 shift();
846 goto inHex;
847 }
848 if (m_current == '.') {
849 record8('0');
850 record8('.');
851 shift();
852 goto inNumberAfterDecimalPoint;
853 }
854 if ((m_current | 0x20) == 'e') {
855 record8('0');
856 record8('e');
857 shift();
858 goto inExponentIndicator;
859 }
860 if (isASCIIOctalDigit(m_current))
861 goto inOctal;
862 if (isASCIIDigit(m_current))
863 goto startNumber;
864 lvalp->doubleValue = 0;
865 goto doneNumeric;
866
867inNumberAfterDecimalPoint:
868 while (isASCIIDigit(m_current)) {
869 record8(m_current);
870 shift();
871 }
872 if ((m_current | 0x20) == 'e') {
873 record8('e');
874 shift();
875 goto inExponentIndicator;
876 }
877 goto doneNumber;
878
879inExponentIndicator:
880 if (m_current == '+' || m_current == '-') {
881 record8(m_current);
882 shift();
883 }
884 if (!isASCIIDigit(m_current))
885 goto returnError;
886 do {
887 record8(m_current);
888 shift();
889 } while (isASCIIDigit(m_current));
890 goto doneNumber;
891
892inOctal: {
893 do {
894 record8(m_current);
895 shift();
896 } while (isASCIIOctalDigit(m_current));
897 if (isASCIIDigit(m_current))
898 goto startNumber;
899
900 double dval = 0;
901
902 const char* end = m_buffer8.end();
903 for (const char* p = m_buffer8.data(); p < end; ++p) {
904 dval *= 8;
905 dval += *p - '0';
906 }
907 if (dval >= mantissaOverflowLowerBound)
908 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
909
910 m_buffer8.resize(0);
911
912 lvalp->doubleValue = dval;
913 goto doneNumeric;
914}
915
916inHex: {
917 do {
918 record8(m_current);
919 shift();
920 } while (isASCIIHexDigit(m_current));
921
922 double dval = 0;
923
924 const char* end = m_buffer8.end();
925 for (const char* p = m_buffer8.data(); p < end; ++p) {
926 dval *= 16;
927 dval += toASCIIHexValue(*p);
928 }
929 if (dval >= mantissaOverflowLowerBound)
930 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
931
932 m_buffer8.resize(0);
933
934 lvalp->doubleValue = dval;
935 goto doneNumeric;
936}
937
938startNumber:
939 record8(m_current);
940 shift();
941 while (isASCIIDigit(m_current)) {
942 record8(m_current);
943 shift();
944 }
945 if (m_current == '.') {
946 record8('.');
947 shift();
948 goto inNumberAfterDecimalPoint;
949 }
950 if ((m_current | 0x20) == 'e') {
951 record8('e');
952 shift();
953 goto inExponentIndicator;
954 }
955
956 // Fall through into doneNumber.
957
958doneNumber:
959 // Null-terminate string for strtod.
960 m_buffer8.append('\0');
961 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
962 m_buffer8.resize(0);
963
964 // Fall through into doneNumeric.
965
966doneNumeric:
967 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
968 if (UNLIKELY(isIdentStart(m_current)))
969 goto returnError;
970
971 m_atLineStart = false;
972 m_delimited = false;
973 token = NUMBER;
974 goto returnToken;
975
976doneSemicolon:
977 token = ';';
978 m_delimited = true;
979 goto returnToken;
980
981doneIdentifier:
982 m_atLineStart = false;
983 m_delimited = false;
984 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
985 m_buffer16.resize(0);
986 token = IDENT;
987 goto returnToken;
988
989doneIdentifierOrKeyword: {
990 m_atLineStart = false;
991 m_delimited = false;
992 m_buffer16.resize(0);
993 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
994 token = entry ? entry->lexerValue() : static_cast<int>(IDENT);
995
996 // Fall through into returnToken.
997}
998
999returnToken: {
1000 int lineNumber = m_lineNumber;
1001 llocp->first_line = lineNumber;
1002 llocp->last_line = lineNumber;
1003 llocp->first_column = startOffset;
1004 llocp->last_column = currentOffset();
1005 m_lastToken = token;
1006 return token;
1007}
1008
1009returnError:
1010 m_error = true;
1011 return -1;
1012}
1013
1014bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1015{
1016 ASSERT(m_buffer16.isEmpty());
1017
1018 bool lastWasEscape = false;
1019 bool inBrackets = false;
1020
1021 if (patternPrefix) {
1022 ASSERT(!isLineTerminator(patternPrefix));
1023 ASSERT(patternPrefix != '/');
1024 ASSERT(patternPrefix != '[');
1025 record16(patternPrefix);
1026 }
1027
1028 while (true) {
1029 int current = m_current;
1030
1031 if (isLineTerminator(current) || current == -1) {
1032 m_buffer16.resize(0);
1033 return false;
1034 }
1035
1036 shift();
1037
1038 if (current == '/' && !lastWasEscape && !inBrackets)
1039 break;
1040
1041 record16(current);
1042
1043 if (lastWasEscape) {
1044 lastWasEscape = false;
1045 continue;
1046 }
1047
1048 switch (current) {
1049 case '[':
1050 inBrackets = true;
1051 break;
1052 case ']':
1053 inBrackets = false;
1054 break;
1055 case '\\':
1056 lastWasEscape = true;
1057 break;
1058 }
1059 }
1060
1061 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1062 m_buffer16.resize(0);
1063
1064 while (isIdentPart(m_current)) {
1065 record16(m_current);
1066 shift();
1067 }
1068
1069 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1070 m_buffer16.resize(0);
1071
1072 return true;
1073}
1074
1075bool Lexer::skipRegExp()
1076{
1077 bool lastWasEscape = false;
1078 bool inBrackets = false;
1079
1080 while (true) {
1081 int current = m_current;
1082
1083 if (isLineTerminator(current) || current == -1)
1084 return false;
1085
1086 shift();
1087
1088 if (current == '/' && !lastWasEscape && !inBrackets)
1089 break;
1090
1091 if (lastWasEscape) {
1092 lastWasEscape = false;
1093 continue;
1094 }
1095
1096 switch (current) {
1097 case '[':
1098 inBrackets = true;
1099 break;
1100 case ']':
1101 inBrackets = false;
1102 break;
1103 case '\\':
1104 lastWasEscape = true;
1105 break;
1106 }
1107 }
1108
1109 while (isIdentPart(m_current))
1110 shift();
1111
1112 return true;
1113}
1114
1115void Lexer::clear()
1116{
1117 m_arena = 0;
1118
1119 Vector<char> newBuffer8;
1120 m_buffer8.swap(newBuffer8);
1121
1122 Vector<UChar> newBuffer16;
1123 m_buffer16.swap(newBuffer16);
1124
1125 m_isReparsing = false;
1126}
1127
1128SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1129{
1130 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1131}
1132
1133} // namespace JSC
Note: See TracBrowser for help on using the repository browser.