source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 61878

Last change on this file since 61878 was 61878, checked in by [email protected], 15 years ago

2010-06-25 Oliver Hunt <[email protected]>

Reviewed by Geoffrey Garen.

Remove old js parser
https://bugs.webkit.org/show_bug.cgi?id=41222

Remove the old yacc parser, this also solves the tiger problem. Which
was a conflict between yacc generated token values and those in the
custom parser

  • Android.mk:
  • CMakeLists.txt:
  • DerivedSources.make:
  • DerivedSources.pro:
  • GNUmakefile.am:
  • JavaScriptCore.pro:
  • JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.vcproj:
  • JavaScriptCore.xcodeproj/project.pbxproj:
  • parser/Grammar.y: Removed.
  • parser/JSParser.cpp:
  • parser/JSParser.h:
  • parser/Lexer.cpp:
  • parser/NodeConstructors.h: (JSC::Node::Node):
  • parser/Parser.cpp: (JSC::Parser::parse):
  • wtf/Platform.h:
  • Property svn:eol-style set to native
File size: 26.2 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "Lexer.h"
25
26#include "JSFunction.h"
27
28#include "JSGlobalObjectFunctions.h"
29#include "Identifier.h"
30#include "NodeInfo.h"
31#include "Nodes.h"
32#include "dtoa.h"
33#include <ctype.h>
34#include <limits.h>
35#include <string.h>
36#include <wtf/Assertions.h>
37
38using namespace WTF;
39using namespace Unicode;
40
41#include "JSParser.h"
42#include "Lookup.h"
43#include "Lexer.lut.h"
44
45namespace JSC {
46
47static const UChar byteOrderMark = 0xFEFF;
48
49Lexer::Lexer(JSGlobalData* globalData)
50 : m_isReparsing(false)
51 , m_globalData(globalData)
52 , m_keywordTable(JSC::mainTable)
53{
54}
55
56Lexer::~Lexer()
57{
58 m_keywordTable.deleteTable();
59}
60
61inline const UChar* Lexer::currentCharacter() const
62{
63 return m_code - 4;
64}
65
66inline int Lexer::currentOffset() const
67{
68 return currentCharacter() - m_codeStart;
69}
70
71ALWAYS_INLINE void Lexer::shift1()
72{
73 m_current = m_next1;
74 m_next1 = m_next2;
75 m_next2 = m_next3;
76 if (LIKELY(m_code < m_codeEnd))
77 m_next3 = m_code[0];
78 else
79 m_next3 = -1;
80
81 ++m_code;
82}
83
84ALWAYS_INLINE void Lexer::shift2()
85{
86 m_current = m_next2;
87 m_next1 = m_next3;
88 if (LIKELY(m_code + 1 < m_codeEnd)) {
89 m_next2 = m_code[0];
90 m_next3 = m_code[1];
91 } else {
92 m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
93 m_next3 = -1;
94 }
95
96 m_code += 2;
97}
98
99ALWAYS_INLINE void Lexer::shift3()
100{
101 m_current = m_next3;
102 if (LIKELY(m_code + 2 < m_codeEnd)) {
103 m_next1 = m_code[0];
104 m_next2 = m_code[1];
105 m_next3 = m_code[2];
106 } else {
107 m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
108 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
109 m_next3 = -1;
110 }
111
112 m_code += 3;
113}
114
115ALWAYS_INLINE void Lexer::shift4()
116{
117 if (LIKELY(m_code + 3 < m_codeEnd)) {
118 m_current = m_code[0];
119 m_next1 = m_code[1];
120 m_next2 = m_code[2];
121 m_next3 = m_code[3];
122 } else {
123 m_current = m_code < m_codeEnd ? m_code[0] : -1;
124 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
125 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
126 m_next3 = -1;
127 }
128
129 m_code += 4;
130}
131
132void Lexer::setCode(const SourceCode& source, ParserArena& arena)
133{
134 m_arena = &arena.identifierArena();
135
136 m_lineNumber = source.firstLine();
137 m_delimited = false;
138 m_lastToken = -1;
139
140 const UChar* data = source.provider()->data();
141
142 m_source = &source;
143 m_codeStart = data;
144 m_code = data + source.startOffset();
145 m_codeEnd = data + source.endOffset();
146 m_error = false;
147 m_atLineStart = true;
148
149 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
150 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
151
152 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154 if (source.provider()->hasBOMs()) {
155 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156 if (UNLIKELY(*p == byteOrderMark)) {
157 copyCodeWithoutBOMs();
158 break;
159 }
160 }
161 }
162
163 // Read the first characters into the 4-character buffer.
164 shift4();
165 ASSERT(currentOffset() == source.startOffset());
166}
167
168void Lexer::copyCodeWithoutBOMs()
169{
170 // Note: In this case, the character offset data for debugging will be incorrect.
171 // If it's important to correctly debug code with extraneous BOMs, then the caller
172 // should strip the BOMs when creating the SourceProvider object and do its own
173 // mapping of offsets within the stripped text to original text offset.
174
175 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
176 for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177 UChar c = *p;
178 if (c != byteOrderMark)
179 m_codeWithoutBOMs.append(c);
180 }
181 ptrdiff_t startDelta = m_codeStart - m_code;
182 m_code = m_codeWithoutBOMs.data();
183 m_codeStart = m_code + startDelta;
184 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185}
186
187void Lexer::shiftLineTerminator()
188{
189 ASSERT(isLineTerminator(m_current));
190
191 // Allow both CRLF and LFCR.
192 if (m_current + m_next1 == '\n' + '\r')
193 shift2();
194 else
195 shift1();
196
197 ++m_lineNumber;
198}
199
200ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201{
202 return &m_arena->makeIdentifier(m_globalData, characters, length);
203}
204
205inline bool Lexer::lastTokenWasRestrKeyword() const
206{
207 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
208}
209
210static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211{
212 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
213}
214
215static inline bool isIdentStart(int c)
216{
217 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
218}
219
220static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221{
222 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
223 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
224}
225
226static inline bool isIdentPart(int c)
227{
228 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
229}
230
231static inline int singleEscape(int c)
232{
233 switch (c) {
234 case 'b':
235 return 0x08;
236 case 't':
237 return 0x09;
238 case 'n':
239 return 0x0A;
240 case 'v':
241 return 0x0B;
242 case 'f':
243 return 0x0C;
244 case 'r':
245 return 0x0D;
246 default:
247 return c;
248 }
249}
250
251inline void Lexer::record8(int c)
252{
253 ASSERT(c >= 0);
254 ASSERT(c <= 0xFF);
255 m_buffer8.append(static_cast<char>(c));
256}
257
258inline void Lexer::record16(UChar c)
259{
260 m_buffer16.append(c);
261}
262
263inline void Lexer::record16(int c)
264{
265 ASSERT(c >= 0);
266 ASSERT(c <= USHRT_MAX);
267 record16(UChar(static_cast<unsigned short>(c)));
268}
269
270int Lexer::lex(void* p1, void* p2)
271{
272 ASSERT(!m_error);
273 ASSERT(m_buffer8.isEmpty());
274 ASSERT(m_buffer16.isEmpty());
275
276 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278 int token = 0;
279 m_terminator = false;
280
281start:
282 while (isWhiteSpace(m_current))
283 shift1();
284
285 int startOffset = currentOffset();
286
287 if (m_current == -1) {
288 if (!m_terminator && !m_delimited && !m_isReparsing) {
289 // automatic semicolon insertion if program incomplete
290 token = ';';
291 goto doneSemicolon;
292 }
293 return 0;
294 }
295
296 m_delimited = false;
297 switch (m_current) {
298 case '>':
299 if (m_next1 == '>' && m_next2 == '>') {
300 if (m_next3 == '=') {
301 shift4();
302 token = URSHIFTEQUAL;
303 break;
304 }
305 shift3();
306 token = URSHIFT;
307 break;
308 }
309 if (m_next1 == '>') {
310 if (m_next2 == '=') {
311 shift3();
312 token = RSHIFTEQUAL;
313 break;
314 }
315 shift2();
316 token = RSHIFT;
317 break;
318 }
319 if (m_next1 == '=') {
320 shift2();
321 token = GE;
322 break;
323 }
324 shift1();
325 token = '>';
326 break;
327 case '=':
328 if (m_next1 == '=') {
329 if (m_next2 == '=') {
330 shift3();
331 token = STREQ;
332 break;
333 }
334 shift2();
335 token = EQEQ;
336 break;
337 }
338 shift1();
339 token = '=';
340 break;
341 case '!':
342 if (m_next1 == '=') {
343 if (m_next2 == '=') {
344 shift3();
345 token = STRNEQ;
346 break;
347 }
348 shift2();
349 token = NE;
350 break;
351 }
352 shift1();
353 token = '!';
354 break;
355 case '<':
356 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
357 // <!-- marks the beginning of a line comment (for www usage)
358 shift4();
359 goto inSingleLineComment;
360 }
361 if (m_next1 == '<') {
362 if (m_next2 == '=') {
363 shift3();
364 token = LSHIFTEQUAL;
365 break;
366 }
367 shift2();
368 token = LSHIFT;
369 break;
370 }
371 if (m_next1 == '=') {
372 shift2();
373 token = LE;
374 break;
375 }
376 shift1();
377 token = '<';
378 break;
379 case '+':
380 if (m_next1 == '+') {
381 shift2();
382 if (m_terminator) {
383 token = AUTOPLUSPLUS;
384 break;
385 }
386 token = PLUSPLUS;
387 break;
388 }
389 if (m_next1 == '=') {
390 shift2();
391 token = PLUSEQUAL;
392 break;
393 }
394 shift1();
395 token = '+';
396 break;
397 case '-':
398 if (m_next1 == '-') {
399 if (m_atLineStart && m_next2 == '>') {
400 shift3();
401 goto inSingleLineComment;
402 }
403 shift2();
404 if (m_terminator) {
405 token = AUTOMINUSMINUS;
406 break;
407 }
408 token = MINUSMINUS;
409 break;
410 }
411 if (m_next1 == '=') {
412 shift2();
413 token = MINUSEQUAL;
414 break;
415 }
416 shift1();
417 token = '-';
418 break;
419 case '*':
420 if (m_next1 == '=') {
421 shift2();
422 token = MULTEQUAL;
423 break;
424 }
425 shift1();
426 token = '*';
427 break;
428 case '/':
429 if (m_next1 == '/') {
430 shift2();
431 goto inSingleLineComment;
432 }
433 if (m_next1 == '*')
434 goto inMultiLineComment;
435 if (m_next1 == '=') {
436 shift2();
437 token = DIVEQUAL;
438 break;
439 }
440 shift1();
441 token = '/';
442 break;
443 case '&':
444 if (m_next1 == '&') {
445 shift2();
446 token = AND;
447 break;
448 }
449 if (m_next1 == '=') {
450 shift2();
451 token = ANDEQUAL;
452 break;
453 }
454 shift1();
455 token = '&';
456 break;
457 case '^':
458 if (m_next1 == '=') {
459 shift2();
460 token = XOREQUAL;
461 break;
462 }
463 shift1();
464 token = '^';
465 break;
466 case '%':
467 if (m_next1 == '=') {
468 shift2();
469 token = MODEQUAL;
470 break;
471 }
472 shift1();
473 token = '%';
474 break;
475 case '|':
476 if (m_next1 == '=') {
477 shift2();
478 token = OREQUAL;
479 break;
480 }
481 if (m_next1 == '|') {
482 shift2();
483 token = OR;
484 break;
485 }
486 shift1();
487 token = '|';
488 break;
489 case '.':
490 if (isASCIIDigit(m_next1)) {
491 record8('.');
492 shift1();
493 goto inNumberAfterDecimalPoint;
494 }
495 token = '.';
496 shift1();
497 break;
498 case ',':
499 case '~':
500 case '?':
501 case ':':
502 case '(':
503 case ')':
504 case '[':
505 case ']':
506 token = m_current;
507 shift1();
508 break;
509 case ';':
510 shift1();
511 m_delimited = true;
512 token = ';';
513 break;
514 case '{':
515 lvalp->intValue = currentOffset();
516 shift1();
517 token = OPENBRACE;
518 break;
519 case '}':
520 lvalp->intValue = currentOffset();
521 shift1();
522 m_delimited = true;
523 token = CLOSEBRACE;
524 break;
525 case '\\':
526 goto startIdentifierWithBackslash;
527 case '0':
528 goto startNumberWithZeroDigit;
529 case '1':
530 case '2':
531 case '3':
532 case '4':
533 case '5':
534 case '6':
535 case '7':
536 case '8':
537 case '9':
538 goto startNumber;
539 case '"':
540 case '\'':
541 goto startString;
542 default:
543 if (isIdentStart(m_current))
544 goto startIdentifierOrKeyword;
545 if (isLineTerminator(m_current)) {
546 shiftLineTerminator();
547 m_atLineStart = true;
548 m_terminator = true;
549 if (lastTokenWasRestrKeyword()) {
550 token = ';';
551 goto doneSemicolon;
552 }
553 goto start;
554 }
555 goto returnError;
556 }
557
558 m_atLineStart = false;
559 goto returnToken;
560
561startString: {
562 int stringQuoteCharacter = m_current;
563 shift1();
564
565 const UChar* stringStart = currentCharacter();
566 while (m_current != stringQuoteCharacter) {
567 // Fast check for characters that require special handling.
568 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
569 // as possible, and lets through all common ASCII characters.
570 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
571 m_buffer16.append(stringStart, currentCharacter() - stringStart);
572 goto inString;
573 }
574 shift1();
575 }
576 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
577 shift1();
578 m_atLineStart = false;
579 m_delimited = false;
580 token = STRING;
581 goto returnToken;
582
583inString:
584 while (m_current != stringQuoteCharacter) {
585 if (m_current == '\\')
586 goto inStringEscapeSequence;
587 if (UNLIKELY(isLineTerminator(m_current)))
588 goto returnError;
589 if (UNLIKELY(m_current == -1))
590 goto returnError;
591 record16(m_current);
592 shift1();
593 }
594 goto doneString;
595
596inStringEscapeSequence:
597 shift1();
598 if (m_current == 'x') {
599 shift1();
600 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
601 record16(convertHex(m_current, m_next1));
602 shift2();
603 goto inString;
604 }
605 record16('x');
606 if (m_current == stringQuoteCharacter)
607 goto doneString;
608 goto inString;
609 }
610 if (m_current == 'u') {
611 shift1();
612 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
613 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
614 shift4();
615 goto inString;
616 }
617 if (m_current == stringQuoteCharacter) {
618 record16('u');
619 goto doneString;
620 }
621 goto returnError;
622 }
623 if (isASCIIOctalDigit(m_current)) {
624 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
625 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
626 shift3();
627 goto inString;
628 }
629 if (isASCIIOctalDigit(m_next1)) {
630 record16((m_current - '0') * 8 + m_next1 - '0');
631 shift2();
632 goto inString;
633 }
634 record16(m_current - '0');
635 shift1();
636 goto inString;
637 }
638 if (isLineTerminator(m_current)) {
639 shiftLineTerminator();
640 goto inString;
641 }
642 if (m_current == -1)
643 goto returnError;
644 record16(singleEscape(m_current));
645 shift1();
646 goto inString;
647}
648
649startIdentifierWithBackslash:
650 shift1();
651 if (UNLIKELY(m_current != 'u'))
652 goto returnError;
653 shift1();
654 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
655 goto returnError;
656 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
657 if (UNLIKELY(!isIdentStart(token)))
658 goto returnError;
659 goto inIdentifierAfterCharacterCheck;
660
661startIdentifierOrKeyword: {
662 const UChar* identifierStart = currentCharacter();
663 shift1();
664 while (isIdentPart(m_current))
665 shift1();
666 if (LIKELY(m_current != '\\')) {
667 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
668 goto doneIdentifierOrKeyword;
669 }
670 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
671}
672
673 do {
674 shift1();
675 if (UNLIKELY(m_current != 'u'))
676 goto returnError;
677 shift1();
678 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
679 goto returnError;
680 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
681 if (UNLIKELY(!isIdentPart(token)))
682 goto returnError;
683inIdentifierAfterCharacterCheck:
684 record16(token);
685 shift4();
686
687 while (isIdentPart(m_current)) {
688 record16(m_current);
689 shift1();
690 }
691 } while (UNLIKELY(m_current == '\\'));
692 goto doneIdentifier;
693
694inSingleLineComment:
695 while (!isLineTerminator(m_current)) {
696 if (UNLIKELY(m_current == -1))
697 return 0;
698 shift1();
699 }
700 shiftLineTerminator();
701 m_atLineStart = true;
702 m_terminator = true;
703 if (lastTokenWasRestrKeyword())
704 goto doneSemicolon;
705 goto start;
706
707inMultiLineComment:
708 shift2();
709 while (m_current != '*' || m_next1 != '/') {
710 if (isLineTerminator(m_current))
711 shiftLineTerminator();
712 else {
713 shift1();
714 if (UNLIKELY(m_current == -1))
715 goto returnError;
716 }
717 }
718 shift2();
719 m_atLineStart = false;
720 goto start;
721
722startNumberWithZeroDigit:
723 shift1();
724 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
725 shift1();
726 goto inHex;
727 }
728 if (m_current == '.') {
729 record8('0');
730 record8('.');
731 shift1();
732 goto inNumberAfterDecimalPoint;
733 }
734 if ((m_current | 0x20) == 'e') {
735 record8('0');
736 record8('e');
737 shift1();
738 goto inExponentIndicator;
739 }
740 if (isASCIIOctalDigit(m_current))
741 goto inOctal;
742 if (isASCIIDigit(m_current))
743 goto startNumber;
744 lvalp->doubleValue = 0;
745 goto doneNumeric;
746
747inNumberAfterDecimalPoint:
748 while (isASCIIDigit(m_current)) {
749 record8(m_current);
750 shift1();
751 }
752 if ((m_current | 0x20) == 'e') {
753 record8('e');
754 shift1();
755 goto inExponentIndicator;
756 }
757 goto doneNumber;
758
759inExponentIndicator:
760 if (m_current == '+' || m_current == '-') {
761 record8(m_current);
762 shift1();
763 }
764 if (!isASCIIDigit(m_current))
765 goto returnError;
766 do {
767 record8(m_current);
768 shift1();
769 } while (isASCIIDigit(m_current));
770 goto doneNumber;
771
772inOctal: {
773 do {
774 record8(m_current);
775 shift1();
776 } while (isASCIIOctalDigit(m_current));
777 if (isASCIIDigit(m_current))
778 goto startNumber;
779
780 double dval = 0;
781
782 const char* end = m_buffer8.end();
783 for (const char* p = m_buffer8.data(); p < end; ++p) {
784 dval *= 8;
785 dval += *p - '0';
786 }
787 if (dval >= mantissaOverflowLowerBound)
788 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
789
790 m_buffer8.resize(0);
791
792 lvalp->doubleValue = dval;
793 goto doneNumeric;
794}
795
796inHex: {
797 do {
798 record8(m_current);
799 shift1();
800 } while (isASCIIHexDigit(m_current));
801
802 double dval = 0;
803
804 const char* end = m_buffer8.end();
805 for (const char* p = m_buffer8.data(); p < end; ++p) {
806 dval *= 16;
807 dval += toASCIIHexValue(*p);
808 }
809 if (dval >= mantissaOverflowLowerBound)
810 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
811
812 m_buffer8.resize(0);
813
814 lvalp->doubleValue = dval;
815 goto doneNumeric;
816}
817
818startNumber:
819 record8(m_current);
820 shift1();
821 while (isASCIIDigit(m_current)) {
822 record8(m_current);
823 shift1();
824 }
825 if (m_current == '.') {
826 record8('.');
827 shift1();
828 goto inNumberAfterDecimalPoint;
829 }
830 if ((m_current | 0x20) == 'e') {
831 record8('e');
832 shift1();
833 goto inExponentIndicator;
834 }
835
836 // Fall through into doneNumber.
837
838doneNumber:
839 // Null-terminate string for strtod.
840 m_buffer8.append('\0');
841 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
842 m_buffer8.resize(0);
843
844 // Fall through into doneNumeric.
845
846doneNumeric:
847 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
848 if (UNLIKELY(isIdentStart(m_current)))
849 goto returnError;
850
851 m_atLineStart = false;
852 m_delimited = false;
853 token = NUMBER;
854 goto returnToken;
855
856doneSemicolon:
857 token = ';';
858 m_delimited = true;
859 goto returnToken;
860
861doneIdentifier:
862 m_atLineStart = false;
863 m_delimited = false;
864 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
865 m_buffer16.resize(0);
866 token = IDENT;
867 goto returnToken;
868
869doneIdentifierOrKeyword: {
870 m_atLineStart = false;
871 m_delimited = false;
872 m_buffer16.resize(0);
873 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
874 token = entry ? entry->lexerValue() : static_cast<int>(IDENT);
875 goto returnToken;
876}
877
878doneString:
879 // Atomize constant strings in case they're later used in property lookup.
880 shift1();
881 m_atLineStart = false;
882 m_delimited = false;
883 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
884 m_buffer16.resize(0);
885 token = STRING;
886
887 // Fall through into returnToken.
888
889returnToken: {
890 int lineNumber = m_lineNumber;
891 llocp->first_line = lineNumber;
892 llocp->last_line = lineNumber;
893 llocp->first_column = startOffset;
894 llocp->last_column = currentOffset();
895 m_lastToken = token;
896 return token;
897}
898
899returnError:
900 m_error = true;
901 return -1;
902}
903
904bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
905{
906 ASSERT(m_buffer16.isEmpty());
907
908 bool lastWasEscape = false;
909 bool inBrackets = false;
910
911 if (patternPrefix) {
912 ASSERT(!isLineTerminator(patternPrefix));
913 ASSERT(patternPrefix != '/');
914 ASSERT(patternPrefix != '[');
915 record16(patternPrefix);
916 }
917
918 while (true) {
919 int current = m_current;
920
921 if (isLineTerminator(current) || current == -1) {
922 m_buffer16.resize(0);
923 return false;
924 }
925
926 shift1();
927
928 if (current == '/' && !lastWasEscape && !inBrackets)
929 break;
930
931 record16(current);
932
933 if (lastWasEscape) {
934 lastWasEscape = false;
935 continue;
936 }
937
938 switch (current) {
939 case '[':
940 inBrackets = true;
941 break;
942 case ']':
943 inBrackets = false;
944 break;
945 case '\\':
946 lastWasEscape = true;
947 break;
948 }
949 }
950
951 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
952 m_buffer16.resize(0);
953
954 while (isIdentPart(m_current)) {
955 record16(m_current);
956 shift1();
957 }
958
959 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
960 m_buffer16.resize(0);
961
962 return true;
963}
964
965bool Lexer::skipRegExp()
966{
967 bool lastWasEscape = false;
968 bool inBrackets = false;
969
970 while (true) {
971 int current = m_current;
972
973 if (isLineTerminator(current) || current == -1)
974 return false;
975
976 shift1();
977
978 if (current == '/' && !lastWasEscape && !inBrackets)
979 break;
980
981 if (lastWasEscape) {
982 lastWasEscape = false;
983 continue;
984 }
985
986 switch (current) {
987 case '[':
988 inBrackets = true;
989 break;
990 case ']':
991 inBrackets = false;
992 break;
993 case '\\':
994 lastWasEscape = true;
995 break;
996 }
997 }
998
999 while (isIdentPart(m_current))
1000 shift1();
1001
1002 return true;
1003}
1004
1005void Lexer::clear()
1006{
1007 m_arena = 0;
1008 m_codeWithoutBOMs.clear();
1009
1010 Vector<char> newBuffer8;
1011 m_buffer8.swap(newBuffer8);
1012
1013 Vector<UChar> newBuffer16;
1014 m_buffer16.swap(newBuffer16);
1015
1016 m_isReparsing = false;
1017}
1018
1019SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1020{
1021 if (m_codeWithoutBOMs.isEmpty())
1022 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1023
1024 const UChar* data = m_source->provider()->data();
1025
1026 ASSERT(openBrace < closeBrace);
1027 int i;
1028 for (i = m_source->startOffset(); i < openBrace; ++i) {
1029 if (data[i] == byteOrderMark) {
1030 openBrace++;
1031 closeBrace++;
1032 }
1033 }
1034 for (; i < closeBrace; ++i) {
1035 if (data[i] == byteOrderMark)
1036 closeBrace++;
1037 }
1038
1039 ASSERT(openBrace < closeBrace);
1040
1041 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1042}
1043
1044} // namespace JSC
Note: See TracBrowser for help on using the repository browser.