source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 51505

Last change on this file since 51505 was 51505, checked in by [email protected], 15 years ago

2009-11-30 Laszlo Gombos <Laszlo Gombos>

Reviewed by Kenneth Rohde Christiansen.

[Qt] Remove obsolete PLATFORM(KDE) code
https://bugs.webkit.org/show_bug.cgi?id=31958

KDE is now using unpatched QtWebKit.

  • parser/Lexer.cpp: Remove obsolete KDE_USE_FINAL guard
  • wtf/Platform.h: Remove PLATFORM(KDE) definition and code section that is guarded with it.
  • Property svn:eol-style set to native
File size: 26.3 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "Lexer.h"
25
26#include "JSFunction.h"
27#include "JSGlobalObjectFunctions.h"
28#include "NodeInfo.h"
29#include "Nodes.h"
30#include "dtoa.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
34#include <wtf/Assertions.h>
35
36using namespace WTF;
37using namespace Unicode;
38
39// We can't specify the namespace in yacc's C output, so do it here instead.
40using namespace JSC;
41
42#include "Grammar.h"
43#include "Lookup.h"
44#include "Lexer.lut.h"
45
46namespace JSC {
47
48static const UChar byteOrderMark = 0xFEFF;
49
50Lexer::Lexer(JSGlobalData* globalData)
51 : m_isReparsing(false)
52 , m_globalData(globalData)
53 , m_keywordTable(JSC::mainTable)
54{
55 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
56 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
57}
58
59Lexer::~Lexer()
60{
61 m_keywordTable.deleteTable();
62}
63
64inline const UChar* Lexer::currentCharacter() const
65{
66 return m_code - 4;
67}
68
69inline int Lexer::currentOffset() const
70{
71 return currentCharacter() - m_codeStart;
72}
73
74ALWAYS_INLINE void Lexer::shift1()
75{
76 m_current = m_next1;
77 m_next1 = m_next2;
78 m_next2 = m_next3;
79 if (LIKELY(m_code < m_codeEnd))
80 m_next3 = m_code[0];
81 else
82 m_next3 = -1;
83
84 ++m_code;
85}
86
87ALWAYS_INLINE void Lexer::shift2()
88{
89 m_current = m_next2;
90 m_next1 = m_next3;
91 if (LIKELY(m_code + 1 < m_codeEnd)) {
92 m_next2 = m_code[0];
93 m_next3 = m_code[1];
94 } else {
95 m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
96 m_next3 = -1;
97 }
98
99 m_code += 2;
100}
101
102ALWAYS_INLINE void Lexer::shift3()
103{
104 m_current = m_next3;
105 if (LIKELY(m_code + 2 < m_codeEnd)) {
106 m_next1 = m_code[0];
107 m_next2 = m_code[1];
108 m_next3 = m_code[2];
109 } else {
110 m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
111 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
112 m_next3 = -1;
113 }
114
115 m_code += 3;
116}
117
118ALWAYS_INLINE void Lexer::shift4()
119{
120 if (LIKELY(m_code + 3 < m_codeEnd)) {
121 m_current = m_code[0];
122 m_next1 = m_code[1];
123 m_next2 = m_code[2];
124 m_next3 = m_code[3];
125 } else {
126 m_current = m_code < m_codeEnd ? m_code[0] : -1;
127 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
128 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
129 m_next3 = -1;
130 }
131
132 m_code += 4;
133}
134
135void Lexer::setCode(const SourceCode& source, ParserArena& arena)
136{
137 m_arena = &arena.identifierArena();
138
139 m_lineNumber = source.firstLine();
140 m_delimited = false;
141 m_lastToken = -1;
142
143 const UChar* data = source.provider()->data();
144
145 m_source = &source;
146 m_codeStart = data;
147 m_code = data + source.startOffset();
148 m_codeEnd = data + source.endOffset();
149 m_error = false;
150 m_atLineStart = true;
151
152 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154 if (source.provider()->hasBOMs()) {
155 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156 if (UNLIKELY(*p == byteOrderMark)) {
157 copyCodeWithoutBOMs();
158 break;
159 }
160 }
161 }
162
163 // Read the first characters into the 4-character buffer.
164 shift4();
165 ASSERT(currentOffset() == source.startOffset());
166}
167
168void Lexer::copyCodeWithoutBOMs()
169{
170 // Note: In this case, the character offset data for debugging will be incorrect.
171 // If it's important to correctly debug code with extraneous BOMs, then the caller
172 // should strip the BOMs when creating the SourceProvider object and do its own
173 // mapping of offsets within the stripped text to original text offset.
174
175 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
176 for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177 UChar c = *p;
178 if (c != byteOrderMark)
179 m_codeWithoutBOMs.append(c);
180 }
181 ptrdiff_t startDelta = m_codeStart - m_code;
182 m_code = m_codeWithoutBOMs.data();
183 m_codeStart = m_code + startDelta;
184 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185}
186
187void Lexer::shiftLineTerminator()
188{
189 ASSERT(isLineTerminator(m_current));
190
191 // Allow both CRLF and LFCR.
192 if (m_current + m_next1 == '\n' + '\r')
193 shift2();
194 else
195 shift1();
196
197 ++m_lineNumber;
198}
199
200ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201{
202 return &m_arena->makeIdentifier(m_globalData, characters, length);
203}
204
205inline bool Lexer::lastTokenWasRestrKeyword() const
206{
207 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
208}
209
210static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211{
212 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
213}
214
215static inline bool isIdentStart(int c)
216{
217 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
218}
219
220static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221{
222 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
223 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
224}
225
226static inline bool isIdentPart(int c)
227{
228 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
229}
230
231static inline int singleEscape(int c)
232{
233 switch (c) {
234 case 'b':
235 return 0x08;
236 case 't':
237 return 0x09;
238 case 'n':
239 return 0x0A;
240 case 'v':
241 return 0x0B;
242 case 'f':
243 return 0x0C;
244 case 'r':
245 return 0x0D;
246 default:
247 return c;
248 }
249}
250
251inline void Lexer::record8(int c)
252{
253 ASSERT(c >= 0);
254 ASSERT(c <= 0xFF);
255 m_buffer8.append(static_cast<char>(c));
256}
257
258inline void Lexer::record16(UChar c)
259{
260 m_buffer16.append(c);
261}
262
263inline void Lexer::record16(int c)
264{
265 ASSERT(c >= 0);
266 ASSERT(c <= USHRT_MAX);
267 record16(UChar(static_cast<unsigned short>(c)));
268}
269
270int Lexer::lex(void* p1, void* p2)
271{
272 ASSERT(!m_error);
273 ASSERT(m_buffer8.isEmpty());
274 ASSERT(m_buffer16.isEmpty());
275
276 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278 int token = 0;
279 m_terminator = false;
280
281start:
282 while (isWhiteSpace(m_current))
283 shift1();
284
285 int startOffset = currentOffset();
286
287 if (m_current == -1) {
288 if (!m_terminator && !m_delimited && !m_isReparsing) {
289 // automatic semicolon insertion if program incomplete
290 token = ';';
291 goto doneSemicolon;
292 }
293 return 0;
294 }
295
296 m_delimited = false;
297 switch (m_current) {
298 case '>':
299 if (m_next1 == '>' && m_next2 == '>') {
300 if (m_next3 == '=') {
301 shift4();
302 token = URSHIFTEQUAL;
303 break;
304 }
305 shift3();
306 token = URSHIFT;
307 break;
308 }
309 if (m_next1 == '>') {
310 if (m_next2 == '=') {
311 shift3();
312 token = RSHIFTEQUAL;
313 break;
314 }
315 shift2();
316 token = RSHIFT;
317 break;
318 }
319 if (m_next1 == '=') {
320 shift2();
321 token = GE;
322 break;
323 }
324 shift1();
325 token = '>';
326 break;
327 case '=':
328 if (m_next1 == '=') {
329 if (m_next2 == '=') {
330 shift3();
331 token = STREQ;
332 break;
333 }
334 shift2();
335 token = EQEQ;
336 break;
337 }
338 shift1();
339 token = '=';
340 break;
341 case '!':
342 if (m_next1 == '=') {
343 if (m_next2 == '=') {
344 shift3();
345 token = STRNEQ;
346 break;
347 }
348 shift2();
349 token = NE;
350 break;
351 }
352 shift1();
353 token = '!';
354 break;
355 case '<':
356 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
357 // <!-- marks the beginning of a line comment (for www usage)
358 shift4();
359 goto inSingleLineComment;
360 }
361 if (m_next1 == '<') {
362 if (m_next2 == '=') {
363 shift3();
364 token = LSHIFTEQUAL;
365 break;
366 }
367 shift2();
368 token = LSHIFT;
369 break;
370 }
371 if (m_next1 == '=') {
372 shift2();
373 token = LE;
374 break;
375 }
376 shift1();
377 token = '<';
378 break;
379 case '+':
380 if (m_next1 == '+') {
381 shift2();
382 if (m_terminator) {
383 token = AUTOPLUSPLUS;
384 break;
385 }
386 token = PLUSPLUS;
387 break;
388 }
389 if (m_next1 == '=') {
390 shift2();
391 token = PLUSEQUAL;
392 break;
393 }
394 shift1();
395 token = '+';
396 break;
397 case '-':
398 if (m_next1 == '-') {
399 if (m_atLineStart && m_next2 == '>') {
400 shift3();
401 goto inSingleLineComment;
402 }
403 shift2();
404 if (m_terminator) {
405 token = AUTOMINUSMINUS;
406 break;
407 }
408 token = MINUSMINUS;
409 break;
410 }
411 if (m_next1 == '=') {
412 shift2();
413 token = MINUSEQUAL;
414 break;
415 }
416 shift1();
417 token = '-';
418 break;
419 case '*':
420 if (m_next1 == '=') {
421 shift2();
422 token = MULTEQUAL;
423 break;
424 }
425 shift1();
426 token = '*';
427 break;
428 case '/':
429 if (m_next1 == '/') {
430 shift2();
431 goto inSingleLineComment;
432 }
433 if (m_next1 == '*')
434 goto inMultiLineComment;
435 if (m_next1 == '=') {
436 shift2();
437 token = DIVEQUAL;
438 break;
439 }
440 shift1();
441 token = '/';
442 break;
443 case '&':
444 if (m_next1 == '&') {
445 shift2();
446 token = AND;
447 break;
448 }
449 if (m_next1 == '=') {
450 shift2();
451 token = ANDEQUAL;
452 break;
453 }
454 shift1();
455 token = '&';
456 break;
457 case '^':
458 if (m_next1 == '=') {
459 shift2();
460 token = XOREQUAL;
461 break;
462 }
463 shift1();
464 token = '^';
465 break;
466 case '%':
467 if (m_next1 == '=') {
468 shift2();
469 token = MODEQUAL;
470 break;
471 }
472 shift1();
473 token = '%';
474 break;
475 case '|':
476 if (m_next1 == '=') {
477 shift2();
478 token = OREQUAL;
479 break;
480 }
481 if (m_next1 == '|') {
482 shift2();
483 token = OR;
484 break;
485 }
486 shift1();
487 token = '|';
488 break;
489 case '.':
490 if (isASCIIDigit(m_next1)) {
491 record8('.');
492 shift1();
493 goto inNumberAfterDecimalPoint;
494 }
495 token = '.';
496 shift1();
497 break;
498 case ',':
499 case '~':
500 case '?':
501 case ':':
502 case '(':
503 case ')':
504 case '[':
505 case ']':
506 token = m_current;
507 shift1();
508 break;
509 case ';':
510 shift1();
511 m_delimited = true;
512 token = ';';
513 break;
514 case '{':
515 lvalp->intValue = currentOffset();
516 shift1();
517 token = OPENBRACE;
518 break;
519 case '}':
520 lvalp->intValue = currentOffset();
521 shift1();
522 m_delimited = true;
523 token = CLOSEBRACE;
524 break;
525 case '\\':
526 goto startIdentifierWithBackslash;
527 case '0':
528 goto startNumberWithZeroDigit;
529 case '1':
530 case '2':
531 case '3':
532 case '4':
533 case '5':
534 case '6':
535 case '7':
536 case '8':
537 case '9':
538 goto startNumber;
539 case '"':
540 case '\'':
541 goto startString;
542 default:
543 if (isIdentStart(m_current))
544 goto startIdentifierOrKeyword;
545 if (isLineTerminator(m_current)) {
546 shiftLineTerminator();
547 m_atLineStart = true;
548 m_terminator = true;
549 if (lastTokenWasRestrKeyword()) {
550 token = ';';
551 goto doneSemicolon;
552 }
553 goto start;
554 }
555 goto returnError;
556 }
557
558 m_atLineStart = false;
559 goto returnToken;
560
561startString: {
562 int stringQuoteCharacter = m_current;
563 shift1();
564
565 const UChar* stringStart = currentCharacter();
566 while (m_current != stringQuoteCharacter) {
567 // Fast check for characters that require special handling.
568 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
569 // as possible, and lets through all common ASCII characters.
570 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
571 m_buffer16.append(stringStart, currentCharacter() - stringStart);
572 goto inString;
573 }
574 shift1();
575 }
576 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
577 shift1();
578 m_atLineStart = false;
579 m_delimited = false;
580 token = STRING;
581 goto returnToken;
582
583inString:
584 while (m_current != stringQuoteCharacter) {
585 if (m_current == '\\')
586 goto inStringEscapeSequence;
587 if (UNLIKELY(isLineTerminator(m_current)))
588 goto returnError;
589 if (UNLIKELY(m_current == -1))
590 goto returnError;
591 record16(m_current);
592 shift1();
593 }
594 goto doneString;
595
596inStringEscapeSequence:
597 shift1();
598 if (m_current == 'x') {
599 shift1();
600 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
601 record16(convertHex(m_current, m_next1));
602 shift2();
603 goto inString;
604 }
605 record16('x');
606 if (m_current == stringQuoteCharacter)
607 goto doneString;
608 goto inString;
609 }
610 if (m_current == 'u') {
611 shift1();
612 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
613 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
614 shift4();
615 goto inString;
616 }
617 if (m_current == stringQuoteCharacter) {
618 record16('u');
619 goto doneString;
620 }
621 goto returnError;
622 }
623 if (isASCIIOctalDigit(m_current)) {
624 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
625 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
626 shift3();
627 goto inString;
628 }
629 if (isASCIIOctalDigit(m_next1)) {
630 record16((m_current - '0') * 8 + m_next1 - '0');
631 shift2();
632 goto inString;
633 }
634 record16(m_current - '0');
635 shift1();
636 goto inString;
637 }
638 if (isLineTerminator(m_current)) {
639 shiftLineTerminator();
640 goto inString;
641 }
642 record16(singleEscape(m_current));
643 shift1();
644 goto inString;
645}
646
647startIdentifierWithBackslash:
648 shift1();
649 if (UNLIKELY(m_current != 'u'))
650 goto returnError;
651 shift1();
652 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
653 goto returnError;
654 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
655 if (UNLIKELY(!isIdentStart(token)))
656 goto returnError;
657 goto inIdentifierAfterCharacterCheck;
658
659startIdentifierOrKeyword: {
660 const UChar* identifierStart = currentCharacter();
661 shift1();
662 while (isIdentPart(m_current))
663 shift1();
664 if (LIKELY(m_current != '\\')) {
665 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
666 goto doneIdentifierOrKeyword;
667 }
668 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
669}
670
671 do {
672 shift1();
673 if (UNLIKELY(m_current != 'u'))
674 goto returnError;
675 shift1();
676 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
677 goto returnError;
678 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
679 if (UNLIKELY(!isIdentPart(token)))
680 goto returnError;
681inIdentifierAfterCharacterCheck:
682 record16(token);
683 shift4();
684
685 while (isIdentPart(m_current)) {
686 record16(m_current);
687 shift1();
688 }
689 } while (UNLIKELY(m_current == '\\'));
690 goto doneIdentifier;
691
692inSingleLineComment:
693 while (!isLineTerminator(m_current)) {
694 if (UNLIKELY(m_current == -1))
695 return 0;
696 shift1();
697 }
698 shiftLineTerminator();
699 m_atLineStart = true;
700 m_terminator = true;
701 if (lastTokenWasRestrKeyword())
702 goto doneSemicolon;
703 goto start;
704
705inMultiLineComment:
706 shift2();
707 while (m_current != '*' || m_next1 != '/') {
708 if (isLineTerminator(m_current))
709 shiftLineTerminator();
710 else {
711 shift1();
712 if (UNLIKELY(m_current == -1))
713 goto returnError;
714 }
715 }
716 shift2();
717 m_atLineStart = false;
718 goto start;
719
720startNumberWithZeroDigit:
721 shift1();
722 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
723 shift1();
724 goto inHex;
725 }
726 if (m_current == '.') {
727 record8('0');
728 record8('.');
729 shift1();
730 goto inNumberAfterDecimalPoint;
731 }
732 if ((m_current | 0x20) == 'e') {
733 record8('0');
734 record8('e');
735 shift1();
736 goto inExponentIndicator;
737 }
738 if (isASCIIOctalDigit(m_current))
739 goto inOctal;
740 if (isASCIIDigit(m_current))
741 goto startNumber;
742 lvalp->doubleValue = 0;
743 goto doneNumeric;
744
745inNumberAfterDecimalPoint:
746 while (isASCIIDigit(m_current)) {
747 record8(m_current);
748 shift1();
749 }
750 if ((m_current | 0x20) == 'e') {
751 record8('e');
752 shift1();
753 goto inExponentIndicator;
754 }
755 goto doneNumber;
756
757inExponentIndicator:
758 if (m_current == '+' || m_current == '-') {
759 record8(m_current);
760 shift1();
761 }
762 if (!isASCIIDigit(m_current))
763 goto returnError;
764 do {
765 record8(m_current);
766 shift1();
767 } while (isASCIIDigit(m_current));
768 goto doneNumber;
769
770inOctal: {
771 do {
772 record8(m_current);
773 shift1();
774 } while (isASCIIOctalDigit(m_current));
775 if (isASCIIDigit(m_current))
776 goto startNumber;
777
778 double dval = 0;
779
780 const char* end = m_buffer8.end();
781 for (const char* p = m_buffer8.data(); p < end; ++p) {
782 dval *= 8;
783 dval += *p - '0';
784 }
785 if (dval >= mantissaOverflowLowerBound)
786 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
787
788 m_buffer8.resize(0);
789
790 lvalp->doubleValue = dval;
791 goto doneNumeric;
792}
793
794inHex: {
795 do {
796 record8(m_current);
797 shift1();
798 } while (isASCIIHexDigit(m_current));
799
800 double dval = 0;
801
802 const char* end = m_buffer8.end();
803 for (const char* p = m_buffer8.data(); p < end; ++p) {
804 dval *= 16;
805 dval += toASCIIHexValue(*p);
806 }
807 if (dval >= mantissaOverflowLowerBound)
808 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
809
810 m_buffer8.resize(0);
811
812 lvalp->doubleValue = dval;
813 goto doneNumeric;
814}
815
816startNumber:
817 record8(m_current);
818 shift1();
819 while (isASCIIDigit(m_current)) {
820 record8(m_current);
821 shift1();
822 }
823 if (m_current == '.') {
824 record8('.');
825 shift1();
826 goto inNumberAfterDecimalPoint;
827 }
828 if ((m_current | 0x20) == 'e') {
829 record8('e');
830 shift1();
831 goto inExponentIndicator;
832 }
833
834 // Fall through into doneNumber.
835
836doneNumber:
837 // Null-terminate string for strtod.
838 m_buffer8.append('\0');
839 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
840 m_buffer8.resize(0);
841
842 // Fall through into doneNumeric.
843
844doneNumeric:
845 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
846 if (UNLIKELY(isIdentStart(m_current)))
847 goto returnError;
848
849 m_atLineStart = false;
850 m_delimited = false;
851 token = NUMBER;
852 goto returnToken;
853
854doneSemicolon:
855 token = ';';
856 m_delimited = true;
857 goto returnToken;
858
859doneIdentifier:
860 m_atLineStart = false;
861 m_delimited = false;
862 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
863 m_buffer16.resize(0);
864 token = IDENT;
865 goto returnToken;
866
867doneIdentifierOrKeyword: {
868 m_atLineStart = false;
869 m_delimited = false;
870 m_buffer16.resize(0);
871 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
872 token = entry ? entry->lexerValue() : IDENT;
873 goto returnToken;
874}
875
876doneString:
877 // Atomize constant strings in case they're later used in property lookup.
878 shift1();
879 m_atLineStart = false;
880 m_delimited = false;
881 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
882 m_buffer16.resize(0);
883 token = STRING;
884
885 // Fall through into returnToken.
886
887returnToken: {
888 int lineNumber = m_lineNumber;
889 llocp->first_line = lineNumber;
890 llocp->last_line = lineNumber;
891 llocp->first_column = startOffset;
892 llocp->last_column = currentOffset();
893
894 m_lastToken = token;
895 return token;
896}
897
898returnError:
899 m_error = true;
900 return -1;
901}
902
903bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
904{
905 ASSERT(m_buffer16.isEmpty());
906
907 bool lastWasEscape = false;
908 bool inBrackets = false;
909
910 if (patternPrefix) {
911 ASSERT(!isLineTerminator(patternPrefix));
912 ASSERT(patternPrefix != '/');
913 ASSERT(patternPrefix != '[');
914 record16(patternPrefix);
915 }
916
917 while (true) {
918 int current = m_current;
919
920 if (isLineTerminator(current) || current == -1) {
921 m_buffer16.resize(0);
922 return false;
923 }
924
925 shift1();
926
927 if (current == '/' && !lastWasEscape && !inBrackets)
928 break;
929
930 record16(current);
931
932 if (lastWasEscape) {
933 lastWasEscape = false;
934 continue;
935 }
936
937 switch (current) {
938 case '[':
939 inBrackets = true;
940 break;
941 case ']':
942 inBrackets = false;
943 break;
944 case '\\':
945 lastWasEscape = true;
946 break;
947 }
948 }
949
950 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
951 m_buffer16.resize(0);
952
953 while (isIdentPart(m_current)) {
954 record16(m_current);
955 shift1();
956 }
957
958 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
959 m_buffer16.resize(0);
960
961 return true;
962}
963
964bool Lexer::skipRegExp()
965{
966 bool lastWasEscape = false;
967 bool inBrackets = false;
968
969 while (true) {
970 int current = m_current;
971
972 if (isLineTerminator(current) || current == -1)
973 return false;
974
975 shift1();
976
977 if (current == '/' && !lastWasEscape && !inBrackets)
978 break;
979
980 if (lastWasEscape) {
981 lastWasEscape = false;
982 continue;
983 }
984
985 switch (current) {
986 case '[':
987 inBrackets = true;
988 break;
989 case ']':
990 inBrackets = false;
991 break;
992 case '\\':
993 lastWasEscape = true;
994 break;
995 }
996 }
997
998 while (isIdentPart(m_current))
999 shift1();
1000
1001 return true;
1002}
1003
1004void Lexer::clear()
1005{
1006 m_arena = 0;
1007 m_codeWithoutBOMs.clear();
1008
1009 Vector<char> newBuffer8;
1010 newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
1011 m_buffer8.swap(newBuffer8);
1012
1013 Vector<UChar> newBuffer16;
1014 newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
1015 m_buffer16.swap(newBuffer16);
1016
1017 m_isReparsing = false;
1018}
1019
1020SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1021{
1022 if (m_codeWithoutBOMs.isEmpty())
1023 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1024
1025 const UChar* data = m_source->provider()->data();
1026
1027 ASSERT(openBrace < closeBrace);
1028
1029 int numBOMsBeforeOpenBrace = 0;
1030 int numBOMsBetweenBraces = 0;
1031
1032 int i;
1033 for (i = m_source->startOffset(); i < openBrace; ++i)
1034 numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
1035 for (; i < closeBrace; ++i)
1036 numBOMsBetweenBraces += data[i] == byteOrderMark;
1037
1038 return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
1039 closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
1040}
1041
1042} // namespace JSC
Note: See TracBrowser for help on using the repository browser.