source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 41342

Last change on this file since 41342 was 41045, checked in by [email protected], 16 years ago

JavaScriptCore:

2009-02-17 Geoffrey Garen <[email protected]>

Reviewed by Sam Weinig.


Fixed <rdar://problem/6595040> REGRESSION: http://www.amnestyusa.org/
fails to load.


amnestyusa.org uses the Optimist JavaScript library, which adds event
listeners by concatenating string-ified functions. This is only sure to
be syntactically valid if the string-ified functions end in semicolons.

  • parser/Lexer.cpp: (JSC::Lexer::isWhiteSpace):
  • parser/Lexer.h: (JSC::Lexer::isWhiteSpace): (JSC::Lexer::isLineTerminator): Added some helper functions for examining whitespace.
  • runtime/FunctionPrototype.cpp: (JSC::appendSemicolonIfNeeded): (JSC::functionProtoFuncToString): When string-ifying a function, insert a semicolon in the last non-whitespace position, if one doesn't already exist.

LayoutTests:

2009-02-17 Geoffrey Garen <[email protected]>

Reviewed by Sam Weinig.


Test for <rdar://problem/6595040> REGRESSION: http://www.amnestyusa.org/
fails to load.

  • fast/js/function-toString-semicolon-insertion-expected.txt: Added.
  • fast/js/function-toString-semicolon-insertion.html: Added.
  • fast/js/resources/function-toString-semicolon-insertion.js: Added. (compileAndSerialize):
  • Property svn:eol-style set to native
File size: 26.7 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "Lexer.h"
25
26#include "JSFunction.h"
27#include "JSGlobalObjectFunctions.h"
28#include "NodeInfo.h"
29#include "Nodes.h"
30#include "dtoa.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
34#include <wtf/ASCIICType.h>
35#include <wtf/Assertions.h>
36
37using namespace WTF;
38using namespace Unicode;
39
40// we can't specify the namespace in yacc's C output, so do it here
41using namespace JSC;
42
43#ifndef KDE_USE_FINAL
44#include "Grammar.h"
45#endif
46
47#include "Lookup.h"
48#include "Lexer.lut.h"
49
50// a bridge for yacc from the C world to C++
51int jscyylex(void* lvalp, void* llocp, void* globalData)
52{
53 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
54}
55
56namespace JSC {
57
58static bool isDecimalDigit(int);
59
60Lexer::Lexer(JSGlobalData* globalData)
61 : yylineno(1)
62 , m_restrKeyword(false)
63 , m_eatNextIdentifier(false)
64 , m_stackToken(-1)
65 , m_lastToken(-1)
66 , m_position(0)
67 , m_code(0)
68 , m_length(0)
69 , m_isReparsing(false)
70 , m_atLineStart(true)
71 , m_current(0)
72 , m_next1(0)
73 , m_next2(0)
74 , m_next3(0)
75 , m_currentOffset(0)
76 , m_nextOffset1(0)
77 , m_nextOffset2(0)
78 , m_nextOffset3(0)
79 , m_globalData(globalData)
80 , m_mainTable(JSC::mainTable)
81{
82 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
83 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
84}
85
86Lexer::~Lexer()
87{
88 m_mainTable.deleteTable();
89}
90
91void Lexer::setCode(const SourceCode& source)
92{
93 yylineno = source.firstLine();
94 m_restrKeyword = false;
95 m_delimited = false;
96 m_eatNextIdentifier = false;
97 m_stackToken = -1;
98 m_lastToken = -1;
99
100 m_position = source.startOffset();
101 m_source = &source;
102 m_code = source.provider()->data();
103 m_length = source.endOffset();
104 m_skipLF = false;
105 m_skipCR = false;
106 m_error = false;
107 m_atLineStart = true;
108
109 // read first characters
110 shift(4);
111}
112
113void Lexer::shift(unsigned p)
114{
115 // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
116 // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
117
118 while (p--) {
119 m_current = m_next1;
120 m_next1 = m_next2;
121 m_next2 = m_next3;
122 m_currentOffset = m_nextOffset1;
123 m_nextOffset1 = m_nextOffset2;
124 m_nextOffset2 = m_nextOffset3;
125 do {
126 if (m_position >= m_length) {
127 m_nextOffset3 = m_position;
128 m_position++;
129 m_next3 = -1;
130 break;
131 }
132 m_nextOffset3 = m_position;
133 m_next3 = m_code[m_position++];
134 } while (m_next3 == 0xFEFF);
135 }
136}
137
138// called on each new line
139void Lexer::nextLine()
140{
141 yylineno++;
142 m_atLineStart = true;
143}
144
145void Lexer::setDone(State s)
146{
147 m_state = s;
148 m_done = true;
149}
150
151int Lexer::lex(void* p1, void* p2)
152{
153 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
154 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
155 int token = 0;
156 m_state = Start;
157 unsigned short stringType = 0; // either single or double quotes
158 m_buffer8.clear();
159 m_buffer16.clear();
160 m_done = false;
161 m_terminator = false;
162 m_skipLF = false;
163 m_skipCR = false;
164
165 // did we push a token on the stack previously ?
166 // (after an automatic semicolon insertion)
167 if (m_stackToken >= 0) {
168 setDone(Other);
169 token = m_stackToken;
170 m_stackToken = 0;
171 }
172 int startOffset = m_currentOffset;
173 while (!m_done) {
174 if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
175 m_skipLF = false;
176 if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
177 m_skipCR = false;
178 if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
179 m_skipLF = false;
180 m_skipCR = false;
181 shift(1);
182 }
183 switch (m_state) {
184 case Start:
185 startOffset = m_currentOffset;
186 if (isWhiteSpace()) {
187 // do nothing
188 } else if (m_current == '/' && m_next1 == '/') {
189 shift(1);
190 m_state = InSingleLineComment;
191 } else if (m_current == '/' && m_next1 == '*') {
192 shift(1);
193 m_state = InMultiLineComment;
194 } else if (m_current == -1) {
195 if (!m_terminator && !m_delimited && !m_isReparsing) {
196 // automatic semicolon insertion if program incomplete
197 token = ';';
198 m_stackToken = 0;
199 setDone(Other);
200 } else
201 setDone(Eof);
202 } else if (isLineTerminator()) {
203 nextLine();
204 m_terminator = true;
205 if (m_restrKeyword) {
206 token = ';';
207 setDone(Other);
208 }
209 } else if (m_current == '"' || m_current == '\'') {
210 m_state = InString;
211 stringType = static_cast<unsigned short>(m_current);
212 } else if (isIdentStart(m_current)) {
213 record16(m_current);
214 m_state = InIdentifierOrKeyword;
215 } else if (m_current == '\\')
216 m_state = InIdentifierStartUnicodeEscapeStart;
217 else if (m_current == '0') {
218 record8(m_current);
219 m_state = InNum0;
220 } else if (isDecimalDigit(m_current)) {
221 record8(m_current);
222 m_state = InNum;
223 } else if (m_current == '.' && isDecimalDigit(m_next1)) {
224 record8(m_current);
225 m_state = InDecimal;
226 // <!-- marks the beginning of a line comment (for www usage)
227 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
228 shift(3);
229 m_state = InSingleLineComment;
230 // same for -->
231 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
232 shift(2);
233 m_state = InSingleLineComment;
234 } else {
235 token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
236 if (token != -1)
237 setDone(Other);
238 else
239 setDone(Bad);
240 }
241 break;
242 case InString:
243 if (m_current == stringType) {
244 shift(1);
245 setDone(String);
246 } else if (isLineTerminator() || m_current == -1)
247 setDone(Bad);
248 else if (m_current == '\\')
249 m_state = InEscapeSequence;
250 else
251 record16(m_current);
252 break;
253 // Escape Sequences inside of strings
254 case InEscapeSequence:
255 if (isOctalDigit(m_current)) {
256 if (m_current >= '0' && m_current <= '3' &&
257 isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
258 record16(convertOctal(m_current, m_next1, m_next2));
259 shift(2);
260 m_state = InString;
261 } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
262 record16(convertOctal('0', m_current, m_next1));
263 shift(1);
264 m_state = InString;
265 } else if (isOctalDigit(m_current)) {
266 record16(convertOctal('0', '0', m_current));
267 m_state = InString;
268 } else
269 setDone(Bad);
270 } else if (m_current == 'x')
271 m_state = InHexEscape;
272 else if (m_current == 'u')
273 m_state = InUnicodeEscape;
274 else if (isLineTerminator()) {
275 nextLine();
276 m_state = InString;
277 } else {
278 record16(singleEscape(static_cast<unsigned short>(m_current)));
279 m_state = InString;
280 }
281 break;
282 case InHexEscape:
283 if (isHexDigit(m_current) && isHexDigit(m_next1)) {
284 m_state = InString;
285 record16(convertHex(m_current, m_next1));
286 shift(1);
287 } else if (m_current == stringType) {
288 record16('x');
289 shift(1);
290 setDone(String);
291 } else {
292 record16('x');
293 record16(m_current);
294 m_state = InString;
295 }
296 break;
297 case InUnicodeEscape:
298 if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
299 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
300 shift(3);
301 m_state = InString;
302 } else if (m_current == stringType) {
303 record16('u');
304 shift(1);
305 setDone(String);
306 } else
307 setDone(Bad);
308 break;
309 case InSingleLineComment:
310 if (isLineTerminator()) {
311 nextLine();
312 m_terminator = true;
313 if (m_restrKeyword) {
314 token = ';';
315 setDone(Other);
316 } else
317 m_state = Start;
318 } else if (m_current == -1)
319 setDone(Eof);
320 break;
321 case InMultiLineComment:
322 if (m_current == -1)
323 setDone(Bad);
324 else if (isLineTerminator())
325 nextLine();
326 else if (m_current == '*' && m_next1 == '/') {
327 m_state = Start;
328 shift(1);
329 }
330 break;
331 case InIdentifierOrKeyword:
332 case InIdentifier:
333 if (isIdentPart(m_current))
334 record16(m_current);
335 else if (m_current == '\\')
336 m_state = InIdentifierPartUnicodeEscapeStart;
337 else
338 setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
339 break;
340 case InNum0:
341 if (m_current == 'x' || m_current == 'X') {
342 record8(m_current);
343 m_state = InHex;
344 } else if (m_current == '.') {
345 record8(m_current);
346 m_state = InDecimal;
347 } else if (m_current == 'e' || m_current == 'E') {
348 record8(m_current);
349 m_state = InExponentIndicator;
350 } else if (isOctalDigit(m_current)) {
351 record8(m_current);
352 m_state = InOctal;
353 } else if (isDecimalDigit(m_current)) {
354 record8(m_current);
355 m_state = InDecimal;
356 } else
357 setDone(Number);
358 break;
359 case InHex:
360 if (isHexDigit(m_current))
361 record8(m_current);
362 else
363 setDone(Hex);
364 break;
365 case InOctal:
366 if (isOctalDigit(m_current))
367 record8(m_current);
368 else if (isDecimalDigit(m_current)) {
369 record8(m_current);
370 m_state = InDecimal;
371 } else
372 setDone(Octal);
373 break;
374 case InNum:
375 if (isDecimalDigit(m_current))
376 record8(m_current);
377 else if (m_current == '.') {
378 record8(m_current);
379 m_state = InDecimal;
380 } else if (m_current == 'e' || m_current == 'E') {
381 record8(m_current);
382 m_state = InExponentIndicator;
383 } else
384 setDone(Number);
385 break;
386 case InDecimal:
387 if (isDecimalDigit(m_current))
388 record8(m_current);
389 else if (m_current == 'e' || m_current == 'E') {
390 record8(m_current);
391 m_state = InExponentIndicator;
392 } else
393 setDone(Number);
394 break;
395 case InExponentIndicator:
396 if (m_current == '+' || m_current == '-')
397 record8(m_current);
398 else if (isDecimalDigit(m_current)) {
399 record8(m_current);
400 m_state = InExponent;
401 } else
402 setDone(Bad);
403 break;
404 case InExponent:
405 if (isDecimalDigit(m_current))
406 record8(m_current);
407 else
408 setDone(Number);
409 break;
410 case InIdentifierStartUnicodeEscapeStart:
411 if (m_current == 'u')
412 m_state = InIdentifierStartUnicodeEscape;
413 else
414 setDone(Bad);
415 break;
416 case InIdentifierPartUnicodeEscapeStart:
417 if (m_current == 'u')
418 m_state = InIdentifierPartUnicodeEscape;
419 else
420 setDone(Bad);
421 break;
422 case InIdentifierStartUnicodeEscape:
423 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
424 setDone(Bad);
425 break;
426 }
427 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
428 shift(3);
429 if (!isIdentStart(token)) {
430 setDone(Bad);
431 break;
432 }
433 record16(token);
434 m_state = InIdentifier;
435 break;
436 case InIdentifierPartUnicodeEscape:
437 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
438 setDone(Bad);
439 break;
440 }
441 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
442 shift(3);
443 if (!isIdentPart(token)) {
444 setDone(Bad);
445 break;
446 }
447 record16(token);
448 m_state = InIdentifier;
449 break;
450 default:
451 ASSERT(!"Unhandled state in switch statement");
452 }
453
454 // move on to the next character
455 if (!m_done)
456 shift(1);
457 if (m_state != Start && m_state != InSingleLineComment)
458 m_atLineStart = false;
459 }
460
461 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
462 if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
463 m_state = Bad;
464
465 // terminate string
466 m_buffer8.append('\0');
467
468#ifdef JSC_DEBUG_LEX
469 fprintf(stderr, "line: %d ", lineNo());
470 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
471 fprintf(stderr, "%s ", m_buffer8.data());
472#endif
473
474 double dval = 0;
475 if (m_state == Number)
476 dval = WTF::strtod(m_buffer8.data(), 0L);
477 else if (m_state == Hex) { // scan hex numbers
478 const char* p = m_buffer8.data() + 2;
479 while (char c = *p++) {
480 dval *= 16;
481 dval += convertHex(c);
482 }
483
484 if (dval >= mantissaOverflowLowerBound)
485 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
486
487 m_state = Number;
488 } else if (m_state == Octal) { // scan octal number
489 const char* p = m_buffer8.data() + 1;
490 while (char c = *p++) {
491 dval *= 8;
492 dval += c - '0';
493 }
494
495 if (dval >= mantissaOverflowLowerBound)
496 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
497
498 m_state = Number;
499 }
500
501#ifdef JSC_DEBUG_LEX
502 switch (m_state) {
503 case Eof:
504 printf("(EOF)\n");
505 break;
506 case Other:
507 printf("(Other)\n");
508 break;
509 case Identifier:
510 printf("(Identifier)/(Keyword)\n");
511 break;
512 case String:
513 printf("(String)\n");
514 break;
515 case Number:
516 printf("(Number)\n");
517 break;
518 default:
519 printf("(unknown)");
520 }
521#endif
522
523 if (m_state != Identifier)
524 m_eatNextIdentifier = false;
525
526 m_restrKeyword = false;
527 m_delimited = false;
528 llocp->first_line = yylineno;
529 llocp->last_line = yylineno;
530 llocp->first_column = startOffset;
531 llocp->last_column = m_currentOffset;
532 switch (m_state) {
533 case Eof:
534 token = 0;
535 break;
536 case Other:
537 if (token == '}' || token == ';')
538 m_delimited = true;
539 break;
540 case Identifier:
541 // Apply anonymous-function hack below (eat the identifier).
542 if (m_eatNextIdentifier) {
543 m_eatNextIdentifier = false;
544 token = lex(lvalp, llocp);
545 break;
546 }
547 lvalp->ident = makeIdentifier(m_buffer16);
548 token = IDENT;
549 break;
550 case IdentifierOrKeyword: {
551 lvalp->ident = makeIdentifier(m_buffer16);
552 const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
553 if (!entry) {
554 // Lookup for keyword failed, means this is an identifier.
555 token = IDENT;
556 break;
557 }
558 token = entry->lexerValue();
559 // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
560 m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
561 if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
562 m_restrKeyword = true;
563 break;
564 }
565 case String:
566 // Atomize constant strings in case they're later used in property lookup.
567 lvalp->ident = makeIdentifier(m_buffer16);
568 token = STRING;
569 break;
570 case Number:
571 lvalp->doubleValue = dval;
572 token = NUMBER;
573 break;
574 case Bad:
575#ifdef JSC_DEBUG_LEX
576 fprintf(stderr, "yylex: ERROR.\n");
577#endif
578 m_error = true;
579 return -1;
580 default:
581 ASSERT(!"unhandled numeration value in switch");
582 m_error = true;
583 return -1;
584 }
585 m_lastToken = token;
586 return token;
587}
588
589bool Lexer::isWhiteSpace() const
590{
591 return isWhiteSpace(m_current);
592}
593
594bool Lexer::isLineTerminator()
595{
596 bool cr = (m_current == '\r');
597 bool lf = (m_current == '\n');
598 if (cr)
599 m_skipLF = true;
600 else if (lf)
601 m_skipCR = true;
602 return cr || lf || m_current == 0x2028 || m_current == 0x2029;
603}
604
605bool Lexer::isIdentStart(int c)
606{
607 return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
608}
609
610bool Lexer::isIdentPart(int c)
611{
612 return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
613 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
614}
615
616static bool isDecimalDigit(int c)
617{
618 return isASCIIDigit(c);
619}
620
621bool Lexer::isHexDigit(int c)
622{
623 return isASCIIHexDigit(c);
624}
625
626bool Lexer::isOctalDigit(int c)
627{
628 return isASCIIOctalDigit(c);
629}
630
631int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
632{
633 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
634 shift(4);
635 return URSHIFTEQUAL;
636 }
637 if (c1 == '=' && c2 == '=' && c3 == '=') {
638 shift(3);
639 return STREQ;
640 }
641 if (c1 == '!' && c2 == '=' && c3 == '=') {
642 shift(3);
643 return STRNEQ;
644 }
645 if (c1 == '>' && c2 == '>' && c3 == '>') {
646 shift(3);
647 return URSHIFT;
648 }
649 if (c1 == '<' && c2 == '<' && c3 == '=') {
650 shift(3);
651 return LSHIFTEQUAL;
652 }
653 if (c1 == '>' && c2 == '>' && c3 == '=') {
654 shift(3);
655 return RSHIFTEQUAL;
656 }
657 if (c1 == '<' && c2 == '=') {
658 shift(2);
659 return LE;
660 }
661 if (c1 == '>' && c2 == '=') {
662 shift(2);
663 return GE;
664 }
665 if (c1 == '!' && c2 == '=') {
666 shift(2);
667 return NE;
668 }
669 if (c1 == '+' && c2 == '+') {
670 shift(2);
671 if (m_terminator)
672 return AUTOPLUSPLUS;
673 return PLUSPLUS;
674 }
675 if (c1 == '-' && c2 == '-') {
676 shift(2);
677 if (m_terminator)
678 return AUTOMINUSMINUS;
679 return MINUSMINUS;
680 }
681 if (c1 == '=' && c2 == '=') {
682 shift(2);
683 return EQEQ;
684 }
685 if (c1 == '+' && c2 == '=') {
686 shift(2);
687 return PLUSEQUAL;
688 }
689 if (c1 == '-' && c2 == '=') {
690 shift(2);
691 return MINUSEQUAL;
692 }
693 if (c1 == '*' && c2 == '=') {
694 shift(2);
695 return MULTEQUAL;
696 }
697 if (c1 == '/' && c2 == '=') {
698 shift(2);
699 return DIVEQUAL;
700 }
701 if (c1 == '&' && c2 == '=') {
702 shift(2);
703 return ANDEQUAL;
704 }
705 if (c1 == '^' && c2 == '=') {
706 shift(2);
707 return XOREQUAL;
708 }
709 if (c1 == '%' && c2 == '=') {
710 shift(2);
711 return MODEQUAL;
712 }
713 if (c1 == '|' && c2 == '=') {
714 shift(2);
715 return OREQUAL;
716 }
717 if (c1 == '<' && c2 == '<') {
718 shift(2);
719 return LSHIFT;
720 }
721 if (c1 == '>' && c2 == '>') {
722 shift(2);
723 return RSHIFT;
724 }
725 if (c1 == '&' && c2 == '&') {
726 shift(2);
727 return AND;
728 }
729 if (c1 == '|' && c2 == '|') {
730 shift(2);
731 return OR;
732 }
733
734 switch (c1) {
735 case '=':
736 case '>':
737 case '<':
738 case ',':
739 case '!':
740 case '~':
741 case '?':
742 case ':':
743 case '.':
744 case '+':
745 case '-':
746 case '*':
747 case '/':
748 case '&':
749 case '|':
750 case '^':
751 case '%':
752 case '(':
753 case ')':
754 case '[':
755 case ']':
756 case ';':
757 shift(1);
758 return static_cast<int>(c1);
759 case '{':
760 charPos = m_currentOffset;
761 shift(1);
762 return OPENBRACE;
763 case '}':
764 charPos = m_currentOffset;
765 shift(1);
766 return CLOSEBRACE;
767 default:
768 return -1;
769 }
770}
771
772unsigned short Lexer::singleEscape(unsigned short c)
773{
774 switch (c) {
775 case 'b':
776 return 0x08;
777 case 't':
778 return 0x09;
779 case 'n':
780 return 0x0A;
781 case 'v':
782 return 0x0B;
783 case 'f':
784 return 0x0C;
785 case 'r':
786 return 0x0D;
787 case '"':
788 return 0x22;
789 case '\'':
790 return 0x27;
791 case '\\':
792 return 0x5C;
793 default:
794 return c;
795 }
796}
797
798unsigned short Lexer::convertOctal(int c1, int c2, int c3)
799{
800 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
801}
802
803unsigned char Lexer::convertHex(int c)
804{
805 if (c >= '0' && c <= '9')
806 return static_cast<unsigned char>(c - '0');
807 if (c >= 'a' && c <= 'f')
808 return static_cast<unsigned char>(c - 'a' + 10);
809 return static_cast<unsigned char>(c - 'A' + 10);
810}
811
812unsigned char Lexer::convertHex(int c1, int c2)
813{
814 return ((convertHex(c1) << 4) + convertHex(c2));
815}
816
817UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
818{
819 unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
820 unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
821 return (highByte << 8 | lowByte);
822}
823
824void Lexer::record8(int c)
825{
826 ASSERT(c >= 0);
827 ASSERT(c <= 0xff);
828 m_buffer8.append(static_cast<char>(c));
829}
830
831void Lexer::record16(int c)
832{
833 ASSERT(c >= 0);
834 ASSERT(c <= USHRT_MAX);
835 record16(UChar(static_cast<unsigned short>(c)));
836}
837
838void Lexer::record16(UChar c)
839{
840 m_buffer16.append(c);
841}
842
843bool Lexer::scanRegExp()
844{
845 m_buffer16.clear();
846 bool lastWasEscape = false;
847 bool inBrackets = false;
848
849 while (1) {
850 if (isLineTerminator() || m_current == -1)
851 return false;
852 else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
853 // keep track of '[' and ']'
854 if (!lastWasEscape) {
855 if ( m_current == '[' && !inBrackets )
856 inBrackets = true;
857 if ( m_current == ']' && inBrackets )
858 inBrackets = false;
859 }
860 record16(m_current);
861 lastWasEscape =
862 !lastWasEscape && (m_current == '\\');
863 } else { // end of regexp
864 m_pattern = UString(m_buffer16);
865 m_buffer16.clear();
866 shift(1);
867 break;
868 }
869 shift(1);
870 }
871
872 while (isIdentPart(m_current)) {
873 record16(m_current);
874 shift(1);
875 }
876 m_flags = UString(m_buffer16);
877
878 return true;
879}
880
881void Lexer::clear()
882{
883 m_identifiers.clear();
884
885 Vector<char> newBuffer8;
886 newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
887 m_buffer8.swap(newBuffer8);
888
889 Vector<UChar> newBuffer16;
890 newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
891 m_buffer16.swap(newBuffer16);
892
893 m_isReparsing = false;
894
895 m_pattern = 0;
896 m_flags = 0;
897}
898
899} // namespace JSC
Note: See TracBrowser for help on using the repository browser.