source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 31809

Last change on this file since 31809 was 31809, checked in by [email protected], 17 years ago

Reviewed by Geoff.

Generate a pure (re-entrant) parser with Bison.

No change on SunSpider.

  • kjs/Parser.cpp: (KJS::Parser::parse):
  • kjs/grammar.y:
  • kjs/lexer.cpp: (kjsyylex): (KJS::Lexer::lex):
  • kjs/lexer.h: Pass state as function arguments, instead of global data. Don't call lexer() as often as before, as this function is about to become slower due to thread-specific storage.
  • kjs/function.cpp: (KJS::isStrWhiteSpace): Don't call isSeparatorSpace() for 8-bit characters, as these are already taken care of. This is a small speedup, compensating for a small slowdown caused by switching Bison mode.
  • Property svn:eol-style set to native
File size: 22.1 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "lexer.h"
26
27#include "dtoa.h"
28#include "function.h"
29#include "nodes.h"
30#include "NodeInfo.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
34#include <wtf/Assertions.h>
35#include <wtf/unicode/Unicode.h>
36
37using namespace WTF;
38using namespace Unicode;
39
40// we can't specify the namespace in yacc's C output, so do it here
41using namespace KJS;
42
43#ifndef KDE_USE_FINAL
44#include "grammar.h"
45#endif
46
47#include "lookup.h"
48#include "lexer.lut.h"
49
50// a bridge for yacc from the C world to C++
51int kjsyylex(YYSTYPE* lvalp, YYLTYPE* llocp, void* lexer)
52{
53 return static_cast<Lexer*>(lexer)->lex(lvalp, llocp);
54}
55
56namespace KJS {
57
58static bool isDecimalDigit(int);
59
60static const size_t initialReadBufferCapacity = 32;
61static const size_t initialStringTableCapacity = 64;
62
63Lexer& lexer()
64{
65 ASSERT(JSLock::currentThreadIsHoldingLock());
66
67 // FIXME: We'd like to avoid calling new here, but we don't currently
68 // support tearing down the Lexer at app quit time, since that would involve
69 // tearing down its UString data members without holding the JSLock.
70 static Lexer* staticLexer = new Lexer;
71 return *staticLexer;
72}
73
74Lexer::Lexer()
75 : yylineno(1)
76 , restrKeyword(false)
77 , eatNextIdentifier(false)
78 , stackToken(-1)
79 , lastToken(-1)
80 , pos(0)
81 , code(0)
82 , length(0)
83 , atLineStart(true)
84 , current(0)
85 , next1(0)
86 , next2(0)
87 , next3(0)
88{
89 m_buffer8.reserveCapacity(initialReadBufferCapacity);
90 m_buffer16.reserveCapacity(initialReadBufferCapacity);
91 m_strings.reserveCapacity(initialStringTableCapacity);
92 m_identifiers.reserveCapacity(initialStringTableCapacity);
93}
94
95void Lexer::setCode(int startingLineNumber, const UChar* c, unsigned int len)
96{
97 yylineno = 1 + startingLineNumber;
98 restrKeyword = false;
99 delimited = false;
100 eatNextIdentifier = false;
101 stackToken = -1;
102 lastToken = -1;
103 pos = 0;
104 code = c;
105 length = len;
106 skipLF = false;
107 skipCR = false;
108 error = false;
109 atLineStart = true;
110
111 // read first characters
112 current = (length > 0) ? code[0] : -1;
113 next1 = (length > 1) ? code[1] : -1;
114 next2 = (length > 2) ? code[2] : -1;
115 next3 = (length > 3) ? code[3] : -1;
116}
117
118void Lexer::shift(unsigned int p)
119{
120 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
121 // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
122 while (p--) {
123 pos++;
124 current = next1;
125 next1 = next2;
126 next2 = next3;
127 next3 = (pos + 3 < length) ? code[pos + 3] : -1;
128 }
129}
130
131// called on each new line
132void Lexer::nextLine()
133{
134 yylineno++;
135 atLineStart = true;
136}
137
138void Lexer::setDone(State s)
139{
140 state = s;
141 done = true;
142}
143
144int Lexer::lex(YYSTYPE* lvalp, YYLTYPE* llocp)
145{
146 int token = 0;
147 state = Start;
148 unsigned short stringType = 0; // either single or double quotes
149 m_buffer8.clear();
150 m_buffer16.clear();
151 done = false;
152 terminator = false;
153 skipLF = false;
154 skipCR = false;
155
156 // did we push a token on the stack previously ?
157 // (after an automatic semicolon insertion)
158 if (stackToken >= 0) {
159 setDone(Other);
160 token = stackToken;
161 stackToken = 0;
162 }
163
164 while (!done) {
165 if (skipLF && current != '\n') // found \r but not \n afterwards
166 skipLF = false;
167 if (skipCR && current != '\r') // found \n but not \r afterwards
168 skipCR = false;
169 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
170 {
171 skipLF = false;
172 skipCR = false;
173 shift(1);
174 }
175 switch (state) {
176 case Start:
177 if (isWhiteSpace()) {
178 // do nothing
179 } else if (current == '/' && next1 == '/') {
180 shift(1);
181 state = InSingleLineComment;
182 } else if (current == '/' && next1 == '*') {
183 shift(1);
184 state = InMultiLineComment;
185 } else if (current == -1) {
186 if (!terminator && !delimited) {
187 // automatic semicolon insertion if program incomplete
188 token = ';';
189 stackToken = 0;
190 setDone(Other);
191 } else
192 setDone(Eof);
193 } else if (isLineTerminator()) {
194 nextLine();
195 terminator = true;
196 if (restrKeyword) {
197 token = ';';
198 setDone(Other);
199 }
200 } else if (current == '"' || current == '\'') {
201 state = InString;
202 stringType = static_cast<unsigned short>(current);
203 } else if (isIdentStart(current)) {
204 record16(current);
205 state = InIdentifierOrKeyword;
206 } else if (current == '\\') {
207 state = InIdentifierStartUnicodeEscapeStart;
208 } else if (current == '0') {
209 record8(current);
210 state = InNum0;
211 } else if (isDecimalDigit(current)) {
212 record8(current);
213 state = InNum;
214 } else if (current == '.' && isDecimalDigit(next1)) {
215 record8(current);
216 state = InDecimal;
217 // <!-- marks the beginning of a line comment (for www usage)
218 } else if (current == '<' && next1 == '!' &&
219 next2 == '-' && next3 == '-') {
220 shift(3);
221 state = InSingleLineComment;
222 // same for -->
223 } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
224 shift(2);
225 state = InSingleLineComment;
226 } else {
227 token = matchPunctuator(current, next1, next2, next3);
228 if (token != -1) {
229 setDone(Other);
230 } else {
231 // cerr << "encountered unknown character" << endl;
232 setDone(Bad);
233 }
234 }
235 break;
236 case InString:
237 if (current == stringType) {
238 shift(1);
239 setDone(String);
240 } else if (isLineTerminator() || current == -1) {
241 setDone(Bad);
242 } else if (current == '\\') {
243 state = InEscapeSequence;
244 } else {
245 record16(current);
246 }
247 break;
248 // Escape Sequences inside of strings
249 case InEscapeSequence:
250 if (isOctalDigit(current)) {
251 if (current >= '0' && current <= '3' &&
252 isOctalDigit(next1) && isOctalDigit(next2)) {
253 record16(convertOctal(current, next1, next2));
254 shift(2);
255 state = InString;
256 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
257 record16(convertOctal('0', current, next1));
258 shift(1);
259 state = InString;
260 } else if (isOctalDigit(current)) {
261 record16(convertOctal('0', '0', current));
262 state = InString;
263 } else {
264 setDone(Bad);
265 }
266 } else if (current == 'x')
267 state = InHexEscape;
268 else if (current == 'u')
269 state = InUnicodeEscape;
270 else if (isLineTerminator()) {
271 nextLine();
272 state = InString;
273 } else {
274 record16(singleEscape(static_cast<unsigned short>(current)));
275 state = InString;
276 }
277 break;
278 case InHexEscape:
279 if (isHexDigit(current) && isHexDigit(next1)) {
280 state = InString;
281 record16(convertHex(current, next1));
282 shift(1);
283 } else if (current == stringType) {
284 record16('x');
285 shift(1);
286 setDone(String);
287 } else {
288 record16('x');
289 record16(current);
290 state = InString;
291 }
292 break;
293 case InUnicodeEscape:
294 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
295 record16(convertUnicode(current, next1, next2, next3));
296 shift(3);
297 state = InString;
298 } else if (current == stringType) {
299 record16('u');
300 shift(1);
301 setDone(String);
302 } else {
303 setDone(Bad);
304 }
305 break;
306 case InSingleLineComment:
307 if (isLineTerminator()) {
308 nextLine();
309 terminator = true;
310 if (restrKeyword) {
311 token = ';';
312 setDone(Other);
313 } else
314 state = Start;
315 } else if (current == -1) {
316 setDone(Eof);
317 }
318 break;
319 case InMultiLineComment:
320 if (current == -1) {
321 setDone(Bad);
322 } else if (isLineTerminator()) {
323 nextLine();
324 } else if (current == '*' && next1 == '/') {
325 state = Start;
326 shift(1);
327 }
328 break;
329 case InIdentifierOrKeyword:
330 case InIdentifier:
331 if (isIdentPart(current))
332 record16(current);
333 else if (current == '\\')
334 state = InIdentifierPartUnicodeEscapeStart;
335 else
336 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
337 break;
338 case InNum0:
339 if (current == 'x' || current == 'X') {
340 record8(current);
341 state = InHex;
342 } else if (current == '.') {
343 record8(current);
344 state = InDecimal;
345 } else if (current == 'e' || current == 'E') {
346 record8(current);
347 state = InExponentIndicator;
348 } else if (isOctalDigit(current)) {
349 record8(current);
350 state = InOctal;
351 } else if (isDecimalDigit(current)) {
352 record8(current);
353 state = InDecimal;
354 } else {
355 setDone(Number);
356 }
357 break;
358 case InHex:
359 if (isHexDigit(current)) {
360 record8(current);
361 } else {
362 setDone(Hex);
363 }
364 break;
365 case InOctal:
366 if (isOctalDigit(current)) {
367 record8(current);
368 }
369 else if (isDecimalDigit(current)) {
370 record8(current);
371 state = InDecimal;
372 } else
373 setDone(Octal);
374 break;
375 case InNum:
376 if (isDecimalDigit(current)) {
377 record8(current);
378 } else if (current == '.') {
379 record8(current);
380 state = InDecimal;
381 } else if (current == 'e' || current == 'E') {
382 record8(current);
383 state = InExponentIndicator;
384 } else
385 setDone(Number);
386 break;
387 case InDecimal:
388 if (isDecimalDigit(current)) {
389 record8(current);
390 } else if (current == 'e' || current == 'E') {
391 record8(current);
392 state = InExponentIndicator;
393 } else
394 setDone(Number);
395 break;
396 case InExponentIndicator:
397 if (current == '+' || current == '-') {
398 record8(current);
399 } else if (isDecimalDigit(current)) {
400 record8(current);
401 state = InExponent;
402 } else
403 setDone(Bad);
404 break;
405 case InExponent:
406 if (isDecimalDigit(current)) {
407 record8(current);
408 } else
409 setDone(Number);
410 break;
411 case InIdentifierStartUnicodeEscapeStart:
412 if (current == 'u')
413 state = InIdentifierStartUnicodeEscape;
414 else
415 setDone(Bad);
416 break;
417 case InIdentifierPartUnicodeEscapeStart:
418 if (current == 'u')
419 state = InIdentifierPartUnicodeEscape;
420 else
421 setDone(Bad);
422 break;
423 case InIdentifierStartUnicodeEscape:
424 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
425 setDone(Bad);
426 break;
427 }
428 token = convertUnicode(current, next1, next2, next3);
429 shift(3);
430 if (!isIdentStart(token)) {
431 setDone(Bad);
432 break;
433 }
434 record16(token);
435 state = InIdentifier;
436 break;
437 case InIdentifierPartUnicodeEscape:
438 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
439 setDone(Bad);
440 break;
441 }
442 token = convertUnicode(current, next1, next2, next3);
443 shift(3);
444 if (!isIdentPart(token)) {
445 setDone(Bad);
446 break;
447 }
448 record16(token);
449 state = InIdentifier;
450 break;
451 default:
452 ASSERT(!"Unhandled state in switch statement");
453 }
454
455 // move on to the next character
456 if (!done)
457 shift(1);
458 if (state != Start && state != InSingleLineComment)
459 atLineStart = false;
460 }
461
462 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
463 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
464 state = Bad;
465
466 // terminate string
467 m_buffer8.append('\0');
468
469#ifdef KJS_DEBUG_LEX
470 fprintf(stderr, "line: %d ", lineNo());
471 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
472 fprintf(stderr, "%s ", buffer8.data());
473#endif
474
475 double dval = 0;
476 if (state == Number) {
477 dval = kjs_strtod(m_buffer8.data(), 0L);
478 } else if (state == Hex) { // scan hex numbers
479 const char* p = m_buffer8.data() + 2;
480 while (char c = *p++) {
481 dval *= 16;
482 dval += convertHex(c);
483 }
484
485 if (dval >= mantissaOverflowLowerBound)
486 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
487
488 state = Number;
489 } else if (state == Octal) { // scan octal number
490 const char* p = m_buffer8.data() + 1;
491 while (char c = *p++) {
492 dval *= 8;
493 dval += c - '0';
494 }
495
496 if (dval >= mantissaOverflowLowerBound)
497 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
498
499 state = Number;
500 }
501
502#ifdef KJS_DEBUG_LEX
503 switch (state) {
504 case Eof:
505 printf("(EOF)\n");
506 break;
507 case Other:
508 printf("(Other)\n");
509 break;
510 case Identifier:
511 printf("(Identifier)/(Keyword)\n");
512 break;
513 case String:
514 printf("(String)\n");
515 break;
516 case Number:
517 printf("(Number)\n");
518 break;
519 default:
520 printf("(unknown)");
521 }
522#endif
523
524 if (state != Identifier)
525 eatNextIdentifier = false;
526
527 restrKeyword = false;
528 delimited = false;
529 llocp->first_line = yylineno; // ???
530 llocp->last_line = yylineno;
531
532 switch (state) {
533 case Eof:
534 token = 0;
535 break;
536 case Other:
537 if (token == '}' || token == ';')
538 delimited = true;
539 break;
540 case Identifier:
541 // Apply anonymous-function hack below (eat the identifier).
542 if (eatNextIdentifier) {
543 eatNextIdentifier = false;
544 token = lex(lvalp, llocp);
545 break;
546 }
547 lvalp->ident = makeIdentifier(m_buffer16);
548 token = IDENT;
549 break;
550 case IdentifierOrKeyword:
551 lvalp->ident = makeIdentifier(m_buffer16);
552 if ((token = mainTable.value(*lvalp->ident)) < 0) {
553 // Lookup for keyword failed, means this is an identifier.
554 token = IDENT;
555 break;
556 }
557 // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
558 eatNextIdentifier = token == FUNCTION && lastToken == '=';
559 if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
560 restrKeyword = true;
561 break;
562 case String:
563 lvalp->string = makeUString(m_buffer16);
564 token = STRING;
565 break;
566 case Number:
567 lvalp->doubleValue = dval;
568 token = NUMBER;
569 break;
570 case Bad:
571#ifdef KJS_DEBUG_LEX
572 fprintf(stderr, "yylex: ERROR.\n");
573#endif
574 error = true;
575 return -1;
576 default:
577 ASSERT(!"unhandled numeration value in switch");
578 error = true;
579 return -1;
580 }
581 lastToken = token;
582 return token;
583}
584
585bool Lexer::isWhiteSpace() const
586{
587 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
588}
589
590bool Lexer::isLineTerminator()
591{
592 bool cr = (current == '\r');
593 bool lf = (current == '\n');
594 if (cr)
595 skipLF = true;
596 else if (lf)
597 skipCR = true;
598 return cr || lf || current == 0x2028 || current == 0x2029;
599}
600
601bool Lexer::isIdentStart(int c)
602{
603 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
604 || c == '$' || c == '_';
605}
606
607bool Lexer::isIdentPart(int c)
608{
609 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
610 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
611 || c == '$' || c == '_';
612}
613
614static bool isDecimalDigit(int c)
615{
616 return (c >= '0' && c <= '9');
617}
618
619bool Lexer::isHexDigit(int c)
620{
621 return (c >= '0' && c <= '9' ||
622 c >= 'a' && c <= 'f' ||
623 c >= 'A' && c <= 'F');
624}
625
626bool Lexer::isOctalDigit(int c)
627{
628 return (c >= '0' && c <= '7');
629}
630
631int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
632{
633 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
634 shift(4);
635 return URSHIFTEQUAL;
636 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
637 shift(3);
638 return STREQ;
639 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
640 shift(3);
641 return STRNEQ;
642 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
643 shift(3);
644 return URSHIFT;
645 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
646 shift(3);
647 return LSHIFTEQUAL;
648 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
649 shift(3);
650 return RSHIFTEQUAL;
651 } else if (c1 == '<' && c2 == '=') {
652 shift(2);
653 return LE;
654 } else if (c1 == '>' && c2 == '=') {
655 shift(2);
656 return GE;
657 } else if (c1 == '!' && c2 == '=') {
658 shift(2);
659 return NE;
660 } else if (c1 == '+' && c2 == '+') {
661 shift(2);
662 if (terminator)
663 return AUTOPLUSPLUS;
664 else
665 return PLUSPLUS;
666 } else if (c1 == '-' && c2 == '-') {
667 shift(2);
668 if (terminator)
669 return AUTOMINUSMINUS;
670 else
671 return MINUSMINUS;
672 } else if (c1 == '=' && c2 == '=') {
673 shift(2);
674 return EQEQ;
675 } else if (c1 == '+' && c2 == '=') {
676 shift(2);
677 return PLUSEQUAL;
678 } else if (c1 == '-' && c2 == '=') {
679 shift(2);
680 return MINUSEQUAL;
681 } else if (c1 == '*' && c2 == '=') {
682 shift(2);
683 return MULTEQUAL;
684 } else if (c1 == '/' && c2 == '=') {
685 shift(2);
686 return DIVEQUAL;
687 } else if (c1 == '&' && c2 == '=') {
688 shift(2);
689 return ANDEQUAL;
690 } else if (c1 == '^' && c2 == '=') {
691 shift(2);
692 return XOREQUAL;
693 } else if (c1 == '%' && c2 == '=') {
694 shift(2);
695 return MODEQUAL;
696 } else if (c1 == '|' && c2 == '=') {
697 shift(2);
698 return OREQUAL;
699 } else if (c1 == '<' && c2 == '<') {
700 shift(2);
701 return LSHIFT;
702 } else if (c1 == '>' && c2 == '>') {
703 shift(2);
704 return RSHIFT;
705 } else if (c1 == '&' && c2 == '&') {
706 shift(2);
707 return AND;
708 } else if (c1 == '|' && c2 == '|') {
709 shift(2);
710 return OR;
711 }
712
713 switch(c1) {
714 case '=':
715 case '>':
716 case '<':
717 case ',':
718 case '!':
719 case '~':
720 case '?':
721 case ':':
722 case '.':
723 case '+':
724 case '-':
725 case '*':
726 case '/':
727 case '&':
728 case '|':
729 case '^':
730 case '%':
731 case '(':
732 case ')':
733 case '{':
734 case '}':
735 case '[':
736 case ']':
737 case ';':
738 shift(1);
739 return static_cast<int>(c1);
740 default:
741 return -1;
742 }
743}
744
745unsigned short Lexer::singleEscape(unsigned short c)
746{
747 switch(c) {
748 case 'b':
749 return 0x08;
750 case 't':
751 return 0x09;
752 case 'n':
753 return 0x0A;
754 case 'v':
755 return 0x0B;
756 case 'f':
757 return 0x0C;
758 case 'r':
759 return 0x0D;
760 case '"':
761 return 0x22;
762 case '\'':
763 return 0x27;
764 case '\\':
765 return 0x5C;
766 default:
767 return c;
768 }
769}
770
771unsigned short Lexer::convertOctal(int c1, int c2, int c3)
772{
773 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
774}
775
776unsigned char Lexer::convertHex(int c)
777{
778 if (c >= '0' && c <= '9')
779 return static_cast<unsigned char>(c - '0');
780 if (c >= 'a' && c <= 'f')
781 return static_cast<unsigned char>(c - 'a' + 10);
782 return static_cast<unsigned char>(c - 'A' + 10);
783}
784
785unsigned char Lexer::convertHex(int c1, int c2)
786{
787 return ((convertHex(c1) << 4) + convertHex(c2));
788}
789
790UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
791{
792 unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
793 unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
794 return (highByte << 8 | lowByte);
795}
796
797void Lexer::record8(int c)
798{
799 ASSERT(c >= 0);
800 ASSERT(c <= 0xff);
801 m_buffer8.append(static_cast<char>(c));
802}
803
804void Lexer::record16(int c)
805{
806 ASSERT(c >= 0);
807 ASSERT(c <= USHRT_MAX);
808 record16(UChar(static_cast<unsigned short>(c)));
809}
810
811void Lexer::record16(UChar c)
812{
813 m_buffer16.append(c);
814}
815
816bool Lexer::scanRegExp()
817{
818 m_buffer16.clear();
819 bool lastWasEscape = false;
820 bool inBrackets = false;
821
822 while (1) {
823 if (isLineTerminator() || current == -1)
824 return false;
825 else if (current != '/' || lastWasEscape == true || inBrackets == true)
826 {
827 // keep track of '[' and ']'
828 if (!lastWasEscape) {
829 if ( current == '[' && !inBrackets )
830 inBrackets = true;
831 if ( current == ']' && inBrackets )
832 inBrackets = false;
833 }
834 record16(current);
835 lastWasEscape =
836 !lastWasEscape && (current == '\\');
837 } else { // end of regexp
838 m_pattern = UString(m_buffer16);
839 m_buffer16.clear();
840 shift(1);
841 break;
842 }
843 shift(1);
844 }
845
846 while (isIdentPart(current)) {
847 record16(current);
848 shift(1);
849 }
850 m_flags = UString(m_buffer16);
851
852 return true;
853}
854
855void Lexer::clear()
856{
857 deleteAllValues(m_strings);
858 Vector<UString*> newStrings;
859 newStrings.reserveCapacity(initialStringTableCapacity);
860 m_strings.swap(newStrings);
861
862 deleteAllValues(m_identifiers);
863 Vector<KJS::Identifier*> newIdentifiers;
864 newIdentifiers.reserveCapacity(initialStringTableCapacity);
865 m_identifiers.swap(newIdentifiers);
866
867 Vector<char> newBuffer8;
868 newBuffer8.reserveCapacity(initialReadBufferCapacity);
869 m_buffer8.swap(newBuffer8);
870
871 Vector<UChar> newBuffer16;
872 newBuffer16.reserveCapacity(initialReadBufferCapacity);
873 m_buffer16.swap(newBuffer16);
874
875 m_pattern = 0;
876 m_flags = 0;
877}
878
879Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
880{
881 KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
882 m_identifiers.append(identifier);
883 return identifier;
884}
885
886UString* Lexer::makeUString(const Vector<UChar>& buffer)
887{
888 UString* string = new UString(buffer);
889 m_strings.append(string);
890 return string;
891}
892
893} // namespace KJS
Note: See TracBrowser for help on using the repository browser.