source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 31936

Last change on this file since 31936 was 31936, checked in by [email protected], 17 years ago

Reviewed by Darin.

Implement an abstraction for thread-specific storage, use it to get rid of some static objects.

SunSpider results were not conclusive, possibly up to 0.2% slowdown.

  • wtf/ThreadSpecific.h: Added. (WTF::::ThreadSpecific): (WTF::::~ThreadSpecific): (WTF::::get): (WTF::::set): (WTF::::destroy): (WTF::T): (WTF::::operator): Only implemented for platforms that use pthreads.
  • kjs/CommonIdentifiers.cpp: (KJS::CommonIdentifiers::shared):
  • kjs/CommonIdentifiers.h:
  • kjs/InitializeThreading.cpp: (KJS::initializeThreading):
  • kjs/Parser.cpp: (KJS::parser):
  • kjs/Parser.h:
  • kjs/identifier.cpp: (KJS::identifierTable): (KJS::literalIdentifierTable): (KJS::Identifier::initializeIdentifierThreading):
  • kjs/identifier.h:
  • kjs/lexer.cpp: (KJS::lexer):
  • kjs/lexer.h: Make static instances per-thread.
  • Property svn:eol-style set to native
File size: 22.1 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "lexer.h"
26
27#include "dtoa.h"
28#include "function.h"
29#include "nodes.h"
30#include "NodeInfo.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
34#include <wtf/Assertions.h>
35#if USE(MULTIPLE_THREADS)
36#include <wtf/ThreadSpecific.h>
37#endif
38#include <wtf/unicode/Unicode.h>
39
40using namespace WTF;
41using namespace Unicode;
42
43// we can't specify the namespace in yacc's C output, so do it here
44using namespace KJS;
45
46#ifndef KDE_USE_FINAL
47#include "grammar.h"
48#endif
49
50#include "lookup.h"
51#include "lexer.lut.h"
52
53// a bridge for yacc from the C world to C++
54int kjsyylex(void* lvalp, void* llocp, void* lexer)
55{
56 return static_cast<Lexer*>(lexer)->lex(lvalp, llocp);
57}
58
59namespace KJS {
60
61static bool isDecimalDigit(int);
62
63static const size_t initialReadBufferCapacity = 32;
64static const size_t initialStringTableCapacity = 64;
65
66Lexer& lexer()
67{
68#if USE(MULTIPLE_THREADS)
69 static ThreadSpecific<Lexer> staticLexer;
70 return *staticLexer;
71#else
72 static Lexer staticLexer;
73 return staticLexer;
74#endif
75}
76
77Lexer::Lexer()
78 : yylineno(1)
79 , restrKeyword(false)
80 , eatNextIdentifier(false)
81 , stackToken(-1)
82 , lastToken(-1)
83 , pos(0)
84 , code(0)
85 , length(0)
86 , atLineStart(true)
87 , current(0)
88 , next1(0)
89 , next2(0)
90 , next3(0)
91{
92 m_buffer8.reserveCapacity(initialReadBufferCapacity);
93 m_buffer16.reserveCapacity(initialReadBufferCapacity);
94 m_strings.reserveCapacity(initialStringTableCapacity);
95 m_identifiers.reserveCapacity(initialStringTableCapacity);
96}
97
98void Lexer::setCode(int startingLineNumber, const UChar* c, unsigned int len)
99{
100 yylineno = 1 + startingLineNumber;
101 restrKeyword = false;
102 delimited = false;
103 eatNextIdentifier = false;
104 stackToken = -1;
105 lastToken = -1;
106 pos = 0;
107 code = c;
108 length = len;
109 skipLF = false;
110 skipCR = false;
111 error = false;
112 atLineStart = true;
113
114 // read first characters
115 current = (length > 0) ? code[0] : -1;
116 next1 = (length > 1) ? code[1] : -1;
117 next2 = (length > 2) ? code[2] : -1;
118 next3 = (length > 3) ? code[3] : -1;
119}
120
121void Lexer::shift(unsigned int p)
122{
123 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
124 // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
125 while (p--) {
126 pos++;
127 current = next1;
128 next1 = next2;
129 next2 = next3;
130 next3 = (pos + 3 < length) ? code[pos + 3] : -1;
131 }
132}
133
134// called on each new line
135void Lexer::nextLine()
136{
137 yylineno++;
138 atLineStart = true;
139}
140
141void Lexer::setDone(State s)
142{
143 state = s;
144 done = true;
145}
146
147int Lexer::lex(void* p1, void* p2)
148{
149 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
150 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
151 int token = 0;
152 state = Start;
153 unsigned short stringType = 0; // either single or double quotes
154 m_buffer8.clear();
155 m_buffer16.clear();
156 done = false;
157 terminator = false;
158 skipLF = false;
159 skipCR = false;
160
161 // did we push a token on the stack previously ?
162 // (after an automatic semicolon insertion)
163 if (stackToken >= 0) {
164 setDone(Other);
165 token = stackToken;
166 stackToken = 0;
167 }
168
169 while (!done) {
170 if (skipLF && current != '\n') // found \r but not \n afterwards
171 skipLF = false;
172 if (skipCR && current != '\r') // found \n but not \r afterwards
173 skipCR = false;
174 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
175 {
176 skipLF = false;
177 skipCR = false;
178 shift(1);
179 }
180 switch (state) {
181 case Start:
182 if (isWhiteSpace()) {
183 // do nothing
184 } else if (current == '/' && next1 == '/') {
185 shift(1);
186 state = InSingleLineComment;
187 } else if (current == '/' && next1 == '*') {
188 shift(1);
189 state = InMultiLineComment;
190 } else if (current == -1) {
191 if (!terminator && !delimited) {
192 // automatic semicolon insertion if program incomplete
193 token = ';';
194 stackToken = 0;
195 setDone(Other);
196 } else
197 setDone(Eof);
198 } else if (isLineTerminator()) {
199 nextLine();
200 terminator = true;
201 if (restrKeyword) {
202 token = ';';
203 setDone(Other);
204 }
205 } else if (current == '"' || current == '\'') {
206 state = InString;
207 stringType = static_cast<unsigned short>(current);
208 } else if (isIdentStart(current)) {
209 record16(current);
210 state = InIdentifierOrKeyword;
211 } else if (current == '\\') {
212 state = InIdentifierStartUnicodeEscapeStart;
213 } else if (current == '0') {
214 record8(current);
215 state = InNum0;
216 } else if (isDecimalDigit(current)) {
217 record8(current);
218 state = InNum;
219 } else if (current == '.' && isDecimalDigit(next1)) {
220 record8(current);
221 state = InDecimal;
222 // <!-- marks the beginning of a line comment (for www usage)
223 } else if (current == '<' && next1 == '!' &&
224 next2 == '-' && next3 == '-') {
225 shift(3);
226 state = InSingleLineComment;
227 // same for -->
228 } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
229 shift(2);
230 state = InSingleLineComment;
231 } else {
232 token = matchPunctuator(current, next1, next2, next3);
233 if (token != -1) {
234 setDone(Other);
235 } else {
236 // cerr << "encountered unknown character" << endl;
237 setDone(Bad);
238 }
239 }
240 break;
241 case InString:
242 if (current == stringType) {
243 shift(1);
244 setDone(String);
245 } else if (isLineTerminator() || current == -1) {
246 setDone(Bad);
247 } else if (current == '\\') {
248 state = InEscapeSequence;
249 } else {
250 record16(current);
251 }
252 break;
253 // Escape Sequences inside of strings
254 case InEscapeSequence:
255 if (isOctalDigit(current)) {
256 if (current >= '0' && current <= '3' &&
257 isOctalDigit(next1) && isOctalDigit(next2)) {
258 record16(convertOctal(current, next1, next2));
259 shift(2);
260 state = InString;
261 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
262 record16(convertOctal('0', current, next1));
263 shift(1);
264 state = InString;
265 } else if (isOctalDigit(current)) {
266 record16(convertOctal('0', '0', current));
267 state = InString;
268 } else {
269 setDone(Bad);
270 }
271 } else if (current == 'x')
272 state = InHexEscape;
273 else if (current == 'u')
274 state = InUnicodeEscape;
275 else if (isLineTerminator()) {
276 nextLine();
277 state = InString;
278 } else {
279 record16(singleEscape(static_cast<unsigned short>(current)));
280 state = InString;
281 }
282 break;
283 case InHexEscape:
284 if (isHexDigit(current) && isHexDigit(next1)) {
285 state = InString;
286 record16(convertHex(current, next1));
287 shift(1);
288 } else if (current == stringType) {
289 record16('x');
290 shift(1);
291 setDone(String);
292 } else {
293 record16('x');
294 record16(current);
295 state = InString;
296 }
297 break;
298 case InUnicodeEscape:
299 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
300 record16(convertUnicode(current, next1, next2, next3));
301 shift(3);
302 state = InString;
303 } else if (current == stringType) {
304 record16('u');
305 shift(1);
306 setDone(String);
307 } else {
308 setDone(Bad);
309 }
310 break;
311 case InSingleLineComment:
312 if (isLineTerminator()) {
313 nextLine();
314 terminator = true;
315 if (restrKeyword) {
316 token = ';';
317 setDone(Other);
318 } else
319 state = Start;
320 } else if (current == -1) {
321 setDone(Eof);
322 }
323 break;
324 case InMultiLineComment:
325 if (current == -1) {
326 setDone(Bad);
327 } else if (isLineTerminator()) {
328 nextLine();
329 } else if (current == '*' && next1 == '/') {
330 state = Start;
331 shift(1);
332 }
333 break;
334 case InIdentifierOrKeyword:
335 case InIdentifier:
336 if (isIdentPart(current))
337 record16(current);
338 else if (current == '\\')
339 state = InIdentifierPartUnicodeEscapeStart;
340 else
341 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
342 break;
343 case InNum0:
344 if (current == 'x' || current == 'X') {
345 record8(current);
346 state = InHex;
347 } else if (current == '.') {
348 record8(current);
349 state = InDecimal;
350 } else if (current == 'e' || current == 'E') {
351 record8(current);
352 state = InExponentIndicator;
353 } else if (isOctalDigit(current)) {
354 record8(current);
355 state = InOctal;
356 } else if (isDecimalDigit(current)) {
357 record8(current);
358 state = InDecimal;
359 } else {
360 setDone(Number);
361 }
362 break;
363 case InHex:
364 if (isHexDigit(current)) {
365 record8(current);
366 } else {
367 setDone(Hex);
368 }
369 break;
370 case InOctal:
371 if (isOctalDigit(current)) {
372 record8(current);
373 }
374 else if (isDecimalDigit(current)) {
375 record8(current);
376 state = InDecimal;
377 } else
378 setDone(Octal);
379 break;
380 case InNum:
381 if (isDecimalDigit(current)) {
382 record8(current);
383 } else if (current == '.') {
384 record8(current);
385 state = InDecimal;
386 } else if (current == 'e' || current == 'E') {
387 record8(current);
388 state = InExponentIndicator;
389 } else
390 setDone(Number);
391 break;
392 case InDecimal:
393 if (isDecimalDigit(current)) {
394 record8(current);
395 } else if (current == 'e' || current == 'E') {
396 record8(current);
397 state = InExponentIndicator;
398 } else
399 setDone(Number);
400 break;
401 case InExponentIndicator:
402 if (current == '+' || current == '-') {
403 record8(current);
404 } else if (isDecimalDigit(current)) {
405 record8(current);
406 state = InExponent;
407 } else
408 setDone(Bad);
409 break;
410 case InExponent:
411 if (isDecimalDigit(current)) {
412 record8(current);
413 } else
414 setDone(Number);
415 break;
416 case InIdentifierStartUnicodeEscapeStart:
417 if (current == 'u')
418 state = InIdentifierStartUnicodeEscape;
419 else
420 setDone(Bad);
421 break;
422 case InIdentifierPartUnicodeEscapeStart:
423 if (current == 'u')
424 state = InIdentifierPartUnicodeEscape;
425 else
426 setDone(Bad);
427 break;
428 case InIdentifierStartUnicodeEscape:
429 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
430 setDone(Bad);
431 break;
432 }
433 token = convertUnicode(current, next1, next2, next3);
434 shift(3);
435 if (!isIdentStart(token)) {
436 setDone(Bad);
437 break;
438 }
439 record16(token);
440 state = InIdentifier;
441 break;
442 case InIdentifierPartUnicodeEscape:
443 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
444 setDone(Bad);
445 break;
446 }
447 token = convertUnicode(current, next1, next2, next3);
448 shift(3);
449 if (!isIdentPart(token)) {
450 setDone(Bad);
451 break;
452 }
453 record16(token);
454 state = InIdentifier;
455 break;
456 default:
457 ASSERT(!"Unhandled state in switch statement");
458 }
459
460 // move on to the next character
461 if (!done)
462 shift(1);
463 if (state != Start && state != InSingleLineComment)
464 atLineStart = false;
465 }
466
467 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
468 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
469 state = Bad;
470
471 // terminate string
472 m_buffer8.append('\0');
473
474#ifdef KJS_DEBUG_LEX
475 fprintf(stderr, "line: %d ", lineNo());
476 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
477 fprintf(stderr, "%s ", buffer8.data());
478#endif
479
480 double dval = 0;
481 if (state == Number) {
482 dval = kjs_strtod(m_buffer8.data(), 0L);
483 } else if (state == Hex) { // scan hex numbers
484 const char* p = m_buffer8.data() + 2;
485 while (char c = *p++) {
486 dval *= 16;
487 dval += convertHex(c);
488 }
489
490 if (dval >= mantissaOverflowLowerBound)
491 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
492
493 state = Number;
494 } else if (state == Octal) { // scan octal number
495 const char* p = m_buffer8.data() + 1;
496 while (char c = *p++) {
497 dval *= 8;
498 dval += c - '0';
499 }
500
501 if (dval >= mantissaOverflowLowerBound)
502 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
503
504 state = Number;
505 }
506
507#ifdef KJS_DEBUG_LEX
508 switch (state) {
509 case Eof:
510 printf("(EOF)\n");
511 break;
512 case Other:
513 printf("(Other)\n");
514 break;
515 case Identifier:
516 printf("(Identifier)/(Keyword)\n");
517 break;
518 case String:
519 printf("(String)\n");
520 break;
521 case Number:
522 printf("(Number)\n");
523 break;
524 default:
525 printf("(unknown)");
526 }
527#endif
528
529 if (state != Identifier)
530 eatNextIdentifier = false;
531
532 restrKeyword = false;
533 delimited = false;
534 llocp->first_line = yylineno; // ???
535 llocp->last_line = yylineno;
536
537 switch (state) {
538 case Eof:
539 token = 0;
540 break;
541 case Other:
542 if (token == '}' || token == ';')
543 delimited = true;
544 break;
545 case Identifier:
546 // Apply anonymous-function hack below (eat the identifier).
547 if (eatNextIdentifier) {
548 eatNextIdentifier = false;
549 token = lex(lvalp, llocp);
550 break;
551 }
552 lvalp->ident = makeIdentifier(m_buffer16);
553 token = IDENT;
554 break;
555 case IdentifierOrKeyword:
556 lvalp->ident = makeIdentifier(m_buffer16);
557 if ((token = mainTable.value(*lvalp->ident)) < 0) {
558 // Lookup for keyword failed, means this is an identifier.
559 token = IDENT;
560 break;
561 }
562 // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
563 eatNextIdentifier = token == FUNCTION && lastToken == '=';
564 if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
565 restrKeyword = true;
566 break;
567 case String:
568 lvalp->string = makeUString(m_buffer16);
569 token = STRING;
570 break;
571 case Number:
572 lvalp->doubleValue = dval;
573 token = NUMBER;
574 break;
575 case Bad:
576#ifdef KJS_DEBUG_LEX
577 fprintf(stderr, "yylex: ERROR.\n");
578#endif
579 error = true;
580 return -1;
581 default:
582 ASSERT(!"unhandled numeration value in switch");
583 error = true;
584 return -1;
585 }
586 lastToken = token;
587 return token;
588}
589
590bool Lexer::isWhiteSpace() const
591{
592 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
593}
594
595bool Lexer::isLineTerminator()
596{
597 bool cr = (current == '\r');
598 bool lf = (current == '\n');
599 if (cr)
600 skipLF = true;
601 else if (lf)
602 skipCR = true;
603 return cr || lf || current == 0x2028 || current == 0x2029;
604}
605
606bool Lexer::isIdentStart(int c)
607{
608 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
609 || c == '$' || c == '_';
610}
611
612bool Lexer::isIdentPart(int c)
613{
614 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
615 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
616 || c == '$' || c == '_';
617}
618
619static bool isDecimalDigit(int c)
620{
621 return (c >= '0' && c <= '9');
622}
623
624bool Lexer::isHexDigit(int c)
625{
626 return (c >= '0' && c <= '9' ||
627 c >= 'a' && c <= 'f' ||
628 c >= 'A' && c <= 'F');
629}
630
631bool Lexer::isOctalDigit(int c)
632{
633 return (c >= '0' && c <= '7');
634}
635
636int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
637{
638 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
639 shift(4);
640 return URSHIFTEQUAL;
641 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
642 shift(3);
643 return STREQ;
644 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
645 shift(3);
646 return STRNEQ;
647 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
648 shift(3);
649 return URSHIFT;
650 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
651 shift(3);
652 return LSHIFTEQUAL;
653 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
654 shift(3);
655 return RSHIFTEQUAL;
656 } else if (c1 == '<' && c2 == '=') {
657 shift(2);
658 return LE;
659 } else if (c1 == '>' && c2 == '=') {
660 shift(2);
661 return GE;
662 } else if (c1 == '!' && c2 == '=') {
663 shift(2);
664 return NE;
665 } else if (c1 == '+' && c2 == '+') {
666 shift(2);
667 if (terminator)
668 return AUTOPLUSPLUS;
669 else
670 return PLUSPLUS;
671 } else if (c1 == '-' && c2 == '-') {
672 shift(2);
673 if (terminator)
674 return AUTOMINUSMINUS;
675 else
676 return MINUSMINUS;
677 } else if (c1 == '=' && c2 == '=') {
678 shift(2);
679 return EQEQ;
680 } else if (c1 == '+' && c2 == '=') {
681 shift(2);
682 return PLUSEQUAL;
683 } else if (c1 == '-' && c2 == '=') {
684 shift(2);
685 return MINUSEQUAL;
686 } else if (c1 == '*' && c2 == '=') {
687 shift(2);
688 return MULTEQUAL;
689 } else if (c1 == '/' && c2 == '=') {
690 shift(2);
691 return DIVEQUAL;
692 } else if (c1 == '&' && c2 == '=') {
693 shift(2);
694 return ANDEQUAL;
695 } else if (c1 == '^' && c2 == '=') {
696 shift(2);
697 return XOREQUAL;
698 } else if (c1 == '%' && c2 == '=') {
699 shift(2);
700 return MODEQUAL;
701 } else if (c1 == '|' && c2 == '=') {
702 shift(2);
703 return OREQUAL;
704 } else if (c1 == '<' && c2 == '<') {
705 shift(2);
706 return LSHIFT;
707 } else if (c1 == '>' && c2 == '>') {
708 shift(2);
709 return RSHIFT;
710 } else if (c1 == '&' && c2 == '&') {
711 shift(2);
712 return AND;
713 } else if (c1 == '|' && c2 == '|') {
714 shift(2);
715 return OR;
716 }
717
718 switch(c1) {
719 case '=':
720 case '>':
721 case '<':
722 case ',':
723 case '!':
724 case '~':
725 case '?':
726 case ':':
727 case '.':
728 case '+':
729 case '-':
730 case '*':
731 case '/':
732 case '&':
733 case '|':
734 case '^':
735 case '%':
736 case '(':
737 case ')':
738 case '{':
739 case '}':
740 case '[':
741 case ']':
742 case ';':
743 shift(1);
744 return static_cast<int>(c1);
745 default:
746 return -1;
747 }
748}
749
750unsigned short Lexer::singleEscape(unsigned short c)
751{
752 switch(c) {
753 case 'b':
754 return 0x08;
755 case 't':
756 return 0x09;
757 case 'n':
758 return 0x0A;
759 case 'v':
760 return 0x0B;
761 case 'f':
762 return 0x0C;
763 case 'r':
764 return 0x0D;
765 case '"':
766 return 0x22;
767 case '\'':
768 return 0x27;
769 case '\\':
770 return 0x5C;
771 default:
772 return c;
773 }
774}
775
776unsigned short Lexer::convertOctal(int c1, int c2, int c3)
777{
778 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
779}
780
781unsigned char Lexer::convertHex(int c)
782{
783 if (c >= '0' && c <= '9')
784 return static_cast<unsigned char>(c - '0');
785 if (c >= 'a' && c <= 'f')
786 return static_cast<unsigned char>(c - 'a' + 10);
787 return static_cast<unsigned char>(c - 'A' + 10);
788}
789
790unsigned char Lexer::convertHex(int c1, int c2)
791{
792 return ((convertHex(c1) << 4) + convertHex(c2));
793}
794
795UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
796{
797 unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
798 unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
799 return (highByte << 8 | lowByte);
800}
801
802void Lexer::record8(int c)
803{
804 ASSERT(c >= 0);
805 ASSERT(c <= 0xff);
806 m_buffer8.append(static_cast<char>(c));
807}
808
809void Lexer::record16(int c)
810{
811 ASSERT(c >= 0);
812 ASSERT(c <= USHRT_MAX);
813 record16(UChar(static_cast<unsigned short>(c)));
814}
815
816void Lexer::record16(UChar c)
817{
818 m_buffer16.append(c);
819}
820
821bool Lexer::scanRegExp()
822{
823 m_buffer16.clear();
824 bool lastWasEscape = false;
825 bool inBrackets = false;
826
827 while (1) {
828 if (isLineTerminator() || current == -1)
829 return false;
830 else if (current != '/' || lastWasEscape == true || inBrackets == true)
831 {
832 // keep track of '[' and ']'
833 if (!lastWasEscape) {
834 if ( current == '[' && !inBrackets )
835 inBrackets = true;
836 if ( current == ']' && inBrackets )
837 inBrackets = false;
838 }
839 record16(current);
840 lastWasEscape =
841 !lastWasEscape && (current == '\\');
842 } else { // end of regexp
843 m_pattern = UString(m_buffer16);
844 m_buffer16.clear();
845 shift(1);
846 break;
847 }
848 shift(1);
849 }
850
851 while (isIdentPart(current)) {
852 record16(current);
853 shift(1);
854 }
855 m_flags = UString(m_buffer16);
856
857 return true;
858}
859
860void Lexer::clear()
861{
862 deleteAllValues(m_strings);
863 Vector<UString*> newStrings;
864 newStrings.reserveCapacity(initialStringTableCapacity);
865 m_strings.swap(newStrings);
866
867 deleteAllValues(m_identifiers);
868 Vector<KJS::Identifier*> newIdentifiers;
869 newIdentifiers.reserveCapacity(initialStringTableCapacity);
870 m_identifiers.swap(newIdentifiers);
871
872 Vector<char> newBuffer8;
873 newBuffer8.reserveCapacity(initialReadBufferCapacity);
874 m_buffer8.swap(newBuffer8);
875
876 Vector<UChar> newBuffer16;
877 newBuffer16.reserveCapacity(initialReadBufferCapacity);
878 m_buffer16.swap(newBuffer16);
879
880 m_pattern = 0;
881 m_flags = 0;
882}
883
884Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
885{
886 KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
887 m_identifiers.append(identifier);
888 return identifier;
889}
890
891UString* Lexer::makeUString(const Vector<UChar>& buffer)
892{
893 UString* string = new UString(buffer);
894 m_strings.append(string);
895 return string;
896}
897
898} // namespace KJS
Note: See TracBrowser for help on using the repository browser.