source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 28545

Last change on this file since 28545 was 28468, checked in by [email protected], 17 years ago

JavaScriptCore:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and
functions accessing data members from Interpreter to JSGlobalObject.
Changed Interpreter member functions to static functions.


This resolves a bug in global object bootstrapping, where the global
ExecState could be used when uninitialized.


This is a big change, but it's mostly code motion and renaming.


Layout and JS tests, and testjsglue and testapi, pass. SunSpider reports
a .7% regression, but Shark sees no difference related to this patch,
and SunSpider reported a .7% speedup from an earlier step in this
refactoring, so I think it's fair to call that a wash.

JavaScriptGlue:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject. Replaced JSInterpreter
subclass with JSGlobalObject subclass.


  • JSRun.cpp: (JSRun::JSRun): (JSRun::Evaluate): (JSRun::CheckSyntax):
  • JSRun.h: (JSGlueGlobalObject::JSGlueGlobalObject):
  • JSUtils.cpp: (KJSValueToCFTypeInternal):
  • JSValueWrapper.cpp: (getThreadGlobalExecState):

WebCore:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject. Changed Interpreter
member functions to static functions. Same for the subclass,
ScriptInterpreter.


This is a big change, but it's mostly code motion and renaming.

WebKit/mac:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject.


  • WebView/WebFrame.mm: (-[WebFrame _attachScriptDebugger]):

WebKit/win:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject.


  • WebFrame.cpp: (WebFrame::globalContext): (WebFrame::attachScriptDebugger): (WebFrame::windowObjectCleared):
  • WebScriptDebugger.cpp: (WebScriptDebugger::WebScriptDebugger):
  • Property svn:eol-style set to native
File size: 21.6 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "lexer.h"
26
27#include "function.h"
28#include "nodes.h"
29#include <ctype.h>
30#include <limits.h>
31#include <string.h>
32#include <wtf/Assertions.h>
33#include <wtf/unicode/Unicode.h>
34
35using namespace WTF;
36using namespace Unicode;
37
38// we can't specify the namespace in yacc's C output, so do it here
39using namespace KJS;
40
41#ifndef KDE_USE_FINAL
42#include "grammar.h"
43#endif
44
45#include "lookup.h"
46#include "lexer.lut.h"
47
48extern YYLTYPE kjsyylloc; // global bison variable holding token info
49
50// a bridge for yacc from the C world to C++
51int kjsyylex()
52{
53 return lexer().lex();
54}
55
56namespace KJS {
57
58static bool isDecimalDigit(int);
59
60static const size_t initialReadBufferCapacity = 32;
61static const size_t initialStringTableCapacity = 64;
62
63Lexer& lexer()
64{
65 ASSERT(JSLock::currentThreadIsHoldingLock());
66
67 // FIXME: We'd like to avoid calling new here, but we don't currently
68 // support tearing down the Lexer at app quit time, since that would involve
69 // tearing down its UString data members without holding the JSLock.
70 static Lexer* staticLexer = new Lexer;
71 return *staticLexer;
72}
73
74Lexer::Lexer()
75 : yylineno(1)
76 , restrKeyword(false)
77 , eatNextIdentifier(false)
78 , stackToken(-1)
79 , lastToken(-1)
80 , pos(0)
81 , code(0)
82 , length(0)
83#ifndef KJS_PURE_ECMA
84 , bol(true)
85#endif
86 , current(0)
87 , next1(0)
88 , next2(0)
89 , next3(0)
90{
91 m_buffer8.reserveCapacity(initialReadBufferCapacity);
92 m_buffer16.reserveCapacity(initialReadBufferCapacity);
93 m_strings.reserveCapacity(initialStringTableCapacity);
94 m_identifiers.reserveCapacity(initialStringTableCapacity);
95}
96
97void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
98{
99 yylineno = 1 + startingLineNumber;
100 m_sourceURL = sourceURL;
101 restrKeyword = false;
102 delimited = false;
103 eatNextIdentifier = false;
104 stackToken = -1;
105 lastToken = -1;
106 pos = 0;
107 code = c;
108 length = len;
109 skipLF = false;
110 skipCR = false;
111 error = false;
112#ifndef KJS_PURE_ECMA
113 bol = true;
114#endif
115
116 // read first characters
117 current = (length > 0) ? code[0].uc : -1;
118 next1 = (length > 1) ? code[1].uc : -1;
119 next2 = (length > 2) ? code[2].uc : -1;
120 next3 = (length > 3) ? code[3].uc : -1;
121}
122
123void Lexer::shift(unsigned int p)
124{
125 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
126 // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
127 while (p--) {
128 pos++;
129 current = next1;
130 next1 = next2;
131 next2 = next3;
132 next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
133 }
134}
135
136// called on each new line
137void Lexer::nextLine()
138{
139 yylineno++;
140#ifndef KJS_PURE_ECMA
141 bol = true;
142#endif
143}
144
145void Lexer::setDone(State s)
146{
147 state = s;
148 done = true;
149}
150
151int Lexer::lex()
152{
153 int token = 0;
154 state = Start;
155 unsigned short stringType = 0; // either single or double quotes
156 m_buffer8.clear();
157 m_buffer16.clear();
158 done = false;
159 terminator = false;
160 skipLF = false;
161 skipCR = false;
162
163 // did we push a token on the stack previously ?
164 // (after an automatic semicolon insertion)
165 if (stackToken >= 0) {
166 setDone(Other);
167 token = stackToken;
168 stackToken = 0;
169 }
170
171 while (!done) {
172 if (skipLF && current != '\n') // found \r but not \n afterwards
173 skipLF = false;
174 if (skipCR && current != '\r') // found \n but not \r afterwards
175 skipCR = false;
176 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
177 {
178 skipLF = false;
179 skipCR = false;
180 shift(1);
181 }
182 switch (state) {
183 case Start:
184 if (isWhiteSpace()) {
185 // do nothing
186 } else if (current == '/' && next1 == '/') {
187 shift(1);
188 state = InSingleLineComment;
189 } else if (current == '/' && next1 == '*') {
190 shift(1);
191 state = InMultiLineComment;
192 } else if (current == -1) {
193 if (!terminator && !delimited) {
194 // automatic semicolon insertion if program incomplete
195 token = ';';
196 stackToken = 0;
197 setDone(Other);
198 } else
199 setDone(Eof);
200 } else if (isLineTerminator()) {
201 nextLine();
202 terminator = true;
203 if (restrKeyword) {
204 token = ';';
205 setDone(Other);
206 }
207 } else if (current == '"' || current == '\'') {
208 state = InString;
209 stringType = static_cast<unsigned short>(current);
210 } else if (isIdentStart(current)) {
211 record16(current);
212 state = InIdentifierOrKeyword;
213 } else if (current == '\\') {
214 state = InIdentifierUnicodeEscapeStart;
215 } else if (current == '0') {
216 record8(current);
217 state = InNum0;
218 } else if (isDecimalDigit(current)) {
219 record8(current);
220 state = InNum;
221 } else if (current == '.' && isDecimalDigit(next1)) {
222 record8(current);
223 state = InDecimal;
224#ifndef KJS_PURE_ECMA
225 // <!-- marks the beginning of a line comment (for www usage)
226 } else if (current == '<' && next1 == '!' &&
227 next2 == '-' && next3 == '-') {
228 shift(3);
229 state = InSingleLineComment;
230 // same for -->
231 } else if (bol && current == '-' && next1 == '-' && next2 == '>') {
232 shift(2);
233 state = InSingleLineComment;
234#endif
235 } else {
236 token = matchPunctuator(current, next1, next2, next3);
237 if (token != -1) {
238 setDone(Other);
239 } else {
240 // cerr << "encountered unknown character" << endl;
241 setDone(Bad);
242 }
243 }
244 break;
245 case InString:
246 if (current == stringType) {
247 shift(1);
248 setDone(String);
249 } else if (isLineTerminator() || current == -1) {
250 setDone(Bad);
251 } else if (current == '\\') {
252 state = InEscapeSequence;
253 } else {
254 record16(current);
255 }
256 break;
257 // Escape Sequences inside of strings
258 case InEscapeSequence:
259 if (isOctalDigit(current)) {
260 if (current >= '0' && current <= '3' &&
261 isOctalDigit(next1) && isOctalDigit(next2)) {
262 record16(convertOctal(current, next1, next2));
263 shift(2);
264 state = InString;
265 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
266 record16(convertOctal('0', current, next1));
267 shift(1);
268 state = InString;
269 } else if (isOctalDigit(current)) {
270 record16(convertOctal('0', '0', current));
271 state = InString;
272 } else {
273 setDone(Bad);
274 }
275 } else if (current == 'x')
276 state = InHexEscape;
277 else if (current == 'u')
278 state = InUnicodeEscape;
279 else if (isLineTerminator()) {
280 nextLine();
281 state = InString;
282 } else {
283 record16(singleEscape(static_cast<unsigned short>(current)));
284 state = InString;
285 }
286 break;
287 case InHexEscape:
288 if (isHexDigit(current) && isHexDigit(next1)) {
289 state = InString;
290 record16(convertHex(current, next1));
291 shift(1);
292 } else if (current == stringType) {
293 record16('x');
294 shift(1);
295 setDone(String);
296 } else {
297 record16('x');
298 record16(current);
299 state = InString;
300 }
301 break;
302 case InUnicodeEscape:
303 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
304 record16(convertUnicode(current, next1, next2, next3));
305 shift(3);
306 state = InString;
307 } else if (current == stringType) {
308 record16('u');
309 shift(1);
310 setDone(String);
311 } else {
312 setDone(Bad);
313 }
314 break;
315 case InSingleLineComment:
316 if (isLineTerminator()) {
317 nextLine();
318 terminator = true;
319 if (restrKeyword) {
320 token = ';';
321 setDone(Other);
322 } else
323 state = Start;
324 } else if (current == -1) {
325 setDone(Eof);
326 }
327 break;
328 case InMultiLineComment:
329 if (current == -1) {
330 setDone(Bad);
331 } else if (isLineTerminator()) {
332 nextLine();
333 } else if (current == '*' && next1 == '/') {
334 state = Start;
335 shift(1);
336 }
337 break;
338 case InIdentifierOrKeyword:
339 case InIdentifier:
340 if (isIdentPart(current))
341 record16(current);
342 else if (current == '\\')
343 state = InIdentifierUnicodeEscapeStart;
344 else
345 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
346 break;
347 case InNum0:
348 if (current == 'x' || current == 'X') {
349 record8(current);
350 state = InHex;
351 } else if (current == '.') {
352 record8(current);
353 state = InDecimal;
354 } else if (current == 'e' || current == 'E') {
355 record8(current);
356 state = InExponentIndicator;
357 } else if (isOctalDigit(current)) {
358 record8(current);
359 state = InOctal;
360 } else if (isDecimalDigit(current)) {
361 record8(current);
362 state = InDecimal;
363 } else {
364 setDone(Number);
365 }
366 break;
367 case InHex:
368 if (isHexDigit(current)) {
369 record8(current);
370 } else {
371 setDone(Hex);
372 }
373 break;
374 case InOctal:
375 if (isOctalDigit(current)) {
376 record8(current);
377 }
378 else if (isDecimalDigit(current)) {
379 record8(current);
380 state = InDecimal;
381 } else
382 setDone(Octal);
383 break;
384 case InNum:
385 if (isDecimalDigit(current)) {
386 record8(current);
387 } else if (current == '.') {
388 record8(current);
389 state = InDecimal;
390 } else if (current == 'e' || current == 'E') {
391 record8(current);
392 state = InExponentIndicator;
393 } else
394 setDone(Number);
395 break;
396 case InDecimal:
397 if (isDecimalDigit(current)) {
398 record8(current);
399 } else if (current == 'e' || current == 'E') {
400 record8(current);
401 state = InExponentIndicator;
402 } else
403 setDone(Number);
404 break;
405 case InExponentIndicator:
406 if (current == '+' || current == '-') {
407 record8(current);
408 } else if (isDecimalDigit(current)) {
409 record8(current);
410 state = InExponent;
411 } else
412 setDone(Bad);
413 break;
414 case InExponent:
415 if (isDecimalDigit(current)) {
416 record8(current);
417 } else
418 setDone(Number);
419 break;
420 case InIdentifierUnicodeEscapeStart:
421 if (current == 'u')
422 state = InIdentifierUnicodeEscape;
423 else
424 setDone(Bad);
425 break;
426 case InIdentifierUnicodeEscape:
427 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
428 record16(convertUnicode(current, next1, next2, next3));
429 shift(3);
430 state = InIdentifier;
431 } else {
432 setDone(Bad);
433 }
434 break;
435 default:
436 ASSERT(!"Unhandled state in switch statement");
437 }
438
439 // move on to the next character
440 if (!done)
441 shift(1);
442#ifndef KJS_PURE_ECMA
443 if (state != Start && state != InSingleLineComment)
444 bol = false;
445#endif
446 }
447
448 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
449 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
450 state = Bad;
451
452 // terminate string
453 m_buffer8.append('\0');
454
455#ifdef KJS_DEBUG_LEX
456 fprintf(stderr, "line: %d ", lineNo());
457 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
458 fprintf(stderr, "%s ", buffer8.data());
459#endif
460
461 double dval = 0;
462 if (state == Number) {
463 dval = strtod(m_buffer8.data(), 0L);
464 } else if (state == Hex) { // scan hex numbers
465 const char* p = m_buffer8.data() + 2;
466 while (char c = *p++) {
467 dval *= 16;
468 dval += convertHex(c);
469 }
470
471 if (dval >= mantissaOverflowLowerBound)
472 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
473
474 state = Number;
475 } else if (state == Octal) { // scan octal number
476 const char* p = m_buffer8.data() + 1;
477 while (char c = *p++) {
478 dval *= 8;
479 dval += c - '0';
480 }
481
482 if (dval >= mantissaOverflowLowerBound)
483 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
484
485 state = Number;
486 }
487
488#ifdef KJS_DEBUG_LEX
489 switch (state) {
490 case Eof:
491 printf("(EOF)\n");
492 break;
493 case Other:
494 printf("(Other)\n");
495 break;
496 case Identifier:
497 printf("(Identifier)/(Keyword)\n");
498 break;
499 case String:
500 printf("(String)\n");
501 break;
502 case Number:
503 printf("(Number)\n");
504 break;
505 default:
506 printf("(unknown)");
507 }
508#endif
509
510 if (state != Identifier && eatNextIdentifier)
511 eatNextIdentifier = false;
512
513 restrKeyword = false;
514 delimited = false;
515 kjsyylloc.first_line = yylineno; // ???
516 kjsyylloc.last_line = yylineno;
517
518 switch (state) {
519 case Eof:
520 token = 0;
521 break;
522 case Other:
523 if(token == '}' || token == ';') {
524 delimited = true;
525 }
526 break;
527 case IdentifierOrKeyword:
528 if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) {
529 case Identifier:
530 // Lookup for keyword failed, means this is an identifier
531 // Apply anonymous-function hack below (eat the identifier)
532 if (eatNextIdentifier) {
533 eatNextIdentifier = false;
534 token = lex();
535 break;
536 }
537 kjsyylval.ident = makeIdentifier(m_buffer16);
538 token = IDENT;
539 break;
540 }
541
542 eatNextIdentifier = false;
543 // Hack for "f = function somename() { ... }", too hard to get into the grammar
544 if (token == FUNCTION && lastToken == '=' )
545 eatNextIdentifier = true;
546
547 if (token == CONTINUE || token == BREAK ||
548 token == RETURN || token == THROW)
549 restrKeyword = true;
550 break;
551 case String:
552 kjsyylval.string = makeUString(m_buffer16);
553 token = STRING;
554 break;
555 case Number:
556 kjsyylval.doubleValue = dval;
557 token = NUMBER;
558 break;
559 case Bad:
560#ifdef KJS_DEBUG_LEX
561 fprintf(stderr, "yylex: ERROR.\n");
562#endif
563 error = true;
564 return -1;
565 default:
566 ASSERT(!"unhandled numeration value in switch");
567 error = true;
568 return -1;
569 }
570 lastToken = token;
571 return token;
572}
573
574bool Lexer::isWhiteSpace() const
575{
576 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
577}
578
579bool Lexer::isLineTerminator()
580{
581 bool cr = (current == '\r');
582 bool lf = (current == '\n');
583 if (cr)
584 skipLF = true;
585 else if (lf)
586 skipCR = true;
587 return cr || lf || current == 0x2028 || current == 0x2029;
588}
589
590bool Lexer::isIdentStart(int c)
591{
592 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
593 || c == '$' || c == '_';
594}
595
596bool Lexer::isIdentPart(int c)
597{
598 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
599 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
600 || c == '$' || c == '_';
601}
602
603static bool isDecimalDigit(int c)
604{
605 return (c >= '0' && c <= '9');
606}
607
608bool Lexer::isHexDigit(int c)
609{
610 return (c >= '0' && c <= '9' ||
611 c >= 'a' && c <= 'f' ||
612 c >= 'A' && c <= 'F');
613}
614
615bool Lexer::isOctalDigit(int c)
616{
617 return (c >= '0' && c <= '7');
618}
619
620int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
621{
622 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
623 shift(4);
624 return URSHIFTEQUAL;
625 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
626 shift(3);
627 return STREQ;
628 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
629 shift(3);
630 return STRNEQ;
631 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
632 shift(3);
633 return URSHIFT;
634 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
635 shift(3);
636 return LSHIFTEQUAL;
637 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
638 shift(3);
639 return RSHIFTEQUAL;
640 } else if (c1 == '<' && c2 == '=') {
641 shift(2);
642 return LE;
643 } else if (c1 == '>' && c2 == '=') {
644 shift(2);
645 return GE;
646 } else if (c1 == '!' && c2 == '=') {
647 shift(2);
648 return NE;
649 } else if (c1 == '+' && c2 == '+') {
650 shift(2);
651 if (terminator)
652 return AUTOPLUSPLUS;
653 else
654 return PLUSPLUS;
655 } else if (c1 == '-' && c2 == '-') {
656 shift(2);
657 if (terminator)
658 return AUTOMINUSMINUS;
659 else
660 return MINUSMINUS;
661 } else if (c1 == '=' && c2 == '=') {
662 shift(2);
663 return EQEQ;
664 } else if (c1 == '+' && c2 == '=') {
665 shift(2);
666 return PLUSEQUAL;
667 } else if (c1 == '-' && c2 == '=') {
668 shift(2);
669 return MINUSEQUAL;
670 } else if (c1 == '*' && c2 == '=') {
671 shift(2);
672 return MULTEQUAL;
673 } else if (c1 == '/' && c2 == '=') {
674 shift(2);
675 return DIVEQUAL;
676 } else if (c1 == '&' && c2 == '=') {
677 shift(2);
678 return ANDEQUAL;
679 } else if (c1 == '^' && c2 == '=') {
680 shift(2);
681 return XOREQUAL;
682 } else if (c1 == '%' && c2 == '=') {
683 shift(2);
684 return MODEQUAL;
685 } else if (c1 == '|' && c2 == '=') {
686 shift(2);
687 return OREQUAL;
688 } else if (c1 == '<' && c2 == '<') {
689 shift(2);
690 return LSHIFT;
691 } else if (c1 == '>' && c2 == '>') {
692 shift(2);
693 return RSHIFT;
694 } else if (c1 == '&' && c2 == '&') {
695 shift(2);
696 return AND;
697 } else if (c1 == '|' && c2 == '|') {
698 shift(2);
699 return OR;
700 }
701
702 switch(c1) {
703 case '=':
704 case '>':
705 case '<':
706 case ',':
707 case '!':
708 case '~':
709 case '?':
710 case ':':
711 case '.':
712 case '+':
713 case '-':
714 case '*':
715 case '/':
716 case '&':
717 case '|':
718 case '^':
719 case '%':
720 case '(':
721 case ')':
722 case '{':
723 case '}':
724 case '[':
725 case ']':
726 case ';':
727 shift(1);
728 return static_cast<int>(c1);
729 default:
730 return -1;
731 }
732}
733
734unsigned short Lexer::singleEscape(unsigned short c)
735{
736 switch(c) {
737 case 'b':
738 return 0x08;
739 case 't':
740 return 0x09;
741 case 'n':
742 return 0x0A;
743 case 'v':
744 return 0x0B;
745 case 'f':
746 return 0x0C;
747 case 'r':
748 return 0x0D;
749 case '"':
750 return 0x22;
751 case '\'':
752 return 0x27;
753 case '\\':
754 return 0x5C;
755 default:
756 return c;
757 }
758}
759
760unsigned short Lexer::convertOctal(int c1, int c2, int c3)
761{
762 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
763}
764
765unsigned char Lexer::convertHex(int c)
766{
767 if (c >= '0' && c <= '9')
768 return static_cast<unsigned char>(c - '0');
769 if (c >= 'a' && c <= 'f')
770 return static_cast<unsigned char>(c - 'a' + 10);
771 return static_cast<unsigned char>(c - 'A' + 10);
772}
773
774unsigned char Lexer::convertHex(int c1, int c2)
775{
776 return ((convertHex(c1) << 4) + convertHex(c2));
777}
778
779KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
780{
781 // FIXME: This conversion is lossy. See http://bugs.webkit.org/show_bug.cgi?id=4920.
782 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
783 (convertHex(c3) << 4) + convertHex(c4));
784}
785
786void Lexer::record8(int c)
787{
788 ASSERT(c >= 0);
789 ASSERT(c <= 0xff);
790 m_buffer8.append(static_cast<char>(c));
791}
792
793void Lexer::record16(int c)
794{
795 ASSERT(c >= 0);
796 ASSERT(c <= USHRT_MAX);
797 record16(UChar(static_cast<unsigned short>(c)));
798}
799
800void Lexer::record16(KJS::UChar c)
801{
802 m_buffer16.append(c);
803}
804
805bool Lexer::scanRegExp()
806{
807 m_buffer16.clear();
808 bool lastWasEscape = false;
809 bool inBrackets = false;
810
811 while (1) {
812 if (isLineTerminator() || current == -1)
813 return false;
814 else if (current != '/' || lastWasEscape == true || inBrackets == true)
815 {
816 // keep track of '[' and ']'
817 if (!lastWasEscape) {
818 if ( current == '[' && !inBrackets )
819 inBrackets = true;
820 if ( current == ']' && inBrackets )
821 inBrackets = false;
822 }
823 record16(current);
824 lastWasEscape =
825 !lastWasEscape && (current == '\\');
826 } else { // end of regexp
827 m_pattern = UString(m_buffer16);
828 m_buffer16.clear();
829 shift(1);
830 break;
831 }
832 shift(1);
833 }
834
835 while (isIdentPart(current)) {
836 record16(current);
837 shift(1);
838 }
839 m_flags = UString(m_buffer16);
840
841 return true;
842}
843
844void Lexer::clear()
845{
846 deleteAllValues(m_strings);
847 Vector<UString*> newStrings;
848 newStrings.reserveCapacity(initialStringTableCapacity);
849 m_strings.swap(newStrings);
850
851 deleteAllValues(m_identifiers);
852 Vector<KJS::Identifier*> newIdentifiers;
853 newIdentifiers.reserveCapacity(initialStringTableCapacity);
854 m_identifiers.swap(newIdentifiers);
855
856 Vector<char> newBuffer8;
857 newBuffer8.reserveCapacity(initialReadBufferCapacity);
858 m_buffer8.swap(newBuffer8);
859
860 Vector<UChar> newBuffer16;
861 newBuffer16.reserveCapacity(initialReadBufferCapacity);
862 m_buffer16.swap(newBuffer16);
863
864 m_pattern = 0;
865 m_flags = 0;
866 m_sourceURL = 0;
867}
868
869Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer)
870{
871 KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
872 m_identifiers.append(identifier);
873 return identifier;
874}
875
876UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer)
877{
878 UString* string = new UString(buffer);
879 m_strings.append(string);
880 return string;
881}
882
883} // namespace KJS
Note: See TracBrowser for help on using the repository browser.