source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 27695

Last change on this file since 27695 was 27695, checked in by eseidel, 18 years ago

2007-11-10 Eric Seidel <[email protected]>

Reviewed by darin.

Add simple type inferencing to the parser, and create custom
AddNode and LessNode subclasses based on inferred types.
http://bugs.webkit.org/show_bug.cgi?id=15884

SunSpider claims this is at least a 0.5% speedup.

  • JavaScriptCore.exp:
  • kjs/grammar.y:
  • kjs/internal.cpp: (KJS::NumberImp::getPrimitiveNumber): (KJS::GetterSetterImp::getPrimitiveNumber):
  • kjs/internal.h:
  • kjs/lexer.cpp: (KJS::Lexer::lex):
  • kjs/nodes.cpp: (KJS::Node::Node): (KJS::StringNode::evaluate): (KJS::StringNode::evaluateToNumber): (KJS::StringNode::evaluateToBoolean): (KJS::RegExpNode::evaluate): (KJS::UnaryPlusNode::optimizeVariableAccess): (KJS::AddNode::evaluate): (KJS::AddNode::evaluateToNumber): (KJS::AddNumbersNode::inlineEvaluateToNumber): (KJS::AddNumbersNode::evaluate): (KJS::AddNumbersNode::evaluateToNumber): (KJS::AddStringsNode::evaluate): (KJS::AddStringLeftNode::evaluate): (KJS::AddStringRightNode::evaluate): (KJS::lessThan): (KJS::lessThanEq): (KJS::LessNumbersNode::evaluate): (KJS::LessStringsNode::evaluate):
  • kjs/nodes.h: (KJS::ExpressionNode::): (KJS::RegExpNode::): (KJS::RegExpNode::precedence): (KJS::TypeOfResolveNode::): (KJS::LocalVarTypeOfNode::): (KJS::UnaryPlusNode::): (KJS::UnaryPlusNode::precedence): (KJS::AddNode::): (KJS::AddNode::precedence): (KJS::AddNumbersNode::): (KJS::AddStringLeftNode::): (KJS::AddStringRightNode::): (KJS::AddStringsNode::): (KJS::LessNode::): (KJS::LessNode::precedence): (KJS::LessNumbersNode::): (KJS::LessStringsNode::):
  • kjs/nodes2string.cpp: (KJS::StringNode::streamTo):
  • kjs/object.cpp:
  • kjs/object.h:
  • kjs/value.h: (KJS::JSValue::getPrimitiveNumber):
  • Property svn:eol-style set to native
File size: 22.1 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "lexer.h"
26
27#include "function.h"
28#include "interpreter.h"
29#include "nodes.h"
30#include <ctype.h>
31#include <limits.h>
32#include <string.h>
33#include <wtf/Assertions.h>
34#include <wtf/unicode/Unicode.h>
35
36using namespace WTF;
37using namespace Unicode;
38
39// we can't specify the namespace in yacc's C output, so do it here
40using namespace KJS;
41
42#ifndef KDE_USE_FINAL
43#include "grammar.h"
44#endif
45
46#include "lookup.h"
47#include "lexer.lut.h"
48
49extern YYLTYPE kjsyylloc; // global bison variable holding token info
50
51// a bridge for yacc from the C world to C++
52int kjsyylex()
53{
54 return Lexer::curr()->lex();
55}
56
57namespace KJS {
58
59static Lexer* currLexer = 0;
60
61static bool isDecimalDigit(int);
62
63Lexer::Lexer()
64 : yylineno(1),
65 size8(128), size16(128), restrKeyword(false),
66 eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
67 code(0), length(0),
68#ifndef KJS_PURE_ECMA
69 bol(true),
70#endif
71 current(0), next1(0), next2(0), next3(0),
72 strings(0), numStrings(0), stringsCapacity(0),
73 identifiers(0), numIdentifiers(0), identifiersCapacity(0)
74{
75 // allocate space for read buffers
76 buffer8 = new char[size8];
77 buffer16 = new KJS::UChar[size16];
78 currLexer = this;
79}
80
81Lexer::~Lexer()
82{
83 doneParsing();
84 delete [] buffer8;
85 delete [] buffer16;
86}
87
88Lexer *Lexer::curr()
89{
90 if (!currLexer) {
91 // create singleton instance
92 currLexer = new Lexer();
93 }
94 return currLexer;
95}
96
97#ifdef KJS_DEBUG_MEM
98void Lexer::globalClear()
99{
100 delete currLexer;
101 currLexer = 0L;
102}
103#endif
104
105void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
106{
107 yylineno = 1 + startingLineNumber;
108 m_sourceURL = sourceURL;
109 restrKeyword = false;
110 delimited = false;
111 eatNextIdentifier = false;
112 stackToken = -1;
113 lastToken = -1;
114 pos = 0;
115 code = c;
116 length = len;
117 skipLF = false;
118 skipCR = false;
119 error = false;
120#ifndef KJS_PURE_ECMA
121 bol = true;
122#endif
123
124 // read first characters
125 current = (length > 0) ? code[0].uc : -1;
126 next1 = (length > 1) ? code[1].uc : -1;
127 next2 = (length > 2) ? code[2].uc : -1;
128 next3 = (length > 3) ? code[3].uc : -1;
129}
130
131void Lexer::shift(unsigned int p)
132{
133 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
134 // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
135 while (p--) {
136 pos++;
137 current = next1;
138 next1 = next2;
139 next2 = next3;
140 next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
141 }
142}
143
144// called on each new line
145void Lexer::nextLine()
146{
147 yylineno++;
148#ifndef KJS_PURE_ECMA
149 bol = true;
150#endif
151}
152
153void Lexer::setDone(State s)
154{
155 state = s;
156 done = true;
157}
158
159int Lexer::lex()
160{
161 int token = 0;
162 state = Start;
163 unsigned short stringType = 0; // either single or double quotes
164 pos8 = pos16 = 0;
165 done = false;
166 terminator = false;
167 skipLF = false;
168 skipCR = false;
169
170 // did we push a token on the stack previously ?
171 // (after an automatic semicolon insertion)
172 if (stackToken >= 0) {
173 setDone(Other);
174 token = stackToken;
175 stackToken = 0;
176 }
177
178 while (!done) {
179 if (skipLF && current != '\n') // found \r but not \n afterwards
180 skipLF = false;
181 if (skipCR && current != '\r') // found \n but not \r afterwards
182 skipCR = false;
183 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
184 {
185 skipLF = false;
186 skipCR = false;
187 shift(1);
188 }
189 switch (state) {
190 case Start:
191 if (isWhiteSpace()) {
192 // do nothing
193 } else if (current == '/' && next1 == '/') {
194 shift(1);
195 state = InSingleLineComment;
196 } else if (current == '/' && next1 == '*') {
197 shift(1);
198 state = InMultiLineComment;
199 } else if (current == -1) {
200 if (!terminator && !delimited) {
201 // automatic semicolon insertion if program incomplete
202 token = ';';
203 stackToken = 0;
204 setDone(Other);
205 } else
206 setDone(Eof);
207 } else if (isLineTerminator()) {
208 nextLine();
209 terminator = true;
210 if (restrKeyword) {
211 token = ';';
212 setDone(Other);
213 }
214 } else if (current == '"' || current == '\'') {
215 state = InString;
216 stringType = static_cast<unsigned short>(current);
217 } else if (isIdentStart(current)) {
218 record16(current);
219 state = InIdentifierOrKeyword;
220 } else if (current == '\\') {
221 state = InIdentifierUnicodeEscapeStart;
222 } else if (current == '0') {
223 record8(current);
224 state = InNum0;
225 } else if (isDecimalDigit(current)) {
226 record8(current);
227 state = InNum;
228 } else if (current == '.' && isDecimalDigit(next1)) {
229 record8(current);
230 state = InDecimal;
231#ifndef KJS_PURE_ECMA
232 // <!-- marks the beginning of a line comment (for www usage)
233 } else if (current == '<' && next1 == '!' &&
234 next2 == '-' && next3 == '-') {
235 shift(3);
236 state = InSingleLineComment;
237 // same for -->
238 } else if (bol && current == '-' && next1 == '-' && next2 == '>') {
239 shift(2);
240 state = InSingleLineComment;
241#endif
242 } else {
243 token = matchPunctuator(current, next1, next2, next3);
244 if (token != -1) {
245 setDone(Other);
246 } else {
247 // cerr << "encountered unknown character" << endl;
248 setDone(Bad);
249 }
250 }
251 break;
252 case InString:
253 if (current == stringType) {
254 shift(1);
255 setDone(String);
256 } else if (isLineTerminator() || current == -1) {
257 setDone(Bad);
258 } else if (current == '\\') {
259 state = InEscapeSequence;
260 } else {
261 record16(current);
262 }
263 break;
264 // Escape Sequences inside of strings
265 case InEscapeSequence:
266 if (isOctalDigit(current)) {
267 if (current >= '0' && current <= '3' &&
268 isOctalDigit(next1) && isOctalDigit(next2)) {
269 record16(convertOctal(current, next1, next2));
270 shift(2);
271 state = InString;
272 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
273 record16(convertOctal('0', current, next1));
274 shift(1);
275 state = InString;
276 } else if (isOctalDigit(current)) {
277 record16(convertOctal('0', '0', current));
278 state = InString;
279 } else {
280 setDone(Bad);
281 }
282 } else if (current == 'x')
283 state = InHexEscape;
284 else if (current == 'u')
285 state = InUnicodeEscape;
286 else if (isLineTerminator()) {
287 nextLine();
288 state = InString;
289 } else {
290 record16(singleEscape(static_cast<unsigned short>(current)));
291 state = InString;
292 }
293 break;
294 case InHexEscape:
295 if (isHexDigit(current) && isHexDigit(next1)) {
296 state = InString;
297 record16(convertHex(current, next1));
298 shift(1);
299 } else if (current == stringType) {
300 record16('x');
301 shift(1);
302 setDone(String);
303 } else {
304 record16('x');
305 record16(current);
306 state = InString;
307 }
308 break;
309 case InUnicodeEscape:
310 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
311 record16(convertUnicode(current, next1, next2, next3));
312 shift(3);
313 state = InString;
314 } else if (current == stringType) {
315 record16('u');
316 shift(1);
317 setDone(String);
318 } else {
319 setDone(Bad);
320 }
321 break;
322 case InSingleLineComment:
323 if (isLineTerminator()) {
324 nextLine();
325 terminator = true;
326 if (restrKeyword) {
327 token = ';';
328 setDone(Other);
329 } else
330 state = Start;
331 } else if (current == -1) {
332 setDone(Eof);
333 }
334 break;
335 case InMultiLineComment:
336 if (current == -1) {
337 setDone(Bad);
338 } else if (isLineTerminator()) {
339 nextLine();
340 } else if (current == '*' && next1 == '/') {
341 state = Start;
342 shift(1);
343 }
344 break;
345 case InIdentifierOrKeyword:
346 case InIdentifier:
347 if (isIdentPart(current))
348 record16(current);
349 else if (current == '\\')
350 state = InIdentifierUnicodeEscapeStart;
351 else
352 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
353 break;
354 case InNum0:
355 if (current == 'x' || current == 'X') {
356 record8(current);
357 state = InHex;
358 } else if (current == '.') {
359 record8(current);
360 state = InDecimal;
361 } else if (current == 'e' || current == 'E') {
362 record8(current);
363 state = InExponentIndicator;
364 } else if (isOctalDigit(current)) {
365 record8(current);
366 state = InOctal;
367 } else if (isDecimalDigit(current)) {
368 record8(current);
369 state = InDecimal;
370 } else {
371 setDone(Number);
372 }
373 break;
374 case InHex:
375 if (isHexDigit(current)) {
376 record8(current);
377 } else {
378 setDone(Hex);
379 }
380 break;
381 case InOctal:
382 if (isOctalDigit(current)) {
383 record8(current);
384 }
385 else if (isDecimalDigit(current)) {
386 record8(current);
387 state = InDecimal;
388 } else
389 setDone(Octal);
390 break;
391 case InNum:
392 if (isDecimalDigit(current)) {
393 record8(current);
394 } else if (current == '.') {
395 record8(current);
396 state = InDecimal;
397 } else if (current == 'e' || current == 'E') {
398 record8(current);
399 state = InExponentIndicator;
400 } else
401 setDone(Number);
402 break;
403 case InDecimal:
404 if (isDecimalDigit(current)) {
405 record8(current);
406 } else if (current == 'e' || current == 'E') {
407 record8(current);
408 state = InExponentIndicator;
409 } else
410 setDone(Number);
411 break;
412 case InExponentIndicator:
413 if (current == '+' || current == '-') {
414 record8(current);
415 } else if (isDecimalDigit(current)) {
416 record8(current);
417 state = InExponent;
418 } else
419 setDone(Bad);
420 break;
421 case InExponent:
422 if (isDecimalDigit(current)) {
423 record8(current);
424 } else
425 setDone(Number);
426 break;
427 case InIdentifierUnicodeEscapeStart:
428 if (current == 'u')
429 state = InIdentifierUnicodeEscape;
430 else
431 setDone(Bad);
432 break;
433 case InIdentifierUnicodeEscape:
434 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
435 record16(convertUnicode(current, next1, next2, next3));
436 shift(3);
437 state = InIdentifier;
438 } else {
439 setDone(Bad);
440 }
441 break;
442 default:
443 ASSERT(!"Unhandled state in switch statement");
444 }
445
446 // move on to the next character
447 if (!done)
448 shift(1);
449#ifndef KJS_PURE_ECMA
450 if (state != Start && state != InSingleLineComment)
451 bol = false;
452#endif
453 }
454
455 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
456 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
457 state = Bad;
458
459 // terminate string
460 buffer8[pos8] = '\0';
461
462#ifdef KJS_DEBUG_LEX
463 fprintf(stderr, "line: %d ", lineNo());
464 fprintf(stderr, "yytext (%x): ", buffer8[0]);
465 fprintf(stderr, "%s ", buffer8);
466#endif
467
468 double dval = 0;
469 if (state == Number) {
470 dval = strtod(buffer8, 0L);
471 } else if (state == Hex) { // scan hex numbers
472 const char *p = buffer8 + 2;
473 while (char c = *p++) {
474 dval *= 16;
475 dval += convertHex(c);
476 }
477
478 if (dval >= mantissaOverflowLowerBound)
479 dval = parseIntOverflow(buffer8 + 2, p - (buffer8 + 3), 16);
480
481 state = Number;
482 } else if (state == Octal) { // scan octal number
483 const char *p = buffer8 + 1;
484 while (char c = *p++) {
485 dval *= 8;
486 dval += c - '0';
487 }
488
489 if (dval >= mantissaOverflowLowerBound)
490 dval = parseIntOverflow(buffer8 + 1, p - (buffer8 + 2), 8);
491
492 state = Number;
493 }
494
495#ifdef KJS_DEBUG_LEX
496 switch (state) {
497 case Eof:
498 printf("(EOF)\n");
499 break;
500 case Other:
501 printf("(Other)\n");
502 break;
503 case Identifier:
504 printf("(Identifier)/(Keyword)\n");
505 break;
506 case String:
507 printf("(String)\n");
508 break;
509 case Number:
510 printf("(Number)\n");
511 break;
512 default:
513 printf("(unknown)");
514 }
515#endif
516
517 if (state != Identifier && eatNextIdentifier)
518 eatNextIdentifier = false;
519
520 restrKeyword = false;
521 delimited = false;
522 kjsyylloc.first_line = yylineno; // ???
523 kjsyylloc.last_line = yylineno;
524
525 switch (state) {
526 case Eof:
527 token = 0;
528 break;
529 case Other:
530 if(token == '}' || token == ';') {
531 delimited = true;
532 }
533 break;
534 case IdentifierOrKeyword:
535 if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
536 case Identifier:
537 // Lookup for keyword failed, means this is an identifier
538 // Apply anonymous-function hack below (eat the identifier)
539 if (eatNextIdentifier) {
540 eatNextIdentifier = false;
541 token = lex();
542 break;
543 }
544 kjsyylval.ident = makeIdentifier(buffer16, pos16);
545 token = IDENT;
546 break;
547 }
548
549 eatNextIdentifier = false;
550 // Hack for "f = function somename() { ... }", too hard to get into the grammar
551 if (token == FUNCTION && lastToken == '=' )
552 eatNextIdentifier = true;
553
554 if (token == CONTINUE || token == BREAK ||
555 token == RETURN || token == THROW)
556 restrKeyword = true;
557 break;
558 case String:
559 kjsyylval.string = makeUString(buffer16, pos16);
560 token = STRING;
561 break;
562 case Number:
563 kjsyylval.doubleValue = dval;
564 token = NUMBER;
565 break;
566 case Bad:
567#ifdef KJS_DEBUG_LEX
568 fprintf(stderr, "yylex: ERROR.\n");
569#endif
570 error = true;
571 return -1;
572 default:
573 ASSERT(!"unhandled numeration value in switch");
574 error = true;
575 return -1;
576 }
577 lastToken = token;
578 return token;
579}
580
581bool Lexer::isWhiteSpace() const
582{
583 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
584}
585
586bool Lexer::isLineTerminator()
587{
588 bool cr = (current == '\r');
589 bool lf = (current == '\n');
590 if (cr)
591 skipLF = true;
592 else if (lf)
593 skipCR = true;
594 return cr || lf || current == 0x2028 || current == 0x2029;
595}
596
597bool Lexer::isIdentStart(int c)
598{
599 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
600 || c == '$' || c == '_';
601}
602
603bool Lexer::isIdentPart(int c)
604{
605 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
606 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
607 || c == '$' || c == '_';
608}
609
610static bool isDecimalDigit(int c)
611{
612 return (c >= '0' && c <= '9');
613}
614
615bool Lexer::isHexDigit(int c)
616{
617 return (c >= '0' && c <= '9' ||
618 c >= 'a' && c <= 'f' ||
619 c >= 'A' && c <= 'F');
620}
621
622bool Lexer::isOctalDigit(int c)
623{
624 return (c >= '0' && c <= '7');
625}
626
627int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
628{
629 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
630 shift(4);
631 return URSHIFTEQUAL;
632 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
633 shift(3);
634 return STREQ;
635 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
636 shift(3);
637 return STRNEQ;
638 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
639 shift(3);
640 return URSHIFT;
641 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
642 shift(3);
643 return LSHIFTEQUAL;
644 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
645 shift(3);
646 return RSHIFTEQUAL;
647 } else if (c1 == '<' && c2 == '=') {
648 shift(2);
649 return LE;
650 } else if (c1 == '>' && c2 == '=') {
651 shift(2);
652 return GE;
653 } else if (c1 == '!' && c2 == '=') {
654 shift(2);
655 return NE;
656 } else if (c1 == '+' && c2 == '+') {
657 shift(2);
658 if (terminator)
659 return AUTOPLUSPLUS;
660 else
661 return PLUSPLUS;
662 } else if (c1 == '-' && c2 == '-') {
663 shift(2);
664 if (terminator)
665 return AUTOMINUSMINUS;
666 else
667 return MINUSMINUS;
668 } else if (c1 == '=' && c2 == '=') {
669 shift(2);
670 return EQEQ;
671 } else if (c1 == '+' && c2 == '=') {
672 shift(2);
673 return PLUSEQUAL;
674 } else if (c1 == '-' && c2 == '=') {
675 shift(2);
676 return MINUSEQUAL;
677 } else if (c1 == '*' && c2 == '=') {
678 shift(2);
679 return MULTEQUAL;
680 } else if (c1 == '/' && c2 == '=') {
681 shift(2);
682 return DIVEQUAL;
683 } else if (c1 == '&' && c2 == '=') {
684 shift(2);
685 return ANDEQUAL;
686 } else if (c1 == '^' && c2 == '=') {
687 shift(2);
688 return XOREQUAL;
689 } else if (c1 == '%' && c2 == '=') {
690 shift(2);
691 return MODEQUAL;
692 } else if (c1 == '|' && c2 == '=') {
693 shift(2);
694 return OREQUAL;
695 } else if (c1 == '<' && c2 == '<') {
696 shift(2);
697 return LSHIFT;
698 } else if (c1 == '>' && c2 == '>') {
699 shift(2);
700 return RSHIFT;
701 } else if (c1 == '&' && c2 == '&') {
702 shift(2);
703 return AND;
704 } else if (c1 == '|' && c2 == '|') {
705 shift(2);
706 return OR;
707 }
708
709 switch(c1) {
710 case '=':
711 case '>':
712 case '<':
713 case ',':
714 case '!':
715 case '~':
716 case '?':
717 case ':':
718 case '.':
719 case '+':
720 case '-':
721 case '*':
722 case '/':
723 case '&':
724 case '|':
725 case '^':
726 case '%':
727 case '(':
728 case ')':
729 case '{':
730 case '}':
731 case '[':
732 case ']':
733 case ';':
734 shift(1);
735 return static_cast<int>(c1);
736 default:
737 return -1;
738 }
739}
740
741unsigned short Lexer::singleEscape(unsigned short c)
742{
743 switch(c) {
744 case 'b':
745 return 0x08;
746 case 't':
747 return 0x09;
748 case 'n':
749 return 0x0A;
750 case 'v':
751 return 0x0B;
752 case 'f':
753 return 0x0C;
754 case 'r':
755 return 0x0D;
756 case '"':
757 return 0x22;
758 case '\'':
759 return 0x27;
760 case '\\':
761 return 0x5C;
762 default:
763 return c;
764 }
765}
766
767unsigned short Lexer::convertOctal(int c1, int c2, int c3)
768{
769 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
770}
771
772unsigned char Lexer::convertHex(int c)
773{
774 if (c >= '0' && c <= '9')
775 return static_cast<unsigned char>(c - '0');
776 if (c >= 'a' && c <= 'f')
777 return static_cast<unsigned char>(c - 'a' + 10);
778 return static_cast<unsigned char>(c - 'A' + 10);
779}
780
781unsigned char Lexer::convertHex(int c1, int c2)
782{
783 return ((convertHex(c1) << 4) + convertHex(c2));
784}
785
786KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
787{
788 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
789 (convertHex(c3) << 4) + convertHex(c4));
790}
791
792void Lexer::record8(int c)
793{
794 ASSERT(c >= 0);
795 ASSERT(c <= 0xff);
796
797 // enlarge buffer if full
798 if (pos8 >= size8 - 1) {
799 char *tmp = new char[2 * size8];
800 memcpy(tmp, buffer8, size8 * sizeof(char));
801 delete [] buffer8;
802 buffer8 = tmp;
803 size8 *= 2;
804 }
805
806 buffer8[pos8++] = (char) c;
807}
808
809void Lexer::record16(int c)
810{
811 ASSERT(c >= 0);
812 ASSERT(c <= USHRT_MAX);
813 record16(UChar(static_cast<unsigned short>(c)));
814}
815
816void Lexer::record16(KJS::UChar c)
817{
818 // enlarge buffer if full
819 if (pos16 >= size16 - 1) {
820 KJS::UChar *tmp = new KJS::UChar[2 * size16];
821 memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
822 delete [] buffer16;
823 buffer16 = tmp;
824 size16 *= 2;
825 }
826
827 buffer16[pos16++] = c;
828}
829
830bool Lexer::scanRegExp()
831{
832 pos16 = 0;
833 bool lastWasEscape = false;
834 bool inBrackets = false;
835
836 while (1) {
837 if (isLineTerminator() || current == -1)
838 return false;
839 else if (current != '/' || lastWasEscape == true || inBrackets == true)
840 {
841 // keep track of '[' and ']'
842 if (!lastWasEscape) {
843 if ( current == '[' && !inBrackets )
844 inBrackets = true;
845 if ( current == ']' && inBrackets )
846 inBrackets = false;
847 }
848 record16(current);
849 lastWasEscape =
850 !lastWasEscape && (current == '\\');
851 }
852 else { // end of regexp
853 pattern = UString(buffer16, pos16);
854 pos16 = 0;
855 shift(1);
856 break;
857 }
858 shift(1);
859 }
860
861 while (isIdentPart(current)) {
862 record16(current);
863 shift(1);
864 }
865 flags = UString(buffer16, pos16);
866
867 return true;
868}
869
870
871void Lexer::doneParsing()
872{
873 for (unsigned i = 0; i < numIdentifiers; i++) {
874 delete identifiers[i];
875 }
876 fastFree(identifiers);
877 identifiers = 0;
878 numIdentifiers = 0;
879 identifiersCapacity = 0;
880
881 for (unsigned i = 0; i < numStrings; i++) {
882 delete strings[i];
883 }
884 fastFree(strings);
885 strings = 0;
886 numStrings = 0;
887 stringsCapacity = 0;
888}
889
890const int initialCapacity = 64;
891const int growthFactor = 2;
892
893// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
894Identifier *Lexer::makeIdentifier(KJS::UChar*, unsigned int)
895{
896 if (numIdentifiers == identifiersCapacity) {
897 identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
898 identifiers = (KJS::Identifier **)fastRealloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);
899 }
900
901 KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
902 identifiers[numIdentifiers++] = identifier;
903 return identifier;
904}
905
906// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
907UString *Lexer::makeUString(KJS::UChar*, unsigned int)
908{
909 if (numStrings == stringsCapacity) {
910 stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
911 strings = (UString **)fastRealloc(strings, sizeof(UString *) * stringsCapacity);
912 }
913
914 UString *string = new UString(buffer16, pos16);
915 strings[numStrings++] = string;
916 return string;
917}
918
919}
Note: See TracBrowser for help on using the repository browser.