source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 12949

Last change on this file since 12949 was 12317, checked in by mjs, 19 years ago

Reviewed by Tim Hatcher.


  • it's "Franklin Street", not "Franklin Steet"
  • kjs/array_instance.h:
  • kjs/array_object.cpp:
  • kjs/array_object.h:
  • kjs/bool_object.cpp:
  • kjs/bool_object.h:
  • kjs/collector.cpp:
  • kjs/collector.h:
  • kjs/completion.h:
  • kjs/context.h:
  • kjs/date_object.cpp:
  • kjs/date_object.h:
  • kjs/debugger.cpp:
  • kjs/debugger.h:
  • kjs/dtoa.h:
  • kjs/error_object.cpp:
  • kjs/error_object.h:
  • kjs/function.cpp:
  • kjs/function.h:
  • kjs/function_object.cpp:
  • kjs/function_object.h:
  • kjs/grammar.y:
  • kjs/identifier.cpp:
  • kjs/identifier.h:
  • kjs/internal.cpp:
  • kjs/internal.h:
  • kjs/interpreter.cpp:
  • kjs/interpreter.h:
  • kjs/lexer.cpp:
  • kjs/lexer.h:
  • kjs/list.cpp:
  • kjs/list.h:
  • kjs/lookup.cpp:
  • kjs/lookup.h:
  • kjs/math_object.cpp:
  • kjs/math_object.h:
  • kjs/nodes.cpp:
  • kjs/nodes.h:
  • kjs/nodes2string.cpp:
  • kjs/number_object.cpp:
  • kjs/number_object.h:
  • kjs/object.cpp:
  • kjs/object.h:
  • kjs/object_object.cpp:
  • kjs/object_object.h:
  • kjs/operations.cpp:
  • kjs/operations.h:
  • kjs/property_map.cpp:
  • kjs/property_map.h:
  • kjs/property_slot.cpp:
  • kjs/property_slot.h:
  • kjs/reference.cpp:
  • kjs/reference.h:
  • kjs/reference_list.cpp:
  • kjs/reference_list.h:
  • kjs/regexp.cpp:
  • kjs/regexp.h:
  • kjs/regexp_object.cpp:
  • kjs/regexp_object.h:
  • kjs/scope_chain.cpp:
  • kjs/scope_chain.h:
  • kjs/simple_number.h:
  • kjs/string_object.cpp:
  • kjs/string_object.h:
  • kjs/testkjs.cpp:
  • kjs/types.h:
  • kjs/ustring.cpp:
  • kjs/ustring.h:
  • kjs/value.cpp:
  • kjs/value.h:
  • kxmlcore/AlwaysInline.h:
  • kxmlcore/ListRefPtr.h:
  • kxmlcore/PassRefPtr.h:
  • kxmlcore/RefPtr.h:
  • Property svn:eol-style set to native
File size: 21.3 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten ([email protected])
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "lexer.h"
25
26#include <ctype.h>
27#include <stdlib.h>
28#include <stdio.h>
29#include <string.h>
30#include <assert.h>
31
32#include "value.h"
33#include "object.h"
34#include "types.h"
35#include "interpreter.h"
36#include "nodes.h"
37#include "identifier.h"
38#include "lookup.h"
39#include "internal.h"
40#include <unicode/uchar.h>
41
42static bool isDecimalDigit(unsigned short c);
43
44// we can't specify the namespace in yacc's C output, so do it here
45using namespace KJS;
46
47static Lexer *currLexer = 0;
48
49#ifndef KDE_USE_FINAL
50#include "grammar.h"
51#endif
52
53#include "lexer.lut.h"
54
55extern YYLTYPE kjsyylloc; // global bison variable holding token info
56
57// a bridge for yacc from the C world to C++
58int kjsyylex()
59{
60 return Lexer::curr()->lex();
61}
62
63Lexer::Lexer()
64 : yylineno(1),
65 size8(128), size16(128), restrKeyword(false),
66 eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
67 code(0), length(0),
68#ifndef KJS_PURE_ECMA
69 bol(true),
70#endif
71 current(0), next1(0), next2(0), next3(0),
72 strings(0), numStrings(0), stringsCapacity(0),
73 identifiers(0), numIdentifiers(0), identifiersCapacity(0)
74{
75 // allocate space for read buffers
76 buffer8 = new char[size8];
77 buffer16 = new KJS::UChar[size16];
78 currLexer = this;
79}
80
81Lexer::~Lexer()
82{
83 doneParsing();
84 delete [] buffer8;
85 delete [] buffer16;
86}
87
88Lexer *Lexer::curr()
89{
90 if (!currLexer) {
91 // create singleton instance
92 currLexer = new Lexer();
93 }
94 return currLexer;
95}
96
97#ifdef KJS_DEBUG_MEM
98void Lexer::globalClear()
99{
100 delete currLexer;
101 currLexer = 0L;
102}
103#endif
104
105void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
106{
107 yylineno = 1 + startingLineNumber;
108 m_sourceURL = sourceURL;
109 restrKeyword = false;
110 delimited = false;
111 eatNextIdentifier = false;
112 stackToken = -1;
113 lastToken = -1;
114 pos = 0;
115 code = c;
116 length = len;
117 skipLF = false;
118 skipCR = false;
119 error = false;
120#ifndef KJS_PURE_ECMA
121 bol = true;
122#endif
123
124 // read first characters
125 shift(4);
126}
127
128void Lexer::shift(unsigned int p)
129{
130 while (p--) {
131 current = next1;
132 next1 = next2;
133 next2 = next3;
134 do {
135 if (pos >= length) {
136 next3 = 0;
137 break;
138 }
139 next3 = code[pos++].uc;
140 } while (u_charType(next3) == U_FORMAT_CHAR);
141 }
142}
143
144// called on each new line
145void Lexer::nextLine()
146{
147 yylineno++;
148#ifndef KJS_PURE_ECMA
149 bol = true;
150#endif
151}
152
153void Lexer::setDone(State s)
154{
155 state = s;
156 done = true;
157}
158
159int Lexer::lex()
160{
161 int token = 0;
162 state = Start;
163 unsigned short stringType = 0; // either single or double quotes
164 pos8 = pos16 = 0;
165 done = false;
166 terminator = false;
167 skipLF = false;
168 skipCR = false;
169
170 // did we push a token on the stack previously ?
171 // (after an automatic semicolon insertion)
172 if (stackToken >= 0) {
173 setDone(Other);
174 token = stackToken;
175 stackToken = 0;
176 }
177
178 while (!done) {
179 if (skipLF && current != '\n') // found \r but not \n afterwards
180 skipLF = false;
181 if (skipCR && current != '\r') // found \n but not \r afterwards
182 skipCR = false;
183 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
184 {
185 skipLF = false;
186 skipCR = false;
187 shift(1);
188 }
189 switch (state) {
190 case Start:
191 if (isWhiteSpace()) {
192 // do nothing
193 } else if (current == '/' && next1 == '/') {
194 shift(1);
195 state = InSingleLineComment;
196 } else if (current == '/' && next1 == '*') {
197 shift(1);
198 state = InMultiLineComment;
199 } else if (current == 0) {
200 if (!terminator && !delimited) {
201 // automatic semicolon insertion if program incomplete
202 token = ';';
203 stackToken = 0;
204 setDone(Other);
205 } else
206 setDone(Eof);
207 } else if (isLineTerminator()) {
208 nextLine();
209 terminator = true;
210 if (restrKeyword) {
211 token = ';';
212 setDone(Other);
213 }
214 } else if (current == '"' || current == '\'') {
215 state = InString;
216 stringType = current;
217 } else if (isIdentStart(current)) {
218 record16(current);
219 state = InIdentifierOrKeyword;
220 } else if (current == '\\') {
221 state = InIdentifierUnicodeEscapeStart;
222 } else if (current == '0') {
223 record8(current);
224 state = InNum0;
225 } else if (isDecimalDigit(current)) {
226 record8(current);
227 state = InNum;
228 } else if (current == '.' && isDecimalDigit(next1)) {
229 record8(current);
230 state = InDecimal;
231#ifndef KJS_PURE_ECMA
232 // <!-- marks the beginning of a line comment (for www usage)
233 } else if (current == '<' && next1 == '!' &&
234 next2 == '-' && next3 == '-') {
235 shift(3);
236 state = InSingleLineComment;
237 // same for -->
238 } else if (bol && current == '-' && next1 == '-' && next2 == '>') {
239 shift(2);
240 state = InSingleLineComment;
241#endif
242 } else {
243 token = matchPunctuator(current, next1, next2, next3);
244 if (token != -1) {
245 setDone(Other);
246 } else {
247 // cerr << "encountered unknown character" << endl;
248 setDone(Bad);
249 }
250 }
251 break;
252 case InString:
253 if (current == stringType) {
254 shift(1);
255 setDone(String);
256 } else if (current == 0 || isLineTerminator()) {
257 setDone(Bad);
258 } else if (current == '\\') {
259 state = InEscapeSequence;
260 } else {
261 record16(current);
262 }
263 break;
264 // Escape Sequences inside of strings
265 case InEscapeSequence:
266 if (isOctalDigit(current)) {
267 if (current >= '0' && current <= '3' &&
268 isOctalDigit(next1) && isOctalDigit(next2)) {
269 record16(convertOctal(current, next1, next2));
270 shift(2);
271 state = InString;
272 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
273 record16(convertOctal('0', current, next1));
274 shift(1);
275 state = InString;
276 } else if (isOctalDigit(current)) {
277 record16(convertOctal('0', '0', current));
278 state = InString;
279 } else {
280 setDone(Bad);
281 }
282 } else if (current == 'x')
283 state = InHexEscape;
284 else if (current == 'u')
285 state = InUnicodeEscape;
286 else if (isLineTerminator()) {
287 nextLine();
288 state = InString;
289 } else {
290 record16(singleEscape(current));
291 state = InString;
292 }
293 break;
294 case InHexEscape:
295 if (isHexDigit(current) && isHexDigit(next1)) {
296 state = InString;
297 record16(convertHex(current, next1));
298 shift(1);
299 } else if (current == stringType) {
300 record16('x');
301 shift(1);
302 setDone(String);
303 } else {
304 record16('x');
305 record16(current);
306 state = InString;
307 }
308 break;
309 case InUnicodeEscape:
310 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
311 record16(convertUnicode(current, next1, next2, next3));
312 shift(3);
313 state = InString;
314 } else if (current == stringType) {
315 record16('u');
316 shift(1);
317 setDone(String);
318 } else {
319 setDone(Bad);
320 }
321 break;
322 case InSingleLineComment:
323 if (isLineTerminator()) {
324 nextLine();
325 terminator = true;
326 if (restrKeyword) {
327 token = ';';
328 setDone(Other);
329 } else
330 state = Start;
331 } else if (current == 0) {
332 setDone(Eof);
333 }
334 break;
335 case InMultiLineComment:
336 if (current == 0) {
337 setDone(Bad);
338 } else if (isLineTerminator()) {
339 nextLine();
340 } else if (current == '*' && next1 == '/') {
341 state = Start;
342 shift(1);
343 }
344 break;
345 case InIdentifierOrKeyword:
346 case InIdentifier:
347 if (isIdentPart(current))
348 record16(current);
349 else if (current == '\\')
350 state = InIdentifierUnicodeEscapeStart;
351 else
352 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
353 break;
354 case InNum0:
355 if (current == 'x' || current == 'X') {
356 record8(current);
357 state = InHex;
358 } else if (current == '.') {
359 record8(current);
360 state = InDecimal;
361 } else if (current == 'e' || current == 'E') {
362 record8(current);
363 state = InExponentIndicator;
364 } else if (isOctalDigit(current)) {
365 record8(current);
366 state = InOctal;
367 } else if (isDecimalDigit(current)) {
368 record8(current);
369 state = InDecimal;
370 } else {
371 setDone(Number);
372 }
373 break;
374 case InHex:
375 if (isHexDigit(current)) {
376 record8(current);
377 } else {
378 setDone(Hex);
379 }
380 break;
381 case InOctal:
382 if (isOctalDigit(current)) {
383 record8(current);
384 }
385 else if (isDecimalDigit(current)) {
386 record8(current);
387 state = InDecimal;
388 } else
389 setDone(Octal);
390 break;
391 case InNum:
392 if (isDecimalDigit(current)) {
393 record8(current);
394 } else if (current == '.') {
395 record8(current);
396 state = InDecimal;
397 } else if (current == 'e' || current == 'E') {
398 record8(current);
399 state = InExponentIndicator;
400 } else
401 setDone(Number);
402 break;
403 case InDecimal:
404 if (isDecimalDigit(current)) {
405 record8(current);
406 } else if (current == 'e' || current == 'E') {
407 record8(current);
408 state = InExponentIndicator;
409 } else
410 setDone(Number);
411 break;
412 case InExponentIndicator:
413 if (current == '+' || current == '-') {
414 record8(current);
415 } else if (isDecimalDigit(current)) {
416 record8(current);
417 state = InExponent;
418 } else
419 setDone(Bad);
420 break;
421 case InExponent:
422 if (isDecimalDigit(current)) {
423 record8(current);
424 } else
425 setDone(Number);
426 break;
427 case InIdentifierUnicodeEscapeStart:
428 if (current == 'u')
429 state = InIdentifierUnicodeEscape;
430 else
431 setDone(Bad);
432 break;
433 case InIdentifierUnicodeEscape:
434 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
435 record16(convertUnicode(current, next1, next2, next3));
436 shift(3);
437 state = InIdentifier;
438 } else {
439 setDone(Bad);
440 }
441 break;
442 default:
443 assert(!"Unhandled state in switch statement");
444 }
445
446 // move on to the next character
447 if (!done)
448 shift(1);
449#ifndef KJS_PURE_ECMA
450 if (state != Start && state != InSingleLineComment)
451 bol = false;
452#endif
453 }
454
455 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
456 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
457 state = Bad;
458
459 // terminate string
460 buffer8[pos8] = '\0';
461
462#ifdef KJS_DEBUG_LEX
463 fprintf(stderr, "line: %d ", lineNo());
464 fprintf(stderr, "yytext (%x): ", buffer8[0]);
465 fprintf(stderr, "%s ", buffer8);
466#endif
467
468 double dval = 0;
469 if (state == Number) {
470 dval = strtod(buffer8, 0L);
471 } else if (state == Hex) { // scan hex numbers
472 const char *p = buffer8 + 2;
473 while (char c = *p++) {
474 dval *= 16;
475 dval += convertHex(c);
476 }
477 state = Number;
478 } else if (state == Octal) { // scan octal number
479 const char *p = buffer8 + 1;
480 while (char c = *p++) {
481 dval *= 8;
482 dval += c - '0';
483 }
484 state = Number;
485 }
486
487#ifdef KJS_DEBUG_LEX
488 switch (state) {
489 case Eof:
490 printf("(EOF)\n");
491 break;
492 case Other:
493 printf("(Other)\n");
494 break;
495 case Identifier:
496 printf("(Identifier)/(Keyword)\n");
497 break;
498 case String:
499 printf("(String)\n");
500 break;
501 case Number:
502 printf("(Number)\n");
503 break;
504 default:
505 printf("(unknown)");
506 }
507#endif
508
509 if (state != Identifier && eatNextIdentifier)
510 eatNextIdentifier = false;
511
512 restrKeyword = false;
513 delimited = false;
514 kjsyylloc.first_line = yylineno; // ???
515 kjsyylloc.last_line = yylineno;
516
517 switch (state) {
518 case Eof:
519 token = 0;
520 break;
521 case Other:
522 if(token == '}' || token == ';') {
523 delimited = true;
524 }
525 break;
526 case IdentifierOrKeyword:
527 if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
528 case Identifier:
529 // Lookup for keyword failed, means this is an identifier
530 // Apply anonymous-function hack below (eat the identifier)
531 if (eatNextIdentifier) {
532 eatNextIdentifier = false;
533 token = lex();
534 break;
535 }
536 kjsyylval.ident = makeIdentifier(buffer16, pos16);
537 token = IDENT;
538 break;
539 }
540
541 eatNextIdentifier = false;
542 // Hack for "f = function somename() { ... }", too hard to get into the grammar
543 if (token == FUNCTION && lastToken == '=' )
544 eatNextIdentifier = true;
545
546 if (token == CONTINUE || token == BREAK ||
547 token == RETURN || token == THROW)
548 restrKeyword = true;
549 break;
550 case String:
551 kjsyylval.ustr = makeUString(buffer16, pos16);
552 token = STRING;
553 break;
554 case Number:
555 kjsyylval.dval = dval;
556 token = NUMBER;
557 break;
558 case Bad:
559 fprintf(stderr, "yylex: ERROR.\n");
560 error = true;
561 return -1;
562 default:
563 assert(!"unhandled numeration value in switch");
564 error = true;
565 return -1;
566 }
567 lastToken = token;
568 return token;
569}
570
571bool Lexer::isWhiteSpace() const
572{
573 return (current == '\t' || current == 0x0b || current == 0x0c || u_charType(current) == U_SPACE_SEPARATOR);
574}
575
576bool Lexer::isLineTerminator()
577{
578 bool cr = (current == '\r');
579 bool lf = (current == '\n');
580 if (cr)
581 skipLF = true;
582 else if (lf)
583 skipCR = true;
584 return cr || lf || current == 0x2028 || current == 0x2029;
585}
586
587bool Lexer::isIdentStart(unsigned short c)
588{
589 return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK)) || c == '$' || c == '_';
590}
591
592bool Lexer::isIdentPart(unsigned short c)
593{
594 return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == '$' || c == '_';
595}
596
597static bool isDecimalDigit(unsigned short c)
598{
599 return (c >= '0' && c <= '9');
600}
601
602bool Lexer::isHexDigit(unsigned short c)
603{
604 return (c >= '0' && c <= '9' ||
605 c >= 'a' && c <= 'f' ||
606 c >= 'A' && c <= 'F');
607}
608
609bool Lexer::isOctalDigit(unsigned short c) const
610{
611 return (c >= '0' && c <= '7');
612}
613
614int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,
615 unsigned short c3, unsigned short c4)
616{
617 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
618 shift(4);
619 return URSHIFTEQUAL;
620 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
621 shift(3);
622 return STREQ;
623 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
624 shift(3);
625 return STRNEQ;
626 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
627 shift(3);
628 return URSHIFT;
629 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
630 shift(3);
631 return LSHIFTEQUAL;
632 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
633 shift(3);
634 return RSHIFTEQUAL;
635 } else if (c1 == '<' && c2 == '=') {
636 shift(2);
637 return LE;
638 } else if (c1 == '>' && c2 == '=') {
639 shift(2);
640 return GE;
641 } else if (c1 == '!' && c2 == '=') {
642 shift(2);
643 return NE;
644 } else if (c1 == '+' && c2 == '+') {
645 shift(2);
646 if (terminator)
647 return AUTOPLUSPLUS;
648 else
649 return PLUSPLUS;
650 } else if (c1 == '-' && c2 == '-') {
651 shift(2);
652 if (terminator)
653 return AUTOMINUSMINUS;
654 else
655 return MINUSMINUS;
656 } else if (c1 == '=' && c2 == '=') {
657 shift(2);
658 return EQEQ;
659 } else if (c1 == '+' && c2 == '=') {
660 shift(2);
661 return PLUSEQUAL;
662 } else if (c1 == '-' && c2 == '=') {
663 shift(2);
664 return MINUSEQUAL;
665 } else if (c1 == '*' && c2 == '=') {
666 shift(2);
667 return MULTEQUAL;
668 } else if (c1 == '/' && c2 == '=') {
669 shift(2);
670 return DIVEQUAL;
671 } else if (c1 == '&' && c2 == '=') {
672 shift(2);
673 return ANDEQUAL;
674 } else if (c1 == '^' && c2 == '=') {
675 shift(2);
676 return XOREQUAL;
677 } else if (c1 == '%' && c2 == '=') {
678 shift(2);
679 return MODEQUAL;
680 } else if (c1 == '|' && c2 == '=') {
681 shift(2);
682 return OREQUAL;
683 } else if (c1 == '<' && c2 == '<') {
684 shift(2);
685 return LSHIFT;
686 } else if (c1 == '>' && c2 == '>') {
687 shift(2);
688 return RSHIFT;
689 } else if (c1 == '&' && c2 == '&') {
690 shift(2);
691 return AND;
692 } else if (c1 == '|' && c2 == '|') {
693 shift(2);
694 return OR;
695 }
696
697 switch(c1) {
698 case '=':
699 case '>':
700 case '<':
701 case ',':
702 case '!':
703 case '~':
704 case '?':
705 case ':':
706 case '.':
707 case '+':
708 case '-':
709 case '*':
710 case '/':
711 case '&':
712 case '|':
713 case '^':
714 case '%':
715 case '(':
716 case ')':
717 case '{':
718 case '}':
719 case '[':
720 case ']':
721 case ';':
722 shift(1);
723 return static_cast<int>(c1);
724 default:
725 return -1;
726 }
727}
728
729unsigned short Lexer::singleEscape(unsigned short c) const
730{
731 switch(c) {
732 case 'b':
733 return 0x08;
734 case 't':
735 return 0x09;
736 case 'n':
737 return 0x0A;
738 case 'v':
739 return 0x0B;
740 case 'f':
741 return 0x0C;
742 case 'r':
743 return 0x0D;
744 case '"':
745 return 0x22;
746 case '\'':
747 return 0x27;
748 case '\\':
749 return 0x5C;
750 default:
751 return c;
752 }
753}
754
755unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,
756 unsigned short c3) const
757{
758 return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
759}
760
761unsigned char Lexer::convertHex(unsigned short c)
762{
763 if (c >= '0' && c <= '9')
764 return (c - '0');
765 else if (c >= 'a' && c <= 'f')
766 return (c - 'a' + 10);
767 else
768 return (c - 'A' + 10);
769}
770
771unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2)
772{
773 return ((convertHex(c1) << 4) + convertHex(c2));
774}
775
776KJS::UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,
777 unsigned short c3, unsigned short c4)
778{
779 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
780 (convertHex(c3) << 4) + convertHex(c4));
781}
782
783void Lexer::record8(unsigned short c)
784{
785 assert(c <= 0xff);
786
787 // enlarge buffer if full
788 if (pos8 >= size8 - 1) {
789 char *tmp = new char[2 * size8];
790 memcpy(tmp, buffer8, size8 * sizeof(char));
791 delete [] buffer8;
792 buffer8 = tmp;
793 size8 *= 2;
794 }
795
796 buffer8[pos8++] = (char) c;
797}
798
799void Lexer::record16(KJS::UChar c)
800{
801 // enlarge buffer if full
802 if (pos16 >= size16 - 1) {
803 KJS::UChar *tmp = new KJS::UChar[2 * size16];
804 memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
805 delete [] buffer16;
806 buffer16 = tmp;
807 size16 *= 2;
808 }
809
810 buffer16[pos16++] = c;
811}
812
813bool Lexer::scanRegExp()
814{
815 pos16 = 0;
816 bool lastWasEscape = false;
817 bool inBrackets = false;
818
819 while (1) {
820 if (isLineTerminator() || current == 0)
821 return false;
822 else if (current != '/' || lastWasEscape == true || inBrackets == true)
823 {
824 // keep track of '[' and ']'
825 if ( !lastWasEscape ) {
826 if ( current == '[' && !inBrackets )
827 inBrackets = true;
828 if ( current == ']' && inBrackets )
829 inBrackets = false;
830 }
831 record16(current);
832 lastWasEscape =
833 !lastWasEscape && (current == '\\');
834 }
835 else { // end of regexp
836 pattern = UString(buffer16, pos16);
837 pos16 = 0;
838 shift(1);
839 break;
840 }
841 shift(1);
842 }
843
844 while (isIdentPart(current)) {
845 record16(current);
846 shift(1);
847 }
848 flags = UString(buffer16, pos16);
849
850 return true;
851}
852
853
854void Lexer::doneParsing()
855{
856 for (unsigned i = 0; i < numIdentifiers; i++) {
857 delete identifiers[i];
858 }
859 fastFree(identifiers);
860 identifiers = 0;
861 numIdentifiers = 0;
862 identifiersCapacity = 0;
863
864 for (unsigned i = 0; i < numStrings; i++) {
865 delete strings[i];
866 }
867 fastFree(strings);
868 strings = 0;
869 numStrings = 0;
870 stringsCapacity = 0;
871}
872
873const int initialCapacity = 64;
874const int growthFactor = 2;
875
876Identifier *Lexer::makeIdentifier(KJS::UChar *buffer, unsigned int pos)
877{
878 if (numIdentifiers == identifiersCapacity) {
879 identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
880 identifiers = (KJS::Identifier **)fastRealloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);
881 }
882
883 KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
884 identifiers[numIdentifiers++] = identifier;
885 return identifier;
886}
887
888UString *Lexer::makeUString(KJS::UChar *buffer, unsigned int pos)
889{
890 if (numStrings == stringsCapacity) {
891 stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
892 strings = (UString **)fastRealloc(strings, sizeof(UString *) * stringsCapacity);
893 }
894
895 UString *string = new UString(buffer16, pos16);
896 strings[numStrings++] = string;
897 return string;
898}
Note: See TracBrowser for help on using the repository browser.