source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 17862

Last change on this file since 17862 was 17862, checked in by ap, 19 years ago

2006-11-20 W. Andy Carrel <[email protected]>

Reviewed by Maciej.

http://bugs.webkit.org/show_bug.cgi?id=11501
REGRESSION: \u no longer escapes metacharacters in RegExps
http://bugs.webkit.org/show_bug.cgi?id=11502
Serializing RegExps doesn't preserve Unicode escapes

JavaScriptCore:

  • kjs/lexer.cpp: (Lexer::Lexer): (Lexer::setCode): (Lexer::shift): (Lexer::scanRegExp): Push \u parsing back down into the RegExp object rather than in the parser. This backs out r17354 in favor of a new fix that better matches the behavior of other browsers.
  • kjs/lexer.h:
  • kjs/regexp.cpp: (KJS::RegExp::RegExp): (KJS::sanitizePattern): (KJS::isHexDigit): (KJS::convertHex): (KJS::convertUnicode):
  • kjs/regexp.h: Translate \u escaped unicode characters for the benefit of pcre.
  • kjs/ustring.cpp: (KJS::UString::append): Fix failure to increment length on the first UChar appended to a UString that was copy-on-write.
  • tests/mozilla/ecma_2/RegExp/properties-001.js: Adjust tests back to the uniform standards.

LayoutTests:

  • fast/js/kde/RegExp-expected.txt:
  • fast/js/regexp-unicode-handling-expected.txt: Adjust these test results to passing as a result of other included changes in this revision.
  • Property svn:eol-style set to native
File size: 22.0 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten ([email protected])
5 * Copyright (C) 2006 Apple Computer, Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "lexer.h"
26
27#include <ctype.h>
28#include <string.h>
29
30#include "interpreter.h"
31#include "nodes.h"
32#include <wtf/unicode/Unicode.h>
33
34static bool isDecimalDigit(int);
35
36// we can't specify the namespace in yacc's C output, so do it here
37using namespace KJS;
38
39static Lexer *currLexer = 0;
40
41#ifndef KDE_USE_FINAL
42#include "grammar.h"
43#endif
44
45#include "lookup.h"
46#include "lexer.lut.h"
47
48extern YYLTYPE kjsyylloc; // global bison variable holding token info
49
50// a bridge for yacc from the C world to C++
51int kjsyylex()
52{
53 return Lexer::curr()->lex();
54}
55
56Lexer::Lexer()
57 : yylineno(1),
58 size8(128), size16(128), restrKeyword(false),
59 eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
60 code(0), length(0),
61#ifndef KJS_PURE_ECMA
62 bol(true),
63#endif
64 current(0), next1(0), next2(0), next3(0),
65 strings(0), numStrings(0), stringsCapacity(0),
66 identifiers(0), numIdentifiers(0), identifiersCapacity(0)
67{
68 // allocate space for read buffers
69 buffer8 = new char[size8];
70 buffer16 = new KJS::UChar[size16];
71 currLexer = this;
72}
73
74Lexer::~Lexer()
75{
76 doneParsing();
77 delete [] buffer8;
78 delete [] buffer16;
79}
80
81Lexer *Lexer::curr()
82{
83 if (!currLexer) {
84 // create singleton instance
85 currLexer = new Lexer();
86 }
87 return currLexer;
88}
89
90#ifdef KJS_DEBUG_MEM
91void Lexer::globalClear()
92{
93 delete currLexer;
94 currLexer = 0L;
95}
96#endif
97
98void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
99{
100 yylineno = 1 + startingLineNumber;
101 m_sourceURL = sourceURL;
102 restrKeyword = false;
103 delimited = false;
104 eatNextIdentifier = false;
105 stackToken = -1;
106 lastToken = -1;
107 pos = 0;
108 code = c;
109 length = len;
110 skipLF = false;
111 skipCR = false;
112 error = false;
113#ifndef KJS_PURE_ECMA
114 bol = true;
115#endif
116
117 // read first characters
118 current = (length > 0) ? code[0].uc : -1;
119 next1 = (length > 1) ? code[1].uc : -1;
120 next2 = (length > 2) ? code[2].uc : -1;
121 next3 = (length > 3) ? code[3].uc : -1;
122}
123
124void Lexer::shift(unsigned int p)
125{
126 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
127 // <http://bugzilla.opendarwin.org/show_bug.cgi?id=10183>.
128 while (p--) {
129 pos++;
130 current = next1;
131 next1 = next2;
132 next2 = next3;
133 next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
134 }
135}
136
137// called on each new line
138void Lexer::nextLine()
139{
140 yylineno++;
141#ifndef KJS_PURE_ECMA
142 bol = true;
143#endif
144}
145
146void Lexer::setDone(State s)
147{
148 state = s;
149 done = true;
150}
151
152int Lexer::lex()
153{
154 int token = 0;
155 state = Start;
156 unsigned short stringType = 0; // either single or double quotes
157 pos8 = pos16 = 0;
158 done = false;
159 terminator = false;
160 skipLF = false;
161 skipCR = false;
162
163 // did we push a token on the stack previously ?
164 // (after an automatic semicolon insertion)
165 if (stackToken >= 0) {
166 setDone(Other);
167 token = stackToken;
168 stackToken = 0;
169 }
170
171 while (!done) {
172 if (skipLF && current != '\n') // found \r but not \n afterwards
173 skipLF = false;
174 if (skipCR && current != '\r') // found \n but not \r afterwards
175 skipCR = false;
176 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
177 {
178 skipLF = false;
179 skipCR = false;
180 shift(1);
181 }
182 switch (state) {
183 case Start:
184 if (isWhiteSpace()) {
185 // do nothing
186 } else if (current == '/' && next1 == '/') {
187 shift(1);
188 state = InSingleLineComment;
189 } else if (current == '/' && next1 == '*') {
190 shift(1);
191 state = InMultiLineComment;
192 } else if (current == -1) {
193 if (!terminator && !delimited) {
194 // automatic semicolon insertion if program incomplete
195 token = ';';
196 stackToken = 0;
197 setDone(Other);
198 } else
199 setDone(Eof);
200 } else if (isLineTerminator()) {
201 nextLine();
202 terminator = true;
203 if (restrKeyword) {
204 token = ';';
205 setDone(Other);
206 }
207 } else if (current == '"' || current == '\'') {
208 state = InString;
209 stringType = static_cast<unsigned short>(current);
210 } else if (isIdentStart(current)) {
211 record16(current);
212 state = InIdentifierOrKeyword;
213 } else if (current == '\\') {
214 state = InIdentifierUnicodeEscapeStart;
215 } else if (current == '0') {
216 record8(current);
217 state = InNum0;
218 } else if (isDecimalDigit(current)) {
219 record8(current);
220 state = InNum;
221 } else if (current == '.' && isDecimalDigit(next1)) {
222 record8(current);
223 state = InDecimal;
224#ifndef KJS_PURE_ECMA
225 // <!-- marks the beginning of a line comment (for www usage)
226 } else if (current == '<' && next1 == '!' &&
227 next2 == '-' && next3 == '-') {
228 shift(3);
229 state = InSingleLineComment;
230 // same for -->
231 } else if (bol && current == '-' && next1 == '-' && next2 == '>') {
232 shift(2);
233 state = InSingleLineComment;
234#endif
235 } else {
236 token = matchPunctuator(current, next1, next2, next3);
237 if (token != -1) {
238 setDone(Other);
239 } else {
240 // cerr << "encountered unknown character" << endl;
241 setDone(Bad);
242 }
243 }
244 break;
245 case InString:
246 if (current == stringType) {
247 shift(1);
248 setDone(String);
249 } else if (isLineTerminator() || current == -1) {
250 setDone(Bad);
251 } else if (current == '\\') {
252 state = InEscapeSequence;
253 } else {
254 record16(current);
255 }
256 break;
257 // Escape Sequences inside of strings
258 case InEscapeSequence:
259 if (isOctalDigit(current)) {
260 if (current >= '0' && current <= '3' &&
261 isOctalDigit(next1) && isOctalDigit(next2)) {
262 record16(convertOctal(current, next1, next2));
263 shift(2);
264 state = InString;
265 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
266 record16(convertOctal('0', current, next1));
267 shift(1);
268 state = InString;
269 } else if (isOctalDigit(current)) {
270 record16(convertOctal('0', '0', current));
271 state = InString;
272 } else {
273 setDone(Bad);
274 }
275 } else if (current == 'x')
276 state = InHexEscape;
277 else if (current == 'u')
278 state = InUnicodeEscape;
279 else if (isLineTerminator()) {
280 nextLine();
281 state = InString;
282 } else {
283 record16(singleEscape(static_cast<unsigned short>(current)));
284 state = InString;
285 }
286 break;
287 case InHexEscape:
288 if (isHexDigit(current) && isHexDigit(next1)) {
289 state = InString;
290 record16(convertHex(current, next1));
291 shift(1);
292 } else if (current == stringType) {
293 record16('x');
294 shift(1);
295 setDone(String);
296 } else {
297 record16('x');
298 record16(current);
299 state = InString;
300 }
301 break;
302 case InUnicodeEscape:
303 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
304 record16(convertUnicode(current, next1, next2, next3));
305 shift(3);
306 state = InString;
307 } else if (current == stringType) {
308 record16('u');
309 shift(1);
310 setDone(String);
311 } else {
312 setDone(Bad);
313 }
314 break;
315 case InSingleLineComment:
316 if (isLineTerminator()) {
317 nextLine();
318 terminator = true;
319 if (restrKeyword) {
320 token = ';';
321 setDone(Other);
322 } else
323 state = Start;
324 } else if (current == -1) {
325 setDone(Eof);
326 }
327 break;
328 case InMultiLineComment:
329 if (current == -1) {
330 setDone(Bad);
331 } else if (isLineTerminator()) {
332 nextLine();
333 } else if (current == '*' && next1 == '/') {
334 state = Start;
335 shift(1);
336 }
337 break;
338 case InIdentifierOrKeyword:
339 case InIdentifier:
340 if (isIdentPart(current))
341 record16(current);
342 else if (current == '\\')
343 state = InIdentifierUnicodeEscapeStart;
344 else
345 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
346 break;
347 case InNum0:
348 if (current == 'x' || current == 'X') {
349 record8(current);
350 state = InHex;
351 } else if (current == '.') {
352 record8(current);
353 state = InDecimal;
354 } else if (current == 'e' || current == 'E') {
355 record8(current);
356 state = InExponentIndicator;
357 } else if (isOctalDigit(current)) {
358 record8(current);
359 state = InOctal;
360 } else if (isDecimalDigit(current)) {
361 record8(current);
362 state = InDecimal;
363 } else {
364 setDone(Number);
365 }
366 break;
367 case InHex:
368 if (isHexDigit(current)) {
369 record8(current);
370 } else {
371 setDone(Hex);
372 }
373 break;
374 case InOctal:
375 if (isOctalDigit(current)) {
376 record8(current);
377 }
378 else if (isDecimalDigit(current)) {
379 record8(current);
380 state = InDecimal;
381 } else
382 setDone(Octal);
383 break;
384 case InNum:
385 if (isDecimalDigit(current)) {
386 record8(current);
387 } else if (current == '.') {
388 record8(current);
389 state = InDecimal;
390 } else if (current == 'e' || current == 'E') {
391 record8(current);
392 state = InExponentIndicator;
393 } else
394 setDone(Number);
395 break;
396 case InDecimal:
397 if (isDecimalDigit(current)) {
398 record8(current);
399 } else if (current == 'e' || current == 'E') {
400 record8(current);
401 state = InExponentIndicator;
402 } else
403 setDone(Number);
404 break;
405 case InExponentIndicator:
406 if (current == '+' || current == '-') {
407 record8(current);
408 } else if (isDecimalDigit(current)) {
409 record8(current);
410 state = InExponent;
411 } else
412 setDone(Bad);
413 break;
414 case InExponent:
415 if (isDecimalDigit(current)) {
416 record8(current);
417 } else
418 setDone(Number);
419 break;
420 case InIdentifierUnicodeEscapeStart:
421 if (current == 'u')
422 state = InIdentifierUnicodeEscape;
423 else
424 setDone(Bad);
425 break;
426 case InIdentifierUnicodeEscape:
427 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
428 record16(convertUnicode(current, next1, next2, next3));
429 shift(3);
430 state = InIdentifier;
431 } else {
432 setDone(Bad);
433 }
434 break;
435 default:
436 assert(!"Unhandled state in switch statement");
437 }
438
439 // move on to the next character
440 if (!done)
441 shift(1);
442#ifndef KJS_PURE_ECMA
443 if (state != Start && state != InSingleLineComment)
444 bol = false;
445#endif
446 }
447
448 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
449 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
450 state = Bad;
451
452 // terminate string
453 buffer8[pos8] = '\0';
454
455#ifdef KJS_DEBUG_LEX
456 fprintf(stderr, "line: %d ", lineNo());
457 fprintf(stderr, "yytext (%x): ", buffer8[0]);
458 fprintf(stderr, "%s ", buffer8);
459#endif
460
461 double dval = 0;
462 if (state == Number) {
463 dval = strtod(buffer8, 0L);
464 } else if (state == Hex) { // scan hex numbers
465 const char *p = buffer8 + 2;
466 while (char c = *p++) {
467 dval *= 16;
468 dval += convertHex(c);
469 }
470 state = Number;
471 } else if (state == Octal) { // scan octal number
472 const char *p = buffer8 + 1;
473 while (char c = *p++) {
474 dval *= 8;
475 dval += c - '0';
476 }
477 state = Number;
478 }
479
480#ifdef KJS_DEBUG_LEX
481 switch (state) {
482 case Eof:
483 printf("(EOF)\n");
484 break;
485 case Other:
486 printf("(Other)\n");
487 break;
488 case Identifier:
489 printf("(Identifier)/(Keyword)\n");
490 break;
491 case String:
492 printf("(String)\n");
493 break;
494 case Number:
495 printf("(Number)\n");
496 break;
497 default:
498 printf("(unknown)");
499 }
500#endif
501
502 if (state != Identifier && eatNextIdentifier)
503 eatNextIdentifier = false;
504
505 restrKeyword = false;
506 delimited = false;
507 kjsyylloc.first_line = yylineno; // ???
508 kjsyylloc.last_line = yylineno;
509
510 switch (state) {
511 case Eof:
512 token = 0;
513 break;
514 case Other:
515 if(token == '}' || token == ';') {
516 delimited = true;
517 }
518 break;
519 case IdentifierOrKeyword:
520 if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
521 case Identifier:
522 // Lookup for keyword failed, means this is an identifier
523 // Apply anonymous-function hack below (eat the identifier)
524 if (eatNextIdentifier) {
525 eatNextIdentifier = false;
526 token = lex();
527 break;
528 }
529 kjsyylval.ident = makeIdentifier(buffer16, pos16);
530 token = IDENT;
531 break;
532 }
533
534 eatNextIdentifier = false;
535 // Hack for "f = function somename() { ... }", too hard to get into the grammar
536 if (token == FUNCTION && lastToken == '=' )
537 eatNextIdentifier = true;
538
539 if (token == CONTINUE || token == BREAK ||
540 token == RETURN || token == THROW)
541 restrKeyword = true;
542 break;
543 case String:
544 kjsyylval.ustr = makeUString(buffer16, pos16);
545 token = STRING;
546 break;
547 case Number:
548 kjsyylval.dval = dval;
549 token = NUMBER;
550 break;
551 case Bad:
552#ifdef KJS_DEBUG_LEX
553 fprintf(stderr, "yylex: ERROR.\n");
554#endif
555 error = true;
556 return -1;
557 default:
558 assert(!"unhandled numeration value in switch");
559 error = true;
560 return -1;
561 }
562 lastToken = token;
563 return token;
564}
565
566bool Lexer::isWhiteSpace() const
567{
568 return current == '\t' || current == 0x0b || current == 0x0c || WTF::Unicode::isSeparatorSpace(current);
569}
570
571bool Lexer::isLineTerminator()
572{
573 bool cr = (current == '\r');
574 bool lf = (current == '\n');
575 if (cr)
576 skipLF = true;
577 else if (lf)
578 skipCR = true;
579 return cr || lf || current == 0x2028 || current == 0x2029;
580}
581
582bool Lexer::isIdentStart(int c)
583{
584 return (WTF::Unicode::category(c) & (WTF::Unicode::Letter_Uppercase
585 | WTF::Unicode::Letter_Lowercase
586 | WTF::Unicode::Letter_Titlecase
587 | WTF::Unicode::Letter_Modifier
588 | WTF::Unicode::Letter_Other))
589 || c == '$' || c == '_';
590}
591
592bool Lexer::isIdentPart(int c)
593{
594 return (WTF::Unicode::category(c) & (WTF::Unicode::Letter_Uppercase
595 | WTF::Unicode::Letter_Lowercase
596 | WTF::Unicode::Letter_Titlecase
597 | WTF::Unicode::Letter_Modifier
598 | WTF::Unicode::Letter_Other
599 | WTF::Unicode::Mark_NonSpacing
600 | WTF::Unicode::Mark_SpacingCombining
601 | WTF::Unicode::Number_DecimalDigit
602 | WTF::Unicode::Punctuation_Connector))
603 || c == '$' || c == '_';
604}
605
606static bool isDecimalDigit(int c)
607{
608 return (c >= '0' && c <= '9');
609}
610
611bool Lexer::isHexDigit(int c)
612{
613 return (c >= '0' && c <= '9' ||
614 c >= 'a' && c <= 'f' ||
615 c >= 'A' && c <= 'F');
616}
617
618bool Lexer::isOctalDigit(int c)
619{
620 return (c >= '0' && c <= '7');
621}
622
623int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
624{
625 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
626 shift(4);
627 return URSHIFTEQUAL;
628 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
629 shift(3);
630 return STREQ;
631 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
632 shift(3);
633 return STRNEQ;
634 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
635 shift(3);
636 return URSHIFT;
637 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
638 shift(3);
639 return LSHIFTEQUAL;
640 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
641 shift(3);
642 return RSHIFTEQUAL;
643 } else if (c1 == '<' && c2 == '=') {
644 shift(2);
645 return LE;
646 } else if (c1 == '>' && c2 == '=') {
647 shift(2);
648 return GE;
649 } else if (c1 == '!' && c2 == '=') {
650 shift(2);
651 return NE;
652 } else if (c1 == '+' && c2 == '+') {
653 shift(2);
654 if (terminator)
655 return AUTOPLUSPLUS;
656 else
657 return PLUSPLUS;
658 } else if (c1 == '-' && c2 == '-') {
659 shift(2);
660 if (terminator)
661 return AUTOMINUSMINUS;
662 else
663 return MINUSMINUS;
664 } else if (c1 == '=' && c2 == '=') {
665 shift(2);
666 return EQEQ;
667 } else if (c1 == '+' && c2 == '=') {
668 shift(2);
669 return PLUSEQUAL;
670 } else if (c1 == '-' && c2 == '=') {
671 shift(2);
672 return MINUSEQUAL;
673 } else if (c1 == '*' && c2 == '=') {
674 shift(2);
675 return MULTEQUAL;
676 } else if (c1 == '/' && c2 == '=') {
677 shift(2);
678 return DIVEQUAL;
679 } else if (c1 == '&' && c2 == '=') {
680 shift(2);
681 return ANDEQUAL;
682 } else if (c1 == '^' && c2 == '=') {
683 shift(2);
684 return XOREQUAL;
685 } else if (c1 == '%' && c2 == '=') {
686 shift(2);
687 return MODEQUAL;
688 } else if (c1 == '|' && c2 == '=') {
689 shift(2);
690 return OREQUAL;
691 } else if (c1 == '<' && c2 == '<') {
692 shift(2);
693 return LSHIFT;
694 } else if (c1 == '>' && c2 == '>') {
695 shift(2);
696 return RSHIFT;
697 } else if (c1 == '&' && c2 == '&') {
698 shift(2);
699 return AND;
700 } else if (c1 == '|' && c2 == '|') {
701 shift(2);
702 return OR;
703 }
704
705 switch(c1) {
706 case '=':
707 case '>':
708 case '<':
709 case ',':
710 case '!':
711 case '~':
712 case '?':
713 case ':':
714 case '.':
715 case '+':
716 case '-':
717 case '*':
718 case '/':
719 case '&':
720 case '|':
721 case '^':
722 case '%':
723 case '(':
724 case ')':
725 case '{':
726 case '}':
727 case '[':
728 case ']':
729 case ';':
730 shift(1);
731 return static_cast<int>(c1);
732 default:
733 return -1;
734 }
735}
736
737unsigned short Lexer::singleEscape(unsigned short c)
738{
739 switch(c) {
740 case 'b':
741 return 0x08;
742 case 't':
743 return 0x09;
744 case 'n':
745 return 0x0A;
746 case 'v':
747 return 0x0B;
748 case 'f':
749 return 0x0C;
750 case 'r':
751 return 0x0D;
752 case '"':
753 return 0x22;
754 case '\'':
755 return 0x27;
756 case '\\':
757 return 0x5C;
758 default:
759 return c;
760 }
761}
762
763unsigned short Lexer::convertOctal(int c1, int c2, int c3)
764{
765 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
766}
767
768unsigned char Lexer::convertHex(int c)
769{
770 if (c >= '0' && c <= '9')
771 return static_cast<unsigned char>(c - '0');
772 if (c >= 'a' && c <= 'f')
773 return static_cast<unsigned char>(c - 'a' + 10);
774 return static_cast<unsigned char>(c - 'A' + 10);
775}
776
777unsigned char Lexer::convertHex(int c1, int c2)
778{
779 return ((convertHex(c1) << 4) + convertHex(c2));
780}
781
782KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
783{
784 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
785 (convertHex(c3) << 4) + convertHex(c4));
786}
787
788void Lexer::record8(int c)
789{
790 ASSERT(c >= 0);
791 ASSERT(c <= 0xff);
792
793 // enlarge buffer if full
794 if (pos8 >= size8 - 1) {
795 char *tmp = new char[2 * size8];
796 memcpy(tmp, buffer8, size8 * sizeof(char));
797 delete [] buffer8;
798 buffer8 = tmp;
799 size8 *= 2;
800 }
801
802 buffer8[pos8++] = (char) c;
803}
804
805void Lexer::record16(int c)
806{
807 ASSERT(c >= 0);
808 ASSERT(c <= USHRT_MAX);
809 record16(UChar(static_cast<unsigned short>(c)));
810}
811
812void Lexer::record16(KJS::UChar c)
813{
814 // enlarge buffer if full
815 if (pos16 >= size16 - 1) {
816 KJS::UChar *tmp = new KJS::UChar[2 * size16];
817 memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
818 delete [] buffer16;
819 buffer16 = tmp;
820 size16 *= 2;
821 }
822
823 buffer16[pos16++] = c;
824}
825
826bool Lexer::scanRegExp()
827{
828 pos16 = 0;
829 bool lastWasEscape = false;
830 bool inBrackets = false;
831
832 while (1) {
833 if (isLineTerminator() || current == -1)
834 return false;
835 else if (current != '/' || lastWasEscape == true || inBrackets == true)
836 {
837 // keep track of '[' and ']'
838 if (!lastWasEscape) {
839 if ( current == '[' && !inBrackets )
840 inBrackets = true;
841 if ( current == ']' && inBrackets )
842 inBrackets = false;
843 }
844 record16(current);
845 lastWasEscape =
846 !lastWasEscape && (current == '\\');
847 }
848 else { // end of regexp
849 pattern = UString(buffer16, pos16);
850 pos16 = 0;
851 shift(1);
852 break;
853 }
854 shift(1);
855 }
856
857 while (isIdentPart(current)) {
858 record16(current);
859 shift(1);
860 }
861 flags = UString(buffer16, pos16);
862
863 return true;
864}
865
866
867void Lexer::doneParsing()
868{
869 for (unsigned i = 0; i < numIdentifiers; i++) {
870 delete identifiers[i];
871 }
872 fastFree(identifiers);
873 identifiers = 0;
874 numIdentifiers = 0;
875 identifiersCapacity = 0;
876
877 for (unsigned i = 0; i < numStrings; i++) {
878 delete strings[i];
879 }
880 fastFree(strings);
881 strings = 0;
882 numStrings = 0;
883 stringsCapacity = 0;
884}
885
886const int initialCapacity = 64;
887const int growthFactor = 2;
888
889// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
890Identifier *Lexer::makeIdentifier(KJS::UChar*, unsigned int)
891{
892 if (numIdentifiers == identifiersCapacity) {
893 identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
894 identifiers = (KJS::Identifier **)fastRealloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);
895 }
896
897 KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
898 identifiers[numIdentifiers++] = identifier;
899 return identifier;
900}
901
902// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
903UString *Lexer::makeUString(KJS::UChar*, unsigned int)
904{
905 if (numStrings == stringsCapacity) {
906 stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
907 strings = (UString **)fastRealloc(strings, sizeof(UString *) * stringsCapacity);
908 }
909
910 UString *string = new UString(buffer16, pos16);
911 strings[numStrings++] = string;
912 return string;
913}
Note: See TracBrowser for help on using the repository browser.