source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 20310

Last change on this file since 20310 was 20304, checked in by bdash, 18 years ago

2007-03-19 Mark Rowe <[email protected]>

Rubber-stamped by Brady.

Update references to bugzilla.opendarwin.org with bugs.webkit.org.

  • bindings/c/c_utility.cpp: (KJS::Bindings::convertUTF8ToUTF16):
  • kjs/function.cpp: (KJS::FunctionImp::callAsFunction):
  • kjs/grammar.y:
  • kjs/keywords.table:
  • kjs/lexer.cpp: (KJS::Lexer::shift):

2007-03-19 Mark Rowe <[email protected]>

Rubber-stamped by Brady.

Update references to bugzilla.opendarwin.org with bugs.webkit.org.

  • ChangeLog:
  • WebCore.vcproj/WebCore/build-generated-files.sh:
  • manual-tests/ATSU-bad-layout.html:
  • manual-tests/accidental-strict-mode.html:
  • manual-tests/applet-param-no-name.html:
  • manual-tests/bidi-parens.html:
  • manual-tests/bugzilla-3855.html:
  • manual-tests/bugzilla-4840.html:
  • manual-tests/bugzilla-6821.html:
  • manual-tests/containing-block-position-chage.html:
  • manual-tests/contenteditable-link.html:
  • manual-tests/css3-cursor-fallback-quirks.html:
  • manual-tests/css3-cursor-fallback-strict.html:
  • manual-tests/custom-cursors.html:
  • manual-tests/dictionary-scrolled-iframe.html:
  • manual-tests/dom-manipulation-on-resize.html:
  • manual-tests/drag-image-to-address-bar.html:
  • manual-tests/empty-link-target.html:
  • manual-tests/empty-title-popup.html:
  • manual-tests/first-line-style-crash.html:
  • manual-tests/invalid-mouse-event.html:
  • manual-tests/left-overflow-repaint.html:
  • manual-tests/linkjump-3.html:
  • manual-tests/log-keypress-events.html:
  • manual-tests/named-window-blank-target.html:
  • manual-tests/plain-text-paste.html:
  • manual-tests/plugin-controller-datasource.html:
  • manual-tests/pre-tab-selection-rect.html:
  • manual-tests/redirection-target.html:
  • manual-tests/redraw-page-cache-visited-links.html:
  • manual-tests/reset-initiatedDrag.html:
  • manual-tests/resources/named-window-blank-target-step2.html:
  • manual-tests/resources/named-window-blank-target-step3.html:
  • manual-tests/resources/named-window-blank-target-step4.html:
  • manual-tests/resources/redraw-page-cache-visited-links-2.html:
  • manual-tests/scrollbar-hittest.html:
  • manual-tests/scrollbar-hittest2.html:
  • manual-tests/subview-click-assertion.html:
  • manual-tests/tabbing-input-google.html:
  • manual-tests/text-field-autoscroll.html:
  • manual-tests/textarea-after-stylesheet-link.html:
  • manual-tests/textarea-focus.html:
  • manual-tests/whitespace-pre-affinity.html:

2007-03-19 Mark Rowe <[email protected]>

Rubber-stamped by Brady.

Update references to bugzilla.opendarwin.org with bugs.webkit.org.

  • WebInspector/webInspector/inspector.css:
  • WebView/WebHTMLView.mm: (-[WebHTMLView firstRectForCharacterRange:]):
  • WebView/WebView.mm: (-[WebView initWithFrame:frameName:groupName:]):
  • Property svn:eol-style set to native
File size: 21.8 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten ([email protected])
5 * Copyright (C) 2006 Apple Computer, Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "lexer.h"
26
27#include <ctype.h>
28#include <string.h>
29
30#include "interpreter.h"
31#include "nodes.h"
32#include <wtf/unicode/Unicode.h>
33
34using namespace WTF;
35using namespace Unicode;
36
37// we can't specify the namespace in yacc's C output, so do it here
38using namespace KJS;
39
40#ifndef KDE_USE_FINAL
41#include "grammar.h"
42#endif
43
44#include "lookup.h"
45#include "lexer.lut.h"
46
47extern YYLTYPE kjsyylloc; // global bison variable holding token info
48
49// a bridge for yacc from the C world to C++
50int kjsyylex()
51{
52 return Lexer::curr()->lex();
53}
54
55namespace KJS {
56
57static Lexer* currLexer = 0;
58
59static bool isDecimalDigit(int);
60
61Lexer::Lexer()
62 : yylineno(1),
63 size8(128), size16(128), restrKeyword(false),
64 eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
65 code(0), length(0),
66#ifndef KJS_PURE_ECMA
67 bol(true),
68#endif
69 current(0), next1(0), next2(0), next3(0),
70 strings(0), numStrings(0), stringsCapacity(0),
71 identifiers(0), numIdentifiers(0), identifiersCapacity(0)
72{
73 // allocate space for read buffers
74 buffer8 = new char[size8];
75 buffer16 = new KJS::UChar[size16];
76 currLexer = this;
77}
78
79Lexer::~Lexer()
80{
81 doneParsing();
82 delete [] buffer8;
83 delete [] buffer16;
84}
85
86Lexer *Lexer::curr()
87{
88 if (!currLexer) {
89 // create singleton instance
90 currLexer = new Lexer();
91 }
92 return currLexer;
93}
94
95#ifdef KJS_DEBUG_MEM
96void Lexer::globalClear()
97{
98 delete currLexer;
99 currLexer = 0L;
100}
101#endif
102
103void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
104{
105 yylineno = 1 + startingLineNumber;
106 m_sourceURL = sourceURL;
107 restrKeyword = false;
108 delimited = false;
109 eatNextIdentifier = false;
110 stackToken = -1;
111 lastToken = -1;
112 pos = 0;
113 code = c;
114 length = len;
115 skipLF = false;
116 skipCR = false;
117 error = false;
118#ifndef KJS_PURE_ECMA
119 bol = true;
120#endif
121
122 // read first characters
123 current = (length > 0) ? code[0].uc : -1;
124 next1 = (length > 1) ? code[1].uc : -1;
125 next2 = (length > 2) ? code[2].uc : -1;
126 next3 = (length > 3) ? code[3].uc : -1;
127}
128
129void Lexer::shift(unsigned int p)
130{
131 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
132 // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
133 while (p--) {
134 pos++;
135 current = next1;
136 next1 = next2;
137 next2 = next3;
138 next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
139 }
140}
141
142// called on each new line
143void Lexer::nextLine()
144{
145 yylineno++;
146#ifndef KJS_PURE_ECMA
147 bol = true;
148#endif
149}
150
151void Lexer::setDone(State s)
152{
153 state = s;
154 done = true;
155}
156
157int Lexer::lex()
158{
159 int token = 0;
160 state = Start;
161 unsigned short stringType = 0; // either single or double quotes
162 pos8 = pos16 = 0;
163 done = false;
164 terminator = false;
165 skipLF = false;
166 skipCR = false;
167
168 // did we push a token on the stack previously ?
169 // (after an automatic semicolon insertion)
170 if (stackToken >= 0) {
171 setDone(Other);
172 token = stackToken;
173 stackToken = 0;
174 }
175
176 while (!done) {
177 if (skipLF && current != '\n') // found \r but not \n afterwards
178 skipLF = false;
179 if (skipCR && current != '\r') // found \n but not \r afterwards
180 skipCR = false;
181 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
182 {
183 skipLF = false;
184 skipCR = false;
185 shift(1);
186 }
187 switch (state) {
188 case Start:
189 if (isWhiteSpace()) {
190 // do nothing
191 } else if (current == '/' && next1 == '/') {
192 shift(1);
193 state = InSingleLineComment;
194 } else if (current == '/' && next1 == '*') {
195 shift(1);
196 state = InMultiLineComment;
197 } else if (current == -1) {
198 if (!terminator && !delimited) {
199 // automatic semicolon insertion if program incomplete
200 token = ';';
201 stackToken = 0;
202 setDone(Other);
203 } else
204 setDone(Eof);
205 } else if (isLineTerminator()) {
206 nextLine();
207 terminator = true;
208 if (restrKeyword) {
209 token = ';';
210 setDone(Other);
211 }
212 } else if (current == '"' || current == '\'') {
213 state = InString;
214 stringType = static_cast<unsigned short>(current);
215 } else if (isIdentStart(current)) {
216 record16(current);
217 state = InIdentifierOrKeyword;
218 } else if (current == '\\') {
219 state = InIdentifierUnicodeEscapeStart;
220 } else if (current == '0') {
221 record8(current);
222 state = InNum0;
223 } else if (isDecimalDigit(current)) {
224 record8(current);
225 state = InNum;
226 } else if (current == '.' && isDecimalDigit(next1)) {
227 record8(current);
228 state = InDecimal;
229#ifndef KJS_PURE_ECMA
230 // <!-- marks the beginning of a line comment (for www usage)
231 } else if (current == '<' && next1 == '!' &&
232 next2 == '-' && next3 == '-') {
233 shift(3);
234 state = InSingleLineComment;
235 // same for -->
236 } else if (bol && current == '-' && next1 == '-' && next2 == '>') {
237 shift(2);
238 state = InSingleLineComment;
239#endif
240 } else {
241 token = matchPunctuator(current, next1, next2, next3);
242 if (token != -1) {
243 setDone(Other);
244 } else {
245 // cerr << "encountered unknown character" << endl;
246 setDone(Bad);
247 }
248 }
249 break;
250 case InString:
251 if (current == stringType) {
252 shift(1);
253 setDone(String);
254 } else if (isLineTerminator() || current == -1) {
255 setDone(Bad);
256 } else if (current == '\\') {
257 state = InEscapeSequence;
258 } else {
259 record16(current);
260 }
261 break;
262 // Escape Sequences inside of strings
263 case InEscapeSequence:
264 if (isOctalDigit(current)) {
265 if (current >= '0' && current <= '3' &&
266 isOctalDigit(next1) && isOctalDigit(next2)) {
267 record16(convertOctal(current, next1, next2));
268 shift(2);
269 state = InString;
270 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
271 record16(convertOctal('0', current, next1));
272 shift(1);
273 state = InString;
274 } else if (isOctalDigit(current)) {
275 record16(convertOctal('0', '0', current));
276 state = InString;
277 } else {
278 setDone(Bad);
279 }
280 } else if (current == 'x')
281 state = InHexEscape;
282 else if (current == 'u')
283 state = InUnicodeEscape;
284 else if (isLineTerminator()) {
285 nextLine();
286 state = InString;
287 } else {
288 record16(singleEscape(static_cast<unsigned short>(current)));
289 state = InString;
290 }
291 break;
292 case InHexEscape:
293 if (isHexDigit(current) && isHexDigit(next1)) {
294 state = InString;
295 record16(convertHex(current, next1));
296 shift(1);
297 } else if (current == stringType) {
298 record16('x');
299 shift(1);
300 setDone(String);
301 } else {
302 record16('x');
303 record16(current);
304 state = InString;
305 }
306 break;
307 case InUnicodeEscape:
308 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
309 record16(convertUnicode(current, next1, next2, next3));
310 shift(3);
311 state = InString;
312 } else if (current == stringType) {
313 record16('u');
314 shift(1);
315 setDone(String);
316 } else {
317 setDone(Bad);
318 }
319 break;
320 case InSingleLineComment:
321 if (isLineTerminator()) {
322 nextLine();
323 terminator = true;
324 if (restrKeyword) {
325 token = ';';
326 setDone(Other);
327 } else
328 state = Start;
329 } else if (current == -1) {
330 setDone(Eof);
331 }
332 break;
333 case InMultiLineComment:
334 if (current == -1) {
335 setDone(Bad);
336 } else if (isLineTerminator()) {
337 nextLine();
338 } else if (current == '*' && next1 == '/') {
339 state = Start;
340 shift(1);
341 }
342 break;
343 case InIdentifierOrKeyword:
344 case InIdentifier:
345 if (isIdentPart(current))
346 record16(current);
347 else if (current == '\\')
348 state = InIdentifierUnicodeEscapeStart;
349 else
350 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
351 break;
352 case InNum0:
353 if (current == 'x' || current == 'X') {
354 record8(current);
355 state = InHex;
356 } else if (current == '.') {
357 record8(current);
358 state = InDecimal;
359 } else if (current == 'e' || current == 'E') {
360 record8(current);
361 state = InExponentIndicator;
362 } else if (isOctalDigit(current)) {
363 record8(current);
364 state = InOctal;
365 } else if (isDecimalDigit(current)) {
366 record8(current);
367 state = InDecimal;
368 } else {
369 setDone(Number);
370 }
371 break;
372 case InHex:
373 if (isHexDigit(current)) {
374 record8(current);
375 } else {
376 setDone(Hex);
377 }
378 break;
379 case InOctal:
380 if (isOctalDigit(current)) {
381 record8(current);
382 }
383 else if (isDecimalDigit(current)) {
384 record8(current);
385 state = InDecimal;
386 } else
387 setDone(Octal);
388 break;
389 case InNum:
390 if (isDecimalDigit(current)) {
391 record8(current);
392 } else if (current == '.') {
393 record8(current);
394 state = InDecimal;
395 } else if (current == 'e' || current == 'E') {
396 record8(current);
397 state = InExponentIndicator;
398 } else
399 setDone(Number);
400 break;
401 case InDecimal:
402 if (isDecimalDigit(current)) {
403 record8(current);
404 } else if (current == 'e' || current == 'E') {
405 record8(current);
406 state = InExponentIndicator;
407 } else
408 setDone(Number);
409 break;
410 case InExponentIndicator:
411 if (current == '+' || current == '-') {
412 record8(current);
413 } else if (isDecimalDigit(current)) {
414 record8(current);
415 state = InExponent;
416 } else
417 setDone(Bad);
418 break;
419 case InExponent:
420 if (isDecimalDigit(current)) {
421 record8(current);
422 } else
423 setDone(Number);
424 break;
425 case InIdentifierUnicodeEscapeStart:
426 if (current == 'u')
427 state = InIdentifierUnicodeEscape;
428 else
429 setDone(Bad);
430 break;
431 case InIdentifierUnicodeEscape:
432 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
433 record16(convertUnicode(current, next1, next2, next3));
434 shift(3);
435 state = InIdentifier;
436 } else {
437 setDone(Bad);
438 }
439 break;
440 default:
441 assert(!"Unhandled state in switch statement");
442 }
443
444 // move on to the next character
445 if (!done)
446 shift(1);
447#ifndef KJS_PURE_ECMA
448 if (state != Start && state != InSingleLineComment)
449 bol = false;
450#endif
451 }
452
453 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
454 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
455 state = Bad;
456
457 // terminate string
458 buffer8[pos8] = '\0';
459
460#ifdef KJS_DEBUG_LEX
461 fprintf(stderr, "line: %d ", lineNo());
462 fprintf(stderr, "yytext (%x): ", buffer8[0]);
463 fprintf(stderr, "%s ", buffer8);
464#endif
465
466 double dval = 0;
467 if (state == Number) {
468 dval = strtod(buffer8, 0L);
469 } else if (state == Hex) { // scan hex numbers
470 const char *p = buffer8 + 2;
471 while (char c = *p++) {
472 dval *= 16;
473 dval += convertHex(c);
474 }
475 state = Number;
476 } else if (state == Octal) { // scan octal number
477 const char *p = buffer8 + 1;
478 while (char c = *p++) {
479 dval *= 8;
480 dval += c - '0';
481 }
482 state = Number;
483 }
484
485#ifdef KJS_DEBUG_LEX
486 switch (state) {
487 case Eof:
488 printf("(EOF)\n");
489 break;
490 case Other:
491 printf("(Other)\n");
492 break;
493 case Identifier:
494 printf("(Identifier)/(Keyword)\n");
495 break;
496 case String:
497 printf("(String)\n");
498 break;
499 case Number:
500 printf("(Number)\n");
501 break;
502 default:
503 printf("(unknown)");
504 }
505#endif
506
507 if (state != Identifier && eatNextIdentifier)
508 eatNextIdentifier = false;
509
510 restrKeyword = false;
511 delimited = false;
512 kjsyylloc.first_line = yylineno; // ???
513 kjsyylloc.last_line = yylineno;
514
515 switch (state) {
516 case Eof:
517 token = 0;
518 break;
519 case Other:
520 if(token == '}' || token == ';') {
521 delimited = true;
522 }
523 break;
524 case IdentifierOrKeyword:
525 if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
526 case Identifier:
527 // Lookup for keyword failed, means this is an identifier
528 // Apply anonymous-function hack below (eat the identifier)
529 if (eatNextIdentifier) {
530 eatNextIdentifier = false;
531 token = lex();
532 break;
533 }
534 kjsyylval.ident = makeIdentifier(buffer16, pos16);
535 token = IDENT;
536 break;
537 }
538
539 eatNextIdentifier = false;
540 // Hack for "f = function somename() { ... }", too hard to get into the grammar
541 if (token == FUNCTION && lastToken == '=' )
542 eatNextIdentifier = true;
543
544 if (token == CONTINUE || token == BREAK ||
545 token == RETURN || token == THROW)
546 restrKeyword = true;
547 break;
548 case String:
549 kjsyylval.ustr = makeUString(buffer16, pos16);
550 token = STRING;
551 break;
552 case Number:
553 kjsyylval.dval = dval;
554 token = NUMBER;
555 break;
556 case Bad:
557#ifdef KJS_DEBUG_LEX
558 fprintf(stderr, "yylex: ERROR.\n");
559#endif
560 error = true;
561 return -1;
562 default:
563 assert(!"unhandled numeration value in switch");
564 error = true;
565 return -1;
566 }
567 lastToken = token;
568 return token;
569}
570
571bool Lexer::isWhiteSpace() const
572{
573 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
574}
575
576bool Lexer::isLineTerminator()
577{
578 bool cr = (current == '\r');
579 bool lf = (current == '\n');
580 if (cr)
581 skipLF = true;
582 else if (lf)
583 skipCR = true;
584 return cr || lf || current == 0x2028 || current == 0x2029;
585}
586
587bool Lexer::isIdentStart(int c)
588{
589 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
590 || c == '$' || c == '_';
591}
592
593bool Lexer::isIdentPart(int c)
594{
595 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
596 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
597 || c == '$' || c == '_';
598}
599
600static bool isDecimalDigit(int c)
601{
602 return (c >= '0' && c <= '9');
603}
604
605bool Lexer::isHexDigit(int c)
606{
607 return (c >= '0' && c <= '9' ||
608 c >= 'a' && c <= 'f' ||
609 c >= 'A' && c <= 'F');
610}
611
612bool Lexer::isOctalDigit(int c)
613{
614 return (c >= '0' && c <= '7');
615}
616
617int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
618{
619 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
620 shift(4);
621 return URSHIFTEQUAL;
622 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
623 shift(3);
624 return STREQ;
625 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
626 shift(3);
627 return STRNEQ;
628 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
629 shift(3);
630 return URSHIFT;
631 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
632 shift(3);
633 return LSHIFTEQUAL;
634 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
635 shift(3);
636 return RSHIFTEQUAL;
637 } else if (c1 == '<' && c2 == '=') {
638 shift(2);
639 return LE;
640 } else if (c1 == '>' && c2 == '=') {
641 shift(2);
642 return GE;
643 } else if (c1 == '!' && c2 == '=') {
644 shift(2);
645 return NE;
646 } else if (c1 == '+' && c2 == '+') {
647 shift(2);
648 if (terminator)
649 return AUTOPLUSPLUS;
650 else
651 return PLUSPLUS;
652 } else if (c1 == '-' && c2 == '-') {
653 shift(2);
654 if (terminator)
655 return AUTOMINUSMINUS;
656 else
657 return MINUSMINUS;
658 } else if (c1 == '=' && c2 == '=') {
659 shift(2);
660 return EQEQ;
661 } else if (c1 == '+' && c2 == '=') {
662 shift(2);
663 return PLUSEQUAL;
664 } else if (c1 == '-' && c2 == '=') {
665 shift(2);
666 return MINUSEQUAL;
667 } else if (c1 == '*' && c2 == '=') {
668 shift(2);
669 return MULTEQUAL;
670 } else if (c1 == '/' && c2 == '=') {
671 shift(2);
672 return DIVEQUAL;
673 } else if (c1 == '&' && c2 == '=') {
674 shift(2);
675 return ANDEQUAL;
676 } else if (c1 == '^' && c2 == '=') {
677 shift(2);
678 return XOREQUAL;
679 } else if (c1 == '%' && c2 == '=') {
680 shift(2);
681 return MODEQUAL;
682 } else if (c1 == '|' && c2 == '=') {
683 shift(2);
684 return OREQUAL;
685 } else if (c1 == '<' && c2 == '<') {
686 shift(2);
687 return LSHIFT;
688 } else if (c1 == '>' && c2 == '>') {
689 shift(2);
690 return RSHIFT;
691 } else if (c1 == '&' && c2 == '&') {
692 shift(2);
693 return AND;
694 } else if (c1 == '|' && c2 == '|') {
695 shift(2);
696 return OR;
697 }
698
699 switch(c1) {
700 case '=':
701 case '>':
702 case '<':
703 case ',':
704 case '!':
705 case '~':
706 case '?':
707 case ':':
708 case '.':
709 case '+':
710 case '-':
711 case '*':
712 case '/':
713 case '&':
714 case '|':
715 case '^':
716 case '%':
717 case '(':
718 case ')':
719 case '{':
720 case '}':
721 case '[':
722 case ']':
723 case ';':
724 shift(1);
725 return static_cast<int>(c1);
726 default:
727 return -1;
728 }
729}
730
731unsigned short Lexer::singleEscape(unsigned short c)
732{
733 switch(c) {
734 case 'b':
735 return 0x08;
736 case 't':
737 return 0x09;
738 case 'n':
739 return 0x0A;
740 case 'v':
741 return 0x0B;
742 case 'f':
743 return 0x0C;
744 case 'r':
745 return 0x0D;
746 case '"':
747 return 0x22;
748 case '\'':
749 return 0x27;
750 case '\\':
751 return 0x5C;
752 default:
753 return c;
754 }
755}
756
757unsigned short Lexer::convertOctal(int c1, int c2, int c3)
758{
759 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
760}
761
762unsigned char Lexer::convertHex(int c)
763{
764 if (c >= '0' && c <= '9')
765 return static_cast<unsigned char>(c - '0');
766 if (c >= 'a' && c <= 'f')
767 return static_cast<unsigned char>(c - 'a' + 10);
768 return static_cast<unsigned char>(c - 'A' + 10);
769}
770
771unsigned char Lexer::convertHex(int c1, int c2)
772{
773 return ((convertHex(c1) << 4) + convertHex(c2));
774}
775
776KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
777{
778 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
779 (convertHex(c3) << 4) + convertHex(c4));
780}
781
782void Lexer::record8(int c)
783{
784 ASSERT(c >= 0);
785 ASSERT(c <= 0xff);
786
787 // enlarge buffer if full
788 if (pos8 >= size8 - 1) {
789 char *tmp = new char[2 * size8];
790 memcpy(tmp, buffer8, size8 * sizeof(char));
791 delete [] buffer8;
792 buffer8 = tmp;
793 size8 *= 2;
794 }
795
796 buffer8[pos8++] = (char) c;
797}
798
799void Lexer::record16(int c)
800{
801 ASSERT(c >= 0);
802 ASSERT(c <= USHRT_MAX);
803 record16(UChar(static_cast<unsigned short>(c)));
804}
805
806void Lexer::record16(KJS::UChar c)
807{
808 // enlarge buffer if full
809 if (pos16 >= size16 - 1) {
810 KJS::UChar *tmp = new KJS::UChar[2 * size16];
811 memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
812 delete [] buffer16;
813 buffer16 = tmp;
814 size16 *= 2;
815 }
816
817 buffer16[pos16++] = c;
818}
819
820bool Lexer::scanRegExp()
821{
822 pos16 = 0;
823 bool lastWasEscape = false;
824 bool inBrackets = false;
825
826 while (1) {
827 if (isLineTerminator() || current == -1)
828 return false;
829 else if (current != '/' || lastWasEscape == true || inBrackets == true)
830 {
831 // keep track of '[' and ']'
832 if (!lastWasEscape) {
833 if ( current == '[' && !inBrackets )
834 inBrackets = true;
835 if ( current == ']' && inBrackets )
836 inBrackets = false;
837 }
838 record16(current);
839 lastWasEscape =
840 !lastWasEscape && (current == '\\');
841 }
842 else { // end of regexp
843 pattern = UString(buffer16, pos16);
844 pos16 = 0;
845 shift(1);
846 break;
847 }
848 shift(1);
849 }
850
851 while (isIdentPart(current)) {
852 record16(current);
853 shift(1);
854 }
855 flags = UString(buffer16, pos16);
856
857 return true;
858}
859
860
861void Lexer::doneParsing()
862{
863 for (unsigned i = 0; i < numIdentifiers; i++) {
864 delete identifiers[i];
865 }
866 fastFree(identifiers);
867 identifiers = 0;
868 numIdentifiers = 0;
869 identifiersCapacity = 0;
870
871 for (unsigned i = 0; i < numStrings; i++) {
872 delete strings[i];
873 }
874 fastFree(strings);
875 strings = 0;
876 numStrings = 0;
877 stringsCapacity = 0;
878}
879
880const int initialCapacity = 64;
881const int growthFactor = 2;
882
883// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
884Identifier *Lexer::makeIdentifier(KJS::UChar*, unsigned int)
885{
886 if (numIdentifiers == identifiersCapacity) {
887 identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
888 identifiers = (KJS::Identifier **)fastRealloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);
889 }
890
891 KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
892 identifiers[numIdentifiers++] = identifier;
893 return identifier;
894}
895
896// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
897UString *Lexer::makeUString(KJS::UChar*, unsigned int)
898{
899 if (numStrings == stringsCapacity) {
900 stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
901 strings = (UString **)fastRealloc(strings, sizeof(UString *) * stringsCapacity);
902 }
903
904 UString *string = new UString(buffer16, pos16);
905 strings[numStrings++] = string;
906 return string;
907}
908
909}
Note: See TracBrowser for help on using the repository browser.