source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 36100

Last change on this file since 36100 was 35245, checked in by [email protected], 17 years ago

Bug 18774: SQUIRRELFISH: print meaningful error messages <https://bugs.webkit.org/show_bug.cgi?id=18774>
<rdar://problem/5769353> SQUIRRELFISH: JavaScript error messages are missing informative text

Reviewed by Cameron Zwarich

Add support for decent error messages in JavaScript. This patch achieves this by providing
ensuring the common errors and exceptions have messages that provide the text of expression
that trigger the exception. In addition it attaches a number of properties to the exception
object detailing where in the source the expression came from.

  • Property svn:eol-style set to native
File size: 27.4 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "lexer.h"
25
26#include "dtoa.h"
27#include "JSFunction.h"
28#include "nodes.h"
29#include "NodeInfo.h"
30#include "JSGlobalObjectFunctions.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
34#include <wtf/Assertions.h>
35#include <wtf/unicode/Unicode.h>
36
37using namespace WTF;
38using namespace Unicode;
39
40// we can't specify the namespace in yacc's C output, so do it here
41using namespace KJS;
42
43#ifndef KDE_USE_FINAL
44#include "grammar.h"
45#endif
46
47#include "lookup.h"
48#include "lexer.lut.h"
49
50// a bridge for yacc from the C world to C++
51int kjsyylex(void* lvalp, void* llocp, void* globalData)
52{
53 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
54}
55
56namespace KJS {
57
58static bool isDecimalDigit(int);
59
60static const size_t initialReadBufferCapacity = 32;
61static const size_t initialStringTableCapacity = 64;
62
63Lexer::Lexer(JSGlobalData* globalData)
64 : yylineno(1)
65 , m_restrKeyword(false)
66 , m_eatNextIdentifier(false)
67 , m_stackToken(-1)
68 , m_lastToken(-1)
69 , m_position(0)
70 , m_code(0)
71 , m_length(0)
72 , m_atLineStart(true)
73 , m_current(0)
74 , m_next1(0)
75 , m_next2(0)
76 , m_next3(0)
77 , m_currentOffset(0)
78 , m_nextOffset1(0)
79 , m_nextOffset2(0)
80 , m_nextOffset3(0)
81 , m_globalData(globalData)
82 , m_mainTable(KJS::mainTable)
83{
84 m_buffer8.reserveCapacity(initialReadBufferCapacity);
85 m_buffer16.reserveCapacity(initialReadBufferCapacity);
86 m_strings.reserveCapacity(initialStringTableCapacity);
87 m_identifiers.reserveCapacity(initialStringTableCapacity);
88}
89
90Lexer::~Lexer()
91{
92 m_mainTable.deleteTable();
93}
94
95void Lexer::setCode(int startingLineNumber, PassRefPtr<SourceProvider> source)
96{
97 yylineno = startingLineNumber;
98 m_restrKeyword = false;
99 m_delimited = false;
100 m_eatNextIdentifier = false;
101 m_stackToken = -1;
102 m_lastToken = -1;
103
104 m_position = 0;
105 m_source = source;
106 m_code = m_source->data();
107 m_length = m_source->length();
108 m_skipLF = false;
109 m_skipCR = false;
110 m_error = false;
111 m_atLineStart = true;
112
113 // read first characters
114 shift(4);
115}
116
117void Lexer::shift(unsigned p)
118{
119 // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
120 // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
121
122 while (p--) {
123 m_current = m_next1;
124 m_next1 = m_next2;
125 m_next2 = m_next3;
126 m_currentOffset = m_nextOffset1;
127 m_nextOffset1 = m_nextOffset2;
128 m_nextOffset2 = m_nextOffset3;
129 do {
130 if (m_position >= m_length) {
131 m_nextOffset3 = m_position;
132 m_position++;
133 m_next3 = -1;
134 break;
135 }
136 m_nextOffset3 = m_position;
137 m_next3 = m_code[m_position++];
138 } while (m_next3 == 0xFEFF);
139 }
140}
141
142// called on each new line
143void Lexer::nextLine()
144{
145 yylineno++;
146 m_atLineStart = true;
147}
148
149void Lexer::setDone(State s)
150{
151 m_state = s;
152 m_done = true;
153}
154
155int Lexer::lex(void* p1, void* p2)
156{
157 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
158 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
159 int token = 0;
160 m_state = Start;
161 unsigned short stringType = 0; // either single or double quotes
162 m_buffer8.clear();
163 m_buffer16.clear();
164 m_done = false;
165 m_terminator = false;
166 m_skipLF = false;
167 m_skipCR = false;
168
169 // did we push a token on the stack previously ?
170 // (after an automatic semicolon insertion)
171 if (m_stackToken >= 0) {
172 setDone(Other);
173 token = m_stackToken;
174 m_stackToken = 0;
175 }
176 int startOffset = m_currentOffset;
177 while (!m_done) {
178 if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
179 m_skipLF = false;
180 if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
181 m_skipCR = false;
182 if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
183 m_skipLF = false;
184 m_skipCR = false;
185 shift(1);
186 }
187 switch (m_state) {
188 case Start:
189 startOffset = m_currentOffset;
190 if (isWhiteSpace()) {
191 // do nothing
192 } else if (m_current == '/' && m_next1 == '/') {
193 shift(1);
194 m_state = InSingleLineComment;
195 } else if (m_current == '/' && m_next1 == '*') {
196 shift(1);
197 m_state = InMultiLineComment;
198 } else if (m_current == -1) {
199 if (!m_terminator && !m_delimited) {
200 // automatic semicolon insertion if program incomplete
201 token = ';';
202 m_stackToken = 0;
203 setDone(Other);
204 } else
205 setDone(Eof);
206 } else if (isLineTerminator()) {
207 nextLine();
208 m_terminator = true;
209 if (m_restrKeyword) {
210 token = ';';
211 setDone(Other);
212 }
213 } else if (m_current == '"' || m_current == '\'') {
214 m_state = InString;
215 stringType = static_cast<unsigned short>(m_current);
216 } else if (isIdentStart(m_current)) {
217 record16(m_current);
218 m_state = InIdentifierOrKeyword;
219 } else if (m_current == '\\')
220 m_state = InIdentifierStartUnicodeEscapeStart;
221 else if (m_current == '0') {
222 record8(m_current);
223 m_state = InNum0;
224 } else if (isDecimalDigit(m_current)) {
225 record8(m_current);
226 m_state = InNum;
227 } else if (m_current == '.' && isDecimalDigit(m_next1)) {
228 record8(m_current);
229 m_state = InDecimal;
230 // <!-- marks the beginning of a line comment (for www usage)
231 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
232 shift(3);
233 m_state = InSingleLineComment;
234 // same for -->
235 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
236 shift(2);
237 m_state = InSingleLineComment;
238 } else {
239 token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
240 if (token != -1)
241 setDone(Other);
242 else
243 setDone(Bad);
244 }
245 break;
246 case InString:
247 if (m_current == stringType) {
248 shift(1);
249 setDone(String);
250 } else if (isLineTerminator() || m_current == -1)
251 setDone(Bad);
252 else if (m_current == '\\')
253 m_state = InEscapeSequence;
254 else
255 record16(m_current);
256 break;
257 // Escape Sequences inside of strings
258 case InEscapeSequence:
259 if (isOctalDigit(m_current)) {
260 if (m_current >= '0' && m_current <= '3' &&
261 isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
262 record16(convertOctal(m_current, m_next1, m_next2));
263 shift(2);
264 m_state = InString;
265 } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
266 record16(convertOctal('0', m_current, m_next1));
267 shift(1);
268 m_state = InString;
269 } else if (isOctalDigit(m_current)) {
270 record16(convertOctal('0', '0', m_current));
271 m_state = InString;
272 } else
273 setDone(Bad);
274 } else if (m_current == 'x')
275 m_state = InHexEscape;
276 else if (m_current == 'u')
277 m_state = InUnicodeEscape;
278 else if (isLineTerminator()) {
279 nextLine();
280 m_state = InString;
281 } else {
282 record16(singleEscape(static_cast<unsigned short>(m_current)));
283 m_state = InString;
284 }
285 break;
286 case InHexEscape:
287 if (isHexDigit(m_current) && isHexDigit(m_next1)) {
288 m_state = InString;
289 record16(convertHex(m_current, m_next1));
290 shift(1);
291 } else if (m_current == stringType) {
292 record16('x');
293 shift(1);
294 setDone(String);
295 } else {
296 record16('x');
297 record16(m_current);
298 m_state = InString;
299 }
300 break;
301 case InUnicodeEscape:
302 if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
303 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
304 shift(3);
305 m_state = InString;
306 } else if (m_current == stringType) {
307 record16('u');
308 shift(1);
309 setDone(String);
310 } else
311 setDone(Bad);
312 break;
313 case InSingleLineComment:
314 if (isLineTerminator()) {
315 nextLine();
316 m_terminator = true;
317 if (m_restrKeyword) {
318 token = ';';
319 setDone(Other);
320 } else
321 m_state = Start;
322 } else if (m_current == -1)
323 setDone(Eof);
324 break;
325 case InMultiLineComment:
326 if (m_current == -1)
327 setDone(Bad);
328 else if (isLineTerminator())
329 nextLine();
330 else if (m_current == '*' && m_next1 == '/') {
331 m_state = Start;
332 shift(1);
333 }
334 break;
335 case InIdentifierOrKeyword:
336 case InIdentifier:
337 if (isIdentPart(m_current))
338 record16(m_current);
339 else if (m_current == '\\')
340 m_state = InIdentifierPartUnicodeEscapeStart;
341 else
342 setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
343 break;
344 case InNum0:
345 if (m_current == 'x' || m_current == 'X') {
346 record8(m_current);
347 m_state = InHex;
348 } else if (m_current == '.') {
349 record8(m_current);
350 m_state = InDecimal;
351 } else if (m_current == 'e' || m_current == 'E') {
352 record8(m_current);
353 m_state = InExponentIndicator;
354 } else if (isOctalDigit(m_current)) {
355 record8(m_current);
356 m_state = InOctal;
357 } else if (isDecimalDigit(m_current)) {
358 record8(m_current);
359 m_state = InDecimal;
360 } else
361 setDone(Number);
362 break;
363 case InHex:
364 if (isHexDigit(m_current))
365 record8(m_current);
366 else
367 setDone(Hex);
368 break;
369 case InOctal:
370 if (isOctalDigit(m_current))
371 record8(m_current);
372 else if (isDecimalDigit(m_current)) {
373 record8(m_current);
374 m_state = InDecimal;
375 } else
376 setDone(Octal);
377 break;
378 case InNum:
379 if (isDecimalDigit(m_current))
380 record8(m_current);
381 else if (m_current == '.') {
382 record8(m_current);
383 m_state = InDecimal;
384 } else if (m_current == 'e' || m_current == 'E') {
385 record8(m_current);
386 m_state = InExponentIndicator;
387 } else
388 setDone(Number);
389 break;
390 case InDecimal:
391 if (isDecimalDigit(m_current))
392 record8(m_current);
393 else if (m_current == 'e' || m_current == 'E') {
394 record8(m_current);
395 m_state = InExponentIndicator;
396 } else
397 setDone(Number);
398 break;
399 case InExponentIndicator:
400 if (m_current == '+' || m_current == '-')
401 record8(m_current);
402 else if (isDecimalDigit(m_current)) {
403 record8(m_current);
404 m_state = InExponent;
405 } else
406 setDone(Bad);
407 break;
408 case InExponent:
409 if (isDecimalDigit(m_current))
410 record8(m_current);
411 else
412 setDone(Number);
413 break;
414 case InIdentifierStartUnicodeEscapeStart:
415 if (m_current == 'u')
416 m_state = InIdentifierStartUnicodeEscape;
417 else
418 setDone(Bad);
419 break;
420 case InIdentifierPartUnicodeEscapeStart:
421 if (m_current == 'u')
422 m_state = InIdentifierPartUnicodeEscape;
423 else
424 setDone(Bad);
425 break;
426 case InIdentifierStartUnicodeEscape:
427 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
428 setDone(Bad);
429 break;
430 }
431 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
432 shift(3);
433 if (!isIdentStart(token)) {
434 setDone(Bad);
435 break;
436 }
437 record16(token);
438 m_state = InIdentifier;
439 break;
440 case InIdentifierPartUnicodeEscape:
441 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
442 setDone(Bad);
443 break;
444 }
445 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
446 shift(3);
447 if (!isIdentPart(token)) {
448 setDone(Bad);
449 break;
450 }
451 record16(token);
452 m_state = InIdentifier;
453 break;
454 default:
455 ASSERT(!"Unhandled state in switch statement");
456 }
457
458 // move on to the next character
459 if (!m_done)
460 shift(1);
461 if (m_state != Start && m_state != InSingleLineComment)
462 m_atLineStart = false;
463 }
464
465 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
466 if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
467 m_state = Bad;
468
469 // terminate string
470 m_buffer8.append('\0');
471
472#ifdef KJS_DEBUG_LEX
473 fprintf(stderr, "line: %d ", lineNo());
474 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
475 fprintf(stderr, "%s ", m_buffer8.data());
476#endif
477
478 double dval = 0;
479 if (m_state == Number)
480 dval = strtod(m_buffer8.data(), 0L);
481 else if (m_state == Hex) { // scan hex numbers
482 const char* p = m_buffer8.data() + 2;
483 while (char c = *p++) {
484 dval *= 16;
485 dval += convertHex(c);
486 }
487
488 if (dval >= mantissaOverflowLowerBound)
489 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
490
491 m_state = Number;
492 } else if (m_state == Octal) { // scan octal number
493 const char* p = m_buffer8.data() + 1;
494 while (char c = *p++) {
495 dval *= 8;
496 dval += c - '0';
497 }
498
499 if (dval >= mantissaOverflowLowerBound)
500 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
501
502 m_state = Number;
503 }
504
505#ifdef KJS_DEBUG_LEX
506 switch (m_state) {
507 case Eof:
508 printf("(EOF)\n");
509 break;
510 case Other:
511 printf("(Other)\n");
512 break;
513 case Identifier:
514 printf("(Identifier)/(Keyword)\n");
515 break;
516 case String:
517 printf("(String)\n");
518 break;
519 case Number:
520 printf("(Number)\n");
521 break;
522 default:
523 printf("(unknown)");
524 }
525#endif
526
527 if (m_state != Identifier)
528 m_eatNextIdentifier = false;
529
530 m_restrKeyword = false;
531 m_delimited = false;
532 llocp->first_line = yylineno;
533 llocp->last_line = yylineno;
534 llocp->first_column = startOffset;
535 llocp->last_column = m_currentOffset;
536 switch (m_state) {
537 case Eof:
538 token = 0;
539 break;
540 case Other:
541 if (token == '}' || token == ';')
542 m_delimited = true;
543 break;
544 case Identifier:
545 // Apply anonymous-function hack below (eat the identifier).
546 if (m_eatNextIdentifier) {
547 m_eatNextIdentifier = false;
548 token = lex(lvalp, llocp);
549 break;
550 }
551 lvalp->ident = makeIdentifier(m_buffer16);
552 token = IDENT;
553 break;
554 case IdentifierOrKeyword: {
555 lvalp->ident = makeIdentifier(m_buffer16);
556 const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
557 if (!entry) {
558 // Lookup for keyword failed, means this is an identifier.
559 token = IDENT;
560 break;
561 }
562 token = entry->integerValue;
563 // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
564 m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
565 if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
566 m_restrKeyword = true;
567 break;
568 }
569 case String:
570 lvalp->string = makeUString(m_buffer16);
571 token = STRING;
572 break;
573 case Number:
574 lvalp->doubleValue = dval;
575 token = NUMBER;
576 break;
577 case Bad:
578#ifdef KJS_DEBUG_LEX
579 fprintf(stderr, "yylex: ERROR.\n");
580#endif
581 m_error = true;
582 return -1;
583 default:
584 ASSERT(!"unhandled numeration value in switch");
585 m_error = true;
586 return -1;
587 }
588 m_lastToken = token;
589 return token;
590}
591
592bool Lexer::isWhiteSpace() const
593{
594 return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
595}
596
597bool Lexer::isLineTerminator()
598{
599 bool cr = (m_current == '\r');
600 bool lf = (m_current == '\n');
601 if (cr)
602 m_skipLF = true;
603 else if (lf)
604 m_skipCR = true;
605 return cr || lf || m_current == 0x2028 || m_current == 0x2029;
606}
607
608bool Lexer::isIdentStart(int c)
609{
610 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
611 || c == '$' || c == '_';
612}
613
614bool Lexer::isIdentPart(int c)
615{
616 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
617 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
618 || c == '$' || c == '_';
619}
620
621static bool isDecimalDigit(int c)
622{
623 return (c >= '0' && c <= '9');
624}
625
626bool Lexer::isHexDigit(int c)
627{
628 return (c >= '0' && c <= '9'
629 || c >= 'a' && c <= 'f'
630 || c >= 'A' && c <= 'F');
631}
632
633bool Lexer::isOctalDigit(int c)
634{
635 return (c >= '0' && c <= '7');
636}
637
638int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
639{
640 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
641 shift(4);
642 return URSHIFTEQUAL;
643 }
644 if (c1 == '=' && c2 == '=' && c3 == '=') {
645 shift(3);
646 return STREQ;
647 }
648 if (c1 == '!' && c2 == '=' && c3 == '=') {
649 shift(3);
650 return STRNEQ;
651 }
652 if (c1 == '>' && c2 == '>' && c3 == '>') {
653 shift(3);
654 return URSHIFT;
655 }
656 if (c1 == '<' && c2 == '<' && c3 == '=') {
657 shift(3);
658 return LSHIFTEQUAL;
659 }
660 if (c1 == '>' && c2 == '>' && c3 == '=') {
661 shift(3);
662 return RSHIFTEQUAL;
663 }
664 if (c1 == '<' && c2 == '=') {
665 shift(2);
666 return LE;
667 }
668 if (c1 == '>' && c2 == '=') {
669 shift(2);
670 return GE;
671 }
672 if (c1 == '!' && c2 == '=') {
673 shift(2);
674 return NE;
675 }
676 if (c1 == '+' && c2 == '+') {
677 shift(2);
678 if (m_terminator)
679 return AUTOPLUSPLUS;
680 return PLUSPLUS;
681 }
682 if (c1 == '-' && c2 == '-') {
683 shift(2);
684 if (m_terminator)
685 return AUTOMINUSMINUS;
686 return MINUSMINUS;
687 }
688 if (c1 == '=' && c2 == '=') {
689 shift(2);
690 return EQEQ;
691 }
692 if (c1 == '+' && c2 == '=') {
693 shift(2);
694 return PLUSEQUAL;
695 }
696 if (c1 == '-' && c2 == '=') {
697 shift(2);
698 return MINUSEQUAL;
699 }
700 if (c1 == '*' && c2 == '=') {
701 shift(2);
702 return MULTEQUAL;
703 }
704 if (c1 == '/' && c2 == '=') {
705 shift(2);
706 return DIVEQUAL;
707 }
708 if (c1 == '&' && c2 == '=') {
709 shift(2);
710 return ANDEQUAL;
711 }
712 if (c1 == '^' && c2 == '=') {
713 shift(2);
714 return XOREQUAL;
715 }
716 if (c1 == '%' && c2 == '=') {
717 shift(2);
718 return MODEQUAL;
719 }
720 if (c1 == '|' && c2 == '=') {
721 shift(2);
722 return OREQUAL;
723 }
724 if (c1 == '<' && c2 == '<') {
725 shift(2);
726 return LSHIFT;
727 }
728 if (c1 == '>' && c2 == '>') {
729 shift(2);
730 return RSHIFT;
731 }
732 if (c1 == '&' && c2 == '&') {
733 shift(2);
734 return AND;
735 }
736 if (c1 == '|' && c2 == '|') {
737 shift(2);
738 return OR;
739 }
740
741 switch (c1) {
742 case '=':
743 case '>':
744 case '<':
745 case ',':
746 case '!':
747 case '~':
748 case '?':
749 case ':':
750 case '.':
751 case '+':
752 case '-':
753 case '*':
754 case '/':
755 case '&':
756 case '|':
757 case '^':
758 case '%':
759 case '(':
760 case ')':
761 case '[':
762 case ']':
763 case ';':
764 shift(1);
765 return static_cast<int>(c1);
766 case '{':
767 charPos = m_position - 4;
768 shift(1);
769 return OPENBRACE;
770 case '}':
771 charPos = m_position - 4;
772 shift(1);
773 return CLOSEBRACE;
774 default:
775 return -1;
776 }
777}
778
779unsigned short Lexer::singleEscape(unsigned short c)
780{
781 switch (c) {
782 case 'b':
783 return 0x08;
784 case 't':
785 return 0x09;
786 case 'n':
787 return 0x0A;
788 case 'v':
789 return 0x0B;
790 case 'f':
791 return 0x0C;
792 case 'r':
793 return 0x0D;
794 case '"':
795 return 0x22;
796 case '\'':
797 return 0x27;
798 case '\\':
799 return 0x5C;
800 default:
801 return c;
802 }
803}
804
805unsigned short Lexer::convertOctal(int c1, int c2, int c3)
806{
807 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
808}
809
810unsigned char Lexer::convertHex(int c)
811{
812 if (c >= '0' && c <= '9')
813 return static_cast<unsigned char>(c - '0');
814 if (c >= 'a' && c <= 'f')
815 return static_cast<unsigned char>(c - 'a' + 10);
816 return static_cast<unsigned char>(c - 'A' + 10);
817}
818
819unsigned char Lexer::convertHex(int c1, int c2)
820{
821 return ((convertHex(c1) << 4) + convertHex(c2));
822}
823
824UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
825{
826 unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
827 unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
828 return (highByte << 8 | lowByte);
829}
830
831void Lexer::record8(int c)
832{
833 ASSERT(c >= 0);
834 ASSERT(c <= 0xff);
835 m_buffer8.append(static_cast<char>(c));
836}
837
838void Lexer::record16(int c)
839{
840 ASSERT(c >= 0);
841 ASSERT(c <= USHRT_MAX);
842 record16(UChar(static_cast<unsigned short>(c)));
843}
844
845void Lexer::record16(UChar c)
846{
847 m_buffer16.append(c);
848}
849
850bool Lexer::scanRegExp()
851{
852 m_buffer16.clear();
853 bool lastWasEscape = false;
854 bool inBrackets = false;
855
856 while (1) {
857 if (isLineTerminator() || m_current == -1)
858 return false;
859 else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
860 // keep track of '[' and ']'
861 if (!lastWasEscape) {
862 if ( m_current == '[' && !inBrackets )
863 inBrackets = true;
864 if ( m_current == ']' && inBrackets )
865 inBrackets = false;
866 }
867 record16(m_current);
868 lastWasEscape =
869 !lastWasEscape && (m_current == '\\');
870 } else { // end of regexp
871 m_pattern = UString(m_buffer16);
872 m_buffer16.clear();
873 shift(1);
874 break;
875 }
876 shift(1);
877 }
878
879 while (isIdentPart(m_current)) {
880 record16(m_current);
881 shift(1);
882 }
883 m_flags = UString(m_buffer16);
884
885 return true;
886}
887
888void Lexer::clear()
889{
890 deleteAllValues(m_strings);
891 Vector<UString*> newStrings;
892 newStrings.reserveCapacity(initialStringTableCapacity);
893 m_strings.swap(newStrings);
894
895 deleteAllValues(m_identifiers);
896 Vector<KJS::Identifier*> newIdentifiers;
897 newIdentifiers.reserveCapacity(initialStringTableCapacity);
898 m_identifiers.swap(newIdentifiers);
899
900 Vector<char> newBuffer8;
901 newBuffer8.reserveCapacity(initialReadBufferCapacity);
902 m_buffer8.swap(newBuffer8);
903
904 Vector<UChar> newBuffer16;
905 newBuffer16.reserveCapacity(initialReadBufferCapacity);
906 m_buffer16.swap(newBuffer16);
907
908 m_pattern = 0;
909 m_flags = 0;
910}
911
912Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
913{
914 KJS::Identifier* identifier = new KJS::Identifier(m_globalData, buffer.data(), buffer.size());
915 m_identifiers.append(identifier);
916 return identifier;
917}
918
919UString* Lexer::makeUString(const Vector<UChar>& buffer)
920{
921 UString* string = new UString(buffer);
922 m_strings.append(string);
923 return string;
924}
925
926} // namespace KJS
Note: See TracBrowser for help on using the repository browser.