source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 34581

Last change on this file since 34581 was 34581, checked in by Darin Adler, 17 years ago

JavaScriptCore:

2008-06-15 Darin Adler <Darin Adler>

  • API/JSBase.cpp:
  • API/JSCallbackConstructor.h:
  • API/JSCallbackFunction.cpp:
  • API/JSCallbackFunction.h:
  • API/JSCallbackObject.h:
  • API/JSCallbackObjectFunctions.h:
  • API/JSClassRef.h:
  • API/JSContextRef.cpp:
  • API/JSObjectRef.cpp:
  • API/JSStringRef.cpp:
  • API/JSStringRefCF.cpp:
  • API/JSValueRef.cpp:
  • GNUmakefile.am:
  • JavaScriptCore.pri:
  • JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.vcproj:
  • JavaScriptCore.xcodeproj/project.pbxproj:
  • JavaScriptCoreSources.bkl:
  • VM/CodeBlock.cpp:
  • VM/CodeGenerator.cpp:
  • VM/ExceptionHelpers.cpp:
  • VM/ExceptionHelpers.h:
  • VM/JSPropertyNameIterator.cpp:
  • VM/JSPropertyNameIterator.h:
  • VM/Machine.cpp:
  • kjs/AllInOneFile.cpp:
  • kjs/DateMath.cpp:
  • kjs/DebuggerCallFrame.cpp:
  • kjs/ExecState.cpp:
  • kjs/JSActivation.cpp:
  • kjs/JSFunction.cpp: Copied from JavaScriptCore/kjs/function.cpp.
  • kjs/JSFunction.h: Copied from JavaScriptCore/kjs/function.h.
  • kjs/JSImmediate.cpp:
  • kjs/JSNotAnObject.h:
  • kjs/JSObject.cpp: Copied from JavaScriptCore/kjs/object.cpp.
  • kjs/JSObject.h: Copied from JavaScriptCore/kjs/object.h.
  • kjs/JSString.h: Copied from JavaScriptCore/kjs/internal.h.
  • kjs/JSValue.cpp: Copied from JavaScriptCore/kjs/value.cpp.
  • kjs/JSValue.h: Copied from JavaScriptCore/kjs/value.h.
  • kjs/JSVariableObject.h:
  • kjs/JSWrapperObject.h:
  • kjs/Shell.cpp:
  • kjs/SymbolTable.h:
  • kjs/array_instance.h:
  • kjs/collector.cpp:
  • kjs/date_object.cpp:
  • kjs/date_object.h:
  • kjs/error_object.cpp:
  • kjs/function.cpp: Removed.
  • kjs/function.h: Removed.
  • kjs/function_object.cpp:
  • kjs/function_object.h:
  • kjs/grammar.y:
  • kjs/internal.cpp:
  • kjs/internal.h: Removed.
  • kjs/lexer.cpp:
  • kjs/list.h:
  • kjs/lookup.h:
  • kjs/nodes.h:
  • kjs/object.cpp: Removed.
  • kjs/object.h: Removed.
  • kjs/object_object.h:
  • kjs/operations.cpp:
  • kjs/property_map.cpp:
  • kjs/property_slot.cpp:
  • kjs/property_slot.h:
  • kjs/protect.h:
  • kjs/regexp_object.cpp:
  • kjs/scope_chain.cpp:
  • kjs/string_object.h:
  • kjs/ustring.cpp:
  • kjs/value.cpp: Removed.
  • kjs/value.h: Removed.
  • profiler/Profile.cpp:
  • profiler/Profiler.cpp:

JavaScriptGlue:

2008-06-15 Darin Adler <Darin Adler>

  • ForwardingHeaders/kjs/JSValue.h: Copied from JavaScriptGlue/ForwardingHeaders/kjs/value.h.
  • ForwardingHeaders/kjs/value.h: Removed.
  • JSUtils.h:

WebCore:

2008-06-15 Darin Adler <Darin Adler>

  • ForwardingHeaders/kjs/JSFunction.h: Copied from WebCore/ForwardingHeaders/kjs/function.h.
  • ForwardingHeaders/kjs/JSObject.h: Copied from WebCore/ForwardingHeaders/kjs/object.h.
  • ForwardingHeaders/kjs/JSString.h: Copied from WebCore/ForwardingHeaders/kjs/internal.h.
  • ForwardingHeaders/kjs/JSValue.h: Copied from WebCore/ForwardingHeaders/kjs/value.h.
  • ForwardingHeaders/kjs/function.h: Removed.
  • ForwardingHeaders/kjs/internal.h: Removed.
  • ForwardingHeaders/kjs/object.h: Removed.
  • ForwardingHeaders/kjs/value.h: Removed.
  • WebCore.vcproj/WebCore.vcproj:
  • bindings/js/JSCustomSQLStatementCallback.h:
  • bindings/js/JSCustomSQLStatementErrorCallback.h:
  • bindings/js/JSCustomSQLTransactionErrorCallback.h:
  • bindings/js/JSCustomVoidCallback.h:
  • bindings/js/JSDOMBinding.h:
  • bindings/js/JSDOMWindowCustom.cpp:
  • bindings/js/JSDOMWindowShell.cpp:
  • bindings/js/JSQuarantinedObjectWrapper.h:
  • bridge/objc/objc_runtime.h:
  • bridge/objc/objc_utility.h:
  • bridge/qt/qt_runtime.cpp:
  • bridge/runtime.h:
  • bridge/runtime_array.h:
  • bridge/runtime_method.h:
  • bridge/runtime_object.h:
  • bridge/testbindings.cpp:
  • bridge/testbindings.mm:
  • bridge/testqtbindings.cpp:
  • loader/FrameLoader.cpp:
  • page/JavaScriptCallFrame.cpp:
  • page/JavaScriptProfile.cpp:
  • page/JavaScriptProfileNode.cpp:
  • platform/graphics/gtk/VideoSinkGStreamer.h:
  • plugins/PluginView.cpp:
  • plugins/gtk/PluginViewGtk.cpp:
  • plugins/qt/PluginViewQt.cpp:
  • plugins/win/PluginViewWin.cpp:

WebKit/gtk:

2008-06-15 Darin Adler <Darin Adler>

  • WebCoreSupport/ContextMenuClientGtk.cpp:
  • webkit/webkitnetworkrequest.h:
  • webkit/webkitwebbackforwardlist.h:
  • webkit/webkitwebframe.h:
  • webkit/webkitwebhistoryitem.h:
  • webkit/webkitwebsettings.h:

WebKit/mac:

2008-06-15 Darin Adler <Darin Adler>

  • ForwardingHeaders/kjs/JSFunction.h: Copied from WebKit/mac/ForwardingHeaders/kjs/function.h.
  • ForwardingHeaders/kjs/JSObject.h: Copied from WebKit/mac/ForwardingHeaders/kjs/object.h.
  • ForwardingHeaders/kjs/JSString.h: Copied from WebKit/mac/ForwardingHeaders/kjs/internal.h.
  • ForwardingHeaders/kjs/JSValue.h: Copied from WebKit/mac/ForwardingHeaders/kjs/value.h.
  • ForwardingHeaders/kjs/function.h: Removed.
  • ForwardingHeaders/kjs/internal.h: Removed.
  • ForwardingHeaders/kjs/object.h: Removed.
  • ForwardingHeaders/kjs/value.h: Removed.
  • WebView/WebScriptDebugDelegate.mm:

WebKit/qt:

2008-06-15 Darin Adler <Darin Adler>

  • Api/qwebframe.cpp:

WebKit/win:

2008-06-15 Darin Adler <Darin Adler>

  • WebView.cpp:

WebKit/wx:

2008-06-15 Darin Adler <Darin Adler>

  • WebView.cpp:

WebKitTools:

2008-06-15 Darin Adler <Darin Adler>

  • Scripts/do-file-rename: Updated for the latest round of renaming.
  • Scripts/do-webcore-rename: Tweaked and reorganized a bit.
  • Property svn:eol-style set to native
File size: 22.1 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "lexer.h"
25
26#include "dtoa.h"
27#include "JSFunction.h"
28#include "nodes.h"
29#include "NodeInfo.h"
30#include <ctype.h>
31#include <limits.h>
32#include <string.h>
33#include <wtf/Assertions.h>
34#include <wtf/unicode/Unicode.h>
35
36using namespace WTF;
37using namespace Unicode;
38
39// we can't specify the namespace in yacc's C output, so do it here
40using namespace KJS;
41
42#ifndef KDE_USE_FINAL
43#include "grammar.h"
44#endif
45
46#include "lookup.h"
47#include "lexer.lut.h"
48
49// a bridge for yacc from the C world to C++
50int kjsyylex(void* lvalp, void* llocp, void* globalData)
51{
52 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
53}
54
55namespace KJS {
56
57static bool isDecimalDigit(int);
58
59static const size_t initialReadBufferCapacity = 32;
60static const size_t initialStringTableCapacity = 64;
61
62Lexer::Lexer()
63 : yylineno(1)
64 , restrKeyword(false)
65 , eatNextIdentifier(false)
66 , stackToken(-1)
67 , lastToken(-1)
68 , pos(0)
69 , code(0)
70 , length(0)
71 , atLineStart(true)
72 , current(0)
73 , next1(0)
74 , next2(0)
75 , next3(0)
76 , mainTable(KJS::mainTable)
77{
78 m_buffer8.reserveCapacity(initialReadBufferCapacity);
79 m_buffer16.reserveCapacity(initialReadBufferCapacity);
80 m_strings.reserveCapacity(initialStringTableCapacity);
81 m_identifiers.reserveCapacity(initialStringTableCapacity);
82}
83
84Lexer::~Lexer()
85{
86 delete[] mainTable.table;
87}
88
89void Lexer::setCode(int startingLineNumber, PassRefPtr<SourceProvider> source)
90{
91 yylineno = startingLineNumber;
92 restrKeyword = false;
93 delimited = false;
94 eatNextIdentifier = false;
95 stackToken = -1;
96 lastToken = -1;
97
98 pos = 0;
99 m_source = source;
100 code = m_source->data();
101 length = m_source->length();
102 skipLF = false;
103 skipCR = false;
104 error = false;
105 atLineStart = true;
106
107 // read first characters
108 shift(4);
109}
110
111void Lexer::shift(unsigned p)
112{
113 // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
114 // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
115
116 while (p--) {
117 current = next1;
118 next1 = next2;
119 next2 = next3;
120 do {
121 if (pos >= length) {
122 pos++;
123 next3 = -1;
124 break;
125 }
126 next3 = code[pos++];
127 } while (next3 == 0xFEFF);
128 }
129}
130
131// called on each new line
132void Lexer::nextLine()
133{
134 yylineno++;
135 atLineStart = true;
136}
137
138void Lexer::setDone(State s)
139{
140 state = s;
141 done = true;
142}
143
144int Lexer::lex(void* p1, void* p2)
145{
146 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
147 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
148 int token = 0;
149 state = Start;
150 unsigned short stringType = 0; // either single or double quotes
151 m_buffer8.clear();
152 m_buffer16.clear();
153 done = false;
154 terminator = false;
155 skipLF = false;
156 skipCR = false;
157
158 // did we push a token on the stack previously ?
159 // (after an automatic semicolon insertion)
160 if (stackToken >= 0) {
161 setDone(Other);
162 token = stackToken;
163 stackToken = 0;
164 }
165
166 while (!done) {
167 if (skipLF && current != '\n') // found \r but not \n afterwards
168 skipLF = false;
169 if (skipCR && current != '\r') // found \n but not \r afterwards
170 skipCR = false;
171 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
172 {
173 skipLF = false;
174 skipCR = false;
175 shift(1);
176 }
177 switch (state) {
178 case Start:
179 if (isWhiteSpace()) {
180 // do nothing
181 } else if (current == '/' && next1 == '/') {
182 shift(1);
183 state = InSingleLineComment;
184 } else if (current == '/' && next1 == '*') {
185 shift(1);
186 state = InMultiLineComment;
187 } else if (current == -1) {
188 if (!terminator && !delimited) {
189 // automatic semicolon insertion if program incomplete
190 token = ';';
191 stackToken = 0;
192 setDone(Other);
193 } else
194 setDone(Eof);
195 } else if (isLineTerminator()) {
196 nextLine();
197 terminator = true;
198 if (restrKeyword) {
199 token = ';';
200 setDone(Other);
201 }
202 } else if (current == '"' || current == '\'') {
203 state = InString;
204 stringType = static_cast<unsigned short>(current);
205 } else if (isIdentStart(current)) {
206 record16(current);
207 state = InIdentifierOrKeyword;
208 } else if (current == '\\') {
209 state = InIdentifierStartUnicodeEscapeStart;
210 } else if (current == '0') {
211 record8(current);
212 state = InNum0;
213 } else if (isDecimalDigit(current)) {
214 record8(current);
215 state = InNum;
216 } else if (current == '.' && isDecimalDigit(next1)) {
217 record8(current);
218 state = InDecimal;
219 // <!-- marks the beginning of a line comment (for www usage)
220 } else if (current == '<' && next1 == '!' &&
221 next2 == '-' && next3 == '-') {
222 shift(3);
223 state = InSingleLineComment;
224 // same for -->
225 } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
226 shift(2);
227 state = InSingleLineComment;
228 } else {
229 token = matchPunctuator(lvalp->intValue, current, next1, next2, next3);
230 if (token != -1) {
231 setDone(Other);
232 } else {
233 // cerr << "encountered unknown character" << endl;
234 setDone(Bad);
235 }
236 }
237 break;
238 case InString:
239 if (current == stringType) {
240 shift(1);
241 setDone(String);
242 } else if (isLineTerminator() || current == -1) {
243 setDone(Bad);
244 } else if (current == '\\') {
245 state = InEscapeSequence;
246 } else {
247 record16(current);
248 }
249 break;
250 // Escape Sequences inside of strings
251 case InEscapeSequence:
252 if (isOctalDigit(current)) {
253 if (current >= '0' && current <= '3' &&
254 isOctalDigit(next1) && isOctalDigit(next2)) {
255 record16(convertOctal(current, next1, next2));
256 shift(2);
257 state = InString;
258 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
259 record16(convertOctal('0', current, next1));
260 shift(1);
261 state = InString;
262 } else if (isOctalDigit(current)) {
263 record16(convertOctal('0', '0', current));
264 state = InString;
265 } else {
266 setDone(Bad);
267 }
268 } else if (current == 'x')
269 state = InHexEscape;
270 else if (current == 'u')
271 state = InUnicodeEscape;
272 else if (isLineTerminator()) {
273 nextLine();
274 state = InString;
275 } else {
276 record16(singleEscape(static_cast<unsigned short>(current)));
277 state = InString;
278 }
279 break;
280 case InHexEscape:
281 if (isHexDigit(current) && isHexDigit(next1)) {
282 state = InString;
283 record16(convertHex(current, next1));
284 shift(1);
285 } else if (current == stringType) {
286 record16('x');
287 shift(1);
288 setDone(String);
289 } else {
290 record16('x');
291 record16(current);
292 state = InString;
293 }
294 break;
295 case InUnicodeEscape:
296 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
297 record16(convertUnicode(current, next1, next2, next3));
298 shift(3);
299 state = InString;
300 } else if (current == stringType) {
301 record16('u');
302 shift(1);
303 setDone(String);
304 } else {
305 setDone(Bad);
306 }
307 break;
308 case InSingleLineComment:
309 if (isLineTerminator()) {
310 nextLine();
311 terminator = true;
312 if (restrKeyword) {
313 token = ';';
314 setDone(Other);
315 } else
316 state = Start;
317 } else if (current == -1) {
318 setDone(Eof);
319 }
320 break;
321 case InMultiLineComment:
322 if (current == -1) {
323 setDone(Bad);
324 } else if (isLineTerminator()) {
325 nextLine();
326 } else if (current == '*' && next1 == '/') {
327 state = Start;
328 shift(1);
329 }
330 break;
331 case InIdentifierOrKeyword:
332 case InIdentifier:
333 if (isIdentPart(current))
334 record16(current);
335 else if (current == '\\')
336 state = InIdentifierPartUnicodeEscapeStart;
337 else
338 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
339 break;
340 case InNum0:
341 if (current == 'x' || current == 'X') {
342 record8(current);
343 state = InHex;
344 } else if (current == '.') {
345 record8(current);
346 state = InDecimal;
347 } else if (current == 'e' || current == 'E') {
348 record8(current);
349 state = InExponentIndicator;
350 } else if (isOctalDigit(current)) {
351 record8(current);
352 state = InOctal;
353 } else if (isDecimalDigit(current)) {
354 record8(current);
355 state = InDecimal;
356 } else {
357 setDone(Number);
358 }
359 break;
360 case InHex:
361 if (isHexDigit(current)) {
362 record8(current);
363 } else {
364 setDone(Hex);
365 }
366 break;
367 case InOctal:
368 if (isOctalDigit(current)) {
369 record8(current);
370 }
371 else if (isDecimalDigit(current)) {
372 record8(current);
373 state = InDecimal;
374 } else
375 setDone(Octal);
376 break;
377 case InNum:
378 if (isDecimalDigit(current)) {
379 record8(current);
380 } else if (current == '.') {
381 record8(current);
382 state = InDecimal;
383 } else if (current == 'e' || current == 'E') {
384 record8(current);
385 state = InExponentIndicator;
386 } else
387 setDone(Number);
388 break;
389 case InDecimal:
390 if (isDecimalDigit(current)) {
391 record8(current);
392 } else if (current == 'e' || current == 'E') {
393 record8(current);
394 state = InExponentIndicator;
395 } else
396 setDone(Number);
397 break;
398 case InExponentIndicator:
399 if (current == '+' || current == '-') {
400 record8(current);
401 } else if (isDecimalDigit(current)) {
402 record8(current);
403 state = InExponent;
404 } else
405 setDone(Bad);
406 break;
407 case InExponent:
408 if (isDecimalDigit(current)) {
409 record8(current);
410 } else
411 setDone(Number);
412 break;
413 case InIdentifierStartUnicodeEscapeStart:
414 if (current == 'u')
415 state = InIdentifierStartUnicodeEscape;
416 else
417 setDone(Bad);
418 break;
419 case InIdentifierPartUnicodeEscapeStart:
420 if (current == 'u')
421 state = InIdentifierPartUnicodeEscape;
422 else
423 setDone(Bad);
424 break;
425 case InIdentifierStartUnicodeEscape:
426 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
427 setDone(Bad);
428 break;
429 }
430 token = convertUnicode(current, next1, next2, next3);
431 shift(3);
432 if (!isIdentStart(token)) {
433 setDone(Bad);
434 break;
435 }
436 record16(token);
437 state = InIdentifier;
438 break;
439 case InIdentifierPartUnicodeEscape:
440 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
441 setDone(Bad);
442 break;
443 }
444 token = convertUnicode(current, next1, next2, next3);
445 shift(3);
446 if (!isIdentPart(token)) {
447 setDone(Bad);
448 break;
449 }
450 record16(token);
451 state = InIdentifier;
452 break;
453 default:
454 ASSERT(!"Unhandled state in switch statement");
455 }
456
457 // move on to the next character
458 if (!done)
459 shift(1);
460 if (state != Start && state != InSingleLineComment)
461 atLineStart = false;
462 }
463
464 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
465 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
466 state = Bad;
467
468 // terminate string
469 m_buffer8.append('\0');
470
471#ifdef KJS_DEBUG_LEX
472 fprintf(stderr, "line: %d ", lineNo());
473 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
474 fprintf(stderr, "%s ", buffer8.data());
475#endif
476
477 double dval = 0;
478 if (state == Number) {
479 dval = strtod(m_buffer8.data(), 0L);
480 } else if (state == Hex) { // scan hex numbers
481 const char* p = m_buffer8.data() + 2;
482 while (char c = *p++) {
483 dval *= 16;
484 dval += convertHex(c);
485 }
486
487 if (dval >= mantissaOverflowLowerBound)
488 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
489
490 state = Number;
491 } else if (state == Octal) { // scan octal number
492 const char* p = m_buffer8.data() + 1;
493 while (char c = *p++) {
494 dval *= 8;
495 dval += c - '0';
496 }
497
498 if (dval >= mantissaOverflowLowerBound)
499 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
500
501 state = Number;
502 }
503
504#ifdef KJS_DEBUG_LEX
505 switch (state) {
506 case Eof:
507 printf("(EOF)\n");
508 break;
509 case Other:
510 printf("(Other)\n");
511 break;
512 case Identifier:
513 printf("(Identifier)/(Keyword)\n");
514 break;
515 case String:
516 printf("(String)\n");
517 break;
518 case Number:
519 printf("(Number)\n");
520 break;
521 default:
522 printf("(unknown)");
523 }
524#endif
525
526 if (state != Identifier)
527 eatNextIdentifier = false;
528
529 restrKeyword = false;
530 delimited = false;
531 llocp->first_line = yylineno; // ???
532 llocp->last_line = yylineno;
533
534 switch (state) {
535 case Eof:
536 token = 0;
537 break;
538 case Other:
539 if (token == '}' || token == ';')
540 delimited = true;
541 break;
542 case Identifier:
543 // Apply anonymous-function hack below (eat the identifier).
544 if (eatNextIdentifier) {
545 eatNextIdentifier = false;
546 token = lex(lvalp, llocp);
547 break;
548 }
549 lvalp->ident = makeIdentifier(m_buffer16);
550 token = IDENT;
551 break;
552 case IdentifierOrKeyword:
553 lvalp->ident = makeIdentifier(m_buffer16);
554 if ((token = mainTable.value(*lvalp->ident)) < 0) {
555 // Lookup for keyword failed, means this is an identifier.
556 token = IDENT;
557 break;
558 }
559 // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
560 eatNextIdentifier = token == FUNCTION && lastToken == '=';
561 if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
562 restrKeyword = true;
563 break;
564 case String:
565 lvalp->string = makeUString(m_buffer16);
566 token = STRING;
567 break;
568 case Number:
569 lvalp->doubleValue = dval;
570 token = NUMBER;
571 break;
572 case Bad:
573#ifdef KJS_DEBUG_LEX
574 fprintf(stderr, "yylex: ERROR.\n");
575#endif
576 error = true;
577 return -1;
578 default:
579 ASSERT(!"unhandled numeration value in switch");
580 error = true;
581 return -1;
582 }
583 lastToken = token;
584 return token;
585}
586
587bool Lexer::isWhiteSpace() const
588{
589 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
590}
591
592bool Lexer::isLineTerminator()
593{
594 bool cr = (current == '\r');
595 bool lf = (current == '\n');
596 if (cr)
597 skipLF = true;
598 else if (lf)
599 skipCR = true;
600 return cr || lf || current == 0x2028 || current == 0x2029;
601}
602
603bool Lexer::isIdentStart(int c)
604{
605 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
606 || c == '$' || c == '_';
607}
608
609bool Lexer::isIdentPart(int c)
610{
611 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
612 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
613 || c == '$' || c == '_';
614}
615
616static bool isDecimalDigit(int c)
617{
618 return (c >= '0' && c <= '9');
619}
620
621bool Lexer::isHexDigit(int c)
622{
623 return (c >= '0' && c <= '9' ||
624 c >= 'a' && c <= 'f' ||
625 c >= 'A' && c <= 'F');
626}
627
628bool Lexer::isOctalDigit(int c)
629{
630 return (c >= '0' && c <= '7');
631}
632
633int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
634{
635 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
636 shift(4);
637 return URSHIFTEQUAL;
638 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
639 shift(3);
640 return STREQ;
641 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
642 shift(3);
643 return STRNEQ;
644 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
645 shift(3);
646 return URSHIFT;
647 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
648 shift(3);
649 return LSHIFTEQUAL;
650 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
651 shift(3);
652 return RSHIFTEQUAL;
653 } else if (c1 == '<' && c2 == '=') {
654 shift(2);
655 return LE;
656 } else if (c1 == '>' && c2 == '=') {
657 shift(2);
658 return GE;
659 } else if (c1 == '!' && c2 == '=') {
660 shift(2);
661 return NE;
662 } else if (c1 == '+' && c2 == '+') {
663 shift(2);
664 if (terminator)
665 return AUTOPLUSPLUS;
666 else
667 return PLUSPLUS;
668 } else if (c1 == '-' && c2 == '-') {
669 shift(2);
670 if (terminator)
671 return AUTOMINUSMINUS;
672 else
673 return MINUSMINUS;
674 } else if (c1 == '=' && c2 == '=') {
675 shift(2);
676 return EQEQ;
677 } else if (c1 == '+' && c2 == '=') {
678 shift(2);
679 return PLUSEQUAL;
680 } else if (c1 == '-' && c2 == '=') {
681 shift(2);
682 return MINUSEQUAL;
683 } else if (c1 == '*' && c2 == '=') {
684 shift(2);
685 return MULTEQUAL;
686 } else if (c1 == '/' && c2 == '=') {
687 shift(2);
688 return DIVEQUAL;
689 } else if (c1 == '&' && c2 == '=') {
690 shift(2);
691 return ANDEQUAL;
692 } else if (c1 == '^' && c2 == '=') {
693 shift(2);
694 return XOREQUAL;
695 } else if (c1 == '%' && c2 == '=') {
696 shift(2);
697 return MODEQUAL;
698 } else if (c1 == '|' && c2 == '=') {
699 shift(2);
700 return OREQUAL;
701 } else if (c1 == '<' && c2 == '<') {
702 shift(2);
703 return LSHIFT;
704 } else if (c1 == '>' && c2 == '>') {
705 shift(2);
706 return RSHIFT;
707 } else if (c1 == '&' && c2 == '&') {
708 shift(2);
709 return AND;
710 } else if (c1 == '|' && c2 == '|') {
711 shift(2);
712 return OR;
713 }
714
715 switch(c1) {
716 case '=':
717 case '>':
718 case '<':
719 case ',':
720 case '!':
721 case '~':
722 case '?':
723 case ':':
724 case '.':
725 case '+':
726 case '-':
727 case '*':
728 case '/':
729 case '&':
730 case '|':
731 case '^':
732 case '%':
733 case '(':
734 case ')':
735 case '[':
736 case ']':
737 case ';':
738 shift(1);
739 return static_cast<int>(c1);
740 case '{':
741 charPos = pos - 4;
742 shift(1);
743 return OPENBRACE;
744 case '}':
745 charPos = pos - 4;
746 shift(1);
747 return CLOSEBRACE;
748 default:
749 return -1;
750 }
751}
752
753unsigned short Lexer::singleEscape(unsigned short c)
754{
755 switch(c) {
756 case 'b':
757 return 0x08;
758 case 't':
759 return 0x09;
760 case 'n':
761 return 0x0A;
762 case 'v':
763 return 0x0B;
764 case 'f':
765 return 0x0C;
766 case 'r':
767 return 0x0D;
768 case '"':
769 return 0x22;
770 case '\'':
771 return 0x27;
772 case '\\':
773 return 0x5C;
774 default:
775 return c;
776 }
777}
778
779unsigned short Lexer::convertOctal(int c1, int c2, int c3)
780{
781 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
782}
783
784unsigned char Lexer::convertHex(int c)
785{
786 if (c >= '0' && c <= '9')
787 return static_cast<unsigned char>(c - '0');
788 if (c >= 'a' && c <= 'f')
789 return static_cast<unsigned char>(c - 'a' + 10);
790 return static_cast<unsigned char>(c - 'A' + 10);
791}
792
793unsigned char Lexer::convertHex(int c1, int c2)
794{
795 return ((convertHex(c1) << 4) + convertHex(c2));
796}
797
798UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
799{
800 unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
801 unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
802 return (highByte << 8 | lowByte);
803}
804
805void Lexer::record8(int c)
806{
807 ASSERT(c >= 0);
808 ASSERT(c <= 0xff);
809 m_buffer8.append(static_cast<char>(c));
810}
811
812void Lexer::record16(int c)
813{
814 ASSERT(c >= 0);
815 ASSERT(c <= USHRT_MAX);
816 record16(UChar(static_cast<unsigned short>(c)));
817}
818
819void Lexer::record16(UChar c)
820{
821 m_buffer16.append(c);
822}
823
824bool Lexer::scanRegExp()
825{
826 m_buffer16.clear();
827 bool lastWasEscape = false;
828 bool inBrackets = false;
829
830 while (1) {
831 if (isLineTerminator() || current == -1)
832 return false;
833 else if (current != '/' || lastWasEscape == true || inBrackets == true)
834 {
835 // keep track of '[' and ']'
836 if (!lastWasEscape) {
837 if ( current == '[' && !inBrackets )
838 inBrackets = true;
839 if ( current == ']' && inBrackets )
840 inBrackets = false;
841 }
842 record16(current);
843 lastWasEscape =
844 !lastWasEscape && (current == '\\');
845 } else { // end of regexp
846 m_pattern = UString(m_buffer16);
847 m_buffer16.clear();
848 shift(1);
849 break;
850 }
851 shift(1);
852 }
853
854 while (isIdentPart(current)) {
855 record16(current);
856 shift(1);
857 }
858 m_flags = UString(m_buffer16);
859
860 return true;
861}
862
863void Lexer::clear()
864{
865 deleteAllValues(m_strings);
866 Vector<UString*> newStrings;
867 newStrings.reserveCapacity(initialStringTableCapacity);
868 m_strings.swap(newStrings);
869
870 deleteAllValues(m_identifiers);
871 Vector<KJS::Identifier*> newIdentifiers;
872 newIdentifiers.reserveCapacity(initialStringTableCapacity);
873 m_identifiers.swap(newIdentifiers);
874
875 Vector<char> newBuffer8;
876 newBuffer8.reserveCapacity(initialReadBufferCapacity);
877 m_buffer8.swap(newBuffer8);
878
879 Vector<UChar> newBuffer16;
880 newBuffer16.reserveCapacity(initialReadBufferCapacity);
881 m_buffer16.swap(newBuffer16);
882
883 m_pattern = 0;
884 m_flags = 0;
885}
886
887Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
888{
889 KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
890 m_identifiers.append(identifier);
891 return identifier;
892}
893
894UString* Lexer::makeUString(const Vector<UChar>& buffer)
895{
896 UString* string = new UString(buffer);
897 m_strings.append(string);
898 return string;
899}
900
901} // namespace KJS
Note: See TracBrowser for help on using the repository browser.