Context Navigation

Lexer.cpp@ 62849

Visit:

Last change on this file since 62849 was 62849, checked in by [email protected], 15 years ago

Tidy up the lexer

Reviewed by Anders Carlson.

Remove some of the old yacc/lex-isms still present in the lexer

parser/JSParser.h:

(JSC::):

parser/Lexer.cpp:

(JSC::Lexer::parseString):
(JSC::Lexer::lex):

parser/Lexer.h:

Property svn:eol-style set to native

File size: 31.2 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2010 Zoltan Herczeg ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "Lexer.h"
26
27	#include "JSFunction.h"
28
29	#include "JSGlobalObjectFunctions.h"
30	#include "Identifier.h"
31	#include "NodeInfo.h"
32	#include "Nodes.h"
33	#include "dtoa.h"
34	#include <ctype.h>
35	#include <limits.h>
36	#include <string.h>
37	#include <wtf/Assertions.h>
38
39	using namespace WTF;
40	using namespace Unicode;
41
42	#include "JSParser.h"
43	#include "Lookup.h"
44	#include "Lexer.lut.h"
45
46	namespace JSC {
47
48
49	enum CharacterTypes {
50	// Types for the main switch
51	CharacterInvalid,
52
53	CharacterAlpha,
54	CharacterZero,
55	CharacterNumber,
56
57	CharacterLineTerminator,
58	CharacterExclamationMark,
59	CharacterSimple,
60	CharacterQuote,
61	CharacterDot,
62	CharacterSlash,
63	CharacterBackSlash,
64	CharacterSemicolon,
65	CharacterOpenBrace,
66	CharacterCloseBrace,
67
68	CharacterAdd,
69	CharacterSub,
70	CharacterMultiply,
71	CharacterModulo,
72	CharacterAnd,
73	CharacterXor,
74	CharacterOr,
75	CharacterLess,
76	CharacterGreater,
77	CharacterEqual,
78
79	// Other types (only one so far)
80	CharacterWhiteSpace,
81	};
82
83	// 128 ascii codes
84	static unsigned char AsciiCharacters[128] = {
85	/* 0 - Null */ CharacterInvalid,
86	/* 1 - Start of Heading */ CharacterInvalid,
87	/* 2 - Start of Text */ CharacterInvalid,
88	/* 3 - End of Text */ CharacterInvalid,
89	/* 4 - End of Transm. */ CharacterInvalid,
90	/* 5 - Enquiry */ CharacterInvalid,
91	/* 6 - Acknowledgment */ CharacterInvalid,
92	/* 7 - Bell */ CharacterInvalid,
93	/* 8 - Back Space */ CharacterInvalid,
94	/* 9 - Horizontal Tab */ CharacterWhiteSpace,
95	/* 10 - Line Feed */ CharacterLineTerminator,
96	/* 11 - Vertical Tab */ CharacterWhiteSpace,
97	/* 12 - Form Feed */ CharacterWhiteSpace,
98	/* 13 - Carriage Return */ CharacterLineTerminator,
99	/* 14 - Shift Out */ CharacterInvalid,
100	/* 15 - Shift In */ CharacterInvalid,
101	/* 16 - Data Line Escape */ CharacterInvalid,
102	/* 17 - Device Control 1 */ CharacterInvalid,
103	/* 18 - Device Control 2 */ CharacterInvalid,
104	/* 19 - Device Control 3 */ CharacterInvalid,
105	/* 20 - Device Control 4 */ CharacterInvalid,
106	/* 21 - Negative Ack. */ CharacterInvalid,
107	/* 22 - Synchronous Idle */ CharacterInvalid,
108	/* 23 - End of Transmit */ CharacterInvalid,
109	/* 24 - Cancel */ CharacterInvalid,
110	/* 25 - End of Medium */ CharacterInvalid,
111	/* 26 - Substitute */ CharacterInvalid,
112	/* 27 - Escape */ CharacterInvalid,
113	/* 28 - File Separator */ CharacterInvalid,
114	/* 29 - Group Separator */ CharacterInvalid,
115	/* 30 - Record Separator */ CharacterInvalid,
116	/* 31 - Unit Separator */ CharacterInvalid,
117	/* 32 - Space */ CharacterWhiteSpace,
118	/* 33 - ! */ CharacterExclamationMark,
119	/* 34 - " */ CharacterQuote,
120	/* 35 - # */ CharacterInvalid,
121	/* 36 - $ */ CharacterAlpha,
122	/* 37 - % */ CharacterModulo,
123	/* 38 - & */ CharacterAnd,
124	/* 39 - ' */ CharacterQuote,
125	/* 40 - ( */ CharacterSimple,
126	/* 41 - ) */ CharacterSimple,
127	/* 42 - * */ CharacterMultiply,
128	/* 43 - + */ CharacterAdd,
129	/* 44 - , */ CharacterSimple,
130	/* 45 - - */ CharacterSub,
131	/* 46 - . */ CharacterDot,
132	/* 47 - / */ CharacterSlash,
133	/* 48 - 0 */ CharacterZero,
134	/* 49 - 1 */ CharacterNumber,
135	/* 50 - 2 */ CharacterNumber,
136	/* 51 - 3 */ CharacterNumber,
137	/* 52 - 4 */ CharacterNumber,
138	/* 53 - 5 */ CharacterNumber,
139	/* 54 - 6 */ CharacterNumber,
140	/* 55 - 7 */ CharacterNumber,
141	/* 56 - 8 */ CharacterNumber,
142	/* 57 - 9 */ CharacterNumber,
143	/* 58 - : */ CharacterSimple,
144	/* 59 - ; */ CharacterSemicolon,
145	/* 60 - < */ CharacterLess,
146	/* 61 - = */ CharacterEqual,
147	/* 62 - > */ CharacterGreater,
148	/* 63 - ? */ CharacterSimple,
149	/* 64 - @ */ CharacterInvalid,
150	/* 65 - A */ CharacterAlpha,
151	/* 66 - B */ CharacterAlpha,
152	/* 67 - C */ CharacterAlpha,
153	/* 68 - D */ CharacterAlpha,
154	/* 69 - E */ CharacterAlpha,
155	/* 70 - F */ CharacterAlpha,
156	/* 71 - G */ CharacterAlpha,
157	/* 72 - H */ CharacterAlpha,
158	/* 73 - I */ CharacterAlpha,
159	/* 74 - J */ CharacterAlpha,
160	/* 75 - K */ CharacterAlpha,
161	/* 76 - L */ CharacterAlpha,
162	/* 77 - M */ CharacterAlpha,
163	/* 78 - N */ CharacterAlpha,
164	/* 79 - O */ CharacterAlpha,
165	/* 80 - P */ CharacterAlpha,
166	/* 81 - Q */ CharacterAlpha,
167	/* 82 - R */ CharacterAlpha,
168	/* 83 - S */ CharacterAlpha,
169	/* 84 - T */ CharacterAlpha,
170	/* 85 - U */ CharacterAlpha,
171	/* 86 - V */ CharacterAlpha,
172	/* 87 - W */ CharacterAlpha,
173	/* 88 - X */ CharacterAlpha,
174	/* 89 - Y */ CharacterAlpha,
175	/* 90 - Z */ CharacterAlpha,
176	/* 91 - [ */ CharacterSimple,
177	/* 92 - \ */ CharacterBackSlash,
178	/* 93 - ] */ CharacterSimple,
179	/* 94 - ^ */ CharacterXor,
180	/* 95 - _ */ CharacterAlpha,
181	/* 96 - ` */ CharacterInvalid,
182	/* 97 - a */ CharacterAlpha,
183	/* 98 - b */ CharacterAlpha,
184	/* 99 - c */ CharacterAlpha,
185	/* 100 - d */ CharacterAlpha,
186	/* 101 - e */ CharacterAlpha,
187	/* 102 - f */ CharacterAlpha,
188	/* 103 - g */ CharacterAlpha,
189	/* 104 - h */ CharacterAlpha,
190	/* 105 - i */ CharacterAlpha,
191	/* 106 - j */ CharacterAlpha,
192	/* 107 - k */ CharacterAlpha,
193	/* 108 - l */ CharacterAlpha,
194	/* 109 - m */ CharacterAlpha,
195	/* 110 - n */ CharacterAlpha,
196	/* 111 - o */ CharacterAlpha,
197	/* 112 - p */ CharacterAlpha,
198	/* 113 - q */ CharacterAlpha,
199	/* 114 - r */ CharacterAlpha,
200	/* 115 - s */ CharacterAlpha,
201	/* 116 - t */ CharacterAlpha,
202	/* 117 - u */ CharacterAlpha,
203	/* 118 - v */ CharacterAlpha,
204	/* 119 - w */ CharacterAlpha,
205	/* 120 - x */ CharacterAlpha,
206	/* 121 - y */ CharacterAlpha,
207	/* 122 - z */ CharacterAlpha,
208	/* 123 - { */ CharacterOpenBrace,
209	/* 124 - \| */ CharacterOr,
210	/* 125 - } */ CharacterCloseBrace,
211	/* 126 - ~ */ CharacterSimple,
212	/* 127 - Delete */ CharacterInvalid,
213	};
214
215	Lexer::Lexer(JSGlobalData* globalData)
216	: m_isReparsing(false)
217	, m_globalData(globalData)
218	, m_keywordTable(JSC::mainTable)
219	{
220	}
221
222	Lexer::~Lexer()
223	{
224	m_keywordTable.deleteTable();
225	}
226
227	ALWAYS_INLINE const UChar* Lexer::currentCharacter() const
228	{
229	ASSERT(m_code <= m_codeEnd);
230	return m_code;
231	}
232
233	ALWAYS_INLINE int Lexer::currentOffset() const
234	{
235	return currentCharacter() - m_codeStart;
236	}
237
238	void Lexer::setCode(const SourceCode& source, ParserArena& arena)
239	{
240	m_arena = &arena.identifierArena();
241
242	m_lineNumber = source.firstLine();
243	m_delimited = false;
244	m_lastToken = -1;
245
246	const UChar* data = source.provider()->data();
247
248	m_source = &source;
249	m_codeStart = data;
250	m_code = data + source.startOffset();
251	m_codeEnd = data + source.endOffset();
252	m_error = false;
253	m_atLineStart = true;
254
255	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
256	m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
257
258	if (LIKELY(m_code < m_codeEnd))
259	m_current = *m_code;
260	else
261	m_current = -1;
262	ASSERT(currentOffset() == source.startOffset());
263	}
264
265	ALWAYS_INLINE void Lexer::shift()
266	{
267	// Faster than an if-else sequence
268	ASSERT(m_current != -1);
269	m_current = -1;
270	++m_code;
271	if (LIKELY(m_code < m_codeEnd))
272	m_current = *m_code;
273	}
274
275	ALWAYS_INLINE int Lexer::peek(int offset)
276	{
277	// Only use if necessary
278	ASSERT(offset > 0 && offset < 5);
279	const UChar* code = m_code + offset;
280	return (code < m_codeEnd) ? *code : -1;
281	}
282
283	int Lexer::getUnicodeCharacter()
284	{
285	int char1 = peek(1);
286	int char2 = peek(2);
287	int char3 = peek(3);
288
289	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(char1) \|\| !isASCIIHexDigit(char2) \|\| !isASCIIHexDigit(char3)))
290	return -1;
291
292	int result = convertUnicode(m_current, char1, char2, char3);
293	shift();
294	shift();
295	shift();
296	shift();
297	return result;
298	}
299
300	void Lexer::shiftLineTerminator()
301	{
302	ASSERT(isLineTerminator(m_current));
303
304	int m_prev = m_current;
305	shift();
306
307	// Allow both CRLF and LFCR.
308	if (m_prev + m_current == '\n' + '\r')
309	shift();
310
311	++m_lineNumber;
312	}
313
314	ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
315	{
316	return &m_arena->makeIdentifier(m_globalData, characters, length);
317	}
318
319	ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const
320	{
321	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
322	}
323
324	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
325	{
326	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
327	}
328
329	static inline bool isIdentStart(int c)
330	{
331	return isASCII(c) ? isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentStart(c);
332	}
333
334	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
335	{
336	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
337	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
338	}
339
340	static inline bool isIdentPart(int c)
341	{
342	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentPart(c);
343	}
344
345	static inline int singleEscape(int c)
346	{
347	switch (c) {
348	case 'b':
349	return 0x08;
350	case 't':
351	return 0x09;
352	case 'n':
353	return 0x0A;
354	case 'v':
355	return 0x0B;
356	case 'f':
357	return 0x0C;
358	case 'r':
359	return 0x0D;
360	case '\\':
361	return '\\';
362	case '\'':
363	return '\'';
364	case '"':
365	return '"';
366	default:
367	return 0;
368	}
369	}
370
371	inline void Lexer::record8(int c)
372	{
373	ASSERT(c >= 0);
374	ASSERT(c <= 0xFF);
375	m_buffer8.append(static_cast<char>(c));
376	}
377
378	inline void Lexer::record16(UChar c)
379	{
380	m_buffer16.append(c);
381	}
382
383	inline void Lexer::record16(int c)
384	{
385	ASSERT(c >= 0);
386	ASSERT(c <= USHRT_MAX);
387	record16(UChar(static_cast<unsigned short>(c)));
388	}
389
390	ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp)
391	{
392	int stringQuoteCharacter = m_current;
393	shift();
394
395	const UChar* stringStart = currentCharacter();
396
397	while (m_current != stringQuoteCharacter) {
398	if (UNLIKELY(m_current == '\\')) {
399	if (stringStart != currentCharacter())
400	m_buffer16.append(stringStart, currentCharacter() - stringStart);
401	shift();
402
403	int escape = singleEscape(m_current);
404
405	// Most common escape sequences first
406	if (escape) {
407	record16(escape);
408	shift();
409	} else if (UNLIKELY(isLineTerminator(m_current)))
410	shiftLineTerminator();
411	else if (m_current == 'x') {
412	shift();
413	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
414	int prev = m_current;
415	shift();
416	record16(convertHex(prev, m_current));
417	shift();
418	} else
419	record16('x');
420	} else if (m_current == 'u') {
421	shift();
422	int character = getUnicodeCharacter();
423	if (character != -1)
424	record16(character);
425	else if (m_current == stringQuoteCharacter)
426	record16('u');
427	else // Only stringQuoteCharacter allowed after \u
428	return false;
429	} else if (isASCIIOctalDigit(m_current)) {
430	// Octal character sequences
431	int character1 = m_current;
432	shift();
433	if (isASCIIOctalDigit(m_current)) {
434	// Two octal characters
435	int character2 = m_current;
436	shift();
437	if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
438	record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
439	shift();
440	} else
441	record16((character1 - '0') * 8 + character2 - '0');
442	} else
443	record16(character1 - '0');
444	} else if (m_current != -1) {
445	record16(m_current);
446	shift();
447	} else
448	return false;
449
450	stringStart = currentCharacter();
451	continue;
452	} else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
453	// New-line or end of input is not allowed
454	if (UNLIKELY(isLineTerminator(m_current)) \|\| UNLIKELY(m_current == -1))
455	return false;
456	// Anything else is just a normal character
457	}
458	shift();
459	}
460
461	if (currentCharacter() != stringStart)
462	m_buffer16.append(stringStart, currentCharacter() - stringStart);
463	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
464	m_buffer16.resize(0);
465	return true;
466	}
467
468	int Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp)
469	{
470	ASSERT(!m_error);
471	ASSERT(m_buffer8.isEmpty());
472	ASSERT(m_buffer16.isEmpty());
473
474	int token = 0;
475	m_terminator = false;
476
477	start:
478	while (isWhiteSpace(m_current))
479	shift();
480
481	int startOffset = currentOffset();
482
483	if (UNLIKELY(m_current == -1)) {
484	if (!m_terminator && !m_delimited && !m_isReparsing) {
485	// automatic semicolon insertion if program incomplete
486	goto doneSemicolon;
487	}
488	return 0;
489	}
490
491	m_delimited = false;
492
493	if (isASCII(m_current)) {
494	ASSERT(m_current >= 0 && m_current < 128);
495
496	switch (AsciiCharacters[m_current]) {
497	case CharacterGreater:
498	shift();
499	if (m_current == '>') {
500	shift();
501	if (m_current == '>') {
502	shift();
503	if (m_current == '=') {
504	shift();
505	token = URSHIFTEQUAL;
506	break;
507	}
508	token = URSHIFT;
509	break;
510	}
511	if (m_current == '=') {
512	shift();
513	token = RSHIFTEQUAL;
514	break;
515	}
516	token = RSHIFT;
517	break;
518	}
519	if (m_current == '=') {
520	shift();
521	token = GE;
522	break;
523	}
524	token = '>';
525	break;
526	case CharacterEqual:
527	shift();
528	if (m_current == '=') {
529	shift();
530	if (m_current == '=') {
531	shift();
532	token = STREQ;
533	break;
534	}
535	token = EQEQ;
536	break;
537	}
538	token = '=';
539	break;
540	case CharacterLess:
541	shift();
542	if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
543	// <!-- marks the beginning of a line comment (for www usage)
544	goto inSingleLineComment;
545	}
546	if (m_current == '<') {
547	shift();
548	if (m_current == '=') {
549	shift();
550	token = LSHIFTEQUAL;
551	break;
552	}
553	token = LSHIFT;
554	break;
555	}
556	if (m_current == '=') {
557	shift();
558	token = LE;
559	break;
560	}
561	token = '<';
562	break;
563	case CharacterExclamationMark:
564	shift();
565	if (m_current == '=') {
566	shift();
567	if (m_current == '=') {
568	shift();
569	token = STRNEQ;
570	break;
571	}
572	token = NE;
573	break;
574	}
575	token = '!';
576	break;
577	case CharacterAdd:
578	shift();
579	if (m_current == '+') {
580	shift();
581	token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
582	break;
583	}
584	if (m_current == '=') {
585	shift();
586	token = PLUSEQUAL;
587	break;
588	}
589	token = '+';
590	break;
591	case CharacterSub:
592	shift();
593	if (m_current == '-') {
594	shift();
595	if (m_atLineStart && m_current == '>') {
596	shift();
597	goto inSingleLineComment;
598	}
599	token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
600	break;
601	}
602	if (m_current == '=') {
603	shift();
604	token = MINUSEQUAL;
605	break;
606	}
607	token = '-';
608	break;
609	case CharacterMultiply:
610	shift();
611	if (m_current == '=') {
612	shift();
613	token = MULTEQUAL;
614	break;
615	}
616	token = '*';
617	break;
618	case CharacterSlash:
619	shift();
620	if (m_current == '/') {
621	shift();
622	goto inSingleLineComment;
623	}
624	if (m_current == '*') {
625	shift();
626	goto inMultiLineComment;
627	}
628	if (m_current == '=') {
629	shift();
630	token = DIVEQUAL;
631	break;
632	}
633	token = '/';
634	break;
635	case CharacterAnd:
636	shift();
637	if (m_current == '&') {
638	shift();
639	token = AND;
640	break;
641	}
642	if (m_current == '=') {
643	shift();
644	token = ANDEQUAL;
645	break;
646	}
647	token = '&';
648	break;
649	case CharacterXor:
650	shift();
651	if (m_current == '=') {
652	shift();
653	token = XOREQUAL;
654	break;
655	}
656	token = '^';
657	break;
658	case CharacterModulo:
659	shift();
660	if (m_current == '=') {
661	shift();
662	token = MODEQUAL;
663	break;
664	}
665	token = '%';
666	break;
667	case CharacterOr:
668	shift();
669	if (m_current == '=') {
670	shift();
671	token = OREQUAL;
672	break;
673	}
674	if (m_current == '\|') {
675	shift();
676	token = OR;
677	break;
678	}
679	token = '\|';
680	break;
681	case CharacterDot:
682	shift();
683	if (isASCIIDigit(m_current)) {
684	record8('.');
685	goto inNumberAfterDecimalPoint;
686	}
687	token = '.';
688	break;
689	case CharacterSimple:
690	token = m_current;
691	shift();
692	break;
693	case CharacterSemicolon:
694	m_delimited = true;
695	shift();
696	token = ';';
697	break;
698	case CharacterOpenBrace:
699	lvalp->intValue = currentOffset();
700	shift();
701	token = OPENBRACE;
702	break;
703	case CharacterCloseBrace:
704	lvalp->intValue = currentOffset();
705	m_delimited = true;
706	shift();
707	token = CLOSEBRACE;
708	break;
709	case CharacterBackSlash:
710	goto startIdentifierWithBackslash;
711	case CharacterZero:
712	goto startNumberWithZeroDigit;
713	case CharacterNumber:
714	goto startNumber;
715	case CharacterQuote:
716	if (UNLIKELY(!parseString(lvalp)))
717	goto returnError;
718	shift();
719	m_delimited = false;
720	token = STRING;
721	break;
722	case CharacterAlpha:
723	ASSERT(isIdentStart(m_current));
724	goto startIdentifierOrKeyword;
725	case CharacterLineTerminator:
726	ASSERT(isLineTerminator(m_current));
727	shiftLineTerminator();
728	m_atLineStart = true;
729	m_terminator = true;
730	if (lastTokenWasRestrKeyword()) {
731	token = ';';
732	goto doneSemicolon;
733	}
734	goto start;
735	case CharacterInvalid:
736	goto returnError;
737	default:
738	ASSERT_NOT_REACHED();
739	goto returnError;
740	}
741	} else {
742	// Rare characters
743
744	if (isNonASCIIIdentStart(m_current))
745	goto startIdentifierOrKeyword;
746	if (isLineTerminator(m_current)) {
747	shiftLineTerminator();
748	m_atLineStart = true;
749	m_terminator = true;
750	if (lastTokenWasRestrKeyword())
751	goto doneSemicolon;
752	goto start;
753	}
754	goto returnError;
755	}
756
757	m_atLineStart = false;
758	goto returnToken;
759
760	startIdentifierWithBackslash: {
761	shift();
762	if (UNLIKELY(m_current != 'u'))
763	goto returnError;
764	shift();
765
766	token = getUnicodeCharacter();
767	if (UNLIKELY(token == -1))
768	goto returnError;
769	if (UNLIKELY(!isIdentStart(token)))
770	goto returnError;
771	goto inIdentifierAfterCharacterCheck;
772	}
773
774	startIdentifierOrKeyword: {
775	const UChar* identifierStart = currentCharacter();
776	shift();
777	while (isIdentPart(m_current))
778	shift();
779	if (LIKELY(m_current != '\\')) {
780	// Fast case for idents which does not contain \uCCCC characters
781	lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
782	goto doneIdentifierOrKeyword;
783	}
784	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
785	}
786
787	do {
788	shift();
789	if (UNLIKELY(m_current != 'u'))
790	goto returnError;
791	shift();
792	token = getUnicodeCharacter();
793	if (UNLIKELY(token == -1))
794	goto returnError;
795	if (UNLIKELY(!isIdentPart(token)))
796	goto returnError;
797	inIdentifierAfterCharacterCheck:
798	record16(token);
799
800	while (isIdentPart(m_current)) {
801	record16(m_current);
802	shift();
803	}
804	} while (UNLIKELY(m_current == '\\'));
805	goto doneIdentifier;
806
807	inSingleLineComment:
808	while (!isLineTerminator(m_current)) {
809	if (UNLIKELY(m_current == -1))
810	return 0;
811	shift();
812	}
813	shiftLineTerminator();
814	m_atLineStart = true;
815	m_terminator = true;
816	if (lastTokenWasRestrKeyword())
817	goto doneSemicolon;
818	goto start;
819
820	inMultiLineComment:
821	while (true) {
822	if (UNLIKELY(m_current == '*')) {
823	shift();
824	if (m_current == '/')
825	break;
826	if (m_current == '*')
827	continue;
828	}
829
830	if (UNLIKELY(m_current == -1))
831	goto returnError;
832
833	if (isLineTerminator(m_current))
834	shiftLineTerminator();
835	else
836	shift();
837	}
838	shift();
839	m_atLineStart = false;
840	goto start;
841
842	startNumberWithZeroDigit:
843	shift();
844	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
845	shift();
846	goto inHex;
847	}
848	if (m_current == '.') {
849	record8('0');
850	record8('.');
851	shift();
852	goto inNumberAfterDecimalPoint;
853	}
854	if ((m_current \| 0x20) == 'e') {
855	record8('0');
856	record8('e');
857	shift();
858	goto inExponentIndicator;
859	}
860	if (isASCIIOctalDigit(m_current))
861	goto inOctal;
862	if (isASCIIDigit(m_current))
863	goto startNumber;
864	lvalp->doubleValue = 0;
865	goto doneNumeric;
866
867	inNumberAfterDecimalPoint:
868	while (isASCIIDigit(m_current)) {
869	record8(m_current);
870	shift();
871	}
872	if ((m_current \| 0x20) == 'e') {
873	record8('e');
874	shift();
875	goto inExponentIndicator;
876	}
877	goto doneNumber;
878
879	inExponentIndicator:
880	if (m_current == '+' \|\| m_current == '-') {
881	record8(m_current);
882	shift();
883	}
884	if (!isASCIIDigit(m_current))
885	goto returnError;
886	do {
887	record8(m_current);
888	shift();
889	} while (isASCIIDigit(m_current));
890	goto doneNumber;
891
892	inOctal: {
893	do {
894	record8(m_current);
895	shift();
896	} while (isASCIIOctalDigit(m_current));
897	if (isASCIIDigit(m_current))
898	goto startNumber;
899
900	double dval = 0;
901
902	const char* end = m_buffer8.end();
903	for (const char* p = m_buffer8.data(); p < end; ++p) {
904	dval *= 8;
905	dval += *p - '0';
906	}
907	if (dval >= mantissaOverflowLowerBound)
908	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
909
910	m_buffer8.resize(0);
911
912	lvalp->doubleValue = dval;
913	goto doneNumeric;
914	}
915
916	inHex: {
917	do {
918	record8(m_current);
919	shift();
920	} while (isASCIIHexDigit(m_current));
921
922	double dval = 0;
923
924	const char* end = m_buffer8.end();
925	for (const char* p = m_buffer8.data(); p < end; ++p) {
926	dval *= 16;
927	dval += toASCIIHexValue(*p);
928	}
929	if (dval >= mantissaOverflowLowerBound)
930	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
931
932	m_buffer8.resize(0);
933
934	lvalp->doubleValue = dval;
935	goto doneNumeric;
936	}
937
938	startNumber:
939	record8(m_current);
940	shift();
941	while (isASCIIDigit(m_current)) {
942	record8(m_current);
943	shift();
944	}
945	if (m_current == '.') {
946	record8('.');
947	shift();
948	goto inNumberAfterDecimalPoint;
949	}
950	if ((m_current \| 0x20) == 'e') {
951	record8('e');
952	shift();
953	goto inExponentIndicator;
954	}
955
956	// Fall through into doneNumber.
957
958	doneNumber:
959	// Null-terminate string for strtod.
960	m_buffer8.append('\0');
961	lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
962	m_buffer8.resize(0);
963
964	// Fall through into doneNumeric.
965
966	doneNumeric:
967	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
968	if (UNLIKELY(isIdentStart(m_current)))
969	goto returnError;
970
971	m_atLineStart = false;
972	m_delimited = false;
973	token = NUMBER;
974	goto returnToken;
975
976	doneSemicolon:
977	token = ';';
978	m_delimited = true;
979	goto returnToken;
980
981	doneIdentifier:
982	m_atLineStart = false;
983	m_delimited = false;
984	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
985	m_buffer16.resize(0);
986	token = IDENT;
987	goto returnToken;
988
989	doneIdentifierOrKeyword: {
990	m_atLineStart = false;
991	m_delimited = false;
992	m_buffer16.resize(0);
993	const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
994	token = entry ? entry->lexerValue() : static_cast<int>(IDENT);
995
996	// Fall through into returnToken.
997	}
998
999	returnToken: {
1000	int lineNumber = m_lineNumber;
1001	llocp->first_line = lineNumber;
1002	llocp->last_line = lineNumber;
1003	llocp->first_column = startOffset;
1004	llocp->last_column = currentOffset();
1005	m_lastToken = token;
1006	return token;
1007	}
1008
1009	returnError:
1010	m_error = true;
1011	return -1;
1012	}
1013
1014	bool Lexer::scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix)
1015	{
1016	ASSERT(m_buffer16.isEmpty());
1017
1018	bool lastWasEscape = false;
1019	bool inBrackets = false;
1020
1021	if (patternPrefix) {
1022	ASSERT(!isLineTerminator(patternPrefix));
1023	ASSERT(patternPrefix != '/');
1024	ASSERT(patternPrefix != '[');
1025	record16(patternPrefix);
1026	}
1027
1028	while (true) {
1029	int current = m_current;
1030
1031	if (isLineTerminator(current) \|\| current == -1) {
1032	m_buffer16.resize(0);
1033	return false;
1034	}
1035
1036	shift();
1037
1038	if (current == '/' && !lastWasEscape && !inBrackets)
1039	break;
1040
1041	record16(current);
1042
1043	if (lastWasEscape) {
1044	lastWasEscape = false;
1045	continue;
1046	}
1047
1048	switch (current) {
1049	case '[':
1050	inBrackets = true;
1051	break;
1052	case ']':
1053	inBrackets = false;
1054	break;
1055	case '\\':
1056	lastWasEscape = true;
1057	break;
1058	}
1059	}
1060
1061	pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1062	m_buffer16.resize(0);
1063
1064	while (isIdentPart(m_current)) {
1065	record16(m_current);
1066	shift();
1067	}
1068
1069	flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1070	m_buffer16.resize(0);
1071
1072	return true;
1073	}
1074
1075	bool Lexer::skipRegExp()
1076	{
1077	bool lastWasEscape = false;
1078	bool inBrackets = false;
1079
1080	while (true) {
1081	int current = m_current;
1082
1083	if (isLineTerminator(current) \|\| current == -1)
1084	return false;
1085
1086	shift();
1087
1088	if (current == '/' && !lastWasEscape && !inBrackets)
1089	break;
1090
1091	if (lastWasEscape) {
1092	lastWasEscape = false;
1093	continue;
1094	}
1095
1096	switch (current) {
1097	case '[':
1098	inBrackets = true;
1099	break;
1100	case ']':
1101	inBrackets = false;
1102	break;
1103	case '\\':
1104	lastWasEscape = true;
1105	break;
1106	}
1107	}
1108
1109	while (isIdentPart(m_current))
1110	shift();
1111
1112	return true;
1113	}
1114
1115	void Lexer::clear()
1116	{
1117	m_arena = 0;
1118
1119	Vector<char> newBuffer8;
1120	m_buffer8.swap(newBuffer8);
1121
1122	Vector<UChar> newBuffer16;
1123	m_buffer16.swap(newBuffer16);
1124
1125	m_isReparsing = false;
1126	}
1127
1128	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1129	{
1130	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1131	}
1132
1133	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 62849

Download in other formats: