Context Navigation

Lexer.cpp@ 62628

Visit:

Last change on this file since 62628 was 62628, checked in by [email protected], 15 years ago

Refactored string parsing inside the lexer
https://bugs.webkit.org/show_bug.cgi?id=41606

Reviewed by Oliver Hunt.

Does not use goto. Although the last sunspider
parse-only tests yields 1.044x speedup, I think the
patch can have a slight improvement at most.

parser/Lexer.cpp:

(JSC::singleEscape):
(JSC::Lexer::parseString):
(JSC::Lexer::lex):

parser/Lexer.h:

Property svn:eol-style set to native

File size: 31.3 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2010 Zoltan Herczeg ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "Lexer.h"
26
27	#include "JSFunction.h"
28
29	#include "JSGlobalObjectFunctions.h"
30	#include "Identifier.h"
31	#include "NodeInfo.h"
32	#include "Nodes.h"
33	#include "dtoa.h"
34	#include <ctype.h>
35	#include <limits.h>
36	#include <string.h>
37	#include <wtf/Assertions.h>
38
39	using namespace WTF;
40	using namespace Unicode;
41
42	#include "JSParser.h"
43	#include "Lookup.h"
44	#include "Lexer.lut.h"
45
46	namespace JSC {
47
48
49	enum CharacterTypes {
50	// Types for the main switch
51	CharacterInvalid,
52
53	CharacterAlpha,
54	CharacterZero,
55	CharacterNumber,
56
57	CharacterLineTerminator,
58	CharacterExclamationMark,
59	CharacterSimple,
60	CharacterQuote,
61	CharacterDot,
62	CharacterSlash,
63	CharacterBackSlash,
64	CharacterSemicolon,
65	CharacterOpenBrace,
66	CharacterCloseBrace,
67
68	CharacterAdd,
69	CharacterSub,
70	CharacterMultiply,
71	CharacterModulo,
72	CharacterAnd,
73	CharacterXor,
74	CharacterOr,
75	CharacterLess,
76	CharacterGreater,
77	CharacterEqual,
78
79	// Other types (only one so far)
80	CharacterWhiteSpace,
81	};
82
83	// 128 ascii codes
84	static unsigned char AsciiCharacters[128] = {
85	/* 0 - Null */ CharacterInvalid,
86	/* 1 - Start of Heading */ CharacterInvalid,
87	/* 2 - Start of Text */ CharacterInvalid,
88	/* 3 - End of Text */ CharacterInvalid,
89	/* 4 - End of Transm. */ CharacterInvalid,
90	/* 5 - Enquiry */ CharacterInvalid,
91	/* 6 - Acknowledgment */ CharacterInvalid,
92	/* 7 - Bell */ CharacterInvalid,
93	/* 8 - Back Space */ CharacterInvalid,
94	/* 9 - Horizontal Tab */ CharacterWhiteSpace,
95	/* 10 - Line Feed */ CharacterLineTerminator,
96	/* 11 - Vertical Tab */ CharacterWhiteSpace,
97	/* 12 - Form Feed */ CharacterWhiteSpace,
98	/* 13 - Carriage Return */ CharacterLineTerminator,
99	/* 14 - Shift Out */ CharacterInvalid,
100	/* 15 - Shift In */ CharacterInvalid,
101	/* 16 - Data Line Escape */ CharacterInvalid,
102	/* 17 - Device Control 1 */ CharacterInvalid,
103	/* 18 - Device Control 2 */ CharacterInvalid,
104	/* 19 - Device Control 3 */ CharacterInvalid,
105	/* 20 - Device Control 4 */ CharacterInvalid,
106	/* 21 - Negative Ack. */ CharacterInvalid,
107	/* 22 - Synchronous Idle */ CharacterInvalid,
108	/* 23 - End of Transmit */ CharacterInvalid,
109	/* 24 - Cancel */ CharacterInvalid,
110	/* 25 - End of Medium */ CharacterInvalid,
111	/* 26 - Substitute */ CharacterInvalid,
112	/* 27 - Escape */ CharacterInvalid,
113	/* 28 - File Separator */ CharacterInvalid,
114	/* 29 - Group Separator */ CharacterInvalid,
115	/* 30 - Record Separator */ CharacterInvalid,
116	/* 31 - Unit Separator */ CharacterInvalid,
117	/* 32 - Space */ CharacterWhiteSpace,
118	/* 33 - ! */ CharacterExclamationMark,
119	/* 34 - " */ CharacterQuote,
120	/* 35 - # */ CharacterInvalid,
121	/* 36 - $ */ CharacterAlpha,
122	/* 37 - % */ CharacterModulo,
123	/* 38 - & */ CharacterAnd,
124	/* 39 - ' */ CharacterQuote,
125	/* 40 - ( */ CharacterSimple,
126	/* 41 - ) */ CharacterSimple,
127	/* 42 - * */ CharacterMultiply,
128	/* 43 - + */ CharacterAdd,
129	/* 44 - , */ CharacterSimple,
130	/* 45 - - */ CharacterSub,
131	/* 46 - . */ CharacterDot,
132	/* 47 - / */ CharacterSlash,
133	/* 48 - 0 */ CharacterZero,
134	/* 49 - 1 */ CharacterNumber,
135	/* 50 - 2 */ CharacterNumber,
136	/* 51 - 3 */ CharacterNumber,
137	/* 52 - 4 */ CharacterNumber,
138	/* 53 - 5 */ CharacterNumber,
139	/* 54 - 6 */ CharacterNumber,
140	/* 55 - 7 */ CharacterNumber,
141	/* 56 - 8 */ CharacterNumber,
142	/* 57 - 9 */ CharacterNumber,
143	/* 58 - : */ CharacterSimple,
144	/* 59 - ; */ CharacterSemicolon,
145	/* 60 - < */ CharacterLess,
146	/* 61 - = */ CharacterEqual,
147	/* 62 - > */ CharacterGreater,
148	/* 63 - ? */ CharacterSimple,
149	/* 64 - @ */ CharacterInvalid,
150	/* 65 - A */ CharacterAlpha,
151	/* 66 - B */ CharacterAlpha,
152	/* 67 - C */ CharacterAlpha,
153	/* 68 - D */ CharacterAlpha,
154	/* 69 - E */ CharacterAlpha,
155	/* 70 - F */ CharacterAlpha,
156	/* 71 - G */ CharacterAlpha,
157	/* 72 - H */ CharacterAlpha,
158	/* 73 - I */ CharacterAlpha,
159	/* 74 - J */ CharacterAlpha,
160	/* 75 - K */ CharacterAlpha,
161	/* 76 - L */ CharacterAlpha,
162	/* 77 - M */ CharacterAlpha,
163	/* 78 - N */ CharacterAlpha,
164	/* 79 - O */ CharacterAlpha,
165	/* 80 - P */ CharacterAlpha,
166	/* 81 - Q */ CharacterAlpha,
167	/* 82 - R */ CharacterAlpha,
168	/* 83 - S */ CharacterAlpha,
169	/* 84 - T */ CharacterAlpha,
170	/* 85 - U */ CharacterAlpha,
171	/* 86 - V */ CharacterAlpha,
172	/* 87 - W */ CharacterAlpha,
173	/* 88 - X */ CharacterAlpha,
174	/* 89 - Y */ CharacterAlpha,
175	/* 90 - Z */ CharacterAlpha,
176	/* 91 - [ */ CharacterSimple,
177	/* 92 - \ */ CharacterBackSlash,
178	/* 93 - ] */ CharacterSimple,
179	/* 94 - ^ */ CharacterXor,
180	/* 95 - _ */ CharacterAlpha,
181	/* 96 - ` */ CharacterInvalid,
182	/* 97 - a */ CharacterAlpha,
183	/* 98 - b */ CharacterAlpha,
184	/* 99 - c */ CharacterAlpha,
185	/* 100 - d */ CharacterAlpha,
186	/* 101 - e */ CharacterAlpha,
187	/* 102 - f */ CharacterAlpha,
188	/* 103 - g */ CharacterAlpha,
189	/* 104 - h */ CharacterAlpha,
190	/* 105 - i */ CharacterAlpha,
191	/* 106 - j */ CharacterAlpha,
192	/* 107 - k */ CharacterAlpha,
193	/* 108 - l */ CharacterAlpha,
194	/* 109 - m */ CharacterAlpha,
195	/* 110 - n */ CharacterAlpha,
196	/* 111 - o */ CharacterAlpha,
197	/* 112 - p */ CharacterAlpha,
198	/* 113 - q */ CharacterAlpha,
199	/* 114 - r */ CharacterAlpha,
200	/* 115 - s */ CharacterAlpha,
201	/* 116 - t */ CharacterAlpha,
202	/* 117 - u */ CharacterAlpha,
203	/* 118 - v */ CharacterAlpha,
204	/* 119 - w */ CharacterAlpha,
205	/* 120 - x */ CharacterAlpha,
206	/* 121 - y */ CharacterAlpha,
207	/* 122 - z */ CharacterAlpha,
208	/* 123 - { */ CharacterOpenBrace,
209	/* 124 - \| */ CharacterOr,
210	/* 125 - } */ CharacterCloseBrace,
211	/* 126 - ~ */ CharacterSimple,
212	/* 127 - Delete */ CharacterInvalid,
213	};
214
215	Lexer::Lexer(JSGlobalData* globalData)
216	: m_isReparsing(false)
217	, m_globalData(globalData)
218	, m_keywordTable(JSC::mainTable)
219	{
220	}
221
222	Lexer::~Lexer()
223	{
224	m_keywordTable.deleteTable();
225	}
226
227	ALWAYS_INLINE const UChar* Lexer::currentCharacter() const
228	{
229	ASSERT(m_code <= m_codeEnd);
230	return m_code;
231	}
232
233	ALWAYS_INLINE int Lexer::currentOffset() const
234	{
235	return currentCharacter() - m_codeStart;
236	}
237
238	void Lexer::setCode(const SourceCode& source, ParserArena& arena)
239	{
240	m_arena = &arena.identifierArena();
241
242	m_lineNumber = source.firstLine();
243	m_delimited = false;
244	m_lastToken = -1;
245
246	const UChar* data = source.provider()->data();
247
248	m_source = &source;
249	m_codeStart = data;
250	m_code = data + source.startOffset();
251	m_codeEnd = data + source.endOffset();
252	m_error = false;
253	m_atLineStart = true;
254
255	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
256	m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
257
258	if (LIKELY(m_code < m_codeEnd))
259	m_current = *m_code;
260	else
261	m_current = -1;
262	ASSERT(currentOffset() == source.startOffset());
263	}
264
265	ALWAYS_INLINE void Lexer::shift()
266	{
267	// Faster than an if-else sequence
268	ASSERT(m_current != -1);
269	m_current = -1;
270	++m_code;
271	if (LIKELY(m_code < m_codeEnd))
272	m_current = *m_code;
273	}
274
275	ALWAYS_INLINE int Lexer::peek(int offset)
276	{
277	// Only use if necessary
278	ASSERT(offset > 0 && offset < 5);
279	const UChar* code = m_code + offset;
280	return (code < m_codeEnd) ? *code : -1;
281	}
282
283	int Lexer::getUnicodeCharacter()
284	{
285	int char1 = peek(1);
286	int char2 = peek(2);
287	int char3 = peek(3);
288
289	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(char1) \|\| !isASCIIHexDigit(char2) \|\| !isASCIIHexDigit(char3)))
290	return -1;
291
292	int result = convertUnicode(m_current, char1, char2, char3);
293	shift();
294	shift();
295	shift();
296	shift();
297	return result;
298	}
299
300	void Lexer::shiftLineTerminator()
301	{
302	ASSERT(isLineTerminator(m_current));
303
304	int m_prev = m_current;
305	shift();
306
307	// Allow both CRLF and LFCR.
308	if (m_prev + m_current == '\n' + '\r')
309	shift();
310
311	++m_lineNumber;
312	}
313
314	ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
315	{
316	return &m_arena->makeIdentifier(m_globalData, characters, length);
317	}
318
319	ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const
320	{
321	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
322	}
323
324	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
325	{
326	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
327	}
328
329	static inline bool isIdentStart(int c)
330	{
331	return isASCII(c) ? isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentStart(c);
332	}
333
334	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
335	{
336	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
337	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
338	}
339
340	static inline bool isIdentPart(int c)
341	{
342	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentPart(c);
343	}
344
345	static inline int singleEscape(int c)
346	{
347	switch (c) {
348	case 'b':
349	return 0x08;
350	case 't':
351	return 0x09;
352	case 'n':
353	return 0x0A;
354	case 'v':
355	return 0x0B;
356	case 'f':
357	return 0x0C;
358	case 'r':
359	return 0x0D;
360	case '\\':
361	return '\\';
362	case '\'':
363	return '\'';
364	case '"':
365	return '"';
366	default:
367	return 0;
368	}
369	}
370
371	inline void Lexer::record8(int c)
372	{
373	ASSERT(c >= 0);
374	ASSERT(c <= 0xFF);
375	m_buffer8.append(static_cast<char>(c));
376	}
377
378	inline void Lexer::record16(UChar c)
379	{
380	m_buffer16.append(c);
381	}
382
383	inline void Lexer::record16(int c)
384	{
385	ASSERT(c >= 0);
386	ASSERT(c <= USHRT_MAX);
387	record16(UChar(static_cast<unsigned short>(c)));
388	}
389
390	ALWAYS_INLINE bool Lexer::parseString(void* lvalp)
391	{
392	int stringQuoteCharacter = m_current;
393	shift();
394
395	const UChar* stringStart = currentCharacter();
396
397	while (m_current != stringQuoteCharacter) {
398	if (UNLIKELY(m_current == '\\')) {
399	if (stringStart != currentCharacter())
400	m_buffer16.append(stringStart, currentCharacter() - stringStart);
401	shift();
402
403	int escape = singleEscape(m_current);
404
405	// Most common escape sequences first
406	if (escape) {
407	record16(escape);
408	shift();
409	} else if (UNLIKELY(isLineTerminator(m_current)))
410	shiftLineTerminator();
411	else if (m_current == 'x') {
412	shift();
413	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
414	int prev = m_current;
415	shift();
416	record16(convertHex(prev, m_current));
417	shift();
418	} else
419	record16('x');
420	} else if (m_current == 'u') {
421	shift();
422	int character = getUnicodeCharacter();
423	if (character != -1)
424	record16(character);
425	else if (m_current == stringQuoteCharacter)
426	record16('u');
427	else // Only stringQuoteCharacter allowed after \u
428	return false;
429	} else if (isASCIIOctalDigit(m_current)) {
430	// Octal character sequences
431	int character1 = m_current;
432	shift();
433	if (isASCIIOctalDigit(m_current)) {
434	// Two octal characters
435	int character2 = m_current;
436	shift();
437	if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
438	record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
439	shift();
440	} else
441	record16((character1 - '0') * 8 + character2 - '0');
442	} else
443	record16(character1 - '0');
444	} else if (m_current != -1) {
445	record16(m_current);
446	shift();
447	} else
448	return false;
449
450	stringStart = currentCharacter();
451	continue;
452	} else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
453	// New-line or end of input is not allowed
454	if (UNLIKELY(isLineTerminator(m_current)) \|\| UNLIKELY(m_current == -1))
455	return false;
456	// Anything else is just a normal character
457	}
458	shift();
459	}
460
461	if (currentCharacter() != stringStart)
462	m_buffer16.append(stringStart, currentCharacter() - stringStart);
463	reinterpret_cast<YYSTYPE*>(lvalp)->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
464	m_buffer16.resize(0);
465	return true;
466	}
467
468	int Lexer::lex(void* p1, void* p2)
469	{
470	ASSERT(!m_error);
471	ASSERT(m_buffer8.isEmpty());
472	ASSERT(m_buffer16.isEmpty());
473
474	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
475	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
476	int token = 0;
477	m_terminator = false;
478
479	start:
480	while (isWhiteSpace(m_current))
481	shift();
482
483	int startOffset = currentOffset();
484
485	if (UNLIKELY(m_current == -1)) {
486	if (!m_terminator && !m_delimited && !m_isReparsing) {
487	// automatic semicolon insertion if program incomplete
488	goto doneSemicolon;
489	}
490	return 0;
491	}
492
493	m_delimited = false;
494
495	if (isASCII(m_current)) {
496	ASSERT(m_current >= 0 && m_current < 128);
497
498	switch (AsciiCharacters[m_current]) {
499	case CharacterGreater:
500	shift();
501	if (m_current == '>') {
502	shift();
503	if (m_current == '>') {
504	shift();
505	if (m_current == '=') {
506	shift();
507	token = URSHIFTEQUAL;
508	break;
509	}
510	token = URSHIFT;
511	break;
512	}
513	if (m_current == '=') {
514	shift();
515	token = RSHIFTEQUAL;
516	break;
517	}
518	token = RSHIFT;
519	break;
520	}
521	if (m_current == '=') {
522	shift();
523	token = GE;
524	break;
525	}
526	token = '>';
527	break;
528	case CharacterEqual:
529	shift();
530	if (m_current == '=') {
531	shift();
532	if (m_current == '=') {
533	shift();
534	token = STREQ;
535	break;
536	}
537	token = EQEQ;
538	break;
539	}
540	token = '=';
541	break;
542	case CharacterLess:
543	shift();
544	if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
545	// <!-- marks the beginning of a line comment (for www usage)
546	goto inSingleLineComment;
547	}
548	if (m_current == '<') {
549	shift();
550	if (m_current == '=') {
551	shift();
552	token = LSHIFTEQUAL;
553	break;
554	}
555	token = LSHIFT;
556	break;
557	}
558	if (m_current == '=') {
559	shift();
560	token = LE;
561	break;
562	}
563	token = '<';
564	break;
565	case CharacterExclamationMark:
566	shift();
567	if (m_current == '=') {
568	shift();
569	if (m_current == '=') {
570	shift();
571	token = STRNEQ;
572	break;
573	}
574	token = NE;
575	break;
576	}
577	token = '!';
578	break;
579	case CharacterAdd:
580	shift();
581	if (m_current == '+') {
582	shift();
583	token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
584	break;
585	}
586	if (m_current == '=') {
587	shift();
588	token = PLUSEQUAL;
589	break;
590	}
591	token = '+';
592	break;
593	case CharacterSub:
594	shift();
595	if (m_current == '-') {
596	shift();
597	if (m_atLineStart && m_current == '>') {
598	shift();
599	goto inSingleLineComment;
600	}
601	token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
602	break;
603	}
604	if (m_current == '=') {
605	shift();
606	token = MINUSEQUAL;
607	break;
608	}
609	token = '-';
610	break;
611	case CharacterMultiply:
612	shift();
613	if (m_current == '=') {
614	shift();
615	token = MULTEQUAL;
616	break;
617	}
618	token = '*';
619	break;
620	case CharacterSlash:
621	shift();
622	if (m_current == '/') {
623	shift();
624	goto inSingleLineComment;
625	}
626	if (m_current == '*') {
627	shift();
628	goto inMultiLineComment;
629	}
630	if (m_current == '=') {
631	shift();
632	token = DIVEQUAL;
633	break;
634	}
635	token = '/';
636	break;
637	case CharacterAnd:
638	shift();
639	if (m_current == '&') {
640	shift();
641	token = AND;
642	break;
643	}
644	if (m_current == '=') {
645	shift();
646	token = ANDEQUAL;
647	break;
648	}
649	token = '&';
650	break;
651	case CharacterXor:
652	shift();
653	if (m_current == '=') {
654	shift();
655	token = XOREQUAL;
656	break;
657	}
658	token = '^';
659	break;
660	case CharacterModulo:
661	shift();
662	if (m_current == '=') {
663	shift();
664	token = MODEQUAL;
665	break;
666	}
667	token = '%';
668	break;
669	case CharacterOr:
670	shift();
671	if (m_current == '=') {
672	shift();
673	token = OREQUAL;
674	break;
675	}
676	if (m_current == '\|') {
677	shift();
678	token = OR;
679	break;
680	}
681	token = '\|';
682	break;
683	case CharacterDot:
684	shift();
685	if (isASCIIDigit(m_current)) {
686	record8('.');
687	goto inNumberAfterDecimalPoint;
688	}
689	token = '.';
690	break;
691	case CharacterSimple:
692	token = m_current;
693	shift();
694	break;
695	case CharacterSemicolon:
696	m_delimited = true;
697	shift();
698	token = ';';
699	break;
700	case CharacterOpenBrace:
701	lvalp->intValue = currentOffset();
702	shift();
703	token = OPENBRACE;
704	break;
705	case CharacterCloseBrace:
706	lvalp->intValue = currentOffset();
707	m_delimited = true;
708	shift();
709	token = CLOSEBRACE;
710	break;
711	case CharacterBackSlash:
712	goto startIdentifierWithBackslash;
713	case CharacterZero:
714	goto startNumberWithZeroDigit;
715	case CharacterNumber:
716	goto startNumber;
717	case CharacterQuote:
718	if (UNLIKELY(!parseString(lvalp)))
719	goto returnError;
720	shift();
721	m_delimited = false;
722	token = STRING;
723	break;
724	case CharacterAlpha:
725	ASSERT(isIdentStart(m_current));
726	goto startIdentifierOrKeyword;
727	case CharacterLineTerminator:
728	ASSERT(isLineTerminator(m_current));
729	shiftLineTerminator();
730	m_atLineStart = true;
731	m_terminator = true;
732	if (lastTokenWasRestrKeyword()) {
733	token = ';';
734	goto doneSemicolon;
735	}
736	goto start;
737	case CharacterInvalid:
738	goto returnError;
739	default:
740	ASSERT_NOT_REACHED();
741	goto returnError;
742	}
743	} else {
744	// Rare characters
745
746	if (isNonASCIIIdentStart(m_current))
747	goto startIdentifierOrKeyword;
748	if (isLineTerminator(m_current)) {
749	shiftLineTerminator();
750	m_atLineStart = true;
751	m_terminator = true;
752	if (lastTokenWasRestrKeyword())
753	goto doneSemicolon;
754	goto start;
755	}
756	goto returnError;
757	}
758
759	m_atLineStart = false;
760	goto returnToken;
761
762	startIdentifierWithBackslash: {
763	shift();
764	if (UNLIKELY(m_current != 'u'))
765	goto returnError;
766	shift();
767
768	token = getUnicodeCharacter();
769	if (UNLIKELY(token == -1))
770	goto returnError;
771	if (UNLIKELY(!isIdentStart(token)))
772	goto returnError;
773	goto inIdentifierAfterCharacterCheck;
774	}
775
776	startIdentifierOrKeyword: {
777	const UChar* identifierStart = currentCharacter();
778	shift();
779	while (isIdentPart(m_current))
780	shift();
781	if (LIKELY(m_current != '\\')) {
782	// Fast case for idents which does not contain \uCCCC characters
783	lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
784	goto doneIdentifierOrKeyword;
785	}
786	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
787	}
788
789	do {
790	shift();
791	if (UNLIKELY(m_current != 'u'))
792	goto returnError;
793	shift();
794	token = getUnicodeCharacter();
795	if (UNLIKELY(token == -1))
796	goto returnError;
797	if (UNLIKELY(!isIdentPart(token)))
798	goto returnError;
799	inIdentifierAfterCharacterCheck:
800	record16(token);
801
802	while (isIdentPart(m_current)) {
803	record16(m_current);
804	shift();
805	}
806	} while (UNLIKELY(m_current == '\\'));
807	goto doneIdentifier;
808
809	inSingleLineComment:
810	while (!isLineTerminator(m_current)) {
811	if (UNLIKELY(m_current == -1))
812	return 0;
813	shift();
814	}
815	shiftLineTerminator();
816	m_atLineStart = true;
817	m_terminator = true;
818	if (lastTokenWasRestrKeyword())
819	goto doneSemicolon;
820	goto start;
821
822	inMultiLineComment:
823	while (true) {
824	if (UNLIKELY(m_current == '*')) {
825	shift();
826	if (m_current == '/')
827	break;
828	if (m_current == '*')
829	continue;
830	}
831
832	if (UNLIKELY(m_current == -1))
833	goto returnError;
834
835	if (isLineTerminator(m_current))
836	shiftLineTerminator();
837	else
838	shift();
839	}
840	shift();
841	m_atLineStart = false;
842	goto start;
843
844	startNumberWithZeroDigit:
845	shift();
846	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
847	shift();
848	goto inHex;
849	}
850	if (m_current == '.') {
851	record8('0');
852	record8('.');
853	shift();
854	goto inNumberAfterDecimalPoint;
855	}
856	if ((m_current \| 0x20) == 'e') {
857	record8('0');
858	record8('e');
859	shift();
860	goto inExponentIndicator;
861	}
862	if (isASCIIOctalDigit(m_current))
863	goto inOctal;
864	if (isASCIIDigit(m_current))
865	goto startNumber;
866	lvalp->doubleValue = 0;
867	goto doneNumeric;
868
869	inNumberAfterDecimalPoint:
870	while (isASCIIDigit(m_current)) {
871	record8(m_current);
872	shift();
873	}
874	if ((m_current \| 0x20) == 'e') {
875	record8('e');
876	shift();
877	goto inExponentIndicator;
878	}
879	goto doneNumber;
880
881	inExponentIndicator:
882	if (m_current == '+' \|\| m_current == '-') {
883	record8(m_current);
884	shift();
885	}
886	if (!isASCIIDigit(m_current))
887	goto returnError;
888	do {
889	record8(m_current);
890	shift();
891	} while (isASCIIDigit(m_current));
892	goto doneNumber;
893
894	inOctal: {
895	do {
896	record8(m_current);
897	shift();
898	} while (isASCIIOctalDigit(m_current));
899	if (isASCIIDigit(m_current))
900	goto startNumber;
901
902	double dval = 0;
903
904	const char* end = m_buffer8.end();
905	for (const char* p = m_buffer8.data(); p < end; ++p) {
906	dval *= 8;
907	dval += *p - '0';
908	}
909	if (dval >= mantissaOverflowLowerBound)
910	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
911
912	m_buffer8.resize(0);
913
914	lvalp->doubleValue = dval;
915	goto doneNumeric;
916	}
917
918	inHex: {
919	do {
920	record8(m_current);
921	shift();
922	} while (isASCIIHexDigit(m_current));
923
924	double dval = 0;
925
926	const char* end = m_buffer8.end();
927	for (const char* p = m_buffer8.data(); p < end; ++p) {
928	dval *= 16;
929	dval += toASCIIHexValue(*p);
930	}
931	if (dval >= mantissaOverflowLowerBound)
932	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
933
934	m_buffer8.resize(0);
935
936	lvalp->doubleValue = dval;
937	goto doneNumeric;
938	}
939
940	startNumber:
941	record8(m_current);
942	shift();
943	while (isASCIIDigit(m_current)) {
944	record8(m_current);
945	shift();
946	}
947	if (m_current == '.') {
948	record8('.');
949	shift();
950	goto inNumberAfterDecimalPoint;
951	}
952	if ((m_current \| 0x20) == 'e') {
953	record8('e');
954	shift();
955	goto inExponentIndicator;
956	}
957
958	// Fall through into doneNumber.
959
960	doneNumber:
961	// Null-terminate string for strtod.
962	m_buffer8.append('\0');
963	lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
964	m_buffer8.resize(0);
965
966	// Fall through into doneNumeric.
967
968	doneNumeric:
969	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
970	if (UNLIKELY(isIdentStart(m_current)))
971	goto returnError;
972
973	m_atLineStart = false;
974	m_delimited = false;
975	token = NUMBER;
976	goto returnToken;
977
978	doneSemicolon:
979	token = ';';
980	m_delimited = true;
981	goto returnToken;
982
983	doneIdentifier:
984	m_atLineStart = false;
985	m_delimited = false;
986	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
987	m_buffer16.resize(0);
988	token = IDENT;
989	goto returnToken;
990
991	doneIdentifierOrKeyword: {
992	m_atLineStart = false;
993	m_delimited = false;
994	m_buffer16.resize(0);
995	const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
996	token = entry ? entry->lexerValue() : static_cast<int>(IDENT);
997
998	// Fall through into returnToken.
999	}
1000
1001	returnToken: {
1002	int lineNumber = m_lineNumber;
1003	llocp->first_line = lineNumber;
1004	llocp->last_line = lineNumber;
1005	llocp->first_column = startOffset;
1006	llocp->last_column = currentOffset();
1007	m_lastToken = token;
1008	return token;
1009	}
1010
1011	returnError:
1012	m_error = true;
1013	return -1;
1014	}
1015
1016	bool Lexer::scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix)
1017	{
1018	ASSERT(m_buffer16.isEmpty());
1019
1020	bool lastWasEscape = false;
1021	bool inBrackets = false;
1022
1023	if (patternPrefix) {
1024	ASSERT(!isLineTerminator(patternPrefix));
1025	ASSERT(patternPrefix != '/');
1026	ASSERT(patternPrefix != '[');
1027	record16(patternPrefix);
1028	}
1029
1030	while (true) {
1031	int current = m_current;
1032
1033	if (isLineTerminator(current) \|\| current == -1) {
1034	m_buffer16.resize(0);
1035	return false;
1036	}
1037
1038	shift();
1039
1040	if (current == '/' && !lastWasEscape && !inBrackets)
1041	break;
1042
1043	record16(current);
1044
1045	if (lastWasEscape) {
1046	lastWasEscape = false;
1047	continue;
1048	}
1049
1050	switch (current) {
1051	case '[':
1052	inBrackets = true;
1053	break;
1054	case ']':
1055	inBrackets = false;
1056	break;
1057	case '\\':
1058	lastWasEscape = true;
1059	break;
1060	}
1061	}
1062
1063	pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1064	m_buffer16.resize(0);
1065
1066	while (isIdentPart(m_current)) {
1067	record16(m_current);
1068	shift();
1069	}
1070
1071	flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1072	m_buffer16.resize(0);
1073
1074	return true;
1075	}
1076
1077	bool Lexer::skipRegExp()
1078	{
1079	bool lastWasEscape = false;
1080	bool inBrackets = false;
1081
1082	while (true) {
1083	int current = m_current;
1084
1085	if (isLineTerminator(current) \|\| current == -1)
1086	return false;
1087
1088	shift();
1089
1090	if (current == '/' && !lastWasEscape && !inBrackets)
1091	break;
1092
1093	if (lastWasEscape) {
1094	lastWasEscape = false;
1095	continue;
1096	}
1097
1098	switch (current) {
1099	case '[':
1100	inBrackets = true;
1101	break;
1102	case ']':
1103	inBrackets = false;
1104	break;
1105	case '\\':
1106	lastWasEscape = true;
1107	break;
1108	}
1109	}
1110
1111	while (isIdentPart(m_current))
1112	shift();
1113
1114	return true;
1115	}
1116
1117	void Lexer::clear()
1118	{
1119	m_arena = 0;
1120	m_codeWithoutBOMs.clear();
1121
1122	Vector<char> newBuffer8;
1123	m_buffer8.swap(newBuffer8);
1124
1125	Vector<UChar> newBuffer16;
1126	m_buffer16.swap(newBuffer16);
1127
1128	m_isReparsing = false;
1129	}
1130
1131	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1132	{
1133	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1134	}
1135
1136	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 62628

Download in other formats: