Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 61878

Visit:

Last change on this file since 61878 was 61878, checked in by [email protected], 15 years ago

2010-06-25 Oliver Hunt <[email protected]>

Reviewed by Geoffrey Garen.

Remove old js parser
https://bugs.webkit.org/show_bug.cgi?id=41222

Remove the old yacc parser, this also solves the tiger problem. Which
was a conflict between yacc generated token values and those in the
custom parser

Android.mk:
CMakeLists.txt:
DerivedSources.make:
DerivedSources.pro:
GNUmakefile.am:
JavaScriptCore.pro:
JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.vcproj:
JavaScriptCore.xcodeproj/project.pbxproj:
parser/Grammar.y: Removed.
parser/JSParser.cpp:
parser/JSParser.h:
parser/Lexer.cpp:
parser/NodeConstructors.h: (JSC::Node::Node):
parser/Parser.cpp: (JSC::Parser::parse):
wtf/Platform.h:

Property svn:eol-style set to native

File size: 26.2 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "Lexer.h"
25
26	#include "JSFunction.h"
27
28	#include "JSGlobalObjectFunctions.h"
29	#include "Identifier.h"
30	#include "NodeInfo.h"
31	#include "Nodes.h"
32	#include "dtoa.h"
33	#include <ctype.h>
34	#include <limits.h>
35	#include <string.h>
36	#include <wtf/Assertions.h>
37
38	using namespace WTF;
39	using namespace Unicode;
40
41	#include "JSParser.h"
42	#include "Lookup.h"
43	#include "Lexer.lut.h"
44
45	namespace JSC {
46
47	static const UChar byteOrderMark = 0xFEFF;
48
49	Lexer::Lexer(JSGlobalData* globalData)
50	: m_isReparsing(false)
51	, m_globalData(globalData)
52	, m_keywordTable(JSC::mainTable)
53	{
54	}
55
56	Lexer::~Lexer()
57	{
58	m_keywordTable.deleteTable();
59	}
60
61	inline const UChar* Lexer::currentCharacter() const
62	{
63	return m_code - 4;
64	}
65
66	inline int Lexer::currentOffset() const
67	{
68	return currentCharacter() - m_codeStart;
69	}
70
71	ALWAYS_INLINE void Lexer::shift1()
72	{
73	m_current = m_next1;
74	m_next1 = m_next2;
75	m_next2 = m_next3;
76	if (LIKELY(m_code < m_codeEnd))
77	m_next3 = m_code[0];
78	else
79	m_next3 = -1;
80
81	++m_code;
82	}
83
84	ALWAYS_INLINE void Lexer::shift2()
85	{
86	m_current = m_next2;
87	m_next1 = m_next3;
88	if (LIKELY(m_code + 1 < m_codeEnd)) {
89	m_next2 = m_code[0];
90	m_next3 = m_code[1];
91	} else {
92	m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
93	m_next3 = -1;
94	}
95
96	m_code += 2;
97	}
98
99	ALWAYS_INLINE void Lexer::shift3()
100	{
101	m_current = m_next3;
102	if (LIKELY(m_code + 2 < m_codeEnd)) {
103	m_next1 = m_code[0];
104	m_next2 = m_code[1];
105	m_next3 = m_code[2];
106	} else {
107	m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
108	m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
109	m_next3 = -1;
110	}
111
112	m_code += 3;
113	}
114
115	ALWAYS_INLINE void Lexer::shift4()
116	{
117	if (LIKELY(m_code + 3 < m_codeEnd)) {
118	m_current = m_code[0];
119	m_next1 = m_code[1];
120	m_next2 = m_code[2];
121	m_next3 = m_code[3];
122	} else {
123	m_current = m_code < m_codeEnd ? m_code[0] : -1;
124	m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
125	m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
126	m_next3 = -1;
127	}
128
129	m_code += 4;
130	}
131
132	void Lexer::setCode(const SourceCode& source, ParserArena& arena)
133	{
134	m_arena = &arena.identifierArena();
135
136	m_lineNumber = source.firstLine();
137	m_delimited = false;
138	m_lastToken = -1;
139
140	const UChar* data = source.provider()->data();
141
142	m_source = &source;
143	m_codeStart = data;
144	m_code = data + source.startOffset();
145	m_codeEnd = data + source.endOffset();
146	m_error = false;
147	m_atLineStart = true;
148
149	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
150	m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
151
152	// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153	// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154	if (source.provider()->hasBOMs()) {
155	for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156	if (UNLIKELY(*p == byteOrderMark)) {
157	copyCodeWithoutBOMs();
158	break;
159	}
160	}
161	}
162
163	// Read the first characters into the 4-character buffer.
164	shift4();
165	ASSERT(currentOffset() == source.startOffset());
166	}
167
168	void Lexer::copyCodeWithoutBOMs()
169	{
170	// Note: In this case, the character offset data for debugging will be incorrect.
171	// If it's important to correctly debug code with extraneous BOMs, then the caller
172	// should strip the BOMs when creating the SourceProvider object and do its own
173	// mapping of offsets within the stripped text to original text offset.
174
175	m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
176	for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177	UChar c = *p;
178	if (c != byteOrderMark)
179	m_codeWithoutBOMs.append(c);
180	}
181	ptrdiff_t startDelta = m_codeStart - m_code;
182	m_code = m_codeWithoutBOMs.data();
183	m_codeStart = m_code + startDelta;
184	m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185	}
186
187	void Lexer::shiftLineTerminator()
188	{
189	ASSERT(isLineTerminator(m_current));
190
191	// Allow both CRLF and LFCR.
192	if (m_current + m_next1 == '\n' + '\r')
193	shift2();
194	else
195	shift1();
196
197	++m_lineNumber;
198	}
199
200	ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201	{
202	return &m_arena->makeIdentifier(m_globalData, characters, length);
203	}
204
205	inline bool Lexer::lastTokenWasRestrKeyword() const
206	{
207	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
208	}
209
210	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211	{
212	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
213	}
214
215	static inline bool isIdentStart(int c)
216	{
217	return isASCII(c) ? isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentStart(c);
218	}
219
220	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221	{
222	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
223	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
224	}
225
226	static inline bool isIdentPart(int c)
227	{
228	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentPart(c);
229	}
230
231	static inline int singleEscape(int c)
232	{
233	switch (c) {
234	case 'b':
235	return 0x08;
236	case 't':
237	return 0x09;
238	case 'n':
239	return 0x0A;
240	case 'v':
241	return 0x0B;
242	case 'f':
243	return 0x0C;
244	case 'r':
245	return 0x0D;
246	default:
247	return c;
248	}
249	}
250
251	inline void Lexer::record8(int c)
252	{
253	ASSERT(c >= 0);
254	ASSERT(c <= 0xFF);
255	m_buffer8.append(static_cast<char>(c));
256	}
257
258	inline void Lexer::record16(UChar c)
259	{
260	m_buffer16.append(c);
261	}
262
263	inline void Lexer::record16(int c)
264	{
265	ASSERT(c >= 0);
266	ASSERT(c <= USHRT_MAX);
267	record16(UChar(static_cast<unsigned short>(c)));
268	}
269
270	int Lexer::lex(void* p1, void* p2)
271	{
272	ASSERT(!m_error);
273	ASSERT(m_buffer8.isEmpty());
274	ASSERT(m_buffer16.isEmpty());
275
276	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278	int token = 0;
279	m_terminator = false;
280
281	start:
282	while (isWhiteSpace(m_current))
283	shift1();
284
285	int startOffset = currentOffset();
286
287	if (m_current == -1) {
288	if (!m_terminator && !m_delimited && !m_isReparsing) {
289	// automatic semicolon insertion if program incomplete
290	token = ';';
291	goto doneSemicolon;
292	}
293	return 0;
294	}
295
296	m_delimited = false;
297	switch (m_current) {
298	case '>':
299	if (m_next1 == '>' && m_next2 == '>') {
300	if (m_next3 == '=') {
301	shift4();
302	token = URSHIFTEQUAL;
303	break;
304	}
305	shift3();
306	token = URSHIFT;
307	break;
308	}
309	if (m_next1 == '>') {
310	if (m_next2 == '=') {
311	shift3();
312	token = RSHIFTEQUAL;
313	break;
314	}
315	shift2();
316	token = RSHIFT;
317	break;
318	}
319	if (m_next1 == '=') {
320	shift2();
321	token = GE;
322	break;
323	}
324	shift1();
325	token = '>';
326	break;
327	case '=':
328	if (m_next1 == '=') {
329	if (m_next2 == '=') {
330	shift3();
331	token = STREQ;
332	break;
333	}
334	shift2();
335	token = EQEQ;
336	break;
337	}
338	shift1();
339	token = '=';
340	break;
341	case '!':
342	if (m_next1 == '=') {
343	if (m_next2 == '=') {
344	shift3();
345	token = STRNEQ;
346	break;
347	}
348	shift2();
349	token = NE;
350	break;
351	}
352	shift1();
353	token = '!';
354	break;
355	case '<':
356	if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
357	// <!-- marks the beginning of a line comment (for www usage)
358	shift4();
359	goto inSingleLineComment;
360	}
361	if (m_next1 == '<') {
362	if (m_next2 == '=') {
363	shift3();
364	token = LSHIFTEQUAL;
365	break;
366	}
367	shift2();
368	token = LSHIFT;
369	break;
370	}
371	if (m_next1 == '=') {
372	shift2();
373	token = LE;
374	break;
375	}
376	shift1();
377	token = '<';
378	break;
379	case '+':
380	if (m_next1 == '+') {
381	shift2();
382	if (m_terminator) {
383	token = AUTOPLUSPLUS;
384	break;
385	}
386	token = PLUSPLUS;
387	break;
388	}
389	if (m_next1 == '=') {
390	shift2();
391	token = PLUSEQUAL;
392	break;
393	}
394	shift1();
395	token = '+';
396	break;
397	case '-':
398	if (m_next1 == '-') {
399	if (m_atLineStart && m_next2 == '>') {
400	shift3();
401	goto inSingleLineComment;
402	}
403	shift2();
404	if (m_terminator) {
405	token = AUTOMINUSMINUS;
406	break;
407	}
408	token = MINUSMINUS;
409	break;
410	}
411	if (m_next1 == '=') {
412	shift2();
413	token = MINUSEQUAL;
414	break;
415	}
416	shift1();
417	token = '-';
418	break;
419	case '*':
420	if (m_next1 == '=') {
421	shift2();
422	token = MULTEQUAL;
423	break;
424	}
425	shift1();
426	token = '*';
427	break;
428	case '/':
429	if (m_next1 == '/') {
430	shift2();
431	goto inSingleLineComment;
432	}
433	if (m_next1 == '*')
434	goto inMultiLineComment;
435	if (m_next1 == '=') {
436	shift2();
437	token = DIVEQUAL;
438	break;
439	}
440	shift1();
441	token = '/';
442	break;
443	case '&':
444	if (m_next1 == '&') {
445	shift2();
446	token = AND;
447	break;
448	}
449	if (m_next1 == '=') {
450	shift2();
451	token = ANDEQUAL;
452	break;
453	}
454	shift1();
455	token = '&';
456	break;
457	case '^':
458	if (m_next1 == '=') {
459	shift2();
460	token = XOREQUAL;
461	break;
462	}
463	shift1();
464	token = '^';
465	break;
466	case '%':
467	if (m_next1 == '=') {
468	shift2();
469	token = MODEQUAL;
470	break;
471	}
472	shift1();
473	token = '%';
474	break;
475	case '\|':
476	if (m_next1 == '=') {
477	shift2();
478	token = OREQUAL;
479	break;
480	}
481	if (m_next1 == '\|') {
482	shift2();
483	token = OR;
484	break;
485	}
486	shift1();
487	token = '\|';
488	break;
489	case '.':
490	if (isASCIIDigit(m_next1)) {
491	record8('.');
492	shift1();
493	goto inNumberAfterDecimalPoint;
494	}
495	token = '.';
496	shift1();
497	break;
498	case ',':
499	case '~':
500	case '?':
501	case ':':
502	case '(':
503	case ')':
504	case '[':
505	case ']':
506	token = m_current;
507	shift1();
508	break;
509	case ';':
510	shift1();
511	m_delimited = true;
512	token = ';';
513	break;
514	case '{':
515	lvalp->intValue = currentOffset();
516	shift1();
517	token = OPENBRACE;
518	break;
519	case '}':
520	lvalp->intValue = currentOffset();
521	shift1();
522	m_delimited = true;
523	token = CLOSEBRACE;
524	break;
525	case '\\':
526	goto startIdentifierWithBackslash;
527	case '0':
528	goto startNumberWithZeroDigit;
529	case '1':
530	case '2':
531	case '3':
532	case '4':
533	case '5':
534	case '6':
535	case '7':
536	case '8':
537	case '9':
538	goto startNumber;
539	case '"':
540	case '\'':
541	goto startString;
542	default:
543	if (isIdentStart(m_current))
544	goto startIdentifierOrKeyword;
545	if (isLineTerminator(m_current)) {
546	shiftLineTerminator();
547	m_atLineStart = true;
548	m_terminator = true;
549	if (lastTokenWasRestrKeyword()) {
550	token = ';';
551	goto doneSemicolon;
552	}
553	goto start;
554	}
555	goto returnError;
556	}
557
558	m_atLineStart = false;
559	goto returnToken;
560
561	startString: {
562	int stringQuoteCharacter = m_current;
563	shift1();
564
565	const UChar* stringStart = currentCharacter();
566	while (m_current != stringQuoteCharacter) {
567	// Fast check for characters that require special handling.
568	// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
569	// as possible, and lets through all common ASCII characters.
570	if (UNLIKELY(m_current == '\\') \|\| UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
571	m_buffer16.append(stringStart, currentCharacter() - stringStart);
572	goto inString;
573	}
574	shift1();
575	}
576	lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
577	shift1();
578	m_atLineStart = false;
579	m_delimited = false;
580	token = STRING;
581	goto returnToken;
582
583	inString:
584	while (m_current != stringQuoteCharacter) {
585	if (m_current == '\\')
586	goto inStringEscapeSequence;
587	if (UNLIKELY(isLineTerminator(m_current)))
588	goto returnError;
589	if (UNLIKELY(m_current == -1))
590	goto returnError;
591	record16(m_current);
592	shift1();
593	}
594	goto doneString;
595
596	inStringEscapeSequence:
597	shift1();
598	if (m_current == 'x') {
599	shift1();
600	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
601	record16(convertHex(m_current, m_next1));
602	shift2();
603	goto inString;
604	}
605	record16('x');
606	if (m_current == stringQuoteCharacter)
607	goto doneString;
608	goto inString;
609	}
610	if (m_current == 'u') {
611	shift1();
612	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
613	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
614	shift4();
615	goto inString;
616	}
617	if (m_current == stringQuoteCharacter) {
618	record16('u');
619	goto doneString;
620	}
621	goto returnError;
622	}
623	if (isASCIIOctalDigit(m_current)) {
624	if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
625	record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
626	shift3();
627	goto inString;
628	}
629	if (isASCIIOctalDigit(m_next1)) {
630	record16((m_current - '0') * 8 + m_next1 - '0');
631	shift2();
632	goto inString;
633	}
634	record16(m_current - '0');
635	shift1();
636	goto inString;
637	}
638	if (isLineTerminator(m_current)) {
639	shiftLineTerminator();
640	goto inString;
641	}
642	if (m_current == -1)
643	goto returnError;
644	record16(singleEscape(m_current));
645	shift1();
646	goto inString;
647	}
648
649	startIdentifierWithBackslash:
650	shift1();
651	if (UNLIKELY(m_current != 'u'))
652	goto returnError;
653	shift1();
654	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
655	goto returnError;
656	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
657	if (UNLIKELY(!isIdentStart(token)))
658	goto returnError;
659	goto inIdentifierAfterCharacterCheck;
660
661	startIdentifierOrKeyword: {
662	const UChar* identifierStart = currentCharacter();
663	shift1();
664	while (isIdentPart(m_current))
665	shift1();
666	if (LIKELY(m_current != '\\')) {
667	lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
668	goto doneIdentifierOrKeyword;
669	}
670	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
671	}
672
673	do {
674	shift1();
675	if (UNLIKELY(m_current != 'u'))
676	goto returnError;
677	shift1();
678	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
679	goto returnError;
680	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
681	if (UNLIKELY(!isIdentPart(token)))
682	goto returnError;
683	inIdentifierAfterCharacterCheck:
684	record16(token);
685	shift4();
686
687	while (isIdentPart(m_current)) {
688	record16(m_current);
689	shift1();
690	}
691	} while (UNLIKELY(m_current == '\\'));
692	goto doneIdentifier;
693
694	inSingleLineComment:
695	while (!isLineTerminator(m_current)) {
696	if (UNLIKELY(m_current == -1))
697	return 0;
698	shift1();
699	}
700	shiftLineTerminator();
701	m_atLineStart = true;
702	m_terminator = true;
703	if (lastTokenWasRestrKeyword())
704	goto doneSemicolon;
705	goto start;
706
707	inMultiLineComment:
708	shift2();
709	while (m_current != '*' \|\| m_next1 != '/') {
710	if (isLineTerminator(m_current))
711	shiftLineTerminator();
712	else {
713	shift1();
714	if (UNLIKELY(m_current == -1))
715	goto returnError;
716	}
717	}
718	shift2();
719	m_atLineStart = false;
720	goto start;
721
722	startNumberWithZeroDigit:
723	shift1();
724	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
725	shift1();
726	goto inHex;
727	}
728	if (m_current == '.') {
729	record8('0');
730	record8('.');
731	shift1();
732	goto inNumberAfterDecimalPoint;
733	}
734	if ((m_current \| 0x20) == 'e') {
735	record8('0');
736	record8('e');
737	shift1();
738	goto inExponentIndicator;
739	}
740	if (isASCIIOctalDigit(m_current))
741	goto inOctal;
742	if (isASCIIDigit(m_current))
743	goto startNumber;
744	lvalp->doubleValue = 0;
745	goto doneNumeric;
746
747	inNumberAfterDecimalPoint:
748	while (isASCIIDigit(m_current)) {
749	record8(m_current);
750	shift1();
751	}
752	if ((m_current \| 0x20) == 'e') {
753	record8('e');
754	shift1();
755	goto inExponentIndicator;
756	}
757	goto doneNumber;
758
759	inExponentIndicator:
760	if (m_current == '+' \|\| m_current == '-') {
761	record8(m_current);
762	shift1();
763	}
764	if (!isASCIIDigit(m_current))
765	goto returnError;
766	do {
767	record8(m_current);
768	shift1();
769	} while (isASCIIDigit(m_current));
770	goto doneNumber;
771
772	inOctal: {
773	do {
774	record8(m_current);
775	shift1();
776	} while (isASCIIOctalDigit(m_current));
777	if (isASCIIDigit(m_current))
778	goto startNumber;
779
780	double dval = 0;
781
782	const char* end = m_buffer8.end();
783	for (const char* p = m_buffer8.data(); p < end; ++p) {
784	dval *= 8;
785	dval += *p - '0';
786	}
787	if (dval >= mantissaOverflowLowerBound)
788	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
789
790	m_buffer8.resize(0);
791
792	lvalp->doubleValue = dval;
793	goto doneNumeric;
794	}
795
796	inHex: {
797	do {
798	record8(m_current);
799	shift1();
800	} while (isASCIIHexDigit(m_current));
801
802	double dval = 0;
803
804	const char* end = m_buffer8.end();
805	for (const char* p = m_buffer8.data(); p < end; ++p) {
806	dval *= 16;
807	dval += toASCIIHexValue(*p);
808	}
809	if (dval >= mantissaOverflowLowerBound)
810	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
811
812	m_buffer8.resize(0);
813
814	lvalp->doubleValue = dval;
815	goto doneNumeric;
816	}
817
818	startNumber:
819	record8(m_current);
820	shift1();
821	while (isASCIIDigit(m_current)) {
822	record8(m_current);
823	shift1();
824	}
825	if (m_current == '.') {
826	record8('.');
827	shift1();
828	goto inNumberAfterDecimalPoint;
829	}
830	if ((m_current \| 0x20) == 'e') {
831	record8('e');
832	shift1();
833	goto inExponentIndicator;
834	}
835
836	// Fall through into doneNumber.
837
838	doneNumber:
839	// Null-terminate string for strtod.
840	m_buffer8.append('\0');
841	lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
842	m_buffer8.resize(0);
843
844	// Fall through into doneNumeric.
845
846	doneNumeric:
847	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
848	if (UNLIKELY(isIdentStart(m_current)))
849	goto returnError;
850
851	m_atLineStart = false;
852	m_delimited = false;
853	token = NUMBER;
854	goto returnToken;
855
856	doneSemicolon:
857	token = ';';
858	m_delimited = true;
859	goto returnToken;
860
861	doneIdentifier:
862	m_atLineStart = false;
863	m_delimited = false;
864	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
865	m_buffer16.resize(0);
866	token = IDENT;
867	goto returnToken;
868
869	doneIdentifierOrKeyword: {
870	m_atLineStart = false;
871	m_delimited = false;
872	m_buffer16.resize(0);
873	const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
874	token = entry ? entry->lexerValue() : static_cast<int>(IDENT);
875	goto returnToken;
876	}
877
878	doneString:
879	// Atomize constant strings in case they're later used in property lookup.
880	shift1();
881	m_atLineStart = false;
882	m_delimited = false;
883	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
884	m_buffer16.resize(0);
885	token = STRING;
886
887	// Fall through into returnToken.
888
889	returnToken: {
890	int lineNumber = m_lineNumber;
891	llocp->first_line = lineNumber;
892	llocp->last_line = lineNumber;
893	llocp->first_column = startOffset;
894	llocp->last_column = currentOffset();
895	m_lastToken = token;
896	return token;
897	}
898
899	returnError:
900	m_error = true;
901	return -1;
902	}
903
904	bool Lexer::scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix)
905	{
906	ASSERT(m_buffer16.isEmpty());
907
908	bool lastWasEscape = false;
909	bool inBrackets = false;
910
911	if (patternPrefix) {
912	ASSERT(!isLineTerminator(patternPrefix));
913	ASSERT(patternPrefix != '/');
914	ASSERT(patternPrefix != '[');
915	record16(patternPrefix);
916	}
917
918	while (true) {
919	int current = m_current;
920
921	if (isLineTerminator(current) \|\| current == -1) {
922	m_buffer16.resize(0);
923	return false;
924	}
925
926	shift1();
927
928	if (current == '/' && !lastWasEscape && !inBrackets)
929	break;
930
931	record16(current);
932
933	if (lastWasEscape) {
934	lastWasEscape = false;
935	continue;
936	}
937
938	switch (current) {
939	case '[':
940	inBrackets = true;
941	break;
942	case ']':
943	inBrackets = false;
944	break;
945	case '\\':
946	lastWasEscape = true;
947	break;
948	}
949	}
950
951	pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
952	m_buffer16.resize(0);
953
954	while (isIdentPart(m_current)) {
955	record16(m_current);
956	shift1();
957	}
958
959	flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
960	m_buffer16.resize(0);
961
962	return true;
963	}
964
965	bool Lexer::skipRegExp()
966	{
967	bool lastWasEscape = false;
968	bool inBrackets = false;
969
970	while (true) {
971	int current = m_current;
972
973	if (isLineTerminator(current) \|\| current == -1)
974	return false;
975
976	shift1();
977
978	if (current == '/' && !lastWasEscape && !inBrackets)
979	break;
980
981	if (lastWasEscape) {
982	lastWasEscape = false;
983	continue;
984	}
985
986	switch (current) {
987	case '[':
988	inBrackets = true;
989	break;
990	case ']':
991	inBrackets = false;
992	break;
993	case '\\':
994	lastWasEscape = true;
995	break;
996	}
997	}
998
999	while (isIdentPart(m_current))
1000	shift1();
1001
1002	return true;
1003	}
1004
1005	void Lexer::clear()
1006	{
1007	m_arena = 0;
1008	m_codeWithoutBOMs.clear();
1009
1010	Vector<char> newBuffer8;
1011	m_buffer8.swap(newBuffer8);
1012
1013	Vector<UChar> newBuffer16;
1014	m_buffer16.swap(newBuffer16);
1015
1016	m_isReparsing = false;
1017	}
1018
1019	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1020	{
1021	if (m_codeWithoutBOMs.isEmpty())
1022	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1023
1024	const UChar* data = m_source->provider()->data();
1025
1026	ASSERT(openBrace < closeBrace);
1027	int i;
1028	for (i = m_source->startOffset(); i < openBrace; ++i) {
1029	if (data[i] == byteOrderMark) {
1030	openBrace++;
1031	closeBrace++;
1032	}
1033	}
1034	for (; i < closeBrace; ++i) {
1035	if (data[i] == byteOrderMark)
1036	closeBrace++;
1037	}
1038
1039	ASSERT(openBrace < closeBrace);
1040
1041	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1042	}
1043
1044	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Download in other formats: