Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 51505

Visit:

Last change on this file since 51505 was 51505, checked in by [email protected], 15 years ago

2009-11-30 Laszlo Gombos <Laszlo Gombos>

Reviewed by Kenneth Rohde Christiansen.

[Qt] Remove obsolete PLATFORM(KDE) code
https://bugs.webkit.org/show_bug.cgi?id=31958

KDE is now using unpatched QtWebKit.

parser/Lexer.cpp: Remove obsolete KDE_USE_FINAL guard
wtf/Platform.h: Remove PLATFORM(KDE) definition and code section that is guarded with it.

Property svn:eol-style set to native

File size: 26.3 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "Lexer.h"
25
26	#include "JSFunction.h"
27	#include "JSGlobalObjectFunctions.h"
28	#include "NodeInfo.h"
29	#include "Nodes.h"
30	#include "dtoa.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/Assertions.h>
35
36	using namespace WTF;
37	using namespace Unicode;
38
39	// We can't specify the namespace in yacc's C output, so do it here instead.
40	using namespace JSC;
41
42	#include "Grammar.h"
43	#include "Lookup.h"
44	#include "Lexer.lut.h"
45
46	namespace JSC {
47
48	static const UChar byteOrderMark = 0xFEFF;
49
50	Lexer::Lexer(JSGlobalData* globalData)
51	: m_isReparsing(false)
52	, m_globalData(globalData)
53	, m_keywordTable(JSC::mainTable)
54	{
55	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
56	m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
57	}
58
59	Lexer::~Lexer()
60	{
61	m_keywordTable.deleteTable();
62	}
63
64	inline const UChar* Lexer::currentCharacter() const
65	{
66	return m_code - 4;
67	}
68
69	inline int Lexer::currentOffset() const
70	{
71	return currentCharacter() - m_codeStart;
72	}
73
74	ALWAYS_INLINE void Lexer::shift1()
75	{
76	m_current = m_next1;
77	m_next1 = m_next2;
78	m_next2 = m_next3;
79	if (LIKELY(m_code < m_codeEnd))
80	m_next3 = m_code[0];
81	else
82	m_next3 = -1;
83
84	++m_code;
85	}
86
87	ALWAYS_INLINE void Lexer::shift2()
88	{
89	m_current = m_next2;
90	m_next1 = m_next3;
91	if (LIKELY(m_code + 1 < m_codeEnd)) {
92	m_next2 = m_code[0];
93	m_next3 = m_code[1];
94	} else {
95	m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
96	m_next3 = -1;
97	}
98
99	m_code += 2;
100	}
101
102	ALWAYS_INLINE void Lexer::shift3()
103	{
104	m_current = m_next3;
105	if (LIKELY(m_code + 2 < m_codeEnd)) {
106	m_next1 = m_code[0];
107	m_next2 = m_code[1];
108	m_next3 = m_code[2];
109	} else {
110	m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
111	m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
112	m_next3 = -1;
113	}
114
115	m_code += 3;
116	}
117
118	ALWAYS_INLINE void Lexer::shift4()
119	{
120	if (LIKELY(m_code + 3 < m_codeEnd)) {
121	m_current = m_code[0];
122	m_next1 = m_code[1];
123	m_next2 = m_code[2];
124	m_next3 = m_code[3];
125	} else {
126	m_current = m_code < m_codeEnd ? m_code[0] : -1;
127	m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
128	m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
129	m_next3 = -1;
130	}
131
132	m_code += 4;
133	}
134
135	void Lexer::setCode(const SourceCode& source, ParserArena& arena)
136	{
137	m_arena = &arena.identifierArena();
138
139	m_lineNumber = source.firstLine();
140	m_delimited = false;
141	m_lastToken = -1;
142
143	const UChar* data = source.provider()->data();
144
145	m_source = &source;
146	m_codeStart = data;
147	m_code = data + source.startOffset();
148	m_codeEnd = data + source.endOffset();
149	m_error = false;
150	m_atLineStart = true;
151
152	// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153	// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154	if (source.provider()->hasBOMs()) {
155	for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156	if (UNLIKELY(*p == byteOrderMark)) {
157	copyCodeWithoutBOMs();
158	break;
159	}
160	}
161	}
162
163	// Read the first characters into the 4-character buffer.
164	shift4();
165	ASSERT(currentOffset() == source.startOffset());
166	}
167
168	void Lexer::copyCodeWithoutBOMs()
169	{
170	// Note: In this case, the character offset data for debugging will be incorrect.
171	// If it's important to correctly debug code with extraneous BOMs, then the caller
172	// should strip the BOMs when creating the SourceProvider object and do its own
173	// mapping of offsets within the stripped text to original text offset.
174
175	m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
176	for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177	UChar c = *p;
178	if (c != byteOrderMark)
179	m_codeWithoutBOMs.append(c);
180	}
181	ptrdiff_t startDelta = m_codeStart - m_code;
182	m_code = m_codeWithoutBOMs.data();
183	m_codeStart = m_code + startDelta;
184	m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185	}
186
187	void Lexer::shiftLineTerminator()
188	{
189	ASSERT(isLineTerminator(m_current));
190
191	// Allow both CRLF and LFCR.
192	if (m_current + m_next1 == '\n' + '\r')
193	shift2();
194	else
195	shift1();
196
197	++m_lineNumber;
198	}
199
200	ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201	{
202	return &m_arena->makeIdentifier(m_globalData, characters, length);
203	}
204
205	inline bool Lexer::lastTokenWasRestrKeyword() const
206	{
207	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
208	}
209
210	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211	{
212	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
213	}
214
215	static inline bool isIdentStart(int c)
216	{
217	return isASCII(c) ? isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentStart(c);
218	}
219
220	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221	{
222	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
223	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
224	}
225
226	static inline bool isIdentPart(int c)
227	{
228	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentPart(c);
229	}
230
231	static inline int singleEscape(int c)
232	{
233	switch (c) {
234	case 'b':
235	return 0x08;
236	case 't':
237	return 0x09;
238	case 'n':
239	return 0x0A;
240	case 'v':
241	return 0x0B;
242	case 'f':
243	return 0x0C;
244	case 'r':
245	return 0x0D;
246	default:
247	return c;
248	}
249	}
250
251	inline void Lexer::record8(int c)
252	{
253	ASSERT(c >= 0);
254	ASSERT(c <= 0xFF);
255	m_buffer8.append(static_cast<char>(c));
256	}
257
258	inline void Lexer::record16(UChar c)
259	{
260	m_buffer16.append(c);
261	}
262
263	inline void Lexer::record16(int c)
264	{
265	ASSERT(c >= 0);
266	ASSERT(c <= USHRT_MAX);
267	record16(UChar(static_cast<unsigned short>(c)));
268	}
269
270	int Lexer::lex(void* p1, void* p2)
271	{
272	ASSERT(!m_error);
273	ASSERT(m_buffer8.isEmpty());
274	ASSERT(m_buffer16.isEmpty());
275
276	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278	int token = 0;
279	m_terminator = false;
280
281	start:
282	while (isWhiteSpace(m_current))
283	shift1();
284
285	int startOffset = currentOffset();
286
287	if (m_current == -1) {
288	if (!m_terminator && !m_delimited && !m_isReparsing) {
289	// automatic semicolon insertion if program incomplete
290	token = ';';
291	goto doneSemicolon;
292	}
293	return 0;
294	}
295
296	m_delimited = false;
297	switch (m_current) {
298	case '>':
299	if (m_next1 == '>' && m_next2 == '>') {
300	if (m_next3 == '=') {
301	shift4();
302	token = URSHIFTEQUAL;
303	break;
304	}
305	shift3();
306	token = URSHIFT;
307	break;
308	}
309	if (m_next1 == '>') {
310	if (m_next2 == '=') {
311	shift3();
312	token = RSHIFTEQUAL;
313	break;
314	}
315	shift2();
316	token = RSHIFT;
317	break;
318	}
319	if (m_next1 == '=') {
320	shift2();
321	token = GE;
322	break;
323	}
324	shift1();
325	token = '>';
326	break;
327	case '=':
328	if (m_next1 == '=') {
329	if (m_next2 == '=') {
330	shift3();
331	token = STREQ;
332	break;
333	}
334	shift2();
335	token = EQEQ;
336	break;
337	}
338	shift1();
339	token = '=';
340	break;
341	case '!':
342	if (m_next1 == '=') {
343	if (m_next2 == '=') {
344	shift3();
345	token = STRNEQ;
346	break;
347	}
348	shift2();
349	token = NE;
350	break;
351	}
352	shift1();
353	token = '!';
354	break;
355	case '<':
356	if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
357	// <!-- marks the beginning of a line comment (for www usage)
358	shift4();
359	goto inSingleLineComment;
360	}
361	if (m_next1 == '<') {
362	if (m_next2 == '=') {
363	shift3();
364	token = LSHIFTEQUAL;
365	break;
366	}
367	shift2();
368	token = LSHIFT;
369	break;
370	}
371	if (m_next1 == '=') {
372	shift2();
373	token = LE;
374	break;
375	}
376	shift1();
377	token = '<';
378	break;
379	case '+':
380	if (m_next1 == '+') {
381	shift2();
382	if (m_terminator) {
383	token = AUTOPLUSPLUS;
384	break;
385	}
386	token = PLUSPLUS;
387	break;
388	}
389	if (m_next1 == '=') {
390	shift2();
391	token = PLUSEQUAL;
392	break;
393	}
394	shift1();
395	token = '+';
396	break;
397	case '-':
398	if (m_next1 == '-') {
399	if (m_atLineStart && m_next2 == '>') {
400	shift3();
401	goto inSingleLineComment;
402	}
403	shift2();
404	if (m_terminator) {
405	token = AUTOMINUSMINUS;
406	break;
407	}
408	token = MINUSMINUS;
409	break;
410	}
411	if (m_next1 == '=') {
412	shift2();
413	token = MINUSEQUAL;
414	break;
415	}
416	shift1();
417	token = '-';
418	break;
419	case '*':
420	if (m_next1 == '=') {
421	shift2();
422	token = MULTEQUAL;
423	break;
424	}
425	shift1();
426	token = '*';
427	break;
428	case '/':
429	if (m_next1 == '/') {
430	shift2();
431	goto inSingleLineComment;
432	}
433	if (m_next1 == '*')
434	goto inMultiLineComment;
435	if (m_next1 == '=') {
436	shift2();
437	token = DIVEQUAL;
438	break;
439	}
440	shift1();
441	token = '/';
442	break;
443	case '&':
444	if (m_next1 == '&') {
445	shift2();
446	token = AND;
447	break;
448	}
449	if (m_next1 == '=') {
450	shift2();
451	token = ANDEQUAL;
452	break;
453	}
454	shift1();
455	token = '&';
456	break;
457	case '^':
458	if (m_next1 == '=') {
459	shift2();
460	token = XOREQUAL;
461	break;
462	}
463	shift1();
464	token = '^';
465	break;
466	case '%':
467	if (m_next1 == '=') {
468	shift2();
469	token = MODEQUAL;
470	break;
471	}
472	shift1();
473	token = '%';
474	break;
475	case '\|':
476	if (m_next1 == '=') {
477	shift2();
478	token = OREQUAL;
479	break;
480	}
481	if (m_next1 == '\|') {
482	shift2();
483	token = OR;
484	break;
485	}
486	shift1();
487	token = '\|';
488	break;
489	case '.':
490	if (isASCIIDigit(m_next1)) {
491	record8('.');
492	shift1();
493	goto inNumberAfterDecimalPoint;
494	}
495	token = '.';
496	shift1();
497	break;
498	case ',':
499	case '~':
500	case '?':
501	case ':':
502	case '(':
503	case ')':
504	case '[':
505	case ']':
506	token = m_current;
507	shift1();
508	break;
509	case ';':
510	shift1();
511	m_delimited = true;
512	token = ';';
513	break;
514	case '{':
515	lvalp->intValue = currentOffset();
516	shift1();
517	token = OPENBRACE;
518	break;
519	case '}':
520	lvalp->intValue = currentOffset();
521	shift1();
522	m_delimited = true;
523	token = CLOSEBRACE;
524	break;
525	case '\\':
526	goto startIdentifierWithBackslash;
527	case '0':
528	goto startNumberWithZeroDigit;
529	case '1':
530	case '2':
531	case '3':
532	case '4':
533	case '5':
534	case '6':
535	case '7':
536	case '8':
537	case '9':
538	goto startNumber;
539	case '"':
540	case '\'':
541	goto startString;
542	default:
543	if (isIdentStart(m_current))
544	goto startIdentifierOrKeyword;
545	if (isLineTerminator(m_current)) {
546	shiftLineTerminator();
547	m_atLineStart = true;
548	m_terminator = true;
549	if (lastTokenWasRestrKeyword()) {
550	token = ';';
551	goto doneSemicolon;
552	}
553	goto start;
554	}
555	goto returnError;
556	}
557
558	m_atLineStart = false;
559	goto returnToken;
560
561	startString: {
562	int stringQuoteCharacter = m_current;
563	shift1();
564
565	const UChar* stringStart = currentCharacter();
566	while (m_current != stringQuoteCharacter) {
567	// Fast check for characters that require special handling.
568	// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
569	// as possible, and lets through all common ASCII characters.
570	if (UNLIKELY(m_current == '\\') \|\| UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
571	m_buffer16.append(stringStart, currentCharacter() - stringStart);
572	goto inString;
573	}
574	shift1();
575	}
576	lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
577	shift1();
578	m_atLineStart = false;
579	m_delimited = false;
580	token = STRING;
581	goto returnToken;
582
583	inString:
584	while (m_current != stringQuoteCharacter) {
585	if (m_current == '\\')
586	goto inStringEscapeSequence;
587	if (UNLIKELY(isLineTerminator(m_current)))
588	goto returnError;
589	if (UNLIKELY(m_current == -1))
590	goto returnError;
591	record16(m_current);
592	shift1();
593	}
594	goto doneString;
595
596	inStringEscapeSequence:
597	shift1();
598	if (m_current == 'x') {
599	shift1();
600	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
601	record16(convertHex(m_current, m_next1));
602	shift2();
603	goto inString;
604	}
605	record16('x');
606	if (m_current == stringQuoteCharacter)
607	goto doneString;
608	goto inString;
609	}
610	if (m_current == 'u') {
611	shift1();
612	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
613	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
614	shift4();
615	goto inString;
616	}
617	if (m_current == stringQuoteCharacter) {
618	record16('u');
619	goto doneString;
620	}
621	goto returnError;
622	}
623	if (isASCIIOctalDigit(m_current)) {
624	if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
625	record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
626	shift3();
627	goto inString;
628	}
629	if (isASCIIOctalDigit(m_next1)) {
630	record16((m_current - '0') * 8 + m_next1 - '0');
631	shift2();
632	goto inString;
633	}
634	record16(m_current - '0');
635	shift1();
636	goto inString;
637	}
638	if (isLineTerminator(m_current)) {
639	shiftLineTerminator();
640	goto inString;
641	}
642	record16(singleEscape(m_current));
643	shift1();
644	goto inString;
645	}
646
647	startIdentifierWithBackslash:
648	shift1();
649	if (UNLIKELY(m_current != 'u'))
650	goto returnError;
651	shift1();
652	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
653	goto returnError;
654	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
655	if (UNLIKELY(!isIdentStart(token)))
656	goto returnError;
657	goto inIdentifierAfterCharacterCheck;
658
659	startIdentifierOrKeyword: {
660	const UChar* identifierStart = currentCharacter();
661	shift1();
662	while (isIdentPart(m_current))
663	shift1();
664	if (LIKELY(m_current != '\\')) {
665	lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
666	goto doneIdentifierOrKeyword;
667	}
668	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
669	}
670
671	do {
672	shift1();
673	if (UNLIKELY(m_current != 'u'))
674	goto returnError;
675	shift1();
676	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
677	goto returnError;
678	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
679	if (UNLIKELY(!isIdentPart(token)))
680	goto returnError;
681	inIdentifierAfterCharacterCheck:
682	record16(token);
683	shift4();
684
685	while (isIdentPart(m_current)) {
686	record16(m_current);
687	shift1();
688	}
689	} while (UNLIKELY(m_current == '\\'));
690	goto doneIdentifier;
691
692	inSingleLineComment:
693	while (!isLineTerminator(m_current)) {
694	if (UNLIKELY(m_current == -1))
695	return 0;
696	shift1();
697	}
698	shiftLineTerminator();
699	m_atLineStart = true;
700	m_terminator = true;
701	if (lastTokenWasRestrKeyword())
702	goto doneSemicolon;
703	goto start;
704
705	inMultiLineComment:
706	shift2();
707	while (m_current != '*' \|\| m_next1 != '/') {
708	if (isLineTerminator(m_current))
709	shiftLineTerminator();
710	else {
711	shift1();
712	if (UNLIKELY(m_current == -1))
713	goto returnError;
714	}
715	}
716	shift2();
717	m_atLineStart = false;
718	goto start;
719
720	startNumberWithZeroDigit:
721	shift1();
722	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
723	shift1();
724	goto inHex;
725	}
726	if (m_current == '.') {
727	record8('0');
728	record8('.');
729	shift1();
730	goto inNumberAfterDecimalPoint;
731	}
732	if ((m_current \| 0x20) == 'e') {
733	record8('0');
734	record8('e');
735	shift1();
736	goto inExponentIndicator;
737	}
738	if (isASCIIOctalDigit(m_current))
739	goto inOctal;
740	if (isASCIIDigit(m_current))
741	goto startNumber;
742	lvalp->doubleValue = 0;
743	goto doneNumeric;
744
745	inNumberAfterDecimalPoint:
746	while (isASCIIDigit(m_current)) {
747	record8(m_current);
748	shift1();
749	}
750	if ((m_current \| 0x20) == 'e') {
751	record8('e');
752	shift1();
753	goto inExponentIndicator;
754	}
755	goto doneNumber;
756
757	inExponentIndicator:
758	if (m_current == '+' \|\| m_current == '-') {
759	record8(m_current);
760	shift1();
761	}
762	if (!isASCIIDigit(m_current))
763	goto returnError;
764	do {
765	record8(m_current);
766	shift1();
767	} while (isASCIIDigit(m_current));
768	goto doneNumber;
769
770	inOctal: {
771	do {
772	record8(m_current);
773	shift1();
774	} while (isASCIIOctalDigit(m_current));
775	if (isASCIIDigit(m_current))
776	goto startNumber;
777
778	double dval = 0;
779
780	const char* end = m_buffer8.end();
781	for (const char* p = m_buffer8.data(); p < end; ++p) {
782	dval *= 8;
783	dval += *p - '0';
784	}
785	if (dval >= mantissaOverflowLowerBound)
786	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
787
788	m_buffer8.resize(0);
789
790	lvalp->doubleValue = dval;
791	goto doneNumeric;
792	}
793
794	inHex: {
795	do {
796	record8(m_current);
797	shift1();
798	} while (isASCIIHexDigit(m_current));
799
800	double dval = 0;
801
802	const char* end = m_buffer8.end();
803	for (const char* p = m_buffer8.data(); p < end; ++p) {
804	dval *= 16;
805	dval += toASCIIHexValue(*p);
806	}
807	if (dval >= mantissaOverflowLowerBound)
808	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
809
810	m_buffer8.resize(0);
811
812	lvalp->doubleValue = dval;
813	goto doneNumeric;
814	}
815
816	startNumber:
817	record8(m_current);
818	shift1();
819	while (isASCIIDigit(m_current)) {
820	record8(m_current);
821	shift1();
822	}
823	if (m_current == '.') {
824	record8('.');
825	shift1();
826	goto inNumberAfterDecimalPoint;
827	}
828	if ((m_current \| 0x20) == 'e') {
829	record8('e');
830	shift1();
831	goto inExponentIndicator;
832	}
833
834	// Fall through into doneNumber.
835
836	doneNumber:
837	// Null-terminate string for strtod.
838	m_buffer8.append('\0');
839	lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
840	m_buffer8.resize(0);
841
842	// Fall through into doneNumeric.
843
844	doneNumeric:
845	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
846	if (UNLIKELY(isIdentStart(m_current)))
847	goto returnError;
848
849	m_atLineStart = false;
850	m_delimited = false;
851	token = NUMBER;
852	goto returnToken;
853
854	doneSemicolon:
855	token = ';';
856	m_delimited = true;
857	goto returnToken;
858
859	doneIdentifier:
860	m_atLineStart = false;
861	m_delimited = false;
862	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
863	m_buffer16.resize(0);
864	token = IDENT;
865	goto returnToken;
866
867	doneIdentifierOrKeyword: {
868	m_atLineStart = false;
869	m_delimited = false;
870	m_buffer16.resize(0);
871	const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
872	token = entry ? entry->lexerValue() : IDENT;
873	goto returnToken;
874	}
875
876	doneString:
877	// Atomize constant strings in case they're later used in property lookup.
878	shift1();
879	m_atLineStart = false;
880	m_delimited = false;
881	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
882	m_buffer16.resize(0);
883	token = STRING;
884
885	// Fall through into returnToken.
886
887	returnToken: {
888	int lineNumber = m_lineNumber;
889	llocp->first_line = lineNumber;
890	llocp->last_line = lineNumber;
891	llocp->first_column = startOffset;
892	llocp->last_column = currentOffset();
893
894	m_lastToken = token;
895	return token;
896	}
897
898	returnError:
899	m_error = true;
900	return -1;
901	}
902
903	bool Lexer::scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix)
904	{
905	ASSERT(m_buffer16.isEmpty());
906
907	bool lastWasEscape = false;
908	bool inBrackets = false;
909
910	if (patternPrefix) {
911	ASSERT(!isLineTerminator(patternPrefix));
912	ASSERT(patternPrefix != '/');
913	ASSERT(patternPrefix != '[');
914	record16(patternPrefix);
915	}
916
917	while (true) {
918	int current = m_current;
919
920	if (isLineTerminator(current) \|\| current == -1) {
921	m_buffer16.resize(0);
922	return false;
923	}
924
925	shift1();
926
927	if (current == '/' && !lastWasEscape && !inBrackets)
928	break;
929
930	record16(current);
931
932	if (lastWasEscape) {
933	lastWasEscape = false;
934	continue;
935	}
936
937	switch (current) {
938	case '[':
939	inBrackets = true;
940	break;
941	case ']':
942	inBrackets = false;
943	break;
944	case '\\':
945	lastWasEscape = true;
946	break;
947	}
948	}
949
950	pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
951	m_buffer16.resize(0);
952
953	while (isIdentPart(m_current)) {
954	record16(m_current);
955	shift1();
956	}
957
958	flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
959	m_buffer16.resize(0);
960
961	return true;
962	}
963
964	bool Lexer::skipRegExp()
965	{
966	bool lastWasEscape = false;
967	bool inBrackets = false;
968
969	while (true) {
970	int current = m_current;
971
972	if (isLineTerminator(current) \|\| current == -1)
973	return false;
974
975	shift1();
976
977	if (current == '/' && !lastWasEscape && !inBrackets)
978	break;
979
980	if (lastWasEscape) {
981	lastWasEscape = false;
982	continue;
983	}
984
985	switch (current) {
986	case '[':
987	inBrackets = true;
988	break;
989	case ']':
990	inBrackets = false;
991	break;
992	case '\\':
993	lastWasEscape = true;
994	break;
995	}
996	}
997
998	while (isIdentPart(m_current))
999	shift1();
1000
1001	return true;
1002	}
1003
1004	void Lexer::clear()
1005	{
1006	m_arena = 0;
1007	m_codeWithoutBOMs.clear();
1008
1009	Vector<char> newBuffer8;
1010	newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
1011	m_buffer8.swap(newBuffer8);
1012
1013	Vector<UChar> newBuffer16;
1014	newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
1015	m_buffer16.swap(newBuffer16);
1016
1017	m_isReparsing = false;
1018	}
1019
1020	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1021	{
1022	if (m_codeWithoutBOMs.isEmpty())
1023	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1024
1025	const UChar* data = m_source->provider()->data();
1026
1027	ASSERT(openBrace < closeBrace);
1028
1029	int numBOMsBeforeOpenBrace = 0;
1030	int numBOMsBetweenBraces = 0;
1031
1032	int i;
1033	for (i = m_source->startOffset(); i < openBrace; ++i)
1034	numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
1035	for (; i < closeBrace; ++i)
1036	numBOMsBetweenBraces += data[i] == byteOrderMark;
1037
1038	return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
1039	closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
1040	}
1041
1042	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Download in other formats: