Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 45609

Visit:

Last change on this file since 45609 was 44224, checked in by [email protected], 16 years ago
Revert 44221.
Property svn:eol-style set to `native`
File size: 25.5 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "Lexer.h"
25
26	#include "JSFunction.h"
27	#include "JSGlobalObjectFunctions.h"
28	#include "NodeInfo.h"
29	#include "Nodes.h"
30	#include "dtoa.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/Assertions.h>
35
36	using namespace WTF;
37	using namespace Unicode;
38
39	// We can't specify the namespace in yacc's C output, so do it here instead.
40	using namespace JSC;
41
42	#ifndef KDE_USE_FINAL
43	#include "Grammar.h"
44	#endif
45
46	#include "Lookup.h"
47	#include "Lexer.lut.h"
48
49	// A bridge for yacc from the C world to the C++ world.
50	int jscyylex(void* lvalp, void* llocp, void* globalData)
51	{
52	return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
53	}
54
55	namespace JSC {
56
57	static const UChar byteOrderMark = 0xFEFF;
58
59	Lexer::Lexer(JSGlobalData* globalData)
60	: m_isReparsing(false)
61	, m_globalData(globalData)
62	, m_keywordTable(JSC::mainTable)
63	{
64	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
65	m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
66	}
67
68	Lexer::~Lexer()
69	{
70	m_keywordTable.deleteTable();
71	}
72
73	inline const UChar* Lexer::currentCharacter() const
74	{
75	return m_code - 4;
76	}
77
78	inline int Lexer::currentOffset() const
79	{
80	return currentCharacter() - m_codeStart;
81	}
82
83	ALWAYS_INLINE void Lexer::shift1()
84	{
85	m_current = m_next1;
86	m_next1 = m_next2;
87	m_next2 = m_next3;
88	if (LIKELY(m_code < m_codeEnd))
89	m_next3 = m_code[0];
90	else
91	m_next3 = -1;
92
93	++m_code;
94	}
95
96	ALWAYS_INLINE void Lexer::shift2()
97	{
98	m_current = m_next2;
99	m_next1 = m_next3;
100	if (LIKELY(m_code + 1 < m_codeEnd)) {
101	m_next2 = m_code[0];
102	m_next3 = m_code[1];
103	} else {
104	m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
105	m_next3 = -1;
106	}
107
108	m_code += 2;
109	}
110
111	ALWAYS_INLINE void Lexer::shift3()
112	{
113	m_current = m_next3;
114	if (LIKELY(m_code + 2 < m_codeEnd)) {
115	m_next1 = m_code[0];
116	m_next2 = m_code[1];
117	m_next3 = m_code[2];
118	} else {
119	m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
120	m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
121	m_next3 = -1;
122	}
123
124	m_code += 3;
125	}
126
127	ALWAYS_INLINE void Lexer::shift4()
128	{
129	if (LIKELY(m_code + 3 < m_codeEnd)) {
130	m_current = m_code[0];
131	m_next1 = m_code[1];
132	m_next2 = m_code[2];
133	m_next3 = m_code[3];
134	} else {
135	m_current = m_code < m_codeEnd ? m_code[0] : -1;
136	m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
137	m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
138	m_next3 = -1;
139	}
140
141	m_code += 4;
142	}
143
144	void Lexer::setCode(const SourceCode& source)
145	{
146	m_lineNumber = source.firstLine();
147	m_delimited = false;
148	m_lastToken = -1;
149
150	const UChar* data = source.provider()->data();
151
152	m_source = &source;
153	m_codeStart = data;
154	m_code = data + source.startOffset();
155	m_codeEnd = data + source.endOffset();
156	m_error = false;
157	m_atLineStart = true;
158
159	// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
160	// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
161	if (source.provider()->hasBOMs()) {
162	for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
163	if (UNLIKELY(*p == byteOrderMark)) {
164	copyCodeWithoutBOMs();
165	break;
166	}
167	}
168	}
169
170	// Read the first characters into the 4-character buffer.
171	shift4();
172	ASSERT(currentOffset() == source.startOffset());
173	}
174
175	void Lexer::copyCodeWithoutBOMs()
176	{
177	// Note: In this case, the character offset data for debugging will be incorrect.
178	// If it's important to correctly debug code with extraneous BOMs, then the caller
179	// should strip the BOMs when creating the SourceProvider object and do its own
180	// mapping of offsets within the stripped text to original text offset.
181
182	m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
183	for (const UChar* p = m_code; p < m_codeEnd; ++p) {
184	UChar c = *p;
185	if (c != byteOrderMark)
186	m_codeWithoutBOMs.append(c);
187	}
188	ptrdiff_t startDelta = m_codeStart - m_code;
189	m_code = m_codeWithoutBOMs.data();
190	m_codeStart = m_code + startDelta;
191	m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
192	}
193
194	void Lexer::shiftLineTerminator()
195	{
196	ASSERT(isLineTerminator(m_current));
197
198	// Allow both CRLF and LFCR.
199	if (m_current + m_next1 == '\n' + '\r')
200	shift2();
201	else
202	shift1();
203
204	++m_lineNumber;
205	}
206
207	ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
208	{
209	m_identifiers.append(Identifier(m_globalData, characters, length));
210	return &m_identifiers.last();
211	}
212
213	inline bool Lexer::lastTokenWasRestrKeyword() const
214	{
215	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
216	}
217
218	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
219	{
220	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
221	}
222
223	static inline bool isIdentStart(int c)
224	{
225	return isASCII(c) ? isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentStart(c);
226	}
227
228	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
229	{
230	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
231	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
232	}
233
234	static inline bool isIdentPart(int c)
235	{
236	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentPart(c);
237	}
238
239	static inline int singleEscape(int c)
240	{
241	switch (c) {
242	case 'b':
243	return 0x08;
244	case 't':
245	return 0x09;
246	case 'n':
247	return 0x0A;
248	case 'v':
249	return 0x0B;
250	case 'f':
251	return 0x0C;
252	case 'r':
253	return 0x0D;
254	default:
255	return c;
256	}
257	}
258
259	inline void Lexer::record8(int c)
260	{
261	ASSERT(c >= 0);
262	ASSERT(c <= 0xFF);
263	m_buffer8.append(static_cast<char>(c));
264	}
265
266	inline void Lexer::record16(UChar c)
267	{
268	m_buffer16.append(c);
269	}
270
271	inline void Lexer::record16(int c)
272	{
273	ASSERT(c >= 0);
274	ASSERT(c <= USHRT_MAX);
275	record16(UChar(static_cast<unsigned short>(c)));
276	}
277
278	int Lexer::lex(void* p1, void* p2)
279	{
280	ASSERT(!m_error);
281	ASSERT(m_buffer8.isEmpty());
282	ASSERT(m_buffer16.isEmpty());
283
284	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
285	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
286	int token = 0;
287	m_terminator = false;
288
289	start:
290	while (isWhiteSpace(m_current))
291	shift1();
292
293	int startOffset = currentOffset();
294
295	if (m_current == -1) {
296	if (!m_terminator && !m_delimited && !m_isReparsing) {
297	// automatic semicolon insertion if program incomplete
298	token = ';';
299	goto doneSemicolon;
300	}
301	return 0;
302	}
303
304	m_delimited = false;
305	switch (m_current) {
306	case '>':
307	if (m_next1 == '>' && m_next2 == '>') {
308	if (m_next3 == '=') {
309	shift4();
310	token = URSHIFTEQUAL;
311	break;
312	}
313	shift3();
314	token = URSHIFT;
315	break;
316	}
317	if (m_next1 == '>') {
318	if (m_next2 == '=') {
319	shift3();
320	token = RSHIFTEQUAL;
321	break;
322	}
323	shift2();
324	token = RSHIFT;
325	break;
326	}
327	if (m_next1 == '=') {
328	shift2();
329	token = GE;
330	break;
331	}
332	shift1();
333	token = '>';
334	break;
335	case '=':
336	if (m_next1 == '=') {
337	if (m_next2 == '=') {
338	shift3();
339	token = STREQ;
340	break;
341	}
342	shift2();
343	token = EQEQ;
344	break;
345	}
346	shift1();
347	token = '=';
348	break;
349	case '!':
350	if (m_next1 == '=') {
351	if (m_next2 == '=') {
352	shift3();
353	token = STRNEQ;
354	break;
355	}
356	shift2();
357	token = NE;
358	break;
359	}
360	shift1();
361	token = '!';
362	break;
363	case '<':
364	if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
365	// <!-- marks the beginning of a line comment (for www usage)
366	shift4();
367	goto inSingleLineComment;
368	}
369	if (m_next1 == '<') {
370	if (m_next2 == '=') {
371	shift3();
372	token = LSHIFTEQUAL;
373	break;
374	}
375	shift2();
376	token = LSHIFT;
377	break;
378	}
379	if (m_next1 == '=') {
380	shift2();
381	token = LE;
382	break;
383	}
384	shift1();
385	token = '<';
386	break;
387	case '+':
388	if (m_next1 == '+') {
389	shift2();
390	if (m_terminator) {
391	token = AUTOPLUSPLUS;
392	break;
393	}
394	token = PLUSPLUS;
395	break;
396	}
397	if (m_next1 == '=') {
398	shift2();
399	token = PLUSEQUAL;
400	break;
401	}
402	shift1();
403	token = '+';
404	break;
405	case '-':
406	if (m_next1 == '-') {
407	if (m_atLineStart && m_next2 == '>') {
408	shift3();
409	goto inSingleLineComment;
410	}
411	shift2();
412	if (m_terminator) {
413	token = AUTOMINUSMINUS;
414	break;
415	}
416	token = MINUSMINUS;
417	break;
418	}
419	if (m_next1 == '=') {
420	shift2();
421	token = MINUSEQUAL;
422	break;
423	}
424	shift1();
425	token = '-';
426	break;
427	case '*':
428	if (m_next1 == '=') {
429	shift2();
430	token = MULTEQUAL;
431	break;
432	}
433	shift1();
434	token = '*';
435	break;
436	case '/':
437	if (m_next1 == '/') {
438	shift2();
439	goto inSingleLineComment;
440	}
441	if (m_next1 == '*')
442	goto inMultiLineComment;
443	if (m_next1 == '=') {
444	shift2();
445	token = DIVEQUAL;
446	break;
447	}
448	shift1();
449	token = '/';
450	break;
451	case '&':
452	if (m_next1 == '&') {
453	shift2();
454	token = AND;
455	break;
456	}
457	if (m_next1 == '=') {
458	shift2();
459	token = ANDEQUAL;
460	break;
461	}
462	shift1();
463	token = '&';
464	break;
465	case '^':
466	if (m_next1 == '=') {
467	shift2();
468	token = XOREQUAL;
469	break;
470	}
471	shift1();
472	token = '^';
473	break;
474	case '%':
475	if (m_next1 == '=') {
476	shift2();
477	token = MODEQUAL;
478	break;
479	}
480	shift1();
481	token = '%';
482	break;
483	case '\|':
484	if (m_next1 == '=') {
485	shift2();
486	token = OREQUAL;
487	break;
488	}
489	if (m_next1 == '\|') {
490	shift2();
491	token = OR;
492	break;
493	}
494	shift1();
495	token = '\|';
496	break;
497	case '.':
498	if (isASCIIDigit(m_next1)) {
499	record8('.');
500	shift1();
501	goto inNumberAfterDecimalPoint;
502	}
503	token = '.';
504	shift1();
505	break;
506	case ',':
507	case '~':
508	case '?':
509	case ':':
510	case '(':
511	case ')':
512	case '[':
513	case ']':
514	token = m_current;
515	shift1();
516	break;
517	case ';':
518	shift1();
519	m_delimited = true;
520	token = ';';
521	break;
522	case '{':
523	lvalp->intValue = currentOffset();
524	shift1();
525	token = OPENBRACE;
526	break;
527	case '}':
528	lvalp->intValue = currentOffset();
529	shift1();
530	m_delimited = true;
531	token = CLOSEBRACE;
532	break;
533	case '\\':
534	goto startIdentifierWithBackslash;
535	case '0':
536	goto startNumberWithZeroDigit;
537	case '1':
538	case '2':
539	case '3':
540	case '4':
541	case '5':
542	case '6':
543	case '7':
544	case '8':
545	case '9':
546	goto startNumber;
547	case '"':
548	case '\'':
549	goto startString;
550	default:
551	if (isIdentStart(m_current))
552	goto startIdentifierOrKeyword;
553	if (isLineTerminator(m_current)) {
554	shiftLineTerminator();
555	m_atLineStart = true;
556	m_terminator = true;
557	if (lastTokenWasRestrKeyword()) {
558	token = ';';
559	goto doneSemicolon;
560	}
561	goto start;
562	}
563	goto returnError;
564	}
565
566	m_atLineStart = false;
567	goto returnToken;
568
569	startString: {
570	int stringQuoteCharacter = m_current;
571	shift1();
572
573	const UChar* stringStart = currentCharacter();
574	while (m_current != stringQuoteCharacter) {
575	// Fast check for characters that require special handling.
576	// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
577	// as possible, and lets through all common ASCII characters.
578	if (UNLIKELY(m_current == '\\') \|\| UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
579	m_buffer16.append(stringStart, currentCharacter() - stringStart);
580	goto inString;
581	}
582	shift1();
583	}
584	lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
585	shift1();
586	m_atLineStart = false;
587	m_delimited = false;
588	token = STRING;
589	goto returnToken;
590
591	inString:
592	while (m_current != stringQuoteCharacter) {
593	if (m_current == '\\')
594	goto inStringEscapeSequence;
595	if (UNLIKELY(isLineTerminator(m_current)))
596	goto returnError;
597	if (UNLIKELY(m_current == -1))
598	goto returnError;
599	record16(m_current);
600	shift1();
601	}
602	goto doneString;
603
604	inStringEscapeSequence:
605	shift1();
606	if (m_current == 'x') {
607	shift1();
608	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
609	record16(convertHex(m_current, m_next1));
610	shift2();
611	goto inString;
612	}
613	record16('x');
614	if (m_current == stringQuoteCharacter)
615	goto doneString;
616	goto inString;
617	}
618	if (m_current == 'u') {
619	shift1();
620	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
621	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
622	shift4();
623	goto inString;
624	}
625	if (m_current == stringQuoteCharacter) {
626	record16('u');
627	goto doneString;
628	}
629	goto returnError;
630	}
631	if (isASCIIOctalDigit(m_current)) {
632	if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
633	record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
634	shift3();
635	goto inString;
636	}
637	if (isASCIIOctalDigit(m_next1)) {
638	record16((m_current - '0') * 8 + m_next1 - '0');
639	shift2();
640	goto inString;
641	}
642	record16(m_current - '0');
643	shift1();
644	goto inString;
645	}
646	if (isLineTerminator(m_current)) {
647	shiftLineTerminator();
648	goto inString;
649	}
650	record16(singleEscape(m_current));
651	shift1();
652	goto inString;
653	}
654
655	startIdentifierWithBackslash:
656	shift1();
657	if (UNLIKELY(m_current != 'u'))
658	goto returnError;
659	shift1();
660	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
661	goto returnError;
662	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
663	if (UNLIKELY(!isIdentStart(token)))
664	goto returnError;
665	goto inIdentifierAfterCharacterCheck;
666
667	startIdentifierOrKeyword: {
668	const UChar* identifierStart = currentCharacter();
669	shift1();
670	while (isIdentPart(m_current))
671	shift1();
672	if (LIKELY(m_current != '\\')) {
673	lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
674	goto doneIdentifierOrKeyword;
675	}
676	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
677	}
678
679	do {
680	shift1();
681	if (UNLIKELY(m_current != 'u'))
682	goto returnError;
683	shift1();
684	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
685	goto returnError;
686	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
687	if (UNLIKELY(!isIdentPart(token)))
688	goto returnError;
689	inIdentifierAfterCharacterCheck:
690	record16(token);
691	shift4();
692
693	while (isIdentPart(m_current)) {
694	record16(m_current);
695	shift1();
696	}
697	} while (UNLIKELY(m_current == '\\'));
698	goto doneIdentifier;
699
700	inSingleLineComment:
701	while (!isLineTerminator(m_current)) {
702	if (UNLIKELY(m_current == -1))
703	return 0;
704	shift1();
705	}
706	shiftLineTerminator();
707	m_atLineStart = true;
708	m_terminator = true;
709	if (lastTokenWasRestrKeyword())
710	goto doneSemicolon;
711	goto start;
712
713	inMultiLineComment:
714	shift2();
715	while (m_current != '*' \|\| m_next1 != '/') {
716	if (isLineTerminator(m_current))
717	shiftLineTerminator();
718	else {
719	shift1();
720	if (UNLIKELY(m_current == -1))
721	goto returnError;
722	}
723	}
724	shift2();
725	m_atLineStart = false;
726	goto start;
727
728	startNumberWithZeroDigit:
729	shift1();
730	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
731	shift1();
732	goto inHex;
733	}
734	if (m_current == '.') {
735	record8('0');
736	record8('.');
737	shift1();
738	goto inNumberAfterDecimalPoint;
739	}
740	if ((m_current \| 0x20) == 'e') {
741	record8('0');
742	record8('e');
743	shift1();
744	goto inExponentIndicator;
745	}
746	if (isASCIIOctalDigit(m_current))
747	goto inOctal;
748	if (isASCIIDigit(m_current))
749	goto startNumber;
750	lvalp->doubleValue = 0;
751	goto doneNumeric;
752
753	inNumberAfterDecimalPoint:
754	while (isASCIIDigit(m_current)) {
755	record8(m_current);
756	shift1();
757	}
758	if ((m_current \| 0x20) == 'e') {
759	record8('e');
760	shift1();
761	goto inExponentIndicator;
762	}
763	goto doneNumber;
764
765	inExponentIndicator:
766	if (m_current == '+' \|\| m_current == '-') {
767	record8(m_current);
768	shift1();
769	}
770	if (!isASCIIDigit(m_current))
771	goto returnError;
772	do {
773	record8(m_current);
774	shift1();
775	} while (isASCIIDigit(m_current));
776	goto doneNumber;
777
778	inOctal: {
779	do {
780	record8(m_current);
781	shift1();
782	} while (isASCIIOctalDigit(m_current));
783	if (isASCIIDigit(m_current))
784	goto startNumber;
785
786	double dval = 0;
787
788	const char* end = m_buffer8.end();
789	for (const char* p = m_buffer8.data(); p < end; ++p) {
790	dval *= 8;
791	dval += *p - '0';
792	}
793	if (dval >= mantissaOverflowLowerBound)
794	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
795
796	m_buffer8.resize(0);
797
798	lvalp->doubleValue = dval;
799	goto doneNumeric;
800	}
801
802	inHex: {
803	do {
804	record8(m_current);
805	shift1();
806	} while (isASCIIHexDigit(m_current));
807
808	double dval = 0;
809
810	const char* end = m_buffer8.end();
811	for (const char* p = m_buffer8.data(); p < end; ++p) {
812	dval *= 16;
813	dval += toASCIIHexValue(*p);
814	}
815	if (dval >= mantissaOverflowLowerBound)
816	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
817
818	m_buffer8.resize(0);
819
820	lvalp->doubleValue = dval;
821	goto doneNumeric;
822	}
823
824	startNumber:
825	record8(m_current);
826	shift1();
827	while (isASCIIDigit(m_current)) {
828	record8(m_current);
829	shift1();
830	}
831	if (m_current == '.') {
832	record8('.');
833	shift1();
834	goto inNumberAfterDecimalPoint;
835	}
836	if ((m_current \| 0x20) == 'e') {
837	record8('e');
838	shift1();
839	goto inExponentIndicator;
840	}
841
842	// Fall through into doneNumber.
843
844	doneNumber:
845	// Null-terminate string for strtod.
846	m_buffer8.append('\0');
847	lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
848	m_buffer8.resize(0);
849
850	// Fall through into doneNumeric.
851
852	doneNumeric:
853	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
854	if (UNLIKELY(isIdentStart(m_current)))
855	goto returnError;
856
857	m_atLineStart = false;
858	m_delimited = false;
859	token = NUMBER;
860	goto returnToken;
861
862	doneSemicolon:
863	token = ';';
864	m_delimited = true;
865	goto returnToken;
866
867	doneIdentifier:
868	m_atLineStart = false;
869	m_delimited = false;
870	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
871	m_buffer16.resize(0);
872	token = IDENT;
873	goto returnToken;
874
875	doneIdentifierOrKeyword: {
876	m_atLineStart = false;
877	m_delimited = false;
878	m_buffer16.resize(0);
879	const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
880	token = entry ? entry->lexerValue() : IDENT;
881	goto returnToken;
882	}
883
884	doneString:
885	// Atomize constant strings in case they're later used in property lookup.
886	shift1();
887	m_atLineStart = false;
888	m_delimited = false;
889	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
890	m_buffer16.resize(0);
891	token = STRING;
892
893	// Fall through into returnToken.
894
895	returnToken: {
896	int lineNumber = m_lineNumber;
897	llocp->first_line = lineNumber;
898	llocp->last_line = lineNumber;
899	llocp->first_column = startOffset;
900	llocp->last_column = currentOffset();
901
902	m_lastToken = token;
903	return token;
904	}
905
906	returnError:
907	m_error = true;
908	return -1;
909	}
910
911	bool Lexer::scanRegExp()
912	{
913	ASSERT(m_buffer16.isEmpty());
914
915	bool lastWasEscape = false;
916	bool inBrackets = false;
917
918	while (true) {
919	if (isLineTerminator(m_current) \|\| m_current == -1)
920	return false;
921	if (m_current != '/' \|\| lastWasEscape \|\| inBrackets) {
922	// keep track of '[' and ']'
923	if (!lastWasEscape) {
924	if (m_current == '[' && !inBrackets)
925	inBrackets = true;
926	if (m_current == ']' && inBrackets)
927	inBrackets = false;
928	}
929	record16(m_current);
930	lastWasEscape = !lastWasEscape && m_current == '\\';
931	} else { // end of regexp
932	m_pattern = UString(m_buffer16);
933	m_buffer16.resize(0);
934	shift1();
935	break;
936	}
937	shift1();
938	}
939
940	while (isIdentPart(m_current)) {
941	record16(m_current);
942	shift1();
943	}
944	m_flags = UString(m_buffer16);
945	m_buffer16.resize(0);
946
947	return true;
948	}
949
950	void Lexer::clear()
951	{
952	m_identifiers.clear();
953	m_codeWithoutBOMs.clear();
954
955	Vector<char> newBuffer8;
956	newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
957	m_buffer8.swap(newBuffer8);
958
959	Vector<UChar> newBuffer16;
960	newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
961	m_buffer16.swap(newBuffer16);
962
963	m_isReparsing = false;
964
965	m_pattern = UString();
966	m_flags = UString();
967	}
968
969	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
970	{
971	if (m_codeWithoutBOMs.isEmpty())
972	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
973
974	const UChar* data = m_source->provider()->data();
975
976	ASSERT(openBrace < closeBrace);
977
978	int numBOMsBeforeOpenBrace = 0;
979	int numBOMsBetweenBraces = 0;
980
981	int i;
982	for (i = m_source->startOffset(); i < openBrace; ++i)
983	numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
984	for (; i < closeBrace; ++i)
985	numBOMsBetweenBraces += data[i] == byteOrderMark;
986
987	return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
988	closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
989	}
990
991	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Download in other formats: