Context Navigation

Lexer.cpp@ 61450

Visit:

Last change on this file since 61450 was 61450, checked in by [email protected], 15 years ago

2010-06-18 Oliver Hunt <[email protected]>

Reviewed by Geoffrey Garen.

Incorrect handling of multiple BOMs scattered through a file.
https://bugs.webkit.org/show_bug.cgi?id=40865

When determining the offset of open and close braces in a source
with BOMs we were finishing our count early as we failed to account
for BOMs prior to the open/close brace positions effecting those
positions.

parser/Lexer.cpp: (JSC::Lexer::sourceCode):

2010-06-18 Oliver Hunt <[email protected]>

Reviewed by Geoffrey Garen.

Incorrect handling of multiple BOMs scattered through a file.
https://bugs.webkit.org/show_bug.cgi?id=40865

Put a few more BOMs into this testcase so that it hits the other cases
that were missed before.

fast/js/resources/bom-in-file-retains-correct-offset.js: (g.f): (g):

Property svn:eol-style set to native

File size: 26.2 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "Lexer.h"
25
26	#include "JSFunction.h"
27	#include "JSGlobalObjectFunctions.h"
28	#include "NodeInfo.h"
29	#include "Nodes.h"
30	#include "dtoa.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/Assertions.h>
35
36	using namespace WTF;
37	using namespace Unicode;
38
39	// We can't specify the namespace in yacc's C output, so do it here instead.
40	using namespace JSC;
41
42	#include "Grammar.h"
43	#include "Lookup.h"
44	#include "Lexer.lut.h"
45
46	namespace JSC {
47
48	static const UChar byteOrderMark = 0xFEFF;
49
50	Lexer::Lexer(JSGlobalData* globalData)
51	: m_isReparsing(false)
52	, m_globalData(globalData)
53	, m_keywordTable(JSC::mainTable)
54	{
55	}
56
57	Lexer::~Lexer()
58	{
59	m_keywordTable.deleteTable();
60	}
61
62	inline const UChar* Lexer::currentCharacter() const
63	{
64	return m_code - 4;
65	}
66
67	inline int Lexer::currentOffset() const
68	{
69	return currentCharacter() - m_codeStart;
70	}
71
72	ALWAYS_INLINE void Lexer::shift1()
73	{
74	m_current = m_next1;
75	m_next1 = m_next2;
76	m_next2 = m_next3;
77	if (LIKELY(m_code < m_codeEnd))
78	m_next3 = m_code[0];
79	else
80	m_next3 = -1;
81
82	++m_code;
83	}
84
85	ALWAYS_INLINE void Lexer::shift2()
86	{
87	m_current = m_next2;
88	m_next1 = m_next3;
89	if (LIKELY(m_code + 1 < m_codeEnd)) {
90	m_next2 = m_code[0];
91	m_next3 = m_code[1];
92	} else {
93	m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
94	m_next3 = -1;
95	}
96
97	m_code += 2;
98	}
99
100	ALWAYS_INLINE void Lexer::shift3()
101	{
102	m_current = m_next3;
103	if (LIKELY(m_code + 2 < m_codeEnd)) {
104	m_next1 = m_code[0];
105	m_next2 = m_code[1];
106	m_next3 = m_code[2];
107	} else {
108	m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
109	m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
110	m_next3 = -1;
111	}
112
113	m_code += 3;
114	}
115
116	ALWAYS_INLINE void Lexer::shift4()
117	{
118	if (LIKELY(m_code + 3 < m_codeEnd)) {
119	m_current = m_code[0];
120	m_next1 = m_code[1];
121	m_next2 = m_code[2];
122	m_next3 = m_code[3];
123	} else {
124	m_current = m_code < m_codeEnd ? m_code[0] : -1;
125	m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
126	m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
127	m_next3 = -1;
128	}
129
130	m_code += 4;
131	}
132
133	void Lexer::setCode(const SourceCode& source, ParserArena& arena)
134	{
135	m_arena = &arena.identifierArena();
136
137	m_lineNumber = source.firstLine();
138	m_delimited = false;
139	m_lastToken = -1;
140
141	const UChar* data = source.provider()->data();
142
143	m_source = &source;
144	m_codeStart = data;
145	m_code = data + source.startOffset();
146	m_codeEnd = data + source.endOffset();
147	m_error = false;
148	m_atLineStart = true;
149
150	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
151	m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
152
153	// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
154	// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
155	if (source.provider()->hasBOMs()) {
156	for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
157	if (UNLIKELY(*p == byteOrderMark)) {
158	copyCodeWithoutBOMs();
159	break;
160	}
161	}
162	}
163
164	// Read the first characters into the 4-character buffer.
165	shift4();
166	ASSERT(currentOffset() == source.startOffset());
167	}
168
169	void Lexer::copyCodeWithoutBOMs()
170	{
171	// Note: In this case, the character offset data for debugging will be incorrect.
172	// If it's important to correctly debug code with extraneous BOMs, then the caller
173	// should strip the BOMs when creating the SourceProvider object and do its own
174	// mapping of offsets within the stripped text to original text offset.
175
176	m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
177	for (const UChar* p = m_code; p < m_codeEnd; ++p) {
178	UChar c = *p;
179	if (c != byteOrderMark)
180	m_codeWithoutBOMs.append(c);
181	}
182	ptrdiff_t startDelta = m_codeStart - m_code;
183	m_code = m_codeWithoutBOMs.data();
184	m_codeStart = m_code + startDelta;
185	m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
186	}
187
188	void Lexer::shiftLineTerminator()
189	{
190	ASSERT(isLineTerminator(m_current));
191
192	// Allow both CRLF and LFCR.
193	if (m_current + m_next1 == '\n' + '\r')
194	shift2();
195	else
196	shift1();
197
198	++m_lineNumber;
199	}
200
201	ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
202	{
203	return &m_arena->makeIdentifier(m_globalData, characters, length);
204	}
205
206	inline bool Lexer::lastTokenWasRestrKeyword() const
207	{
208	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
209	}
210
211	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
212	{
213	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
214	}
215
216	static inline bool isIdentStart(int c)
217	{
218	return isASCII(c) ? isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentStart(c);
219	}
220
221	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
222	{
223	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
224	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
225	}
226
227	static inline bool isIdentPart(int c)
228	{
229	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentPart(c);
230	}
231
232	static inline int singleEscape(int c)
233	{
234	switch (c) {
235	case 'b':
236	return 0x08;
237	case 't':
238	return 0x09;
239	case 'n':
240	return 0x0A;
241	case 'v':
242	return 0x0B;
243	case 'f':
244	return 0x0C;
245	case 'r':
246	return 0x0D;
247	default:
248	return c;
249	}
250	}
251
252	inline void Lexer::record8(int c)
253	{
254	ASSERT(c >= 0);
255	ASSERT(c <= 0xFF);
256	m_buffer8.append(static_cast<char>(c));
257	}
258
259	inline void Lexer::record16(UChar c)
260	{
261	m_buffer16.append(c);
262	}
263
264	inline void Lexer::record16(int c)
265	{
266	ASSERT(c >= 0);
267	ASSERT(c <= USHRT_MAX);
268	record16(UChar(static_cast<unsigned short>(c)));
269	}
270
271	int Lexer::lex(void* p1, void* p2)
272	{
273	ASSERT(!m_error);
274	ASSERT(m_buffer8.isEmpty());
275	ASSERT(m_buffer16.isEmpty());
276
277	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
278	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
279	int token = 0;
280	m_terminator = false;
281
282	start:
283	while (isWhiteSpace(m_current))
284	shift1();
285
286	int startOffset = currentOffset();
287
288	if (m_current == -1) {
289	if (!m_terminator && !m_delimited && !m_isReparsing) {
290	// automatic semicolon insertion if program incomplete
291	token = ';';
292	goto doneSemicolon;
293	}
294	return 0;
295	}
296
297	m_delimited = false;
298	switch (m_current) {
299	case '>':
300	if (m_next1 == '>' && m_next2 == '>') {
301	if (m_next3 == '=') {
302	shift4();
303	token = URSHIFTEQUAL;
304	break;
305	}
306	shift3();
307	token = URSHIFT;
308	break;
309	}
310	if (m_next1 == '>') {
311	if (m_next2 == '=') {
312	shift3();
313	token = RSHIFTEQUAL;
314	break;
315	}
316	shift2();
317	token = RSHIFT;
318	break;
319	}
320	if (m_next1 == '=') {
321	shift2();
322	token = GE;
323	break;
324	}
325	shift1();
326	token = '>';
327	break;
328	case '=':
329	if (m_next1 == '=') {
330	if (m_next2 == '=') {
331	shift3();
332	token = STREQ;
333	break;
334	}
335	shift2();
336	token = EQEQ;
337	break;
338	}
339	shift1();
340	token = '=';
341	break;
342	case '!':
343	if (m_next1 == '=') {
344	if (m_next2 == '=') {
345	shift3();
346	token = STRNEQ;
347	break;
348	}
349	shift2();
350	token = NE;
351	break;
352	}
353	shift1();
354	token = '!';
355	break;
356	case '<':
357	if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
358	// <!-- marks the beginning of a line comment (for www usage)
359	shift4();
360	goto inSingleLineComment;
361	}
362	if (m_next1 == '<') {
363	if (m_next2 == '=') {
364	shift3();
365	token = LSHIFTEQUAL;
366	break;
367	}
368	shift2();
369	token = LSHIFT;
370	break;
371	}
372	if (m_next1 == '=') {
373	shift2();
374	token = LE;
375	break;
376	}
377	shift1();
378	token = '<';
379	break;
380	case '+':
381	if (m_next1 == '+') {
382	shift2();
383	if (m_terminator) {
384	token = AUTOPLUSPLUS;
385	break;
386	}
387	token = PLUSPLUS;
388	break;
389	}
390	if (m_next1 == '=') {
391	shift2();
392	token = PLUSEQUAL;
393	break;
394	}
395	shift1();
396	token = '+';
397	break;
398	case '-':
399	if (m_next1 == '-') {
400	if (m_atLineStart && m_next2 == '>') {
401	shift3();
402	goto inSingleLineComment;
403	}
404	shift2();
405	if (m_terminator) {
406	token = AUTOMINUSMINUS;
407	break;
408	}
409	token = MINUSMINUS;
410	break;
411	}
412	if (m_next1 == '=') {
413	shift2();
414	token = MINUSEQUAL;
415	break;
416	}
417	shift1();
418	token = '-';
419	break;
420	case '*':
421	if (m_next1 == '=') {
422	shift2();
423	token = MULTEQUAL;
424	break;
425	}
426	shift1();
427	token = '*';
428	break;
429	case '/':
430	if (m_next1 == '/') {
431	shift2();
432	goto inSingleLineComment;
433	}
434	if (m_next1 == '*')
435	goto inMultiLineComment;
436	if (m_next1 == '=') {
437	shift2();
438	token = DIVEQUAL;
439	break;
440	}
441	shift1();
442	token = '/';
443	break;
444	case '&':
445	if (m_next1 == '&') {
446	shift2();
447	token = AND;
448	break;
449	}
450	if (m_next1 == '=') {
451	shift2();
452	token = ANDEQUAL;
453	break;
454	}
455	shift1();
456	token = '&';
457	break;
458	case '^':
459	if (m_next1 == '=') {
460	shift2();
461	token = XOREQUAL;
462	break;
463	}
464	shift1();
465	token = '^';
466	break;
467	case '%':
468	if (m_next1 == '=') {
469	shift2();
470	token = MODEQUAL;
471	break;
472	}
473	shift1();
474	token = '%';
475	break;
476	case '\|':
477	if (m_next1 == '=') {
478	shift2();
479	token = OREQUAL;
480	break;
481	}
482	if (m_next1 == '\|') {
483	shift2();
484	token = OR;
485	break;
486	}
487	shift1();
488	token = '\|';
489	break;
490	case '.':
491	if (isASCIIDigit(m_next1)) {
492	record8('.');
493	shift1();
494	goto inNumberAfterDecimalPoint;
495	}
496	token = '.';
497	shift1();
498	break;
499	case ',':
500	case '~':
501	case '?':
502	case ':':
503	case '(':
504	case ')':
505	case '[':
506	case ']':
507	token = m_current;
508	shift1();
509	break;
510	case ';':
511	shift1();
512	m_delimited = true;
513	token = ';';
514	break;
515	case '{':
516	lvalp->intValue = currentOffset();
517	shift1();
518	token = OPENBRACE;
519	break;
520	case '}':
521	lvalp->intValue = currentOffset();
522	shift1();
523	m_delimited = true;
524	token = CLOSEBRACE;
525	break;
526	case '\\':
527	goto startIdentifierWithBackslash;
528	case '0':
529	goto startNumberWithZeroDigit;
530	case '1':
531	case '2':
532	case '3':
533	case '4':
534	case '5':
535	case '6':
536	case '7':
537	case '8':
538	case '9':
539	goto startNumber;
540	case '"':
541	case '\'':
542	goto startString;
543	default:
544	if (isIdentStart(m_current))
545	goto startIdentifierOrKeyword;
546	if (isLineTerminator(m_current)) {
547	shiftLineTerminator();
548	m_atLineStart = true;
549	m_terminator = true;
550	if (lastTokenWasRestrKeyword()) {
551	token = ';';
552	goto doneSemicolon;
553	}
554	goto start;
555	}
556	goto returnError;
557	}
558
559	m_atLineStart = false;
560	goto returnToken;
561
562	startString: {
563	int stringQuoteCharacter = m_current;
564	shift1();
565
566	const UChar* stringStart = currentCharacter();
567	while (m_current != stringQuoteCharacter) {
568	// Fast check for characters that require special handling.
569	// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
570	// as possible, and lets through all common ASCII characters.
571	if (UNLIKELY(m_current == '\\') \|\| UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
572	m_buffer16.append(stringStart, currentCharacter() - stringStart);
573	goto inString;
574	}
575	shift1();
576	}
577	lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
578	shift1();
579	m_atLineStart = false;
580	m_delimited = false;
581	token = STRING;
582	goto returnToken;
583
584	inString:
585	while (m_current != stringQuoteCharacter) {
586	if (m_current == '\\')
587	goto inStringEscapeSequence;
588	if (UNLIKELY(isLineTerminator(m_current)))
589	goto returnError;
590	if (UNLIKELY(m_current == -1))
591	goto returnError;
592	record16(m_current);
593	shift1();
594	}
595	goto doneString;
596
597	inStringEscapeSequence:
598	shift1();
599	if (m_current == 'x') {
600	shift1();
601	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
602	record16(convertHex(m_current, m_next1));
603	shift2();
604	goto inString;
605	}
606	record16('x');
607	if (m_current == stringQuoteCharacter)
608	goto doneString;
609	goto inString;
610	}
611	if (m_current == 'u') {
612	shift1();
613	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
614	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
615	shift4();
616	goto inString;
617	}
618	if (m_current == stringQuoteCharacter) {
619	record16('u');
620	goto doneString;
621	}
622	goto returnError;
623	}
624	if (isASCIIOctalDigit(m_current)) {
625	if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
626	record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
627	shift3();
628	goto inString;
629	}
630	if (isASCIIOctalDigit(m_next1)) {
631	record16((m_current - '0') * 8 + m_next1 - '0');
632	shift2();
633	goto inString;
634	}
635	record16(m_current - '0');
636	shift1();
637	goto inString;
638	}
639	if (isLineTerminator(m_current)) {
640	shiftLineTerminator();
641	goto inString;
642	}
643	if (m_current == -1)
644	goto returnError;
645	record16(singleEscape(m_current));
646	shift1();
647	goto inString;
648	}
649
650	startIdentifierWithBackslash:
651	shift1();
652	if (UNLIKELY(m_current != 'u'))
653	goto returnError;
654	shift1();
655	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
656	goto returnError;
657	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
658	if (UNLIKELY(!isIdentStart(token)))
659	goto returnError;
660	goto inIdentifierAfterCharacterCheck;
661
662	startIdentifierOrKeyword: {
663	const UChar* identifierStart = currentCharacter();
664	shift1();
665	while (isIdentPart(m_current))
666	shift1();
667	if (LIKELY(m_current != '\\')) {
668	lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
669	goto doneIdentifierOrKeyword;
670	}
671	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
672	}
673
674	do {
675	shift1();
676	if (UNLIKELY(m_current != 'u'))
677	goto returnError;
678	shift1();
679	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
680	goto returnError;
681	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
682	if (UNLIKELY(!isIdentPart(token)))
683	goto returnError;
684	inIdentifierAfterCharacterCheck:
685	record16(token);
686	shift4();
687
688	while (isIdentPart(m_current)) {
689	record16(m_current);
690	shift1();
691	}
692	} while (UNLIKELY(m_current == '\\'));
693	goto doneIdentifier;
694
695	inSingleLineComment:
696	while (!isLineTerminator(m_current)) {
697	if (UNLIKELY(m_current == -1))
698	return 0;
699	shift1();
700	}
701	shiftLineTerminator();
702	m_atLineStart = true;
703	m_terminator = true;
704	if (lastTokenWasRestrKeyword())
705	goto doneSemicolon;
706	goto start;
707
708	inMultiLineComment:
709	shift2();
710	while (m_current != '*' \|\| m_next1 != '/') {
711	if (isLineTerminator(m_current))
712	shiftLineTerminator();
713	else {
714	shift1();
715	if (UNLIKELY(m_current == -1))
716	goto returnError;
717	}
718	}
719	shift2();
720	m_atLineStart = false;
721	goto start;
722
723	startNumberWithZeroDigit:
724	shift1();
725	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
726	shift1();
727	goto inHex;
728	}
729	if (m_current == '.') {
730	record8('0');
731	record8('.');
732	shift1();
733	goto inNumberAfterDecimalPoint;
734	}
735	if ((m_current \| 0x20) == 'e') {
736	record8('0');
737	record8('e');
738	shift1();
739	goto inExponentIndicator;
740	}
741	if (isASCIIOctalDigit(m_current))
742	goto inOctal;
743	if (isASCIIDigit(m_current))
744	goto startNumber;
745	lvalp->doubleValue = 0;
746	goto doneNumeric;
747
748	inNumberAfterDecimalPoint:
749	while (isASCIIDigit(m_current)) {
750	record8(m_current);
751	shift1();
752	}
753	if ((m_current \| 0x20) == 'e') {
754	record8('e');
755	shift1();
756	goto inExponentIndicator;
757	}
758	goto doneNumber;
759
760	inExponentIndicator:
761	if (m_current == '+' \|\| m_current == '-') {
762	record8(m_current);
763	shift1();
764	}
765	if (!isASCIIDigit(m_current))
766	goto returnError;
767	do {
768	record8(m_current);
769	shift1();
770	} while (isASCIIDigit(m_current));
771	goto doneNumber;
772
773	inOctal: {
774	do {
775	record8(m_current);
776	shift1();
777	} while (isASCIIOctalDigit(m_current));
778	if (isASCIIDigit(m_current))
779	goto startNumber;
780
781	double dval = 0;
782
783	const char* end = m_buffer8.end();
784	for (const char* p = m_buffer8.data(); p < end; ++p) {
785	dval *= 8;
786	dval += *p - '0';
787	}
788	if (dval >= mantissaOverflowLowerBound)
789	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
790
791	m_buffer8.resize(0);
792
793	lvalp->doubleValue = dval;
794	goto doneNumeric;
795	}
796
797	inHex: {
798	do {
799	record8(m_current);
800	shift1();
801	} while (isASCIIHexDigit(m_current));
802
803	double dval = 0;
804
805	const char* end = m_buffer8.end();
806	for (const char* p = m_buffer8.data(); p < end; ++p) {
807	dval *= 16;
808	dval += toASCIIHexValue(*p);
809	}
810	if (dval >= mantissaOverflowLowerBound)
811	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
812
813	m_buffer8.resize(0);
814
815	lvalp->doubleValue = dval;
816	goto doneNumeric;
817	}
818
819	startNumber:
820	record8(m_current);
821	shift1();
822	while (isASCIIDigit(m_current)) {
823	record8(m_current);
824	shift1();
825	}
826	if (m_current == '.') {
827	record8('.');
828	shift1();
829	goto inNumberAfterDecimalPoint;
830	}
831	if ((m_current \| 0x20) == 'e') {
832	record8('e');
833	shift1();
834	goto inExponentIndicator;
835	}
836
837	// Fall through into doneNumber.
838
839	doneNumber:
840	// Null-terminate string for strtod.
841	m_buffer8.append('\0');
842	lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
843	m_buffer8.resize(0);
844
845	// Fall through into doneNumeric.
846
847	doneNumeric:
848	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
849	if (UNLIKELY(isIdentStart(m_current)))
850	goto returnError;
851
852	m_atLineStart = false;
853	m_delimited = false;
854	token = NUMBER;
855	goto returnToken;
856
857	doneSemicolon:
858	token = ';';
859	m_delimited = true;
860	goto returnToken;
861
862	doneIdentifier:
863	m_atLineStart = false;
864	m_delimited = false;
865	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
866	m_buffer16.resize(0);
867	token = IDENT;
868	goto returnToken;
869
870	doneIdentifierOrKeyword: {
871	m_atLineStart = false;
872	m_delimited = false;
873	m_buffer16.resize(0);
874	const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
875	token = entry ? entry->lexerValue() : IDENT;
876	goto returnToken;
877	}
878
879	doneString:
880	// Atomize constant strings in case they're later used in property lookup.
881	shift1();
882	m_atLineStart = false;
883	m_delimited = false;
884	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
885	m_buffer16.resize(0);
886	token = STRING;
887
888	// Fall through into returnToken.
889
890	returnToken: {
891	int lineNumber = m_lineNumber;
892	llocp->first_line = lineNumber;
893	llocp->last_line = lineNumber;
894	llocp->first_column = startOffset;
895	llocp->last_column = currentOffset();
896
897	m_lastToken = token;
898	return token;
899	}
900
901	returnError:
902	m_error = true;
903	return -1;
904	}
905
906	bool Lexer::scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix)
907	{
908	ASSERT(m_buffer16.isEmpty());
909
910	bool lastWasEscape = false;
911	bool inBrackets = false;
912
913	if (patternPrefix) {
914	ASSERT(!isLineTerminator(patternPrefix));
915	ASSERT(patternPrefix != '/');
916	ASSERT(patternPrefix != '[');
917	record16(patternPrefix);
918	}
919
920	while (true) {
921	int current = m_current;
922
923	if (isLineTerminator(current) \|\| current == -1) {
924	m_buffer16.resize(0);
925	return false;
926	}
927
928	shift1();
929
930	if (current == '/' && !lastWasEscape && !inBrackets)
931	break;
932
933	record16(current);
934
935	if (lastWasEscape) {
936	lastWasEscape = false;
937	continue;
938	}
939
940	switch (current) {
941	case '[':
942	inBrackets = true;
943	break;
944	case ']':
945	inBrackets = false;
946	break;
947	case '\\':
948	lastWasEscape = true;
949	break;
950	}
951	}
952
953	pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
954	m_buffer16.resize(0);
955
956	while (isIdentPart(m_current)) {
957	record16(m_current);
958	shift1();
959	}
960
961	flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
962	m_buffer16.resize(0);
963
964	return true;
965	}
966
967	bool Lexer::skipRegExp()
968	{
969	bool lastWasEscape = false;
970	bool inBrackets = false;
971
972	while (true) {
973	int current = m_current;
974
975	if (isLineTerminator(current) \|\| current == -1)
976	return false;
977
978	shift1();
979
980	if (current == '/' && !lastWasEscape && !inBrackets)
981	break;
982
983	if (lastWasEscape) {
984	lastWasEscape = false;
985	continue;
986	}
987
988	switch (current) {
989	case '[':
990	inBrackets = true;
991	break;
992	case ']':
993	inBrackets = false;
994	break;
995	case '\\':
996	lastWasEscape = true;
997	break;
998	}
999	}
1000
1001	while (isIdentPart(m_current))
1002	shift1();
1003
1004	return true;
1005	}
1006
1007	void Lexer::clear()
1008	{
1009	m_arena = 0;
1010	m_codeWithoutBOMs.clear();
1011
1012	Vector<char> newBuffer8;
1013	m_buffer8.swap(newBuffer8);
1014
1015	Vector<UChar> newBuffer16;
1016	m_buffer16.swap(newBuffer16);
1017
1018	m_isReparsing = false;
1019	}
1020
1021	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1022	{
1023	if (m_codeWithoutBOMs.isEmpty())
1024	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1025
1026	const UChar* data = m_source->provider()->data();
1027
1028	ASSERT(openBrace < closeBrace);
1029	int i;
1030	for (i = m_source->startOffset(); i < openBrace; ++i) {
1031	if (data[i] == byteOrderMark) {
1032	openBrace++;
1033	closeBrace++;
1034	}
1035	}
1036	for (; i < closeBrace; ++i) {
1037	if (data[i] == byteOrderMark)
1038	closeBrace++;
1039	}
1040
1041	ASSERT(openBrace < closeBrace);
1042
1043	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1044	}
1045
1046	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 61450

Download in other formats: