Context Navigation

Lexer.cpp@ 43144

Visit:

Last change on this file since 43144 was 43144, checked in by [email protected], 16 years ago

2009-05-02 Maciej Stachowiak <[email protected]>

Reviewed by Cameron Zwarich.

speed up the lexer in various ways

~2% command-line SunSpider speedup

parser/Lexer.cpp: (JSC::Lexer::setCode): Moved below shift() so it can inline. (JSC::Lexer::scanRegExp): Use resize(0) instead of clear() on Vectors, since the intent here is not to free the underlying buffer. (JSC::Lexer::lex): ditto; also, change the loop logic a bit for the main lexing loop to avoid branching on !m_done twice per iteration. Now we only check it once. (JSC::Lexer::shift): Make this ALWAYS_INLINE and tag an unusual branch as UNLIKELY
parser/Lexer.h: (JSC::Lexer::makeIdentifier): force to be ALWAYS_INLINE
wtf/Vector.h: (WTF::::append): force to be ALWAYS_INLINE (may have helped in ways other than parsing but it wasn't getting inlined in a hot code path in the lexer)

Property svn:eol-style set to native

File size: 26.7 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "Lexer.h"
25
26	#include "JSFunction.h"
27	#include "JSGlobalObjectFunctions.h"
28	#include "NodeInfo.h"
29	#include "Nodes.h"
30	#include "dtoa.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/ASCIICType.h>
35	#include <wtf/Assertions.h>
36
37	using namespace WTF;
38	using namespace Unicode;
39
40	// we can't specify the namespace in yacc's C output, so do it here
41	using namespace JSC;
42
43	#ifndef KDE_USE_FINAL
44	#include "Grammar.h"
45	#endif
46
47	#include "Lookup.h"
48	#include "Lexer.lut.h"
49
50	// a bridge for yacc from the C world to C++
51	int jscyylex(void* lvalp, void* llocp, void* globalData)
52	{
53	return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
54	}
55
56	namespace JSC {
57
58	static bool isDecimalDigit(int);
59
60	Lexer::Lexer(JSGlobalData* globalData)
61	: yylineno(1)
62	, m_restrKeyword(false)
63	, m_eatNextIdentifier(false)
64	, m_stackToken(-1)
65	, m_lastToken(-1)
66	, m_position(0)
67	, m_code(0)
68	, m_length(0)
69	, m_isReparsing(false)
70	, m_atLineStart(true)
71	, m_current(0)
72	, m_next1(0)
73	, m_next2(0)
74	, m_next3(0)
75	, m_currentOffset(0)
76	, m_nextOffset1(0)
77	, m_nextOffset2(0)
78	, m_nextOffset3(0)
79	, m_globalData(globalData)
80	, m_mainTable(JSC::mainTable)
81	{
82	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
83	m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
84	}
85
86	Lexer::~Lexer()
87	{
88	m_mainTable.deleteTable();
89	}
90
91	ALWAYS_INLINE void Lexer::shift(unsigned p)
92	{
93	// ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
94	// see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
95
96	while (p--) {
97	m_current = m_next1;
98	m_next1 = m_next2;
99	m_next2 = m_next3;
100	m_currentOffset = m_nextOffset1;
101	m_nextOffset1 = m_nextOffset2;
102	m_nextOffset2 = m_nextOffset3;
103	do {
104	if (m_position >= m_length) {
105	m_nextOffset3 = m_position;
106	m_position++;
107	m_next3 = -1;
108	break;
109	}
110	m_nextOffset3 = m_position;
111	m_next3 = m_code[m_position++];
112	} while (UNLIKELY(m_next3 == 0xFEFF));
113	}
114	}
115
116	void Lexer::setCode(const SourceCode& source)
117	{
118	yylineno = source.firstLine();
119	m_restrKeyword = false;
120	m_delimited = false;
121	m_eatNextIdentifier = false;
122	m_stackToken = -1;
123	m_lastToken = -1;
124
125	m_position = source.startOffset();
126	m_source = &source;
127	m_code = source.provider()->data();
128	m_length = source.endOffset();
129	m_skipLF = false;
130	m_skipCR = false;
131	m_error = false;
132	m_atLineStart = true;
133
134	// read first characters
135	shift(4);
136	}
137
138	// called on each new line
139	void Lexer::nextLine()
140	{
141	yylineno++;
142	m_atLineStart = true;
143	}
144
145	void Lexer::setDone(State s)
146	{
147	m_state = s;
148	m_done = true;
149	}
150
151	int Lexer::lex(void* p1, void* p2)
152	{
153	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
154	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
155	int token = 0;
156	m_state = Start;
157	unsigned short stringType = 0; // either single or double quotes
158	m_buffer8.resize(0);
159	m_buffer16.resize(0);
160	m_done = false;
161	m_terminator = false;
162	m_skipLF = false;
163	m_skipCR = false;
164
165	// did we push a token on the stack previously ?
166	// (after an automatic semicolon insertion)
167	if (m_stackToken >= 0) {
168	setDone(Other);
169	token = m_stackToken;
170	m_stackToken = 0;
171	}
172	int startOffset = m_currentOffset;
173	if (!m_done) {
174	while (true) {
175	if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
176	m_skipLF = false;
177	if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
178	m_skipCR = false;
179	if (m_skipLF \|\| m_skipCR) { // found \r\n or \n\r -> eat the second one
180	m_skipLF = false;
181	m_skipCR = false;
182	shift(1);
183	}
184	switch (m_state) {
185	case Start:
186	startOffset = m_currentOffset;
187	if (isWhiteSpace()) {
188	// do nothing
189	} else if (m_current == '/' && m_next1 == '/') {
190	shift(1);
191	m_state = InSingleLineComment;
192	} else if (m_current == '/' && m_next1 == '*') {
193	shift(1);
194	m_state = InMultiLineComment;
195	} else if (m_current == -1) {
196	if (!m_terminator && !m_delimited && !m_isReparsing) {
197	// automatic semicolon insertion if program incomplete
198	token = ';';
199	m_stackToken = 0;
200	setDone(Other);
201	} else
202	setDone(Eof);
203	} else if (isLineTerminator()) {
204	nextLine();
205	m_terminator = true;
206	if (m_restrKeyword) {
207	token = ';';
208	setDone(Other);
209	}
210	} else if (m_current == '"' \|\| m_current == '\'') {
211	m_state = InString;
212	stringType = static_cast<unsigned short>(m_current);
213	} else if (isIdentStart(m_current)) {
214	record16(m_current);
215	m_state = InIdentifierOrKeyword;
216	} else if (m_current == '\\')
217	m_state = InIdentifierStartUnicodeEscapeStart;
218	else if (m_current == '0') {
219	record8(m_current);
220	m_state = InNum0;
221	} else if (isDecimalDigit(m_current)) {
222	record8(m_current);
223	m_state = InNum;
224	} else if (m_current == '.' && isDecimalDigit(m_next1)) {
225	record8(m_current);
226	m_state = InDecimal;
227	// <!-- marks the beginning of a line comment (for www usage)
228	} else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
229	shift(3);
230	m_state = InSingleLineComment;
231	// same for -->
232	} else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
233	shift(2);
234	m_state = InSingleLineComment;
235	} else {
236	token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
237	if (token != -1)
238	setDone(Other);
239	else
240	setDone(Bad);
241	}
242	break;
243	case InString:
244	if (m_current == stringType) {
245	shift(1);
246	setDone(String);
247	} else if (isLineTerminator() \|\| m_current == -1)
248	setDone(Bad);
249	else if (m_current == '\\')
250	m_state = InEscapeSequence;
251	else
252	record16(m_current);
253	break;
254	// Escape Sequences inside of strings
255	case InEscapeSequence:
256	if (isOctalDigit(m_current)) {
257	if (m_current >= '0' && m_current <= '3' &&
258	isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
259	record16(convertOctal(m_current, m_next1, m_next2));
260	shift(2);
261	m_state = InString;
262	} else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
263	record16(convertOctal('0', m_current, m_next1));
264	shift(1);
265	m_state = InString;
266	} else if (isOctalDigit(m_current)) {
267	record16(convertOctal('0', '0', m_current));
268	m_state = InString;
269	} else
270	setDone(Bad);
271	} else if (m_current == 'x')
272	m_state = InHexEscape;
273	else if (m_current == 'u')
274	m_state = InUnicodeEscape;
275	else if (isLineTerminator()) {
276	nextLine();
277	m_state = InString;
278	} else {
279	record16(singleEscape(static_cast<unsigned short>(m_current)));
280	m_state = InString;
281	}
282	break;
283	case InHexEscape:
284	if (isHexDigit(m_current) && isHexDigit(m_next1)) {
285	m_state = InString;
286	record16(convertHex(m_current, m_next1));
287	shift(1);
288	} else if (m_current == stringType) {
289	record16('x');
290	shift(1);
291	setDone(String);
292	} else {
293	record16('x');
294	record16(m_current);
295	m_state = InString;
296	}
297	break;
298	case InUnicodeEscape:
299	if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
300	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
301	shift(3);
302	m_state = InString;
303	} else if (m_current == stringType) {
304	record16('u');
305	shift(1);
306	setDone(String);
307	} else
308	setDone(Bad);
309	break;
310	case InSingleLineComment:
311	if (isLineTerminator()) {
312	nextLine();
313	m_terminator = true;
314	if (m_restrKeyword) {
315	token = ';';
316	setDone(Other);
317	} else
318	m_state = Start;
319	} else if (m_current == -1)
320	setDone(Eof);
321	break;
322	case InMultiLineComment:
323	if (m_current == -1)
324	setDone(Bad);
325	else if (isLineTerminator())
326	nextLine();
327	else if (m_current == '*' && m_next1 == '/') {
328	m_state = Start;
329	shift(1);
330	}
331	break;
332	case InIdentifierOrKeyword:
333	case InIdentifier:
334	if (isIdentPart(m_current))
335	record16(m_current);
336	else if (m_current == '\\')
337	m_state = InIdentifierPartUnicodeEscapeStart;
338	else
339	setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
340	break;
341	case InNum0:
342	if (m_current == 'x' \|\| m_current == 'X') {
343	record8(m_current);
344	m_state = InHex;
345	} else if (m_current == '.') {
346	record8(m_current);
347	m_state = InDecimal;
348	} else if (m_current == 'e' \|\| m_current == 'E') {
349	record8(m_current);
350	m_state = InExponentIndicator;
351	} else if (isOctalDigit(m_current)) {
352	record8(m_current);
353	m_state = InOctal;
354	} else if (isDecimalDigit(m_current)) {
355	record8(m_current);
356	m_state = InDecimal;
357	} else
358	setDone(Number);
359	break;
360	case InHex:
361	if (isHexDigit(m_current))
362	record8(m_current);
363	else
364	setDone(Hex);
365	break;
366	case InOctal:
367	if (isOctalDigit(m_current))
368	record8(m_current);
369	else if (isDecimalDigit(m_current)) {
370	record8(m_current);
371	m_state = InDecimal;
372	} else
373	setDone(Octal);
374	break;
375	case InNum:
376	if (isDecimalDigit(m_current))
377	record8(m_current);
378	else if (m_current == '.') {
379	record8(m_current);
380	m_state = InDecimal;
381	} else if (m_current == 'e' \|\| m_current == 'E') {
382	record8(m_current);
383	m_state = InExponentIndicator;
384	} else
385	setDone(Number);
386	break;
387	case InDecimal:
388	if (isDecimalDigit(m_current))
389	record8(m_current);
390	else if (m_current == 'e' \|\| m_current == 'E') {
391	record8(m_current);
392	m_state = InExponentIndicator;
393	} else
394	setDone(Number);
395	break;
396	case InExponentIndicator:
397	if (m_current == '+' \|\| m_current == '-')
398	record8(m_current);
399	else if (isDecimalDigit(m_current)) {
400	record8(m_current);
401	m_state = InExponent;
402	} else
403	setDone(Bad);
404	break;
405	case InExponent:
406	if (isDecimalDigit(m_current))
407	record8(m_current);
408	else
409	setDone(Number);
410	break;
411	case InIdentifierStartUnicodeEscapeStart:
412	if (m_current == 'u')
413	m_state = InIdentifierStartUnicodeEscape;
414	else
415	setDone(Bad);
416	break;
417	case InIdentifierPartUnicodeEscapeStart:
418	if (m_current == 'u')
419	m_state = InIdentifierPartUnicodeEscape;
420	else
421	setDone(Bad);
422	break;
423	case InIdentifierStartUnicodeEscape:
424	if (!isHexDigit(m_current) \|\| !isHexDigit(m_next1) \|\| !isHexDigit(m_next2) \|\| !isHexDigit(m_next3)) {
425	setDone(Bad);
426	break;
427	}
428	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
429	shift(3);
430	if (!isIdentStart(token)) {
431	setDone(Bad);
432	break;
433	}
434	record16(token);
435	m_state = InIdentifier;
436	break;
437	case InIdentifierPartUnicodeEscape:
438	if (!isHexDigit(m_current) \|\| !isHexDigit(m_next1) \|\| !isHexDigit(m_next2) \|\| !isHexDigit(m_next3)) {
439	setDone(Bad);
440	break;
441	}
442	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
443	shift(3);
444	if (!isIdentPart(token)) {
445	setDone(Bad);
446	break;
447	}
448	record16(token);
449	m_state = InIdentifier;
450	break;
451	default:
452	ASSERT(!"Unhandled state in switch statement");
453	}
454
455	if (m_state != Start && m_state != InSingleLineComment)
456	m_atLineStart = false;
457	if (m_done)
458	break;
459
460	shift(1);
461	}
462	}
463
464	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
465	if ((m_state == Number \|\| m_state == Octal \|\| m_state == Hex) && isIdentStart(m_current))
466	m_state = Bad;
467
468	// terminate string
469	m_buffer8.append('\0');
470
471	#ifdef JSC_DEBUG_LEX
472	fprintf(stderr, "line: %d ", lineNo());
473	fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
474	fprintf(stderr, "%s ", m_buffer8.data());
475	#endif
476
477	double dval = 0;
478	if (m_state == Number)
479	dval = WTF::strtod(m_buffer8.data(), 0L);
480	else if (m_state == Hex) { // scan hex numbers
481	const char* p = m_buffer8.data() + 2;
482	while (char c = *p++) {
483	dval *= 16;
484	dval += convertHex(c);
485	}
486
487	if (dval >= mantissaOverflowLowerBound)
488	dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
489
490	m_state = Number;
491	} else if (m_state == Octal) { // scan octal number
492	const char* p = m_buffer8.data() + 1;
493	while (char c = *p++) {
494	dval *= 8;
495	dval += c - '0';
496	}
497
498	if (dval >= mantissaOverflowLowerBound)
499	dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
500
501	m_state = Number;
502	}
503
504	#ifdef JSC_DEBUG_LEX
505	switch (m_state) {
506	case Eof:
507	printf("(EOF)\n");
508	break;
509	case Other:
510	printf("(Other)\n");
511	break;
512	case Identifier:
513	printf("(Identifier)/(Keyword)\n");
514	break;
515	case String:
516	printf("(String)\n");
517	break;
518	case Number:
519	printf("(Number)\n");
520	break;
521	default:
522	printf("(unknown)");
523	}
524	#endif
525
526	if (m_state != Identifier)
527	m_eatNextIdentifier = false;
528
529	m_restrKeyword = false;
530	m_delimited = false;
531	llocp->first_line = yylineno;
532	llocp->last_line = yylineno;
533	llocp->first_column = startOffset;
534	llocp->last_column = m_currentOffset;
535	switch (m_state) {
536	case Eof:
537	token = 0;
538	break;
539	case Other:
540	if (token == '}' \|\| token == ';')
541	m_delimited = true;
542	break;
543	case Identifier:
544	// Apply anonymous-function hack below (eat the identifier).
545	if (m_eatNextIdentifier) {
546	m_eatNextIdentifier = false;
547	token = lex(lvalp, llocp);
548	break;
549	}
550	lvalp->ident = makeIdentifier(m_buffer16);
551	token = IDENT;
552	break;
553	case IdentifierOrKeyword: {
554	lvalp->ident = makeIdentifier(m_buffer16);
555	const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
556	if (!entry) {
557	// Lookup for keyword failed, means this is an identifier.
558	token = IDENT;
559	break;
560	}
561	token = entry->lexerValue();
562	// Hack for "f = function somename() { ... }"; too hard to get into the grammar.
563	m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
564	if (token == CONTINUE \|\| token == BREAK \|\| token == RETURN \|\| token == THROW)
565	m_restrKeyword = true;
566	break;
567	}
568	case String:
569	// Atomize constant strings in case they're later used in property lookup.
570	lvalp->ident = makeIdentifier(m_buffer16);
571	token = STRING;
572	break;
573	case Number:
574	lvalp->doubleValue = dval;
575	token = NUMBER;
576	break;
577	case Bad:
578	#ifdef JSC_DEBUG_LEX
579	fprintf(stderr, "yylex: ERROR.\n");
580	#endif
581	m_error = true;
582	return -1;
583	default:
584	ASSERT(!"unhandled numeration value in switch");
585	m_error = true;
586	return -1;
587	}
588	m_lastToken = token;
589	return token;
590	}
591
592	bool Lexer::isWhiteSpace() const
593	{
594	return isWhiteSpace(m_current);
595	}
596
597	bool Lexer::isLineTerminator()
598	{
599	bool cr = (m_current == '\r');
600	bool lf = (m_current == '\n');
601	if (cr)
602	m_skipLF = true;
603	else if (lf)
604	m_skipCR = true;
605	return cr \|\| lf \|\| m_current == 0x2028 \|\| m_current == 0x2029;
606	}
607
608	bool Lexer::isIdentStart(int c)
609	{
610	return isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' \|\| (!isASCII(c) && (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other)));
611	}
612
613	bool Lexer::isIdentPart(int c)
614	{
615	return isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' \|\| (!isASCII(c) && (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
616	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector)));
617	}
618
619	static bool isDecimalDigit(int c)
620	{
621	return isASCIIDigit(c);
622	}
623
624	bool Lexer::isHexDigit(int c)
625	{
626	return isASCIIHexDigit(c);
627	}
628
629	bool Lexer::isOctalDigit(int c)
630	{
631	return isASCIIOctalDigit(c);
632	}
633
634	int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
635	{
636	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
637	shift(4);
638	return URSHIFTEQUAL;
639	}
640	if (c1 == '=' && c2 == '=' && c3 == '=') {
641	shift(3);
642	return STREQ;
643	}
644	if (c1 == '!' && c2 == '=' && c3 == '=') {
645	shift(3);
646	return STRNEQ;
647	}
648	if (c1 == '>' && c2 == '>' && c3 == '>') {
649	shift(3);
650	return URSHIFT;
651	}
652	if (c1 == '<' && c2 == '<' && c3 == '=') {
653	shift(3);
654	return LSHIFTEQUAL;
655	}
656	if (c1 == '>' && c2 == '>' && c3 == '=') {
657	shift(3);
658	return RSHIFTEQUAL;
659	}
660	if (c1 == '<' && c2 == '=') {
661	shift(2);
662	return LE;
663	}
664	if (c1 == '>' && c2 == '=') {
665	shift(2);
666	return GE;
667	}
668	if (c1 == '!' && c2 == '=') {
669	shift(2);
670	return NE;
671	}
672	if (c1 == '+' && c2 == '+') {
673	shift(2);
674	if (m_terminator)
675	return AUTOPLUSPLUS;
676	return PLUSPLUS;
677	}
678	if (c1 == '-' && c2 == '-') {
679	shift(2);
680	if (m_terminator)
681	return AUTOMINUSMINUS;
682	return MINUSMINUS;
683	}
684	if (c1 == '=' && c2 == '=') {
685	shift(2);
686	return EQEQ;
687	}
688	if (c1 == '+' && c2 == '=') {
689	shift(2);
690	return PLUSEQUAL;
691	}
692	if (c1 == '-' && c2 == '=') {
693	shift(2);
694	return MINUSEQUAL;
695	}
696	if (c1 == '*' && c2 == '=') {
697	shift(2);
698	return MULTEQUAL;
699	}
700	if (c1 == '/' && c2 == '=') {
701	shift(2);
702	return DIVEQUAL;
703	}
704	if (c1 == '&' && c2 == '=') {
705	shift(2);
706	return ANDEQUAL;
707	}
708	if (c1 == '^' && c2 == '=') {
709	shift(2);
710	return XOREQUAL;
711	}
712	if (c1 == '%' && c2 == '=') {
713	shift(2);
714	return MODEQUAL;
715	}
716	if (c1 == '\|' && c2 == '=') {
717	shift(2);
718	return OREQUAL;
719	}
720	if (c1 == '<' && c2 == '<') {
721	shift(2);
722	return LSHIFT;
723	}
724	if (c1 == '>' && c2 == '>') {
725	shift(2);
726	return RSHIFT;
727	}
728	if (c1 == '&' && c2 == '&') {
729	shift(2);
730	return AND;
731	}
732	if (c1 == '\|' && c2 == '\|') {
733	shift(2);
734	return OR;
735	}
736
737	switch (c1) {
738	case '=':
739	case '>':
740	case '<':
741	case ',':
742	case '!':
743	case '~':
744	case '?':
745	case ':':
746	case '.':
747	case '+':
748	case '-':
749	case '*':
750	case '/':
751	case '&':
752	case '\|':
753	case '^':
754	case '%':
755	case '(':
756	case ')':
757	case '[':
758	case ']':
759	case ';':
760	shift(1);
761	return static_cast<int>(c1);
762	case '{':
763	charPos = m_currentOffset;
764	shift(1);
765	return OPENBRACE;
766	case '}':
767	charPos = m_currentOffset;
768	shift(1);
769	return CLOSEBRACE;
770	default:
771	return -1;
772	}
773	}
774
775	unsigned short Lexer::singleEscape(unsigned short c)
776	{
777	switch (c) {
778	case 'b':
779	return 0x08;
780	case 't':
781	return 0x09;
782	case 'n':
783	return 0x0A;
784	case 'v':
785	return 0x0B;
786	case 'f':
787	return 0x0C;
788	case 'r':
789	return 0x0D;
790	case '"':
791	return 0x22;
792	case '\'':
793	return 0x27;
794	case '\\':
795	return 0x5C;
796	default:
797	return c;
798	}
799	}
800
801	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
802	{
803	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
804	}
805
806	unsigned char Lexer::convertHex(int c)
807	{
808	if (c >= '0' && c <= '9')
809	return static_cast<unsigned char>(c - '0');
810	if (c >= 'a' && c <= 'f')
811	return static_cast<unsigned char>(c - 'a' + 10);
812	return static_cast<unsigned char>(c - 'A' + 10);
813	}
814
815	unsigned char Lexer::convertHex(int c1, int c2)
816	{
817	return ((convertHex(c1) << 4) + convertHex(c2));
818	}
819
820	UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
821	{
822	unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
823	unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
824	return (highByte << 8 \| lowByte);
825	}
826
827	void Lexer::record8(int c)
828	{
829	ASSERT(c >= 0);
830	ASSERT(c <= 0xff);
831	m_buffer8.append(static_cast<char>(c));
832	}
833
834	void Lexer::record16(int c)
835	{
836	ASSERT(c >= 0);
837	ASSERT(c <= USHRT_MAX);
838	record16(UChar(static_cast<unsigned short>(c)));
839	}
840
841	void Lexer::record16(UChar c)
842	{
843	m_buffer16.append(c);
844	}
845
846	bool Lexer::scanRegExp()
847	{
848	m_buffer16.resize(0);
849	bool lastWasEscape = false;
850	bool inBrackets = false;
851
852	while (1) {
853	if (isLineTerminator() \|\| m_current == -1)
854	return false;
855	else if (m_current != '/' \|\| lastWasEscape == true \|\| inBrackets == true) {
856	// keep track of '[' and ']'
857	if (!lastWasEscape) {
858	if ( m_current == '[' && !inBrackets )
859	inBrackets = true;
860	if ( m_current == ']' && inBrackets )
861	inBrackets = false;
862	}
863	record16(m_current);
864	lastWasEscape =
865	!lastWasEscape && (m_current == '\\');
866	} else { // end of regexp
867	m_pattern = UString(m_buffer16);
868	m_buffer16.resize(0);
869	shift(1);
870	break;
871	}
872	shift(1);
873	}
874
875	while (isIdentPart(m_current)) {
876	record16(m_current);
877	shift(1);
878	}
879	m_flags = UString(m_buffer16);
880
881	return true;
882	}
883
884	void Lexer::clear()
885	{
886	m_identifiers.clear();
887
888	Vector<char> newBuffer8;
889	newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
890	m_buffer8.swap(newBuffer8);
891
892	Vector<UChar> newBuffer16;
893	newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
894	m_buffer16.swap(newBuffer16);
895
896	m_isReparsing = false;
897
898	m_pattern = 0;
899	m_flags = 0;
900	}
901
902	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 43144

Download in other formats: