Context Navigation

Lexer.cpp@ 41342

Visit:

Last change on this file since 41342 was 41045, checked in by [email protected], 16 years ago

2009-02-17 Geoffrey Garen <[email protected]>

Reviewed by Sam Weinig.

Fixed <rdar://problem/6595040> REGRESSION: http://www.amnestyusa.org/
fails to load.

amnestyusa.org uses the Optimist JavaScript library, which adds event
listeners by concatenating string-ified functions. This is only sure to
be syntactically valid if the string-ified functions end in semicolons.

parser/Lexer.cpp: (JSC::Lexer::isWhiteSpace):
parser/Lexer.h: (JSC::Lexer::isWhiteSpace): (JSC::Lexer::isLineTerminator): Added some helper functions for examining whitespace.

runtime/FunctionPrototype.cpp: (JSC::appendSemicolonIfNeeded): (JSC::functionProtoFuncToString): When string-ifying a function, insert a semicolon in the last non-whitespace position, if one doesn't already exist.

LayoutTests:

2009-02-17 Geoffrey Garen <[email protected]>

Reviewed by Sam Weinig.

Test for <rdar://problem/6595040> REGRESSION: http://www.amnestyusa.org/
fails to load.

fast/js/function-toString-semicolon-insertion-expected.txt: Added.
fast/js/function-toString-semicolon-insertion.html: Added.
fast/js/resources/function-toString-semicolon-insertion.js: Added. (compileAndSerialize):

Property svn:eol-style set to native

File size: 26.7 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "Lexer.h"
25
26	#include "JSFunction.h"
27	#include "JSGlobalObjectFunctions.h"
28	#include "NodeInfo.h"
29	#include "Nodes.h"
30	#include "dtoa.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/ASCIICType.h>
35	#include <wtf/Assertions.h>
36
37	using namespace WTF;
38	using namespace Unicode;
39
40	// we can't specify the namespace in yacc's C output, so do it here
41	using namespace JSC;
42
43	#ifndef KDE_USE_FINAL
44	#include "Grammar.h"
45	#endif
46
47	#include "Lookup.h"
48	#include "Lexer.lut.h"
49
50	// a bridge for yacc from the C world to C++
51	int jscyylex(void* lvalp, void* llocp, void* globalData)
52	{
53	return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
54	}
55
56	namespace JSC {
57
58	static bool isDecimalDigit(int);
59
60	Lexer::Lexer(JSGlobalData* globalData)
61	: yylineno(1)
62	, m_restrKeyword(false)
63	, m_eatNextIdentifier(false)
64	, m_stackToken(-1)
65	, m_lastToken(-1)
66	, m_position(0)
67	, m_code(0)
68	, m_length(0)
69	, m_isReparsing(false)
70	, m_atLineStart(true)
71	, m_current(0)
72	, m_next1(0)
73	, m_next2(0)
74	, m_next3(0)
75	, m_currentOffset(0)
76	, m_nextOffset1(0)
77	, m_nextOffset2(0)
78	, m_nextOffset3(0)
79	, m_globalData(globalData)
80	, m_mainTable(JSC::mainTable)
81	{
82	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
83	m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
84	}
85
86	Lexer::~Lexer()
87	{
88	m_mainTable.deleteTable();
89	}
90
91	void Lexer::setCode(const SourceCode& source)
92	{
93	yylineno = source.firstLine();
94	m_restrKeyword = false;
95	m_delimited = false;
96	m_eatNextIdentifier = false;
97	m_stackToken = -1;
98	m_lastToken = -1;
99
100	m_position = source.startOffset();
101	m_source = &source;
102	m_code = source.provider()->data();
103	m_length = source.endOffset();
104	m_skipLF = false;
105	m_skipCR = false;
106	m_error = false;
107	m_atLineStart = true;
108
109	// read first characters
110	shift(4);
111	}
112
113	void Lexer::shift(unsigned p)
114	{
115	// ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
116	// see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
117
118	while (p--) {
119	m_current = m_next1;
120	m_next1 = m_next2;
121	m_next2 = m_next3;
122	m_currentOffset = m_nextOffset1;
123	m_nextOffset1 = m_nextOffset2;
124	m_nextOffset2 = m_nextOffset3;
125	do {
126	if (m_position >= m_length) {
127	m_nextOffset3 = m_position;
128	m_position++;
129	m_next3 = -1;
130	break;
131	}
132	m_nextOffset3 = m_position;
133	m_next3 = m_code[m_position++];
134	} while (m_next3 == 0xFEFF);
135	}
136	}
137
138	// called on each new line
139	void Lexer::nextLine()
140	{
141	yylineno++;
142	m_atLineStart = true;
143	}
144
145	void Lexer::setDone(State s)
146	{
147	m_state = s;
148	m_done = true;
149	}
150
151	int Lexer::lex(void* p1, void* p2)
152	{
153	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
154	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
155	int token = 0;
156	m_state = Start;
157	unsigned short stringType = 0; // either single or double quotes
158	m_buffer8.clear();
159	m_buffer16.clear();
160	m_done = false;
161	m_terminator = false;
162	m_skipLF = false;
163	m_skipCR = false;
164
165	// did we push a token on the stack previously ?
166	// (after an automatic semicolon insertion)
167	if (m_stackToken >= 0) {
168	setDone(Other);
169	token = m_stackToken;
170	m_stackToken = 0;
171	}
172	int startOffset = m_currentOffset;
173	while (!m_done) {
174	if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
175	m_skipLF = false;
176	if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
177	m_skipCR = false;
178	if (m_skipLF \|\| m_skipCR) { // found \r\n or \n\r -> eat the second one
179	m_skipLF = false;
180	m_skipCR = false;
181	shift(1);
182	}
183	switch (m_state) {
184	case Start:
185	startOffset = m_currentOffset;
186	if (isWhiteSpace()) {
187	// do nothing
188	} else if (m_current == '/' && m_next1 == '/') {
189	shift(1);
190	m_state = InSingleLineComment;
191	} else if (m_current == '/' && m_next1 == '*') {
192	shift(1);
193	m_state = InMultiLineComment;
194	} else if (m_current == -1) {
195	if (!m_terminator && !m_delimited && !m_isReparsing) {
196	// automatic semicolon insertion if program incomplete
197	token = ';';
198	m_stackToken = 0;
199	setDone(Other);
200	} else
201	setDone(Eof);
202	} else if (isLineTerminator()) {
203	nextLine();
204	m_terminator = true;
205	if (m_restrKeyword) {
206	token = ';';
207	setDone(Other);
208	}
209	} else if (m_current == '"' \|\| m_current == '\'') {
210	m_state = InString;
211	stringType = static_cast<unsigned short>(m_current);
212	} else if (isIdentStart(m_current)) {
213	record16(m_current);
214	m_state = InIdentifierOrKeyword;
215	} else if (m_current == '\\')
216	m_state = InIdentifierStartUnicodeEscapeStart;
217	else if (m_current == '0') {
218	record8(m_current);
219	m_state = InNum0;
220	} else if (isDecimalDigit(m_current)) {
221	record8(m_current);
222	m_state = InNum;
223	} else if (m_current == '.' && isDecimalDigit(m_next1)) {
224	record8(m_current);
225	m_state = InDecimal;
226	// <!-- marks the beginning of a line comment (for www usage)
227	} else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
228	shift(3);
229	m_state = InSingleLineComment;
230	// same for -->
231	} else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
232	shift(2);
233	m_state = InSingleLineComment;
234	} else {
235	token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
236	if (token != -1)
237	setDone(Other);
238	else
239	setDone(Bad);
240	}
241	break;
242	case InString:
243	if (m_current == stringType) {
244	shift(1);
245	setDone(String);
246	} else if (isLineTerminator() \|\| m_current == -1)
247	setDone(Bad);
248	else if (m_current == '\\')
249	m_state = InEscapeSequence;
250	else
251	record16(m_current);
252	break;
253	// Escape Sequences inside of strings
254	case InEscapeSequence:
255	if (isOctalDigit(m_current)) {
256	if (m_current >= '0' && m_current <= '3' &&
257	isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
258	record16(convertOctal(m_current, m_next1, m_next2));
259	shift(2);
260	m_state = InString;
261	} else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
262	record16(convertOctal('0', m_current, m_next1));
263	shift(1);
264	m_state = InString;
265	} else if (isOctalDigit(m_current)) {
266	record16(convertOctal('0', '0', m_current));
267	m_state = InString;
268	} else
269	setDone(Bad);
270	} else if (m_current == 'x')
271	m_state = InHexEscape;
272	else if (m_current == 'u')
273	m_state = InUnicodeEscape;
274	else if (isLineTerminator()) {
275	nextLine();
276	m_state = InString;
277	} else {
278	record16(singleEscape(static_cast<unsigned short>(m_current)));
279	m_state = InString;
280	}
281	break;
282	case InHexEscape:
283	if (isHexDigit(m_current) && isHexDigit(m_next1)) {
284	m_state = InString;
285	record16(convertHex(m_current, m_next1));
286	shift(1);
287	} else if (m_current == stringType) {
288	record16('x');
289	shift(1);
290	setDone(String);
291	} else {
292	record16('x');
293	record16(m_current);
294	m_state = InString;
295	}
296	break;
297	case InUnicodeEscape:
298	if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
299	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
300	shift(3);
301	m_state = InString;
302	} else if (m_current == stringType) {
303	record16('u');
304	shift(1);
305	setDone(String);
306	} else
307	setDone(Bad);
308	break;
309	case InSingleLineComment:
310	if (isLineTerminator()) {
311	nextLine();
312	m_terminator = true;
313	if (m_restrKeyword) {
314	token = ';';
315	setDone(Other);
316	} else
317	m_state = Start;
318	} else if (m_current == -1)
319	setDone(Eof);
320	break;
321	case InMultiLineComment:
322	if (m_current == -1)
323	setDone(Bad);
324	else if (isLineTerminator())
325	nextLine();
326	else if (m_current == '*' && m_next1 == '/') {
327	m_state = Start;
328	shift(1);
329	}
330	break;
331	case InIdentifierOrKeyword:
332	case InIdentifier:
333	if (isIdentPart(m_current))
334	record16(m_current);
335	else if (m_current == '\\')
336	m_state = InIdentifierPartUnicodeEscapeStart;
337	else
338	setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
339	break;
340	case InNum0:
341	if (m_current == 'x' \|\| m_current == 'X') {
342	record8(m_current);
343	m_state = InHex;
344	} else if (m_current == '.') {
345	record8(m_current);
346	m_state = InDecimal;
347	} else if (m_current == 'e' \|\| m_current == 'E') {
348	record8(m_current);
349	m_state = InExponentIndicator;
350	} else if (isOctalDigit(m_current)) {
351	record8(m_current);
352	m_state = InOctal;
353	} else if (isDecimalDigit(m_current)) {
354	record8(m_current);
355	m_state = InDecimal;
356	} else
357	setDone(Number);
358	break;
359	case InHex:
360	if (isHexDigit(m_current))
361	record8(m_current);
362	else
363	setDone(Hex);
364	break;
365	case InOctal:
366	if (isOctalDigit(m_current))
367	record8(m_current);
368	else if (isDecimalDigit(m_current)) {
369	record8(m_current);
370	m_state = InDecimal;
371	} else
372	setDone(Octal);
373	break;
374	case InNum:
375	if (isDecimalDigit(m_current))
376	record8(m_current);
377	else if (m_current == '.') {
378	record8(m_current);
379	m_state = InDecimal;
380	} else if (m_current == 'e' \|\| m_current == 'E') {
381	record8(m_current);
382	m_state = InExponentIndicator;
383	} else
384	setDone(Number);
385	break;
386	case InDecimal:
387	if (isDecimalDigit(m_current))
388	record8(m_current);
389	else if (m_current == 'e' \|\| m_current == 'E') {
390	record8(m_current);
391	m_state = InExponentIndicator;
392	} else
393	setDone(Number);
394	break;
395	case InExponentIndicator:
396	if (m_current == '+' \|\| m_current == '-')
397	record8(m_current);
398	else if (isDecimalDigit(m_current)) {
399	record8(m_current);
400	m_state = InExponent;
401	} else
402	setDone(Bad);
403	break;
404	case InExponent:
405	if (isDecimalDigit(m_current))
406	record8(m_current);
407	else
408	setDone(Number);
409	break;
410	case InIdentifierStartUnicodeEscapeStart:
411	if (m_current == 'u')
412	m_state = InIdentifierStartUnicodeEscape;
413	else
414	setDone(Bad);
415	break;
416	case InIdentifierPartUnicodeEscapeStart:
417	if (m_current == 'u')
418	m_state = InIdentifierPartUnicodeEscape;
419	else
420	setDone(Bad);
421	break;
422	case InIdentifierStartUnicodeEscape:
423	if (!isHexDigit(m_current) \|\| !isHexDigit(m_next1) \|\| !isHexDigit(m_next2) \|\| !isHexDigit(m_next3)) {
424	setDone(Bad);
425	break;
426	}
427	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
428	shift(3);
429	if (!isIdentStart(token)) {
430	setDone(Bad);
431	break;
432	}
433	record16(token);
434	m_state = InIdentifier;
435	break;
436	case InIdentifierPartUnicodeEscape:
437	if (!isHexDigit(m_current) \|\| !isHexDigit(m_next1) \|\| !isHexDigit(m_next2) \|\| !isHexDigit(m_next3)) {
438	setDone(Bad);
439	break;
440	}
441	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
442	shift(3);
443	if (!isIdentPart(token)) {
444	setDone(Bad);
445	break;
446	}
447	record16(token);
448	m_state = InIdentifier;
449	break;
450	default:
451	ASSERT(!"Unhandled state in switch statement");
452	}
453
454	// move on to the next character
455	if (!m_done)
456	shift(1);
457	if (m_state != Start && m_state != InSingleLineComment)
458	m_atLineStart = false;
459	}
460
461	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
462	if ((m_state == Number \|\| m_state == Octal \|\| m_state == Hex) && isIdentStart(m_current))
463	m_state = Bad;
464
465	// terminate string
466	m_buffer8.append('\0');
467
468	#ifdef JSC_DEBUG_LEX
469	fprintf(stderr, "line: %d ", lineNo());
470	fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
471	fprintf(stderr, "%s ", m_buffer8.data());
472	#endif
473
474	double dval = 0;
475	if (m_state == Number)
476	dval = WTF::strtod(m_buffer8.data(), 0L);
477	else if (m_state == Hex) { // scan hex numbers
478	const char* p = m_buffer8.data() + 2;
479	while (char c = *p++) {
480	dval *= 16;
481	dval += convertHex(c);
482	}
483
484	if (dval >= mantissaOverflowLowerBound)
485	dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
486
487	m_state = Number;
488	} else if (m_state == Octal) { // scan octal number
489	const char* p = m_buffer8.data() + 1;
490	while (char c = *p++) {
491	dval *= 8;
492	dval += c - '0';
493	}
494
495	if (dval >= mantissaOverflowLowerBound)
496	dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
497
498	m_state = Number;
499	}
500
501	#ifdef JSC_DEBUG_LEX
502	switch (m_state) {
503	case Eof:
504	printf("(EOF)\n");
505	break;
506	case Other:
507	printf("(Other)\n");
508	break;
509	case Identifier:
510	printf("(Identifier)/(Keyword)\n");
511	break;
512	case String:
513	printf("(String)\n");
514	break;
515	case Number:
516	printf("(Number)\n");
517	break;
518	default:
519	printf("(unknown)");
520	}
521	#endif
522
523	if (m_state != Identifier)
524	m_eatNextIdentifier = false;
525
526	m_restrKeyword = false;
527	m_delimited = false;
528	llocp->first_line = yylineno;
529	llocp->last_line = yylineno;
530	llocp->first_column = startOffset;
531	llocp->last_column = m_currentOffset;
532	switch (m_state) {
533	case Eof:
534	token = 0;
535	break;
536	case Other:
537	if (token == '}' \|\| token == ';')
538	m_delimited = true;
539	break;
540	case Identifier:
541	// Apply anonymous-function hack below (eat the identifier).
542	if (m_eatNextIdentifier) {
543	m_eatNextIdentifier = false;
544	token = lex(lvalp, llocp);
545	break;
546	}
547	lvalp->ident = makeIdentifier(m_buffer16);
548	token = IDENT;
549	break;
550	case IdentifierOrKeyword: {
551	lvalp->ident = makeIdentifier(m_buffer16);
552	const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
553	if (!entry) {
554	// Lookup for keyword failed, means this is an identifier.
555	token = IDENT;
556	break;
557	}
558	token = entry->lexerValue();
559	// Hack for "f = function somename() { ... }"; too hard to get into the grammar.
560	m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
561	if (token == CONTINUE \|\| token == BREAK \|\| token == RETURN \|\| token == THROW)
562	m_restrKeyword = true;
563	break;
564	}
565	case String:
566	// Atomize constant strings in case they're later used in property lookup.
567	lvalp->ident = makeIdentifier(m_buffer16);
568	token = STRING;
569	break;
570	case Number:
571	lvalp->doubleValue = dval;
572	token = NUMBER;
573	break;
574	case Bad:
575	#ifdef JSC_DEBUG_LEX
576	fprintf(stderr, "yylex: ERROR.\n");
577	#endif
578	m_error = true;
579	return -1;
580	default:
581	ASSERT(!"unhandled numeration value in switch");
582	m_error = true;
583	return -1;
584	}
585	m_lastToken = token;
586	return token;
587	}
588
589	bool Lexer::isWhiteSpace() const
590	{
591	return isWhiteSpace(m_current);
592	}
593
594	bool Lexer::isLineTerminator()
595	{
596	bool cr = (m_current == '\r');
597	bool lf = (m_current == '\n');
598	if (cr)
599	m_skipLF = true;
600	else if (lf)
601	m_skipCR = true;
602	return cr \|\| lf \|\| m_current == 0x2028 \|\| m_current == 0x2029;
603	}
604
605	bool Lexer::isIdentStart(int c)
606	{
607	return isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' \|\| (!isASCII(c) && (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other)));
608	}
609
610	bool Lexer::isIdentPart(int c)
611	{
612	return isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' \|\| (!isASCII(c) && (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
613	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector)));
614	}
615
616	static bool isDecimalDigit(int c)
617	{
618	return isASCIIDigit(c);
619	}
620
621	bool Lexer::isHexDigit(int c)
622	{
623	return isASCIIHexDigit(c);
624	}
625
626	bool Lexer::isOctalDigit(int c)
627	{
628	return isASCIIOctalDigit(c);
629	}
630
631	int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
632	{
633	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
634	shift(4);
635	return URSHIFTEQUAL;
636	}
637	if (c1 == '=' && c2 == '=' && c3 == '=') {
638	shift(3);
639	return STREQ;
640	}
641	if (c1 == '!' && c2 == '=' && c3 == '=') {
642	shift(3);
643	return STRNEQ;
644	}
645	if (c1 == '>' && c2 == '>' && c3 == '>') {
646	shift(3);
647	return URSHIFT;
648	}
649	if (c1 == '<' && c2 == '<' && c3 == '=') {
650	shift(3);
651	return LSHIFTEQUAL;
652	}
653	if (c1 == '>' && c2 == '>' && c3 == '=') {
654	shift(3);
655	return RSHIFTEQUAL;
656	}
657	if (c1 == '<' && c2 == '=') {
658	shift(2);
659	return LE;
660	}
661	if (c1 == '>' && c2 == '=') {
662	shift(2);
663	return GE;
664	}
665	if (c1 == '!' && c2 == '=') {
666	shift(2);
667	return NE;
668	}
669	if (c1 == '+' && c2 == '+') {
670	shift(2);
671	if (m_terminator)
672	return AUTOPLUSPLUS;
673	return PLUSPLUS;
674	}
675	if (c1 == '-' && c2 == '-') {
676	shift(2);
677	if (m_terminator)
678	return AUTOMINUSMINUS;
679	return MINUSMINUS;
680	}
681	if (c1 == '=' && c2 == '=') {
682	shift(2);
683	return EQEQ;
684	}
685	if (c1 == '+' && c2 == '=') {
686	shift(2);
687	return PLUSEQUAL;
688	}
689	if (c1 == '-' && c2 == '=') {
690	shift(2);
691	return MINUSEQUAL;
692	}
693	if (c1 == '*' && c2 == '=') {
694	shift(2);
695	return MULTEQUAL;
696	}
697	if (c1 == '/' && c2 == '=') {
698	shift(2);
699	return DIVEQUAL;
700	}
701	if (c1 == '&' && c2 == '=') {
702	shift(2);
703	return ANDEQUAL;
704	}
705	if (c1 == '^' && c2 == '=') {
706	shift(2);
707	return XOREQUAL;
708	}
709	if (c1 == '%' && c2 == '=') {
710	shift(2);
711	return MODEQUAL;
712	}
713	if (c1 == '\|' && c2 == '=') {
714	shift(2);
715	return OREQUAL;
716	}
717	if (c1 == '<' && c2 == '<') {
718	shift(2);
719	return LSHIFT;
720	}
721	if (c1 == '>' && c2 == '>') {
722	shift(2);
723	return RSHIFT;
724	}
725	if (c1 == '&' && c2 == '&') {
726	shift(2);
727	return AND;
728	}
729	if (c1 == '\|' && c2 == '\|') {
730	shift(2);
731	return OR;
732	}
733
734	switch (c1) {
735	case '=':
736	case '>':
737	case '<':
738	case ',':
739	case '!':
740	case '~':
741	case '?':
742	case ':':
743	case '.':
744	case '+':
745	case '-':
746	case '*':
747	case '/':
748	case '&':
749	case '\|':
750	case '^':
751	case '%':
752	case '(':
753	case ')':
754	case '[':
755	case ']':
756	case ';':
757	shift(1);
758	return static_cast<int>(c1);
759	case '{':
760	charPos = m_currentOffset;
761	shift(1);
762	return OPENBRACE;
763	case '}':
764	charPos = m_currentOffset;
765	shift(1);
766	return CLOSEBRACE;
767	default:
768	return -1;
769	}
770	}
771
772	unsigned short Lexer::singleEscape(unsigned short c)
773	{
774	switch (c) {
775	case 'b':
776	return 0x08;
777	case 't':
778	return 0x09;
779	case 'n':
780	return 0x0A;
781	case 'v':
782	return 0x0B;
783	case 'f':
784	return 0x0C;
785	case 'r':
786	return 0x0D;
787	case '"':
788	return 0x22;
789	case '\'':
790	return 0x27;
791	case '\\':
792	return 0x5C;
793	default:
794	return c;
795	}
796	}
797
798	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
799	{
800	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
801	}
802
803	unsigned char Lexer::convertHex(int c)
804	{
805	if (c >= '0' && c <= '9')
806	return static_cast<unsigned char>(c - '0');
807	if (c >= 'a' && c <= 'f')
808	return static_cast<unsigned char>(c - 'a' + 10);
809	return static_cast<unsigned char>(c - 'A' + 10);
810	}
811
812	unsigned char Lexer::convertHex(int c1, int c2)
813	{
814	return ((convertHex(c1) << 4) + convertHex(c2));
815	}
816
817	UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
818	{
819	unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
820	unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
821	return (highByte << 8 \| lowByte);
822	}
823
824	void Lexer::record8(int c)
825	{
826	ASSERT(c >= 0);
827	ASSERT(c <= 0xff);
828	m_buffer8.append(static_cast<char>(c));
829	}
830
831	void Lexer::record16(int c)
832	{
833	ASSERT(c >= 0);
834	ASSERT(c <= USHRT_MAX);
835	record16(UChar(static_cast<unsigned short>(c)));
836	}
837
838	void Lexer::record16(UChar c)
839	{
840	m_buffer16.append(c);
841	}
842
843	bool Lexer::scanRegExp()
844	{
845	m_buffer16.clear();
846	bool lastWasEscape = false;
847	bool inBrackets = false;
848
849	while (1) {
850	if (isLineTerminator() \|\| m_current == -1)
851	return false;
852	else if (m_current != '/' \|\| lastWasEscape == true \|\| inBrackets == true) {
853	// keep track of '[' and ']'
854	if (!lastWasEscape) {
855	if ( m_current == '[' && !inBrackets )
856	inBrackets = true;
857	if ( m_current == ']' && inBrackets )
858	inBrackets = false;
859	}
860	record16(m_current);
861	lastWasEscape =
862	!lastWasEscape && (m_current == '\\');
863	} else { // end of regexp
864	m_pattern = UString(m_buffer16);
865	m_buffer16.clear();
866	shift(1);
867	break;
868	}
869	shift(1);
870	}
871
872	while (isIdentPart(m_current)) {
873	record16(m_current);
874	shift(1);
875	}
876	m_flags = UString(m_buffer16);
877
878	return true;
879	}
880
881	void Lexer::clear()
882	{
883	m_identifiers.clear();
884
885	Vector<char> newBuffer8;
886	newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
887	m_buffer8.swap(newBuffer8);
888
889	Vector<UChar> newBuffer16;
890	newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
891	m_buffer16.swap(newBuffer16);
892
893	m_isReparsing = false;
894
895	m_pattern = 0;
896	m_flags = 0;
897	}
898
899	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/parser/Lexer.cpp@ 41342

Download in other formats: