Context Navigation

lexer.cpp@ 31809

Visit:

Last change on this file since 31809 was 31809, checked in by [email protected], 17 years ago

Reviewed by Geoff.

Generate a pure (re-entrant) parser with Bison.

No change on SunSpider.

kjs/Parser.cpp: (KJS::Parser::parse):
kjs/grammar.y:
kjs/lexer.cpp: (kjsyylex): (KJS::Lexer::lex):
kjs/lexer.h: Pass state as function arguments, instead of global data. Don't call lexer() as often as before, as this function is about to become slower due to thread-specific storage.

kjs/function.cpp: (KJS::isStrWhiteSpace): Don't call isSeparatorSpace() for 8-bit characters, as these are already taken care of. This is a small speedup, compensating for a small slowdown caused by switching Bison mode.

Property svn:eol-style set to native

File size: 22.1 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* Copyright (C) 1999-2000 Harri Porten ([email protected])
4	* Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5	* Copyright (C) 2007 Cameron Zwarich ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "lexer.h"
26
27	#include "dtoa.h"
28	#include "function.h"
29	#include "nodes.h"
30	#include "NodeInfo.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/Assertions.h>
35	#include <wtf/unicode/Unicode.h>
36
37	using namespace WTF;
38	using namespace Unicode;
39
40	// we can't specify the namespace in yacc's C output, so do it here
41	using namespace KJS;
42
43	#ifndef KDE_USE_FINAL
44	#include "grammar.h"
45	#endif
46
47	#include "lookup.h"
48	#include "lexer.lut.h"
49
50	// a bridge for yacc from the C world to C++
51	int kjsyylex(YYSTYPE* lvalp, YYLTYPE* llocp, void* lexer)
52	{
53	return static_cast<Lexer*>(lexer)->lex(lvalp, llocp);
54	}
55
56	namespace KJS {
57
58	static bool isDecimalDigit(int);
59
60	static const size_t initialReadBufferCapacity = 32;
61	static const size_t initialStringTableCapacity = 64;
62
63	Lexer& lexer()
64	{
65	ASSERT(JSLock::currentThreadIsHoldingLock());
66
67	// FIXME: We'd like to avoid calling new here, but we don't currently
68	// support tearing down the Lexer at app quit time, since that would involve
69	// tearing down its UString data members without holding the JSLock.
70	static Lexer* staticLexer = new Lexer;
71	return *staticLexer;
72	}
73
74	Lexer::Lexer()
75	: yylineno(1)
76	, restrKeyword(false)
77	, eatNextIdentifier(false)
78	, stackToken(-1)
79	, lastToken(-1)
80	, pos(0)
81	, code(0)
82	, length(0)
83	, atLineStart(true)
84	, current(0)
85	, next1(0)
86	, next2(0)
87	, next3(0)
88	{
89	m_buffer8.reserveCapacity(initialReadBufferCapacity);
90	m_buffer16.reserveCapacity(initialReadBufferCapacity);
91	m_strings.reserveCapacity(initialStringTableCapacity);
92	m_identifiers.reserveCapacity(initialStringTableCapacity);
93	}
94
95	void Lexer::setCode(int startingLineNumber, const UChar* c, unsigned int len)
96	{
97	yylineno = 1 + startingLineNumber;
98	restrKeyword = false;
99	delimited = false;
100	eatNextIdentifier = false;
101	stackToken = -1;
102	lastToken = -1;
103	pos = 0;
104	code = c;
105	length = len;
106	skipLF = false;
107	skipCR = false;
108	error = false;
109	atLineStart = true;
110
111	// read first characters
112	current = (length > 0) ? code[0] : -1;
113	next1 = (length > 1) ? code[1] : -1;
114	next2 = (length > 2) ? code[2] : -1;
115	next3 = (length > 3) ? code[3] : -1;
116	}
117
118	void Lexer::shift(unsigned int p)
119	{
120	// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
121	// <http://bugs.webkit.org/show_bug.cgi?id=10183>.
122	while (p--) {
123	pos++;
124	current = next1;
125	next1 = next2;
126	next2 = next3;
127	next3 = (pos + 3 < length) ? code[pos + 3] : -1;
128	}
129	}
130
131	// called on each new line
132	void Lexer::nextLine()
133	{
134	yylineno++;
135	atLineStart = true;
136	}
137
138	void Lexer::setDone(State s)
139	{
140	state = s;
141	done = true;
142	}
143
144	int Lexer::lex(YYSTYPE* lvalp, YYLTYPE* llocp)
145	{
146	int token = 0;
147	state = Start;
148	unsigned short stringType = 0; // either single or double quotes
149	m_buffer8.clear();
150	m_buffer16.clear();
151	done = false;
152	terminator = false;
153	skipLF = false;
154	skipCR = false;
155
156	// did we push a token on the stack previously ?
157	// (after an automatic semicolon insertion)
158	if (stackToken >= 0) {
159	setDone(Other);
160	token = stackToken;
161	stackToken = 0;
162	}
163
164	while (!done) {
165	if (skipLF && current != '\n') // found \r but not \n afterwards
166	skipLF = false;
167	if (skipCR && current != '\r') // found \n but not \r afterwards
168	skipCR = false;
169	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
170	{
171	skipLF = false;
172	skipCR = false;
173	shift(1);
174	}
175	switch (state) {
176	case Start:
177	if (isWhiteSpace()) {
178	// do nothing
179	} else if (current == '/' && next1 == '/') {
180	shift(1);
181	state = InSingleLineComment;
182	} else if (current == '/' && next1 == '*') {
183	shift(1);
184	state = InMultiLineComment;
185	} else if (current == -1) {
186	if (!terminator && !delimited) {
187	// automatic semicolon insertion if program incomplete
188	token = ';';
189	stackToken = 0;
190	setDone(Other);
191	} else
192	setDone(Eof);
193	} else if (isLineTerminator()) {
194	nextLine();
195	terminator = true;
196	if (restrKeyword) {
197	token = ';';
198	setDone(Other);
199	}
200	} else if (current == '"' \|\| current == '\'') {
201	state = InString;
202	stringType = static_cast<unsigned short>(current);
203	} else if (isIdentStart(current)) {
204	record16(current);
205	state = InIdentifierOrKeyword;
206	} else if (current == '\\') {
207	state = InIdentifierStartUnicodeEscapeStart;
208	} else if (current == '0') {
209	record8(current);
210	state = InNum0;
211	} else if (isDecimalDigit(current)) {
212	record8(current);
213	state = InNum;
214	} else if (current == '.' && isDecimalDigit(next1)) {
215	record8(current);
216	state = InDecimal;
217	// <!-- marks the beginning of a line comment (for www usage)
218	} else if (current == '<' && next1 == '!' &&
219	next2 == '-' && next3 == '-') {
220	shift(3);
221	state = InSingleLineComment;
222	// same for -->
223	} else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
224	shift(2);
225	state = InSingleLineComment;
226	} else {
227	token = matchPunctuator(current, next1, next2, next3);
228	if (token != -1) {
229	setDone(Other);
230	} else {
231	// cerr << "encountered unknown character" << endl;
232	setDone(Bad);
233	}
234	}
235	break;
236	case InString:
237	if (current == stringType) {
238	shift(1);
239	setDone(String);
240	} else if (isLineTerminator() \|\| current == -1) {
241	setDone(Bad);
242	} else if (current == '\\') {
243	state = InEscapeSequence;
244	} else {
245	record16(current);
246	}
247	break;
248	// Escape Sequences inside of strings
249	case InEscapeSequence:
250	if (isOctalDigit(current)) {
251	if (current >= '0' && current <= '3' &&
252	isOctalDigit(next1) && isOctalDigit(next2)) {
253	record16(convertOctal(current, next1, next2));
254	shift(2);
255	state = InString;
256	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
257	record16(convertOctal('0', current, next1));
258	shift(1);
259	state = InString;
260	} else if (isOctalDigit(current)) {
261	record16(convertOctal('0', '0', current));
262	state = InString;
263	} else {
264	setDone(Bad);
265	}
266	} else if (current == 'x')
267	state = InHexEscape;
268	else if (current == 'u')
269	state = InUnicodeEscape;
270	else if (isLineTerminator()) {
271	nextLine();
272	state = InString;
273	} else {
274	record16(singleEscape(static_cast<unsigned short>(current)));
275	state = InString;
276	}
277	break;
278	case InHexEscape:
279	if (isHexDigit(current) && isHexDigit(next1)) {
280	state = InString;
281	record16(convertHex(current, next1));
282	shift(1);
283	} else if (current == stringType) {
284	record16('x');
285	shift(1);
286	setDone(String);
287	} else {
288	record16('x');
289	record16(current);
290	state = InString;
291	}
292	break;
293	case InUnicodeEscape:
294	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
295	record16(convertUnicode(current, next1, next2, next3));
296	shift(3);
297	state = InString;
298	} else if (current == stringType) {
299	record16('u');
300	shift(1);
301	setDone(String);
302	} else {
303	setDone(Bad);
304	}
305	break;
306	case InSingleLineComment:
307	if (isLineTerminator()) {
308	nextLine();
309	terminator = true;
310	if (restrKeyword) {
311	token = ';';
312	setDone(Other);
313	} else
314	state = Start;
315	} else if (current == -1) {
316	setDone(Eof);
317	}
318	break;
319	case InMultiLineComment:
320	if (current == -1) {
321	setDone(Bad);
322	} else if (isLineTerminator()) {
323	nextLine();
324	} else if (current == '*' && next1 == '/') {
325	state = Start;
326	shift(1);
327	}
328	break;
329	case InIdentifierOrKeyword:
330	case InIdentifier:
331	if (isIdentPart(current))
332	record16(current);
333	else if (current == '\\')
334	state = InIdentifierPartUnicodeEscapeStart;
335	else
336	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
337	break;
338	case InNum0:
339	if (current == 'x' \|\| current == 'X') {
340	record8(current);
341	state = InHex;
342	} else if (current == '.') {
343	record8(current);
344	state = InDecimal;
345	} else if (current == 'e' \|\| current == 'E') {
346	record8(current);
347	state = InExponentIndicator;
348	} else if (isOctalDigit(current)) {
349	record8(current);
350	state = InOctal;
351	} else if (isDecimalDigit(current)) {
352	record8(current);
353	state = InDecimal;
354	} else {
355	setDone(Number);
356	}
357	break;
358	case InHex:
359	if (isHexDigit(current)) {
360	record8(current);
361	} else {
362	setDone(Hex);
363	}
364	break;
365	case InOctal:
366	if (isOctalDigit(current)) {
367	record8(current);
368	}
369	else if (isDecimalDigit(current)) {
370	record8(current);
371	state = InDecimal;
372	} else
373	setDone(Octal);
374	break;
375	case InNum:
376	if (isDecimalDigit(current)) {
377	record8(current);
378	} else if (current == '.') {
379	record8(current);
380	state = InDecimal;
381	} else if (current == 'e' \|\| current == 'E') {
382	record8(current);
383	state = InExponentIndicator;
384	} else
385	setDone(Number);
386	break;
387	case InDecimal:
388	if (isDecimalDigit(current)) {
389	record8(current);
390	} else if (current == 'e' \|\| current == 'E') {
391	record8(current);
392	state = InExponentIndicator;
393	} else
394	setDone(Number);
395	break;
396	case InExponentIndicator:
397	if (current == '+' \|\| current == '-') {
398	record8(current);
399	} else if (isDecimalDigit(current)) {
400	record8(current);
401	state = InExponent;
402	} else
403	setDone(Bad);
404	break;
405	case InExponent:
406	if (isDecimalDigit(current)) {
407	record8(current);
408	} else
409	setDone(Number);
410	break;
411	case InIdentifierStartUnicodeEscapeStart:
412	if (current == 'u')
413	state = InIdentifierStartUnicodeEscape;
414	else
415	setDone(Bad);
416	break;
417	case InIdentifierPartUnicodeEscapeStart:
418	if (current == 'u')
419	state = InIdentifierPartUnicodeEscape;
420	else
421	setDone(Bad);
422	break;
423	case InIdentifierStartUnicodeEscape:
424	if (!isHexDigit(current) \|\| !isHexDigit(next1) \|\| !isHexDigit(next2) \|\| !isHexDigit(next3)) {
425	setDone(Bad);
426	break;
427	}
428	token = convertUnicode(current, next1, next2, next3);
429	shift(3);
430	if (!isIdentStart(token)) {
431	setDone(Bad);
432	break;
433	}
434	record16(token);
435	state = InIdentifier;
436	break;
437	case InIdentifierPartUnicodeEscape:
438	if (!isHexDigit(current) \|\| !isHexDigit(next1) \|\| !isHexDigit(next2) \|\| !isHexDigit(next3)) {
439	setDone(Bad);
440	break;
441	}
442	token = convertUnicode(current, next1, next2, next3);
443	shift(3);
444	if (!isIdentPart(token)) {
445	setDone(Bad);
446	break;
447	}
448	record16(token);
449	state = InIdentifier;
450	break;
451	default:
452	ASSERT(!"Unhandled state in switch statement");
453	}
454
455	// move on to the next character
456	if (!done)
457	shift(1);
458	if (state != Start && state != InSingleLineComment)
459	atLineStart = false;
460	}
461
462	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
463	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
464	state = Bad;
465
466	// terminate string
467	m_buffer8.append('\0');
468
469	#ifdef KJS_DEBUG_LEX
470	fprintf(stderr, "line: %d ", lineNo());
471	fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
472	fprintf(stderr, "%s ", buffer8.data());
473	#endif
474
475	double dval = 0;
476	if (state == Number) {
477	dval = kjs_strtod(m_buffer8.data(), 0L);
478	} else if (state == Hex) { // scan hex numbers
479	const char* p = m_buffer8.data() + 2;
480	while (char c = *p++) {
481	dval *= 16;
482	dval += convertHex(c);
483	}
484
485	if (dval >= mantissaOverflowLowerBound)
486	dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
487
488	state = Number;
489	} else if (state == Octal) { // scan octal number
490	const char* p = m_buffer8.data() + 1;
491	while (char c = *p++) {
492	dval *= 8;
493	dval += c - '0';
494	}
495
496	if (dval >= mantissaOverflowLowerBound)
497	dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
498
499	state = Number;
500	}
501
502	#ifdef KJS_DEBUG_LEX
503	switch (state) {
504	case Eof:
505	printf("(EOF)\n");
506	break;
507	case Other:
508	printf("(Other)\n");
509	break;
510	case Identifier:
511	printf("(Identifier)/(Keyword)\n");
512	break;
513	case String:
514	printf("(String)\n");
515	break;
516	case Number:
517	printf("(Number)\n");
518	break;
519	default:
520	printf("(unknown)");
521	}
522	#endif
523
524	if (state != Identifier)
525	eatNextIdentifier = false;
526
527	restrKeyword = false;
528	delimited = false;
529	llocp->first_line = yylineno; // ???
530	llocp->last_line = yylineno;
531
532	switch (state) {
533	case Eof:
534	token = 0;
535	break;
536	case Other:
537	if (token == '}' \|\| token == ';')
538	delimited = true;
539	break;
540	case Identifier:
541	// Apply anonymous-function hack below (eat the identifier).
542	if (eatNextIdentifier) {
543	eatNextIdentifier = false;
544	token = lex(lvalp, llocp);
545	break;
546	}
547	lvalp->ident = makeIdentifier(m_buffer16);
548	token = IDENT;
549	break;
550	case IdentifierOrKeyword:
551	lvalp->ident = makeIdentifier(m_buffer16);
552	if ((token = mainTable.value(*lvalp->ident)) < 0) {
553	// Lookup for keyword failed, means this is an identifier.
554	token = IDENT;
555	break;
556	}
557	// Hack for "f = function somename() { ... }"; too hard to get into the grammar.
558	eatNextIdentifier = token == FUNCTION && lastToken == '=';
559	if (token == CONTINUE \|\| token == BREAK \|\| token == RETURN \|\| token == THROW)
560	restrKeyword = true;
561	break;
562	case String:
563	lvalp->string = makeUString(m_buffer16);
564	token = STRING;
565	break;
566	case Number:
567	lvalp->doubleValue = dval;
568	token = NUMBER;
569	break;
570	case Bad:
571	#ifdef KJS_DEBUG_LEX
572	fprintf(stderr, "yylex: ERROR.\n");
573	#endif
574	error = true;
575	return -1;
576	default:
577	ASSERT(!"unhandled numeration value in switch");
578	error = true;
579	return -1;
580	}
581	lastToken = token;
582	return token;
583	}
584
585	bool Lexer::isWhiteSpace() const
586	{
587	return current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| isSeparatorSpace(current);
588	}
589
590	bool Lexer::isLineTerminator()
591	{
592	bool cr = (current == '\r');
593	bool lf = (current == '\n');
594	if (cr)
595	skipLF = true;
596	else if (lf)
597	skipCR = true;
598	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
599	}
600
601	bool Lexer::isIdentStart(int c)
602	{
603	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other))
604	\|\| c == '$' \|\| c == '_';
605	}
606
607	bool Lexer::isIdentPart(int c)
608	{
609	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
610	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector))
611	\|\| c == '$' \|\| c == '_';
612	}
613
614	static bool isDecimalDigit(int c)
615	{
616	return (c >= '0' && c <= '9');
617	}
618
619	bool Lexer::isHexDigit(int c)
620	{
621	return (c >= '0' && c <= '9' \|\|
622	c >= 'a' && c <= 'f' \|\|
623	c >= 'A' && c <= 'F');
624	}
625
626	bool Lexer::isOctalDigit(int c)
627	{
628	return (c >= '0' && c <= '7');
629	}
630
631	int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
632	{
633	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
634	shift(4);
635	return URSHIFTEQUAL;
636	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
637	shift(3);
638	return STREQ;
639	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
640	shift(3);
641	return STRNEQ;
642	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
643	shift(3);
644	return URSHIFT;
645	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
646	shift(3);
647	return LSHIFTEQUAL;
648	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
649	shift(3);
650	return RSHIFTEQUAL;
651	} else if (c1 == '<' && c2 == '=') {
652	shift(2);
653	return LE;
654	} else if (c1 == '>' && c2 == '=') {
655	shift(2);
656	return GE;
657	} else if (c1 == '!' && c2 == '=') {
658	shift(2);
659	return NE;
660	} else if (c1 == '+' && c2 == '+') {
661	shift(2);
662	if (terminator)
663	return AUTOPLUSPLUS;
664	else
665	return PLUSPLUS;
666	} else if (c1 == '-' && c2 == '-') {
667	shift(2);
668	if (terminator)
669	return AUTOMINUSMINUS;
670	else
671	return MINUSMINUS;
672	} else if (c1 == '=' && c2 == '=') {
673	shift(2);
674	return EQEQ;
675	} else if (c1 == '+' && c2 == '=') {
676	shift(2);
677	return PLUSEQUAL;
678	} else if (c1 == '-' && c2 == '=') {
679	shift(2);
680	return MINUSEQUAL;
681	} else if (c1 == '*' && c2 == '=') {
682	shift(2);
683	return MULTEQUAL;
684	} else if (c1 == '/' && c2 == '=') {
685	shift(2);
686	return DIVEQUAL;
687	} else if (c1 == '&' && c2 == '=') {
688	shift(2);
689	return ANDEQUAL;
690	} else if (c1 == '^' && c2 == '=') {
691	shift(2);
692	return XOREQUAL;
693	} else if (c1 == '%' && c2 == '=') {
694	shift(2);
695	return MODEQUAL;
696	} else if (c1 == '\|' && c2 == '=') {
697	shift(2);
698	return OREQUAL;
699	} else if (c1 == '<' && c2 == '<') {
700	shift(2);
701	return LSHIFT;
702	} else if (c1 == '>' && c2 == '>') {
703	shift(2);
704	return RSHIFT;
705	} else if (c1 == '&' && c2 == '&') {
706	shift(2);
707	return AND;
708	} else if (c1 == '\|' && c2 == '\|') {
709	shift(2);
710	return OR;
711	}
712
713	switch(c1) {
714	case '=':
715	case '>':
716	case '<':
717	case ',':
718	case '!':
719	case '~':
720	case '?':
721	case ':':
722	case '.':
723	case '+':
724	case '-':
725	case '*':
726	case '/':
727	case '&':
728	case '\|':
729	case '^':
730	case '%':
731	case '(':
732	case ')':
733	case '{':
734	case '}':
735	case '[':
736	case ']':
737	case ';':
738	shift(1);
739	return static_cast<int>(c1);
740	default:
741	return -1;
742	}
743	}
744
745	unsigned short Lexer::singleEscape(unsigned short c)
746	{
747	switch(c) {
748	case 'b':
749	return 0x08;
750	case 't':
751	return 0x09;
752	case 'n':
753	return 0x0A;
754	case 'v':
755	return 0x0B;
756	case 'f':
757	return 0x0C;
758	case 'r':
759	return 0x0D;
760	case '"':
761	return 0x22;
762	case '\'':
763	return 0x27;
764	case '\\':
765	return 0x5C;
766	default:
767	return c;
768	}
769	}
770
771	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
772	{
773	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
774	}
775
776	unsigned char Lexer::convertHex(int c)
777	{
778	if (c >= '0' && c <= '9')
779	return static_cast<unsigned char>(c - '0');
780	if (c >= 'a' && c <= 'f')
781	return static_cast<unsigned char>(c - 'a' + 10);
782	return static_cast<unsigned char>(c - 'A' + 10);
783	}
784
785	unsigned char Lexer::convertHex(int c1, int c2)
786	{
787	return ((convertHex(c1) << 4) + convertHex(c2));
788	}
789
790	UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
791	{
792	unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
793	unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
794	return (highByte << 8 \| lowByte);
795	}
796
797	void Lexer::record8(int c)
798	{
799	ASSERT(c >= 0);
800	ASSERT(c <= 0xff);
801	m_buffer8.append(static_cast<char>(c));
802	}
803
804	void Lexer::record16(int c)
805	{
806	ASSERT(c >= 0);
807	ASSERT(c <= USHRT_MAX);
808	record16(UChar(static_cast<unsigned short>(c)));
809	}
810
811	void Lexer::record16(UChar c)
812	{
813	m_buffer16.append(c);
814	}
815
816	bool Lexer::scanRegExp()
817	{
818	m_buffer16.clear();
819	bool lastWasEscape = false;
820	bool inBrackets = false;
821
822	while (1) {
823	if (isLineTerminator() \|\| current == -1)
824	return false;
825	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
826	{
827	// keep track of '[' and ']'
828	if (!lastWasEscape) {
829	if ( current == '[' && !inBrackets )
830	inBrackets = true;
831	if ( current == ']' && inBrackets )
832	inBrackets = false;
833	}
834	record16(current);
835	lastWasEscape =
836	!lastWasEscape && (current == '\\');
837	} else { // end of regexp
838	m_pattern = UString(m_buffer16);
839	m_buffer16.clear();
840	shift(1);
841	break;
842	}
843	shift(1);
844	}
845
846	while (isIdentPart(current)) {
847	record16(current);
848	shift(1);
849	}
850	m_flags = UString(m_buffer16);
851
852	return true;
853	}
854
855	void Lexer::clear()
856	{
857	deleteAllValues(m_strings);
858	Vector<UString*> newStrings;
859	newStrings.reserveCapacity(initialStringTableCapacity);
860	m_strings.swap(newStrings);
861
862	deleteAllValues(m_identifiers);
863	Vector<KJS::Identifier*> newIdentifiers;
864	newIdentifiers.reserveCapacity(initialStringTableCapacity);
865	m_identifiers.swap(newIdentifiers);
866
867	Vector<char> newBuffer8;
868	newBuffer8.reserveCapacity(initialReadBufferCapacity);
869	m_buffer8.swap(newBuffer8);
870
871	Vector<UChar> newBuffer16;
872	newBuffer16.reserveCapacity(initialReadBufferCapacity);
873	m_buffer16.swap(newBuffer16);
874
875	m_pattern = 0;
876	m_flags = 0;
877	}
878
879	Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
880	{
881	KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
882	m_identifiers.append(identifier);
883	return identifier;
884	}
885
886	UString* Lexer::makeUString(const Vector<UChar>& buffer)
887	{
888	UString* string = new UString(buffer);
889	m_strings.append(string);
890	return string;
891	}
892
893	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 31809

Download in other formats: