Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 34979

Visit:

Last change on this file since 34979 was 34607, checked in by [email protected], 17 years ago

Reviewed by Geoff Garen.

Make Identifier construction use an explicitly passed IdentifierTable.

No change on SunSpider total.

Property svn:eol-style set to native

File size: 22.3 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "lexer.h"
25
26	#include "dtoa.h"
27	#include "JSFunction.h"
28	#include "nodes.h"
29	#include "NodeInfo.h"
30	#include <ctype.h>
31	#include <limits.h>
32	#include <string.h>
33	#include <wtf/Assertions.h>
34	#include <wtf/unicode/Unicode.h>
35
36	using namespace WTF;
37	using namespace Unicode;
38
39	// we can't specify the namespace in yacc's C output, so do it here
40	using namespace KJS;
41
42	#ifndef KDE_USE_FINAL
43	#include "grammar.h"
44	#endif
45
46	#include "lookup.h"
47	#include "lexer.lut.h"
48
49	// a bridge for yacc from the C world to C++
50	int kjsyylex(void* lvalp, void* llocp, void* globalData)
51	{
52	return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
53	}
54
55	namespace KJS {
56
57	static bool isDecimalDigit(int);
58
59	static const size_t initialReadBufferCapacity = 32;
60	static const size_t initialStringTableCapacity = 64;
61
62	Lexer::Lexer(JSGlobalData* globalData)
63	: yylineno(1)
64	, restrKeyword(false)
65	, eatNextIdentifier(false)
66	, stackToken(-1)
67	, lastToken(-1)
68	, pos(0)
69	, code(0)
70	, length(0)
71	, atLineStart(true)
72	, current(0)
73	, next1(0)
74	, next2(0)
75	, next3(0)
76	, m_globalData(globalData)
77	, mainTable(KJS::mainTable)
78	{
79	m_buffer8.reserveCapacity(initialReadBufferCapacity);
80	m_buffer16.reserveCapacity(initialReadBufferCapacity);
81	m_strings.reserveCapacity(initialStringTableCapacity);
82	m_identifiers.reserveCapacity(initialStringTableCapacity);
83	}
84
85	Lexer::~Lexer()
86	{
87	delete[] mainTable.table;
88	}
89
90	void Lexer::setCode(int startingLineNumber, PassRefPtr<SourceProvider> source)
91	{
92	yylineno = startingLineNumber;
93	restrKeyword = false;
94	delimited = false;
95	eatNextIdentifier = false;
96	stackToken = -1;
97	lastToken = -1;
98
99	pos = 0;
100	m_source = source;
101	code = m_source->data();
102	length = m_source->length();
103	skipLF = false;
104	skipCR = false;
105	error = false;
106	atLineStart = true;
107
108	// read first characters
109	shift(4);
110	}
111
112	void Lexer::shift(unsigned p)
113	{
114	// ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
115	// see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
116
117	while (p--) {
118	current = next1;
119	next1 = next2;
120	next2 = next3;
121	do {
122	if (pos >= length) {
123	pos++;
124	next3 = -1;
125	break;
126	}
127	next3 = code[pos++];
128	} while (next3 == 0xFEFF);
129	}
130	}
131
132	// called on each new line
133	void Lexer::nextLine()
134	{
135	yylineno++;
136	atLineStart = true;
137	}
138
139	void Lexer::setDone(State s)
140	{
141	state = s;
142	done = true;
143	}
144
145	int Lexer::lex(void* p1, void* p2)
146	{
147	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
148	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
149	int token = 0;
150	state = Start;
151	unsigned short stringType = 0; // either single or double quotes
152	m_buffer8.clear();
153	m_buffer16.clear();
154	done = false;
155	terminator = false;
156	skipLF = false;
157	skipCR = false;
158
159	// did we push a token on the stack previously ?
160	// (after an automatic semicolon insertion)
161	if (stackToken >= 0) {
162	setDone(Other);
163	token = stackToken;
164	stackToken = 0;
165	}
166
167	while (!done) {
168	if (skipLF && current != '\n') // found \r but not \n afterwards
169	skipLF = false;
170	if (skipCR && current != '\r') // found \n but not \r afterwards
171	skipCR = false;
172	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
173	{
174	skipLF = false;
175	skipCR = false;
176	shift(1);
177	}
178	switch (state) {
179	case Start:
180	if (isWhiteSpace()) {
181	// do nothing
182	} else if (current == '/' && next1 == '/') {
183	shift(1);
184	state = InSingleLineComment;
185	} else if (current == '/' && next1 == '*') {
186	shift(1);
187	state = InMultiLineComment;
188	} else if (current == -1) {
189	if (!terminator && !delimited) {
190	// automatic semicolon insertion if program incomplete
191	token = ';';
192	stackToken = 0;
193	setDone(Other);
194	} else
195	setDone(Eof);
196	} else if (isLineTerminator()) {
197	nextLine();
198	terminator = true;
199	if (restrKeyword) {
200	token = ';';
201	setDone(Other);
202	}
203	} else if (current == '"' \|\| current == '\'') {
204	state = InString;
205	stringType = static_cast<unsigned short>(current);
206	} else if (isIdentStart(current)) {
207	record16(current);
208	state = InIdentifierOrKeyword;
209	} else if (current == '\\') {
210	state = InIdentifierStartUnicodeEscapeStart;
211	} else if (current == '0') {
212	record8(current);
213	state = InNum0;
214	} else if (isDecimalDigit(current)) {
215	record8(current);
216	state = InNum;
217	} else if (current == '.' && isDecimalDigit(next1)) {
218	record8(current);
219	state = InDecimal;
220	// <!-- marks the beginning of a line comment (for www usage)
221	} else if (current == '<' && next1 == '!' &&
222	next2 == '-' && next3 == '-') {
223	shift(3);
224	state = InSingleLineComment;
225	// same for -->
226	} else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
227	shift(2);
228	state = InSingleLineComment;
229	} else {
230	token = matchPunctuator(lvalp->intValue, current, next1, next2, next3);
231	if (token != -1) {
232	setDone(Other);
233	} else {
234	// cerr << "encountered unknown character" << endl;
235	setDone(Bad);
236	}
237	}
238	break;
239	case InString:
240	if (current == stringType) {
241	shift(1);
242	setDone(String);
243	} else if (isLineTerminator() \|\| current == -1) {
244	setDone(Bad);
245	} else if (current == '\\') {
246	state = InEscapeSequence;
247	} else {
248	record16(current);
249	}
250	break;
251	// Escape Sequences inside of strings
252	case InEscapeSequence:
253	if (isOctalDigit(current)) {
254	if (current >= '0' && current <= '3' &&
255	isOctalDigit(next1) && isOctalDigit(next2)) {
256	record16(convertOctal(current, next1, next2));
257	shift(2);
258	state = InString;
259	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
260	record16(convertOctal('0', current, next1));
261	shift(1);
262	state = InString;
263	} else if (isOctalDigit(current)) {
264	record16(convertOctal('0', '0', current));
265	state = InString;
266	} else {
267	setDone(Bad);
268	}
269	} else if (current == 'x')
270	state = InHexEscape;
271	else if (current == 'u')
272	state = InUnicodeEscape;
273	else if (isLineTerminator()) {
274	nextLine();
275	state = InString;
276	} else {
277	record16(singleEscape(static_cast<unsigned short>(current)));
278	state = InString;
279	}
280	break;
281	case InHexEscape:
282	if (isHexDigit(current) && isHexDigit(next1)) {
283	state = InString;
284	record16(convertHex(current, next1));
285	shift(1);
286	} else if (current == stringType) {
287	record16('x');
288	shift(1);
289	setDone(String);
290	} else {
291	record16('x');
292	record16(current);
293	state = InString;
294	}
295	break;
296	case InUnicodeEscape:
297	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
298	record16(convertUnicode(current, next1, next2, next3));
299	shift(3);
300	state = InString;
301	} else if (current == stringType) {
302	record16('u');
303	shift(1);
304	setDone(String);
305	} else {
306	setDone(Bad);
307	}
308	break;
309	case InSingleLineComment:
310	if (isLineTerminator()) {
311	nextLine();
312	terminator = true;
313	if (restrKeyword) {
314	token = ';';
315	setDone(Other);
316	} else
317	state = Start;
318	} else if (current == -1) {
319	setDone(Eof);
320	}
321	break;
322	case InMultiLineComment:
323	if (current == -1) {
324	setDone(Bad);
325	} else if (isLineTerminator()) {
326	nextLine();
327	} else if (current == '*' && next1 == '/') {
328	state = Start;
329	shift(1);
330	}
331	break;
332	case InIdentifierOrKeyword:
333	case InIdentifier:
334	if (isIdentPart(current))
335	record16(current);
336	else if (current == '\\')
337	state = InIdentifierPartUnicodeEscapeStart;
338	else
339	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
340	break;
341	case InNum0:
342	if (current == 'x' \|\| current == 'X') {
343	record8(current);
344	state = InHex;
345	} else if (current == '.') {
346	record8(current);
347	state = InDecimal;
348	} else if (current == 'e' \|\| current == 'E') {
349	record8(current);
350	state = InExponentIndicator;
351	} else if (isOctalDigit(current)) {
352	record8(current);
353	state = InOctal;
354	} else if (isDecimalDigit(current)) {
355	record8(current);
356	state = InDecimal;
357	} else {
358	setDone(Number);
359	}
360	break;
361	case InHex:
362	if (isHexDigit(current)) {
363	record8(current);
364	} else {
365	setDone(Hex);
366	}
367	break;
368	case InOctal:
369	if (isOctalDigit(current)) {
370	record8(current);
371	}
372	else if (isDecimalDigit(current)) {
373	record8(current);
374	state = InDecimal;
375	} else
376	setDone(Octal);
377	break;
378	case InNum:
379	if (isDecimalDigit(current)) {
380	record8(current);
381	} else if (current == '.') {
382	record8(current);
383	state = InDecimal;
384	} else if (current == 'e' \|\| current == 'E') {
385	record8(current);
386	state = InExponentIndicator;
387	} else
388	setDone(Number);
389	break;
390	case InDecimal:
391	if (isDecimalDigit(current)) {
392	record8(current);
393	} else if (current == 'e' \|\| current == 'E') {
394	record8(current);
395	state = InExponentIndicator;
396	} else
397	setDone(Number);
398	break;
399	case InExponentIndicator:
400	if (current == '+' \|\| current == '-') {
401	record8(current);
402	} else if (isDecimalDigit(current)) {
403	record8(current);
404	state = InExponent;
405	} else
406	setDone(Bad);
407	break;
408	case InExponent:
409	if (isDecimalDigit(current)) {
410	record8(current);
411	} else
412	setDone(Number);
413	break;
414	case InIdentifierStartUnicodeEscapeStart:
415	if (current == 'u')
416	state = InIdentifierStartUnicodeEscape;
417	else
418	setDone(Bad);
419	break;
420	case InIdentifierPartUnicodeEscapeStart:
421	if (current == 'u')
422	state = InIdentifierPartUnicodeEscape;
423	else
424	setDone(Bad);
425	break;
426	case InIdentifierStartUnicodeEscape:
427	if (!isHexDigit(current) \|\| !isHexDigit(next1) \|\| !isHexDigit(next2) \|\| !isHexDigit(next3)) {
428	setDone(Bad);
429	break;
430	}
431	token = convertUnicode(current, next1, next2, next3);
432	shift(3);
433	if (!isIdentStart(token)) {
434	setDone(Bad);
435	break;
436	}
437	record16(token);
438	state = InIdentifier;
439	break;
440	case InIdentifierPartUnicodeEscape:
441	if (!isHexDigit(current) \|\| !isHexDigit(next1) \|\| !isHexDigit(next2) \|\| !isHexDigit(next3)) {
442	setDone(Bad);
443	break;
444	}
445	token = convertUnicode(current, next1, next2, next3);
446	shift(3);
447	if (!isIdentPart(token)) {
448	setDone(Bad);
449	break;
450	}
451	record16(token);
452	state = InIdentifier;
453	break;
454	default:
455	ASSERT(!"Unhandled state in switch statement");
456	}
457
458	// move on to the next character
459	if (!done)
460	shift(1);
461	if (state != Start && state != InSingleLineComment)
462	atLineStart = false;
463	}
464
465	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
466	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
467	state = Bad;
468
469	// terminate string
470	m_buffer8.append('\0');
471
472	#ifdef KJS_DEBUG_LEX
473	fprintf(stderr, "line: %d ", lineNo());
474	fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
475	fprintf(stderr, "%s ", buffer8.data());
476	#endif
477
478	double dval = 0;
479	if (state == Number) {
480	dval = strtod(m_buffer8.data(), 0L);
481	} else if (state == Hex) { // scan hex numbers
482	const char* p = m_buffer8.data() + 2;
483	while (char c = *p++) {
484	dval *= 16;
485	dval += convertHex(c);
486	}
487
488	if (dval >= mantissaOverflowLowerBound)
489	dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
490
491	state = Number;
492	} else if (state == Octal) { // scan octal number
493	const char* p = m_buffer8.data() + 1;
494	while (char c = *p++) {
495	dval *= 8;
496	dval += c - '0';
497	}
498
499	if (dval >= mantissaOverflowLowerBound)
500	dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
501
502	state = Number;
503	}
504
505	#ifdef KJS_DEBUG_LEX
506	switch (state) {
507	case Eof:
508	printf("(EOF)\n");
509	break;
510	case Other:
511	printf("(Other)\n");
512	break;
513	case Identifier:
514	printf("(Identifier)/(Keyword)\n");
515	break;
516	case String:
517	printf("(String)\n");
518	break;
519	case Number:
520	printf("(Number)\n");
521	break;
522	default:
523	printf("(unknown)");
524	}
525	#endif
526
527	if (state != Identifier)
528	eatNextIdentifier = false;
529
530	restrKeyword = false;
531	delimited = false;
532	llocp->first_line = yylineno; // ???
533	llocp->last_line = yylineno;
534
535	switch (state) {
536	case Eof:
537	token = 0;
538	break;
539	case Other:
540	if (token == '}' \|\| token == ';')
541	delimited = true;
542	break;
543	case Identifier:
544	// Apply anonymous-function hack below (eat the identifier).
545	if (eatNextIdentifier) {
546	eatNextIdentifier = false;
547	token = lex(lvalp, llocp);
548	break;
549	}
550	lvalp->ident = makeIdentifier(m_buffer16);
551	token = IDENT;
552	break;
553	case IdentifierOrKeyword: {
554	lvalp->ident = makeIdentifier(m_buffer16);
555	const HashEntry* entry = mainTable.entry(m_globalData, *lvalp->ident);
556	if (!entry) {
557	// Lookup for keyword failed, means this is an identifier.
558	token = IDENT;
559	break;
560	}
561	token = entry->integerValue;
562	// Hack for "f = function somename() { ... }"; too hard to get into the grammar.
563	eatNextIdentifier = token == FUNCTION && lastToken == '=';
564	if (token == CONTINUE \|\| token == BREAK \|\| token == RETURN \|\| token == THROW)
565	restrKeyword = true;
566	break;
567	}
568	case String:
569	lvalp->string = makeUString(m_buffer16);
570	token = STRING;
571	break;
572	case Number:
573	lvalp->doubleValue = dval;
574	token = NUMBER;
575	break;
576	case Bad:
577	#ifdef KJS_DEBUG_LEX
578	fprintf(stderr, "yylex: ERROR.\n");
579	#endif
580	error = true;
581	return -1;
582	default:
583	ASSERT(!"unhandled numeration value in switch");
584	error = true;
585	return -1;
586	}
587	lastToken = token;
588	return token;
589	}
590
591	bool Lexer::isWhiteSpace() const
592	{
593	return current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| isSeparatorSpace(current);
594	}
595
596	bool Lexer::isLineTerminator()
597	{
598	bool cr = (current == '\r');
599	bool lf = (current == '\n');
600	if (cr)
601	skipLF = true;
602	else if (lf)
603	skipCR = true;
604	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
605	}
606
607	bool Lexer::isIdentStart(int c)
608	{
609	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other))
610	\|\| c == '$' \|\| c == '_';
611	}
612
613	bool Lexer::isIdentPart(int c)
614	{
615	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
616	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector))
617	\|\| c == '$' \|\| c == '_';
618	}
619
620	static bool isDecimalDigit(int c)
621	{
622	return (c >= '0' && c <= '9');
623	}
624
625	bool Lexer::isHexDigit(int c)
626	{
627	return (c >= '0' && c <= '9' \|\|
628	c >= 'a' && c <= 'f' \|\|
629	c >= 'A' && c <= 'F');
630	}
631
632	bool Lexer::isOctalDigit(int c)
633	{
634	return (c >= '0' && c <= '7');
635	}
636
637	int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
638	{
639	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
640	shift(4);
641	return URSHIFTEQUAL;
642	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
643	shift(3);
644	return STREQ;
645	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
646	shift(3);
647	return STRNEQ;
648	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
649	shift(3);
650	return URSHIFT;
651	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
652	shift(3);
653	return LSHIFTEQUAL;
654	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
655	shift(3);
656	return RSHIFTEQUAL;
657	} else if (c1 == '<' && c2 == '=') {
658	shift(2);
659	return LE;
660	} else if (c1 == '>' && c2 == '=') {
661	shift(2);
662	return GE;
663	} else if (c1 == '!' && c2 == '=') {
664	shift(2);
665	return NE;
666	} else if (c1 == '+' && c2 == '+') {
667	shift(2);
668	if (terminator)
669	return AUTOPLUSPLUS;
670	else
671	return PLUSPLUS;
672	} else if (c1 == '-' && c2 == '-') {
673	shift(2);
674	if (terminator)
675	return AUTOMINUSMINUS;
676	else
677	return MINUSMINUS;
678	} else if (c1 == '=' && c2 == '=') {
679	shift(2);
680	return EQEQ;
681	} else if (c1 == '+' && c2 == '=') {
682	shift(2);
683	return PLUSEQUAL;
684	} else if (c1 == '-' && c2 == '=') {
685	shift(2);
686	return MINUSEQUAL;
687	} else if (c1 == '*' && c2 == '=') {
688	shift(2);
689	return MULTEQUAL;
690	} else if (c1 == '/' && c2 == '=') {
691	shift(2);
692	return DIVEQUAL;
693	} else if (c1 == '&' && c2 == '=') {
694	shift(2);
695	return ANDEQUAL;
696	} else if (c1 == '^' && c2 == '=') {
697	shift(2);
698	return XOREQUAL;
699	} else if (c1 == '%' && c2 == '=') {
700	shift(2);
701	return MODEQUAL;
702	} else if (c1 == '\|' && c2 == '=') {
703	shift(2);
704	return OREQUAL;
705	} else if (c1 == '<' && c2 == '<') {
706	shift(2);
707	return LSHIFT;
708	} else if (c1 == '>' && c2 == '>') {
709	shift(2);
710	return RSHIFT;
711	} else if (c1 == '&' && c2 == '&') {
712	shift(2);
713	return AND;
714	} else if (c1 == '\|' && c2 == '\|') {
715	shift(2);
716	return OR;
717	}
718
719	switch(c1) {
720	case '=':
721	case '>':
722	case '<':
723	case ',':
724	case '!':
725	case '~':
726	case '?':
727	case ':':
728	case '.':
729	case '+':
730	case '-':
731	case '*':
732	case '/':
733	case '&':
734	case '\|':
735	case '^':
736	case '%':
737	case '(':
738	case ')':
739	case '[':
740	case ']':
741	case ';':
742	shift(1);
743	return static_cast<int>(c1);
744	case '{':
745	charPos = pos - 4;
746	shift(1);
747	return OPENBRACE;
748	case '}':
749	charPos = pos - 4;
750	shift(1);
751	return CLOSEBRACE;
752	default:
753	return -1;
754	}
755	}
756
757	unsigned short Lexer::singleEscape(unsigned short c)
758	{
759	switch(c) {
760	case 'b':
761	return 0x08;
762	case 't':
763	return 0x09;
764	case 'n':
765	return 0x0A;
766	case 'v':
767	return 0x0B;
768	case 'f':
769	return 0x0C;
770	case 'r':
771	return 0x0D;
772	case '"':
773	return 0x22;
774	case '\'':
775	return 0x27;
776	case '\\':
777	return 0x5C;
778	default:
779	return c;
780	}
781	}
782
783	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
784	{
785	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
786	}
787
788	unsigned char Lexer::convertHex(int c)
789	{
790	if (c >= '0' && c <= '9')
791	return static_cast<unsigned char>(c - '0');
792	if (c >= 'a' && c <= 'f')
793	return static_cast<unsigned char>(c - 'a' + 10);
794	return static_cast<unsigned char>(c - 'A' + 10);
795	}
796
797	unsigned char Lexer::convertHex(int c1, int c2)
798	{
799	return ((convertHex(c1) << 4) + convertHex(c2));
800	}
801
802	UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
803	{
804	unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
805	unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
806	return (highByte << 8 \| lowByte);
807	}
808
809	void Lexer::record8(int c)
810	{
811	ASSERT(c >= 0);
812	ASSERT(c <= 0xff);
813	m_buffer8.append(static_cast<char>(c));
814	}
815
816	void Lexer::record16(int c)
817	{
818	ASSERT(c >= 0);
819	ASSERT(c <= USHRT_MAX);
820	record16(UChar(static_cast<unsigned short>(c)));
821	}
822
823	void Lexer::record16(UChar c)
824	{
825	m_buffer16.append(c);
826	}
827
828	bool Lexer::scanRegExp()
829	{
830	m_buffer16.clear();
831	bool lastWasEscape = false;
832	bool inBrackets = false;
833
834	while (1) {
835	if (isLineTerminator() \|\| current == -1)
836	return false;
837	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
838	{
839	// keep track of '[' and ']'
840	if (!lastWasEscape) {
841	if ( current == '[' && !inBrackets )
842	inBrackets = true;
843	if ( current == ']' && inBrackets )
844	inBrackets = false;
845	}
846	record16(current);
847	lastWasEscape =
848	!lastWasEscape && (current == '\\');
849	} else { // end of regexp
850	m_pattern = UString(m_buffer16);
851	m_buffer16.clear();
852	shift(1);
853	break;
854	}
855	shift(1);
856	}
857
858	while (isIdentPart(current)) {
859	record16(current);
860	shift(1);
861	}
862	m_flags = UString(m_buffer16);
863
864	return true;
865	}
866
867	void Lexer::clear()
868	{
869	deleteAllValues(m_strings);
870	Vector<UString*> newStrings;
871	newStrings.reserveCapacity(initialStringTableCapacity);
872	m_strings.swap(newStrings);
873
874	deleteAllValues(m_identifiers);
875	Vector<KJS::Identifier*> newIdentifiers;
876	newIdentifiers.reserveCapacity(initialStringTableCapacity);
877	m_identifiers.swap(newIdentifiers);
878
879	Vector<char> newBuffer8;
880	newBuffer8.reserveCapacity(initialReadBufferCapacity);
881	m_buffer8.swap(newBuffer8);
882
883	Vector<UChar> newBuffer16;
884	newBuffer16.reserveCapacity(initialReadBufferCapacity);
885	m_buffer16.swap(newBuffer16);
886
887	m_pattern = 0;
888	m_flags = 0;
889	}
890
891	Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
892	{
893	KJS::Identifier* identifier = new KJS::Identifier(m_globalData, buffer.data(), buffer.size());
894	m_identifiers.append(identifier);
895	return identifier;
896	}
897
898	UString* Lexer::makeUString(const Vector<UChar>& buffer)
899	{
900	UString* string = new UString(buffer);
901	m_strings.append(string);
902	return string;
903	}
904
905	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Download in other formats: