Context Navigation

lexer.cpp@ 17862

Visit:

Last change on this file since 17862 was 17862, checked in by ap, 19 years ago

2006-11-20 W. Andy Carrel <[email protected]>

Reviewed by Maciej.

http://bugs.webkit.org/show_bug.cgi?id=11501
REGRESSION: \u no longer escapes metacharacters in RegExps
http://bugs.webkit.org/show_bug.cgi?id=11502
Serializing RegExps doesn't preserve Unicode escapes

JavaScriptCore:

kjs/lexer.cpp: (Lexer::Lexer): (Lexer::setCode): (Lexer::shift): (Lexer::scanRegExp): Push \u parsing back down into the RegExp object rather than in the parser. This backs out r17354 in favor of a new fix that better matches the behavior of other browsers.

kjs/lexer.h:
kjs/regexp.cpp: (KJS::RegExp::RegExp): (KJS::sanitizePattern): (KJS::isHexDigit): (KJS::convertHex): (KJS::convertUnicode):
kjs/regexp.h: Translate \u escaped unicode characters for the benefit of pcre.

kjs/ustring.cpp: (KJS::UString::append): Fix failure to increment length on the first UChar appended to a UString that was copy-on-write.

tests/mozilla/ecma_2/RegExp/properties-001.js: Adjust tests back to the uniform standards.

LayoutTests:

fast/js/kde/RegExp-expected.txt:
fast/js/regexp-unicode-handling-expected.txt: Adjust these test results to passing as a result of other included changes in this revision.

Property svn:eol-style set to native

File size: 22.0 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* This file is part of the KDE libraries
4	* Copyright (C) 1999-2000 Harri Porten ([email protected])
5	* Copyright (C) 2006 Apple Computer, Inc.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "lexer.h"
26
27	#include <ctype.h>
28	#include <string.h>
29
30	#include "interpreter.h"
31	#include "nodes.h"
32	#include <wtf/unicode/Unicode.h>
33
34	static bool isDecimalDigit(int);
35
36	// we can't specify the namespace in yacc's C output, so do it here
37	using namespace KJS;
38
39	static Lexer *currLexer = 0;
40
41	#ifndef KDE_USE_FINAL
42	#include "grammar.h"
43	#endif
44
45	#include "lookup.h"
46	#include "lexer.lut.h"
47
48	extern YYLTYPE kjsyylloc; // global bison variable holding token info
49
50	// a bridge for yacc from the C world to C++
51	int kjsyylex()
52	{
53	return Lexer::curr()->lex();
54	}
55
56	Lexer::Lexer()
57	: yylineno(1),
58	size8(128), size16(128), restrKeyword(false),
59	eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
60	code(0), length(0),
61	#ifndef KJS_PURE_ECMA
62	bol(true),
63	#endif
64	current(0), next1(0), next2(0), next3(0),
65	strings(0), numStrings(0), stringsCapacity(0),
66	identifiers(0), numIdentifiers(0), identifiersCapacity(0)
67	{
68	// allocate space for read buffers
69	buffer8 = new char[size8];
70	buffer16 = new KJS::UChar[size16];
71	currLexer = this;
72	}
73
74	Lexer::~Lexer()
75	{
76	doneParsing();
77	delete [] buffer8;
78	delete [] buffer16;
79	}
80
81	Lexer *Lexer::curr()
82	{
83	if (!currLexer) {
84	// create singleton instance
85	currLexer = new Lexer();
86	}
87	return currLexer;
88	}
89
90	#ifdef KJS_DEBUG_MEM
91	void Lexer::globalClear()
92	{
93	delete currLexer;
94	currLexer = 0L;
95	}
96	#endif
97
98	void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
99	{
100	yylineno = 1 + startingLineNumber;
101	m_sourceURL = sourceURL;
102	restrKeyword = false;
103	delimited = false;
104	eatNextIdentifier = false;
105	stackToken = -1;
106	lastToken = -1;
107	pos = 0;
108	code = c;
109	length = len;
110	skipLF = false;
111	skipCR = false;
112	error = false;
113	#ifndef KJS_PURE_ECMA
114	bol = true;
115	#endif
116
117	// read first characters
118	current = (length > 0) ? code[0].uc : -1;
119	next1 = (length > 1) ? code[1].uc : -1;
120	next2 = (length > 2) ? code[2].uc : -1;
121	next3 = (length > 3) ? code[3].uc : -1;
122	}
123
124	void Lexer::shift(unsigned int p)
125	{
126	// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
127	// <http://bugzilla.opendarwin.org/show_bug.cgi?id=10183>.
128	while (p--) {
129	pos++;
130	current = next1;
131	next1 = next2;
132	next2 = next3;
133	next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
134	}
135	}
136
137	// called on each new line
138	void Lexer::nextLine()
139	{
140	yylineno++;
141	#ifndef KJS_PURE_ECMA
142	bol = true;
143	#endif
144	}
145
146	void Lexer::setDone(State s)
147	{
148	state = s;
149	done = true;
150	}
151
152	int Lexer::lex()
153	{
154	int token = 0;
155	state = Start;
156	unsigned short stringType = 0; // either single or double quotes
157	pos8 = pos16 = 0;
158	done = false;
159	terminator = false;
160	skipLF = false;
161	skipCR = false;
162
163	// did we push a token on the stack previously ?
164	// (after an automatic semicolon insertion)
165	if (stackToken >= 0) {
166	setDone(Other);
167	token = stackToken;
168	stackToken = 0;
169	}
170
171	while (!done) {
172	if (skipLF && current != '\n') // found \r but not \n afterwards
173	skipLF = false;
174	if (skipCR && current != '\r') // found \n but not \r afterwards
175	skipCR = false;
176	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
177	{
178	skipLF = false;
179	skipCR = false;
180	shift(1);
181	}
182	switch (state) {
183	case Start:
184	if (isWhiteSpace()) {
185	// do nothing
186	} else if (current == '/' && next1 == '/') {
187	shift(1);
188	state = InSingleLineComment;
189	} else if (current == '/' && next1 == '*') {
190	shift(1);
191	state = InMultiLineComment;
192	} else if (current == -1) {
193	if (!terminator && !delimited) {
194	// automatic semicolon insertion if program incomplete
195	token = ';';
196	stackToken = 0;
197	setDone(Other);
198	} else
199	setDone(Eof);
200	} else if (isLineTerminator()) {
201	nextLine();
202	terminator = true;
203	if (restrKeyword) {
204	token = ';';
205	setDone(Other);
206	}
207	} else if (current == '"' \|\| current == '\'') {
208	state = InString;
209	stringType = static_cast<unsigned short>(current);
210	} else if (isIdentStart(current)) {
211	record16(current);
212	state = InIdentifierOrKeyword;
213	} else if (current == '\\') {
214	state = InIdentifierUnicodeEscapeStart;
215	} else if (current == '0') {
216	record8(current);
217	state = InNum0;
218	} else if (isDecimalDigit(current)) {
219	record8(current);
220	state = InNum;
221	} else if (current == '.' && isDecimalDigit(next1)) {
222	record8(current);
223	state = InDecimal;
224	#ifndef KJS_PURE_ECMA
225	// <!-- marks the beginning of a line comment (for www usage)
226	} else if (current == '<' && next1 == '!' &&
227	next2 == '-' && next3 == '-') {
228	shift(3);
229	state = InSingleLineComment;
230	// same for -->
231	} else if (bol && current == '-' && next1 == '-' && next2 == '>') {
232	shift(2);
233	state = InSingleLineComment;
234	#endif
235	} else {
236	token = matchPunctuator(current, next1, next2, next3);
237	if (token != -1) {
238	setDone(Other);
239	} else {
240	// cerr << "encountered unknown character" << endl;
241	setDone(Bad);
242	}
243	}
244	break;
245	case InString:
246	if (current == stringType) {
247	shift(1);
248	setDone(String);
249	} else if (isLineTerminator() \|\| current == -1) {
250	setDone(Bad);
251	} else if (current == '\\') {
252	state = InEscapeSequence;
253	} else {
254	record16(current);
255	}
256	break;
257	// Escape Sequences inside of strings
258	case InEscapeSequence:
259	if (isOctalDigit(current)) {
260	if (current >= '0' && current <= '3' &&
261	isOctalDigit(next1) && isOctalDigit(next2)) {
262	record16(convertOctal(current, next1, next2));
263	shift(2);
264	state = InString;
265	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
266	record16(convertOctal('0', current, next1));
267	shift(1);
268	state = InString;
269	} else if (isOctalDigit(current)) {
270	record16(convertOctal('0', '0', current));
271	state = InString;
272	} else {
273	setDone(Bad);
274	}
275	} else if (current == 'x')
276	state = InHexEscape;
277	else if (current == 'u')
278	state = InUnicodeEscape;
279	else if (isLineTerminator()) {
280	nextLine();
281	state = InString;
282	} else {
283	record16(singleEscape(static_cast<unsigned short>(current)));
284	state = InString;
285	}
286	break;
287	case InHexEscape:
288	if (isHexDigit(current) && isHexDigit(next1)) {
289	state = InString;
290	record16(convertHex(current, next1));
291	shift(1);
292	} else if (current == stringType) {
293	record16('x');
294	shift(1);
295	setDone(String);
296	} else {
297	record16('x');
298	record16(current);
299	state = InString;
300	}
301	break;
302	case InUnicodeEscape:
303	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
304	record16(convertUnicode(current, next1, next2, next3));
305	shift(3);
306	state = InString;
307	} else if (current == stringType) {
308	record16('u');
309	shift(1);
310	setDone(String);
311	} else {
312	setDone(Bad);
313	}
314	break;
315	case InSingleLineComment:
316	if (isLineTerminator()) {
317	nextLine();
318	terminator = true;
319	if (restrKeyword) {
320	token = ';';
321	setDone(Other);
322	} else
323	state = Start;
324	} else if (current == -1) {
325	setDone(Eof);
326	}
327	break;
328	case InMultiLineComment:
329	if (current == -1) {
330	setDone(Bad);
331	} else if (isLineTerminator()) {
332	nextLine();
333	} else if (current == '*' && next1 == '/') {
334	state = Start;
335	shift(1);
336	}
337	break;
338	case InIdentifierOrKeyword:
339	case InIdentifier:
340	if (isIdentPart(current))
341	record16(current);
342	else if (current == '\\')
343	state = InIdentifierUnicodeEscapeStart;
344	else
345	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
346	break;
347	case InNum0:
348	if (current == 'x' \|\| current == 'X') {
349	record8(current);
350	state = InHex;
351	} else if (current == '.') {
352	record8(current);
353	state = InDecimal;
354	} else if (current == 'e' \|\| current == 'E') {
355	record8(current);
356	state = InExponentIndicator;
357	} else if (isOctalDigit(current)) {
358	record8(current);
359	state = InOctal;
360	} else if (isDecimalDigit(current)) {
361	record8(current);
362	state = InDecimal;
363	} else {
364	setDone(Number);
365	}
366	break;
367	case InHex:
368	if (isHexDigit(current)) {
369	record8(current);
370	} else {
371	setDone(Hex);
372	}
373	break;
374	case InOctal:
375	if (isOctalDigit(current)) {
376	record8(current);
377	}
378	else if (isDecimalDigit(current)) {
379	record8(current);
380	state = InDecimal;
381	} else
382	setDone(Octal);
383	break;
384	case InNum:
385	if (isDecimalDigit(current)) {
386	record8(current);
387	} else if (current == '.') {
388	record8(current);
389	state = InDecimal;
390	} else if (current == 'e' \|\| current == 'E') {
391	record8(current);
392	state = InExponentIndicator;
393	} else
394	setDone(Number);
395	break;
396	case InDecimal:
397	if (isDecimalDigit(current)) {
398	record8(current);
399	} else if (current == 'e' \|\| current == 'E') {
400	record8(current);
401	state = InExponentIndicator;
402	} else
403	setDone(Number);
404	break;
405	case InExponentIndicator:
406	if (current == '+' \|\| current == '-') {
407	record8(current);
408	} else if (isDecimalDigit(current)) {
409	record8(current);
410	state = InExponent;
411	} else
412	setDone(Bad);
413	break;
414	case InExponent:
415	if (isDecimalDigit(current)) {
416	record8(current);
417	} else
418	setDone(Number);
419	break;
420	case InIdentifierUnicodeEscapeStart:
421	if (current == 'u')
422	state = InIdentifierUnicodeEscape;
423	else
424	setDone(Bad);
425	break;
426	case InIdentifierUnicodeEscape:
427	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
428	record16(convertUnicode(current, next1, next2, next3));
429	shift(3);
430	state = InIdentifier;
431	} else {
432	setDone(Bad);
433	}
434	break;
435	default:
436	assert(!"Unhandled state in switch statement");
437	}
438
439	// move on to the next character
440	if (!done)
441	shift(1);
442	#ifndef KJS_PURE_ECMA
443	if (state != Start && state != InSingleLineComment)
444	bol = false;
445	#endif
446	}
447
448	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
449	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
450	state = Bad;
451
452	// terminate string
453	buffer8[pos8] = '\0';
454
455	#ifdef KJS_DEBUG_LEX
456	fprintf(stderr, "line: %d ", lineNo());
457	fprintf(stderr, "yytext (%x): ", buffer8[0]);
458	fprintf(stderr, "%s ", buffer8);
459	#endif
460
461	double dval = 0;
462	if (state == Number) {
463	dval = strtod(buffer8, 0L);
464	} else if (state == Hex) { // scan hex numbers
465	const char *p = buffer8 + 2;
466	while (char c = *p++) {
467	dval *= 16;
468	dval += convertHex(c);
469	}
470	state = Number;
471	} else if (state == Octal) { // scan octal number
472	const char *p = buffer8 + 1;
473	while (char c = *p++) {
474	dval *= 8;
475	dval += c - '0';
476	}
477	state = Number;
478	}
479
480	#ifdef KJS_DEBUG_LEX
481	switch (state) {
482	case Eof:
483	printf("(EOF)\n");
484	break;
485	case Other:
486	printf("(Other)\n");
487	break;
488	case Identifier:
489	printf("(Identifier)/(Keyword)\n");
490	break;
491	case String:
492	printf("(String)\n");
493	break;
494	case Number:
495	printf("(Number)\n");
496	break;
497	default:
498	printf("(unknown)");
499	}
500	#endif
501
502	if (state != Identifier && eatNextIdentifier)
503	eatNextIdentifier = false;
504
505	restrKeyword = false;
506	delimited = false;
507	kjsyylloc.first_line = yylineno; // ???
508	kjsyylloc.last_line = yylineno;
509
510	switch (state) {
511	case Eof:
512	token = 0;
513	break;
514	case Other:
515	if(token == '}' \|\| token == ';') {
516	delimited = true;
517	}
518	break;
519	case IdentifierOrKeyword:
520	if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
521	case Identifier:
522	// Lookup for keyword failed, means this is an identifier
523	// Apply anonymous-function hack below (eat the identifier)
524	if (eatNextIdentifier) {
525	eatNextIdentifier = false;
526	token = lex();
527	break;
528	}
529	kjsyylval.ident = makeIdentifier(buffer16, pos16);
530	token = IDENT;
531	break;
532	}
533
534	eatNextIdentifier = false;
535	// Hack for "f = function somename() { ... }", too hard to get into the grammar
536	if (token == FUNCTION && lastToken == '=' )
537	eatNextIdentifier = true;
538
539	if (token == CONTINUE \|\| token == BREAK \|\|
540	token == RETURN \|\| token == THROW)
541	restrKeyword = true;
542	break;
543	case String:
544	kjsyylval.ustr = makeUString(buffer16, pos16);
545	token = STRING;
546	break;
547	case Number:
548	kjsyylval.dval = dval;
549	token = NUMBER;
550	break;
551	case Bad:
552	#ifdef KJS_DEBUG_LEX
553	fprintf(stderr, "yylex: ERROR.\n");
554	#endif
555	error = true;
556	return -1;
557	default:
558	assert(!"unhandled numeration value in switch");
559	error = true;
560	return -1;
561	}
562	lastToken = token;
563	return token;
564	}
565
566	bool Lexer::isWhiteSpace() const
567	{
568	return current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| WTF::Unicode::isSeparatorSpace(current);
569	}
570
571	bool Lexer::isLineTerminator()
572	{
573	bool cr = (current == '\r');
574	bool lf = (current == '\n');
575	if (cr)
576	skipLF = true;
577	else if (lf)
578	skipCR = true;
579	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
580	}
581
582	bool Lexer::isIdentStart(int c)
583	{
584	return (WTF::Unicode::category(c) & (WTF::Unicode::Letter_Uppercase
585	\| WTF::Unicode::Letter_Lowercase
586	\| WTF::Unicode::Letter_Titlecase
587	\| WTF::Unicode::Letter_Modifier
588	\| WTF::Unicode::Letter_Other))
589	\|\| c == '$' \|\| c == '_';
590	}
591
592	bool Lexer::isIdentPart(int c)
593	{
594	return (WTF::Unicode::category(c) & (WTF::Unicode::Letter_Uppercase
595	\| WTF::Unicode::Letter_Lowercase
596	\| WTF::Unicode::Letter_Titlecase
597	\| WTF::Unicode::Letter_Modifier
598	\| WTF::Unicode::Letter_Other
599	\| WTF::Unicode::Mark_NonSpacing
600	\| WTF::Unicode::Mark_SpacingCombining
601	\| WTF::Unicode::Number_DecimalDigit
602	\| WTF::Unicode::Punctuation_Connector))
603	\|\| c == '$' \|\| c == '_';
604	}
605
606	static bool isDecimalDigit(int c)
607	{
608	return (c >= '0' && c <= '9');
609	}
610
611	bool Lexer::isHexDigit(int c)
612	{
613	return (c >= '0' && c <= '9' \|\|
614	c >= 'a' && c <= 'f' \|\|
615	c >= 'A' && c <= 'F');
616	}
617
618	bool Lexer::isOctalDigit(int c)
619	{
620	return (c >= '0' && c <= '7');
621	}
622
623	int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
624	{
625	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
626	shift(4);
627	return URSHIFTEQUAL;
628	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
629	shift(3);
630	return STREQ;
631	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
632	shift(3);
633	return STRNEQ;
634	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
635	shift(3);
636	return URSHIFT;
637	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
638	shift(3);
639	return LSHIFTEQUAL;
640	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
641	shift(3);
642	return RSHIFTEQUAL;
643	} else if (c1 == '<' && c2 == '=') {
644	shift(2);
645	return LE;
646	} else if (c1 == '>' && c2 == '=') {
647	shift(2);
648	return GE;
649	} else if (c1 == '!' && c2 == '=') {
650	shift(2);
651	return NE;
652	} else if (c1 == '+' && c2 == '+') {
653	shift(2);
654	if (terminator)
655	return AUTOPLUSPLUS;
656	else
657	return PLUSPLUS;
658	} else if (c1 == '-' && c2 == '-') {
659	shift(2);
660	if (terminator)
661	return AUTOMINUSMINUS;
662	else
663	return MINUSMINUS;
664	} else if (c1 == '=' && c2 == '=') {
665	shift(2);
666	return EQEQ;
667	} else if (c1 == '+' && c2 == '=') {
668	shift(2);
669	return PLUSEQUAL;
670	} else if (c1 == '-' && c2 == '=') {
671	shift(2);
672	return MINUSEQUAL;
673	} else if (c1 == '*' && c2 == '=') {
674	shift(2);
675	return MULTEQUAL;
676	} else if (c1 == '/' && c2 == '=') {
677	shift(2);
678	return DIVEQUAL;
679	} else if (c1 == '&' && c2 == '=') {
680	shift(2);
681	return ANDEQUAL;
682	} else if (c1 == '^' && c2 == '=') {
683	shift(2);
684	return XOREQUAL;
685	} else if (c1 == '%' && c2 == '=') {
686	shift(2);
687	return MODEQUAL;
688	} else if (c1 == '\|' && c2 == '=') {
689	shift(2);
690	return OREQUAL;
691	} else if (c1 == '<' && c2 == '<') {
692	shift(2);
693	return LSHIFT;
694	} else if (c1 == '>' && c2 == '>') {
695	shift(2);
696	return RSHIFT;
697	} else if (c1 == '&' && c2 == '&') {
698	shift(2);
699	return AND;
700	} else if (c1 == '\|' && c2 == '\|') {
701	shift(2);
702	return OR;
703	}
704
705	switch(c1) {
706	case '=':
707	case '>':
708	case '<':
709	case ',':
710	case '!':
711	case '~':
712	case '?':
713	case ':':
714	case '.':
715	case '+':
716	case '-':
717	case '*':
718	case '/':
719	case '&':
720	case '\|':
721	case '^':
722	case '%':
723	case '(':
724	case ')':
725	case '{':
726	case '}':
727	case '[':
728	case ']':
729	case ';':
730	shift(1);
731	return static_cast<int>(c1);
732	default:
733	return -1;
734	}
735	}
736
737	unsigned short Lexer::singleEscape(unsigned short c)
738	{
739	switch(c) {
740	case 'b':
741	return 0x08;
742	case 't':
743	return 0x09;
744	case 'n':
745	return 0x0A;
746	case 'v':
747	return 0x0B;
748	case 'f':
749	return 0x0C;
750	case 'r':
751	return 0x0D;
752	case '"':
753	return 0x22;
754	case '\'':
755	return 0x27;
756	case '\\':
757	return 0x5C;
758	default:
759	return c;
760	}
761	}
762
763	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
764	{
765	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
766	}
767
768	unsigned char Lexer::convertHex(int c)
769	{
770	if (c >= '0' && c <= '9')
771	return static_cast<unsigned char>(c - '0');
772	if (c >= 'a' && c <= 'f')
773	return static_cast<unsigned char>(c - 'a' + 10);
774	return static_cast<unsigned char>(c - 'A' + 10);
775	}
776
777	unsigned char Lexer::convertHex(int c1, int c2)
778	{
779	return ((convertHex(c1) << 4) + convertHex(c2));
780	}
781
782	KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
783	{
784	return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
785	(convertHex(c3) << 4) + convertHex(c4));
786	}
787
788	void Lexer::record8(int c)
789	{
790	ASSERT(c >= 0);
791	ASSERT(c <= 0xff);
792
793	// enlarge buffer if full
794	if (pos8 >= size8 - 1) {
795	char tmp = new char[2 size8];
796	memcpy(tmp, buffer8, size8 * sizeof(char));
797	delete [] buffer8;
798	buffer8 = tmp;
799	size8 *= 2;
800	}
801
802	buffer8[pos8++] = (char) c;
803	}
804
805	void Lexer::record16(int c)
806	{
807	ASSERT(c >= 0);
808	ASSERT(c <= USHRT_MAX);
809	record16(UChar(static_cast<unsigned short>(c)));
810	}
811
812	void Lexer::record16(KJS::UChar c)
813	{
814	// enlarge buffer if full
815	if (pos16 >= size16 - 1) {
816	KJS::UChar tmp = new KJS::UChar[2 size16];
817	memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
818	delete [] buffer16;
819	buffer16 = tmp;
820	size16 *= 2;
821	}
822
823	buffer16[pos16++] = c;
824	}
825
826	bool Lexer::scanRegExp()
827	{
828	pos16 = 0;
829	bool lastWasEscape = false;
830	bool inBrackets = false;
831
832	while (1) {
833	if (isLineTerminator() \|\| current == -1)
834	return false;
835	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
836	{
837	// keep track of '[' and ']'
838	if (!lastWasEscape) {
839	if ( current == '[' && !inBrackets )
840	inBrackets = true;
841	if ( current == ']' && inBrackets )
842	inBrackets = false;
843	}
844	record16(current);
845	lastWasEscape =
846	!lastWasEscape && (current == '\\');
847	}
848	else { // end of regexp
849	pattern = UString(buffer16, pos16);
850	pos16 = 0;
851	shift(1);
852	break;
853	}
854	shift(1);
855	}
856
857	while (isIdentPart(current)) {
858	record16(current);
859	shift(1);
860	}
861	flags = UString(buffer16, pos16);
862
863	return true;
864	}
865
866
867	void Lexer::doneParsing()
868	{
869	for (unsigned i = 0; i < numIdentifiers; i++) {
870	delete identifiers[i];
871	}
872	fastFree(identifiers);
873	identifiers = 0;
874	numIdentifiers = 0;
875	identifiersCapacity = 0;
876
877	for (unsigned i = 0; i < numStrings; i++) {
878	delete strings[i];
879	}
880	fastFree(strings);
881	strings = 0;
882	numStrings = 0;
883	stringsCapacity = 0;
884	}
885
886	const int initialCapacity = 64;
887	const int growthFactor = 2;
888
889	// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
890	Identifier Lexer::makeIdentifier(KJS::UChar, unsigned int)
891	{
892	if (numIdentifiers == identifiersCapacity) {
893	identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
894	identifiers = (KJS::Identifier *)fastRealloc(identifiers, sizeof(KJS::Identifier ) * identifiersCapacity);
895	}
896
897	KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
898	identifiers[numIdentifiers++] = identifier;
899	return identifier;
900	}
901
902	// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
903	UString Lexer::makeUString(KJS::UChar, unsigned int)
904	{
905	if (numStrings == stringsCapacity) {
906	stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
907	strings = (UString *)fastRealloc(strings, sizeof(UString ) * stringsCapacity);
908	}
909
910	UString *string = new UString(buffer16, pos16);
911	strings[numStrings++] = string;
912	return string;
913	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 17862

Download in other formats: