Context Navigation

lexer.cpp@ 12949

Visit:

Last change on this file since 12949 was 12317, checked in by mjs, 19 years ago

Reviewed by Tim Hatcher.

it's "Franklin Street", not "Franklin Steet"

kjs/array_instance.h:
kjs/array_object.cpp:
kjs/array_object.h:
kjs/bool_object.cpp:
kjs/bool_object.h:
kjs/collector.cpp:
kjs/collector.h:
kjs/completion.h:
kjs/context.h:
kjs/date_object.cpp:
kjs/date_object.h:
kjs/debugger.cpp:
kjs/debugger.h:
kjs/dtoa.h:
kjs/error_object.cpp:
kjs/error_object.h:
kjs/function.cpp:
kjs/function.h:
kjs/function_object.cpp:
kjs/function_object.h:
kjs/grammar.y:
kjs/identifier.cpp:
kjs/identifier.h:
kjs/internal.cpp:
kjs/internal.h:
kjs/interpreter.cpp:
kjs/interpreter.h:
kjs/lexer.cpp:
kjs/lexer.h:
kjs/list.cpp:
kjs/list.h:
kjs/lookup.cpp:
kjs/lookup.h:
kjs/math_object.cpp:
kjs/math_object.h:
kjs/nodes.cpp:
kjs/nodes.h:
kjs/nodes2string.cpp:
kjs/number_object.cpp:
kjs/number_object.h:
kjs/object.cpp:
kjs/object.h:
kjs/object_object.cpp:
kjs/object_object.h:
kjs/operations.cpp:
kjs/operations.h:
kjs/property_map.cpp:
kjs/property_map.h:
kjs/property_slot.cpp:
kjs/property_slot.h:
kjs/reference.cpp:
kjs/reference.h:
kjs/reference_list.cpp:
kjs/reference_list.h:
kjs/regexp.cpp:
kjs/regexp.h:
kjs/regexp_object.cpp:
kjs/regexp_object.h:
kjs/scope_chain.cpp:
kjs/scope_chain.h:
kjs/simple_number.h:
kjs/string_object.cpp:
kjs/string_object.h:
kjs/testkjs.cpp:
kjs/types.h:
kjs/ustring.cpp:
kjs/ustring.h:
kjs/value.cpp:
kjs/value.h:
kxmlcore/AlwaysInline.h:
kxmlcore/ListRefPtr.h:
kxmlcore/PassRefPtr.h:
kxmlcore/RefPtr.h:

Property svn:eol-style set to native

File size: 21.3 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* This file is part of the KDE libraries
4	* Copyright (C) 1999-2000 Harri Porten ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "lexer.h"
25
26	#include <ctype.h>
27	#include <stdlib.h>
28	#include <stdio.h>
29	#include <string.h>
30	#include <assert.h>
31
32	#include "value.h"
33	#include "object.h"
34	#include "types.h"
35	#include "interpreter.h"
36	#include "nodes.h"
37	#include "identifier.h"
38	#include "lookup.h"
39	#include "internal.h"
40	#include <unicode/uchar.h>
41
42	static bool isDecimalDigit(unsigned short c);
43
44	// we can't specify the namespace in yacc's C output, so do it here
45	using namespace KJS;
46
47	static Lexer *currLexer = 0;
48
49	#ifndef KDE_USE_FINAL
50	#include "grammar.h"
51	#endif
52
53	#include "lexer.lut.h"
54
55	extern YYLTYPE kjsyylloc; // global bison variable holding token info
56
57	// a bridge for yacc from the C world to C++
58	int kjsyylex()
59	{
60	return Lexer::curr()->lex();
61	}
62
63	Lexer::Lexer()
64	: yylineno(1),
65	size8(128), size16(128), restrKeyword(false),
66	eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
67	code(0), length(0),
68	#ifndef KJS_PURE_ECMA
69	bol(true),
70	#endif
71	current(0), next1(0), next2(0), next3(0),
72	strings(0), numStrings(0), stringsCapacity(0),
73	identifiers(0), numIdentifiers(0), identifiersCapacity(0)
74	{
75	// allocate space for read buffers
76	buffer8 = new char[size8];
77	buffer16 = new KJS::UChar[size16];
78	currLexer = this;
79	}
80
81	Lexer::~Lexer()
82	{
83	doneParsing();
84	delete [] buffer8;
85	delete [] buffer16;
86	}
87
88	Lexer *Lexer::curr()
89	{
90	if (!currLexer) {
91	// create singleton instance
92	currLexer = new Lexer();
93	}
94	return currLexer;
95	}
96
97	#ifdef KJS_DEBUG_MEM
98	void Lexer::globalClear()
99	{
100	delete currLexer;
101	currLexer = 0L;
102	}
103	#endif
104
105	void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
106	{
107	yylineno = 1 + startingLineNumber;
108	m_sourceURL = sourceURL;
109	restrKeyword = false;
110	delimited = false;
111	eatNextIdentifier = false;
112	stackToken = -1;
113	lastToken = -1;
114	pos = 0;
115	code = c;
116	length = len;
117	skipLF = false;
118	skipCR = false;
119	error = false;
120	#ifndef KJS_PURE_ECMA
121	bol = true;
122	#endif
123
124	// read first characters
125	shift(4);
126	}
127
128	void Lexer::shift(unsigned int p)
129	{
130	while (p--) {
131	current = next1;
132	next1 = next2;
133	next2 = next3;
134	do {
135	if (pos >= length) {
136	next3 = 0;
137	break;
138	}
139	next3 = code[pos++].uc;
140	} while (u_charType(next3) == U_FORMAT_CHAR);
141	}
142	}
143
144	// called on each new line
145	void Lexer::nextLine()
146	{
147	yylineno++;
148	#ifndef KJS_PURE_ECMA
149	bol = true;
150	#endif
151	}
152
153	void Lexer::setDone(State s)
154	{
155	state = s;
156	done = true;
157	}
158
159	int Lexer::lex()
160	{
161	int token = 0;
162	state = Start;
163	unsigned short stringType = 0; // either single or double quotes
164	pos8 = pos16 = 0;
165	done = false;
166	terminator = false;
167	skipLF = false;
168	skipCR = false;
169
170	// did we push a token on the stack previously ?
171	// (after an automatic semicolon insertion)
172	if (stackToken >= 0) {
173	setDone(Other);
174	token = stackToken;
175	stackToken = 0;
176	}
177
178	while (!done) {
179	if (skipLF && current != '\n') // found \r but not \n afterwards
180	skipLF = false;
181	if (skipCR && current != '\r') // found \n but not \r afterwards
182	skipCR = false;
183	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
184	{
185	skipLF = false;
186	skipCR = false;
187	shift(1);
188	}
189	switch (state) {
190	case Start:
191	if (isWhiteSpace()) {
192	// do nothing
193	} else if (current == '/' && next1 == '/') {
194	shift(1);
195	state = InSingleLineComment;
196	} else if (current == '/' && next1 == '*') {
197	shift(1);
198	state = InMultiLineComment;
199	} else if (current == 0) {
200	if (!terminator && !delimited) {
201	// automatic semicolon insertion if program incomplete
202	token = ';';
203	stackToken = 0;
204	setDone(Other);
205	} else
206	setDone(Eof);
207	} else if (isLineTerminator()) {
208	nextLine();
209	terminator = true;
210	if (restrKeyword) {
211	token = ';';
212	setDone(Other);
213	}
214	} else if (current == '"' \|\| current == '\'') {
215	state = InString;
216	stringType = current;
217	} else if (isIdentStart(current)) {
218	record16(current);
219	state = InIdentifierOrKeyword;
220	} else if (current == '\\') {
221	state = InIdentifierUnicodeEscapeStart;
222	} else if (current == '0') {
223	record8(current);
224	state = InNum0;
225	} else if (isDecimalDigit(current)) {
226	record8(current);
227	state = InNum;
228	} else if (current == '.' && isDecimalDigit(next1)) {
229	record8(current);
230	state = InDecimal;
231	#ifndef KJS_PURE_ECMA
232	// <!-- marks the beginning of a line comment (for www usage)
233	} else if (current == '<' && next1 == '!' &&
234	next2 == '-' && next3 == '-') {
235	shift(3);
236	state = InSingleLineComment;
237	// same for -->
238	} else if (bol && current == '-' && next1 == '-' && next2 == '>') {
239	shift(2);
240	state = InSingleLineComment;
241	#endif
242	} else {
243	token = matchPunctuator(current, next1, next2, next3);
244	if (token != -1) {
245	setDone(Other);
246	} else {
247	// cerr << "encountered unknown character" << endl;
248	setDone(Bad);
249	}
250	}
251	break;
252	case InString:
253	if (current == stringType) {
254	shift(1);
255	setDone(String);
256	} else if (current == 0 \|\| isLineTerminator()) {
257	setDone(Bad);
258	} else if (current == '\\') {
259	state = InEscapeSequence;
260	} else {
261	record16(current);
262	}
263	break;
264	// Escape Sequences inside of strings
265	case InEscapeSequence:
266	if (isOctalDigit(current)) {
267	if (current >= '0' && current <= '3' &&
268	isOctalDigit(next1) && isOctalDigit(next2)) {
269	record16(convertOctal(current, next1, next2));
270	shift(2);
271	state = InString;
272	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
273	record16(convertOctal('0', current, next1));
274	shift(1);
275	state = InString;
276	} else if (isOctalDigit(current)) {
277	record16(convertOctal('0', '0', current));
278	state = InString;
279	} else {
280	setDone(Bad);
281	}
282	} else if (current == 'x')
283	state = InHexEscape;
284	else if (current == 'u')
285	state = InUnicodeEscape;
286	else if (isLineTerminator()) {
287	nextLine();
288	state = InString;
289	} else {
290	record16(singleEscape(current));
291	state = InString;
292	}
293	break;
294	case InHexEscape:
295	if (isHexDigit(current) && isHexDigit(next1)) {
296	state = InString;
297	record16(convertHex(current, next1));
298	shift(1);
299	} else if (current == stringType) {
300	record16('x');
301	shift(1);
302	setDone(String);
303	} else {
304	record16('x');
305	record16(current);
306	state = InString;
307	}
308	break;
309	case InUnicodeEscape:
310	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
311	record16(convertUnicode(current, next1, next2, next3));
312	shift(3);
313	state = InString;
314	} else if (current == stringType) {
315	record16('u');
316	shift(1);
317	setDone(String);
318	} else {
319	setDone(Bad);
320	}
321	break;
322	case InSingleLineComment:
323	if (isLineTerminator()) {
324	nextLine();
325	terminator = true;
326	if (restrKeyword) {
327	token = ';';
328	setDone(Other);
329	} else
330	state = Start;
331	} else if (current == 0) {
332	setDone(Eof);
333	}
334	break;
335	case InMultiLineComment:
336	if (current == 0) {
337	setDone(Bad);
338	} else if (isLineTerminator()) {
339	nextLine();
340	} else if (current == '*' && next1 == '/') {
341	state = Start;
342	shift(1);
343	}
344	break;
345	case InIdentifierOrKeyword:
346	case InIdentifier:
347	if (isIdentPart(current))
348	record16(current);
349	else if (current == '\\')
350	state = InIdentifierUnicodeEscapeStart;
351	else
352	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
353	break;
354	case InNum0:
355	if (current == 'x' \|\| current == 'X') {
356	record8(current);
357	state = InHex;
358	} else if (current == '.') {
359	record8(current);
360	state = InDecimal;
361	} else if (current == 'e' \|\| current == 'E') {
362	record8(current);
363	state = InExponentIndicator;
364	} else if (isOctalDigit(current)) {
365	record8(current);
366	state = InOctal;
367	} else if (isDecimalDigit(current)) {
368	record8(current);
369	state = InDecimal;
370	} else {
371	setDone(Number);
372	}
373	break;
374	case InHex:
375	if (isHexDigit(current)) {
376	record8(current);
377	} else {
378	setDone(Hex);
379	}
380	break;
381	case InOctal:
382	if (isOctalDigit(current)) {
383	record8(current);
384	}
385	else if (isDecimalDigit(current)) {
386	record8(current);
387	state = InDecimal;
388	} else
389	setDone(Octal);
390	break;
391	case InNum:
392	if (isDecimalDigit(current)) {
393	record8(current);
394	} else if (current == '.') {
395	record8(current);
396	state = InDecimal;
397	} else if (current == 'e' \|\| current == 'E') {
398	record8(current);
399	state = InExponentIndicator;
400	} else
401	setDone(Number);
402	break;
403	case InDecimal:
404	if (isDecimalDigit(current)) {
405	record8(current);
406	} else if (current == 'e' \|\| current == 'E') {
407	record8(current);
408	state = InExponentIndicator;
409	} else
410	setDone(Number);
411	break;
412	case InExponentIndicator:
413	if (current == '+' \|\| current == '-') {
414	record8(current);
415	} else if (isDecimalDigit(current)) {
416	record8(current);
417	state = InExponent;
418	} else
419	setDone(Bad);
420	break;
421	case InExponent:
422	if (isDecimalDigit(current)) {
423	record8(current);
424	} else
425	setDone(Number);
426	break;
427	case InIdentifierUnicodeEscapeStart:
428	if (current == 'u')
429	state = InIdentifierUnicodeEscape;
430	else
431	setDone(Bad);
432	break;
433	case InIdentifierUnicodeEscape:
434	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
435	record16(convertUnicode(current, next1, next2, next3));
436	shift(3);
437	state = InIdentifier;
438	} else {
439	setDone(Bad);
440	}
441	break;
442	default:
443	assert(!"Unhandled state in switch statement");
444	}
445
446	// move on to the next character
447	if (!done)
448	shift(1);
449	#ifndef KJS_PURE_ECMA
450	if (state != Start && state != InSingleLineComment)
451	bol = false;
452	#endif
453	}
454
455	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
456	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
457	state = Bad;
458
459	// terminate string
460	buffer8[pos8] = '\0';
461
462	#ifdef KJS_DEBUG_LEX
463	fprintf(stderr, "line: %d ", lineNo());
464	fprintf(stderr, "yytext (%x): ", buffer8[0]);
465	fprintf(stderr, "%s ", buffer8);
466	#endif
467
468	double dval = 0;
469	if (state == Number) {
470	dval = strtod(buffer8, 0L);
471	} else if (state == Hex) { // scan hex numbers
472	const char *p = buffer8 + 2;
473	while (char c = *p++) {
474	dval *= 16;
475	dval += convertHex(c);
476	}
477	state = Number;
478	} else if (state == Octal) { // scan octal number
479	const char *p = buffer8 + 1;
480	while (char c = *p++) {
481	dval *= 8;
482	dval += c - '0';
483	}
484	state = Number;
485	}
486
487	#ifdef KJS_DEBUG_LEX
488	switch (state) {
489	case Eof:
490	printf("(EOF)\n");
491	break;
492	case Other:
493	printf("(Other)\n");
494	break;
495	case Identifier:
496	printf("(Identifier)/(Keyword)\n");
497	break;
498	case String:
499	printf("(String)\n");
500	break;
501	case Number:
502	printf("(Number)\n");
503	break;
504	default:
505	printf("(unknown)");
506	}
507	#endif
508
509	if (state != Identifier && eatNextIdentifier)
510	eatNextIdentifier = false;
511
512	restrKeyword = false;
513	delimited = false;
514	kjsyylloc.first_line = yylineno; // ???
515	kjsyylloc.last_line = yylineno;
516
517	switch (state) {
518	case Eof:
519	token = 0;
520	break;
521	case Other:
522	if(token == '}' \|\| token == ';') {
523	delimited = true;
524	}
525	break;
526	case IdentifierOrKeyword:
527	if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
528	case Identifier:
529	// Lookup for keyword failed, means this is an identifier
530	// Apply anonymous-function hack below (eat the identifier)
531	if (eatNextIdentifier) {
532	eatNextIdentifier = false;
533	token = lex();
534	break;
535	}
536	kjsyylval.ident = makeIdentifier(buffer16, pos16);
537	token = IDENT;
538	break;
539	}
540
541	eatNextIdentifier = false;
542	// Hack for "f = function somename() { ... }", too hard to get into the grammar
543	if (token == FUNCTION && lastToken == '=' )
544	eatNextIdentifier = true;
545
546	if (token == CONTINUE \|\| token == BREAK \|\|
547	token == RETURN \|\| token == THROW)
548	restrKeyword = true;
549	break;
550	case String:
551	kjsyylval.ustr = makeUString(buffer16, pos16);
552	token = STRING;
553	break;
554	case Number:
555	kjsyylval.dval = dval;
556	token = NUMBER;
557	break;
558	case Bad:
559	fprintf(stderr, "yylex: ERROR.\n");
560	error = true;
561	return -1;
562	default:
563	assert(!"unhandled numeration value in switch");
564	error = true;
565	return -1;
566	}
567	lastToken = token;
568	return token;
569	}
570
571	bool Lexer::isWhiteSpace() const
572	{
573	return (current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| u_charType(current) == U_SPACE_SEPARATOR);
574	}
575
576	bool Lexer::isLineTerminator()
577	{
578	bool cr = (current == '\r');
579	bool lf = (current == '\n');
580	if (cr)
581	skipLF = true;
582	else if (lf)
583	skipCR = true;
584	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
585	}
586
587	bool Lexer::isIdentStart(unsigned short c)
588	{
589	return (U_GET_GC_MASK(c) & (U_GC_L_MASK \| U_GC_NL_MASK)) \|\| c == '$' \|\| c == '_';
590	}
591
592	bool Lexer::isIdentPart(unsigned short c)
593	{
594	return (U_GET_GC_MASK(c) & (U_GC_L_MASK \| U_GC_NL_MASK \| U_GC_MN_MASK \| U_GC_MC_MASK \| U_GC_ND_MASK \| U_GC_PC_MASK)) \|\| c == '$' \|\| c == '_';
595	}
596
597	static bool isDecimalDigit(unsigned short c)
598	{
599	return (c >= '0' && c <= '9');
600	}
601
602	bool Lexer::isHexDigit(unsigned short c)
603	{
604	return (c >= '0' && c <= '9' \|\|
605	c >= 'a' && c <= 'f' \|\|
606	c >= 'A' && c <= 'F');
607	}
608
609	bool Lexer::isOctalDigit(unsigned short c) const
610	{
611	return (c >= '0' && c <= '7');
612	}
613
614	int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,
615	unsigned short c3, unsigned short c4)
616	{
617	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
618	shift(4);
619	return URSHIFTEQUAL;
620	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
621	shift(3);
622	return STREQ;
623	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
624	shift(3);
625	return STRNEQ;
626	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
627	shift(3);
628	return URSHIFT;
629	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
630	shift(3);
631	return LSHIFTEQUAL;
632	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
633	shift(3);
634	return RSHIFTEQUAL;
635	} else if (c1 == '<' && c2 == '=') {
636	shift(2);
637	return LE;
638	} else if (c1 == '>' && c2 == '=') {
639	shift(2);
640	return GE;
641	} else if (c1 == '!' && c2 == '=') {
642	shift(2);
643	return NE;
644	} else if (c1 == '+' && c2 == '+') {
645	shift(2);
646	if (terminator)
647	return AUTOPLUSPLUS;
648	else
649	return PLUSPLUS;
650	} else if (c1 == '-' && c2 == '-') {
651	shift(2);
652	if (terminator)
653	return AUTOMINUSMINUS;
654	else
655	return MINUSMINUS;
656	} else if (c1 == '=' && c2 == '=') {
657	shift(2);
658	return EQEQ;
659	} else if (c1 == '+' && c2 == '=') {
660	shift(2);
661	return PLUSEQUAL;
662	} else if (c1 == '-' && c2 == '=') {
663	shift(2);
664	return MINUSEQUAL;
665	} else if (c1 == '*' && c2 == '=') {
666	shift(2);
667	return MULTEQUAL;
668	} else if (c1 == '/' && c2 == '=') {
669	shift(2);
670	return DIVEQUAL;
671	} else if (c1 == '&' && c2 == '=') {
672	shift(2);
673	return ANDEQUAL;
674	} else if (c1 == '^' && c2 == '=') {
675	shift(2);
676	return XOREQUAL;
677	} else if (c1 == '%' && c2 == '=') {
678	shift(2);
679	return MODEQUAL;
680	} else if (c1 == '\|' && c2 == '=') {
681	shift(2);
682	return OREQUAL;
683	} else if (c1 == '<' && c2 == '<') {
684	shift(2);
685	return LSHIFT;
686	} else if (c1 == '>' && c2 == '>') {
687	shift(2);
688	return RSHIFT;
689	} else if (c1 == '&' && c2 == '&') {
690	shift(2);
691	return AND;
692	} else if (c1 == '\|' && c2 == '\|') {
693	shift(2);
694	return OR;
695	}
696
697	switch(c1) {
698	case '=':
699	case '>':
700	case '<':
701	case ',':
702	case '!':
703	case '~':
704	case '?':
705	case ':':
706	case '.':
707	case '+':
708	case '-':
709	case '*':
710	case '/':
711	case '&':
712	case '\|':
713	case '^':
714	case '%':
715	case '(':
716	case ')':
717	case '{':
718	case '}':
719	case '[':
720	case ']':
721	case ';':
722	shift(1);
723	return static_cast<int>(c1);
724	default:
725	return -1;
726	}
727	}
728
729	unsigned short Lexer::singleEscape(unsigned short c) const
730	{
731	switch(c) {
732	case 'b':
733	return 0x08;
734	case 't':
735	return 0x09;
736	case 'n':
737	return 0x0A;
738	case 'v':
739	return 0x0B;
740	case 'f':
741	return 0x0C;
742	case 'r':
743	return 0x0D;
744	case '"':
745	return 0x22;
746	case '\'':
747	return 0x27;
748	case '\\':
749	return 0x5C;
750	default:
751	return c;
752	}
753	}
754
755	unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,
756	unsigned short c3) const
757	{
758	return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
759	}
760
761	unsigned char Lexer::convertHex(unsigned short c)
762	{
763	if (c >= '0' && c <= '9')
764	return (c - '0');
765	else if (c >= 'a' && c <= 'f')
766	return (c - 'a' + 10);
767	else
768	return (c - 'A' + 10);
769	}
770
771	unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2)
772	{
773	return ((convertHex(c1) << 4) + convertHex(c2));
774	}
775
776	KJS::UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,
777	unsigned short c3, unsigned short c4)
778	{
779	return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
780	(convertHex(c3) << 4) + convertHex(c4));
781	}
782
783	void Lexer::record8(unsigned short c)
784	{
785	assert(c <= 0xff);
786
787	// enlarge buffer if full
788	if (pos8 >= size8 - 1) {
789	char tmp = new char[2 size8];
790	memcpy(tmp, buffer8, size8 * sizeof(char));
791	delete [] buffer8;
792	buffer8 = tmp;
793	size8 *= 2;
794	}
795
796	buffer8[pos8++] = (char) c;
797	}
798
799	void Lexer::record16(KJS::UChar c)
800	{
801	// enlarge buffer if full
802	if (pos16 >= size16 - 1) {
803	KJS::UChar tmp = new KJS::UChar[2 size16];
804	memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
805	delete [] buffer16;
806	buffer16 = tmp;
807	size16 *= 2;
808	}
809
810	buffer16[pos16++] = c;
811	}
812
813	bool Lexer::scanRegExp()
814	{
815	pos16 = 0;
816	bool lastWasEscape = false;
817	bool inBrackets = false;
818
819	while (1) {
820	if (isLineTerminator() \|\| current == 0)
821	return false;
822	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
823	{
824	// keep track of '[' and ']'
825	if ( !lastWasEscape ) {
826	if ( current == '[' && !inBrackets )
827	inBrackets = true;
828	if ( current == ']' && inBrackets )
829	inBrackets = false;
830	}
831	record16(current);
832	lastWasEscape =
833	!lastWasEscape && (current == '\\');
834	}
835	else { // end of regexp
836	pattern = UString(buffer16, pos16);
837	pos16 = 0;
838	shift(1);
839	break;
840	}
841	shift(1);
842	}
843
844	while (isIdentPart(current)) {
845	record16(current);
846	shift(1);
847	}
848	flags = UString(buffer16, pos16);
849
850	return true;
851	}
852
853
854	void Lexer::doneParsing()
855	{
856	for (unsigned i = 0; i < numIdentifiers; i++) {
857	delete identifiers[i];
858	}
859	fastFree(identifiers);
860	identifiers = 0;
861	numIdentifiers = 0;
862	identifiersCapacity = 0;
863
864	for (unsigned i = 0; i < numStrings; i++) {
865	delete strings[i];
866	}
867	fastFree(strings);
868	strings = 0;
869	numStrings = 0;
870	stringsCapacity = 0;
871	}
872
873	const int initialCapacity = 64;
874	const int growthFactor = 2;
875
876	Identifier Lexer::makeIdentifier(KJS::UChar buffer, unsigned int pos)
877	{
878	if (numIdentifiers == identifiersCapacity) {
879	identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
880	identifiers = (KJS::Identifier *)fastRealloc(identifiers, sizeof(KJS::Identifier ) * identifiersCapacity);
881	}
882
883	KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
884	identifiers[numIdentifiers++] = identifier;
885	return identifier;
886	}
887
888	UString Lexer::makeUString(KJS::UChar buffer, unsigned int pos)
889	{
890	if (numStrings == stringsCapacity) {
891	stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
892	strings = (UString *)fastRealloc(strings, sizeof(UString ) * stringsCapacity);
893	}
894
895	UString *string = new UString(buffer16, pos16);
896	strings[numStrings++] = string;
897	return string;
898	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 12949

Download in other formats: