Context Navigation

lexer.cpp@ 27695

Visit:

Last change on this file since 27695 was 27695, checked in by eseidel, 18 years ago

2007-11-10 Eric Seidel <[email protected]>

Reviewed by darin.

Add simple type inferencing to the parser, and create custom
AddNode and LessNode subclasses based on inferred types.
http://bugs.webkit.org/show_bug.cgi?id=15884

SunSpider claims this is at least a 0.5% speedup.

JavaScriptCore.exp:
kjs/grammar.y:
kjs/internal.cpp: (KJS::NumberImp::getPrimitiveNumber): (KJS::GetterSetterImp::getPrimitiveNumber):
kjs/internal.h:
kjs/lexer.cpp: (KJS::Lexer::lex):
kjs/nodes.cpp: (KJS::Node::Node): (KJS::StringNode::evaluate): (KJS::StringNode::evaluateToNumber): (KJS::StringNode::evaluateToBoolean): (KJS::RegExpNode::evaluate): (KJS::UnaryPlusNode::optimizeVariableAccess): (KJS::AddNode::evaluate): (KJS::AddNode::evaluateToNumber): (KJS::AddNumbersNode::inlineEvaluateToNumber): (KJS::AddNumbersNode::evaluate): (KJS::AddNumbersNode::evaluateToNumber): (KJS::AddStringsNode::evaluate): (KJS::AddStringLeftNode::evaluate): (KJS::AddStringRightNode::evaluate): (KJS::lessThan): (KJS::lessThanEq): (KJS::LessNumbersNode::evaluate): (KJS::LessStringsNode::evaluate):
kjs/nodes.h: (KJS::ExpressionNode::): (KJS::RegExpNode::): (KJS::RegExpNode::precedence): (KJS::TypeOfResolveNode::): (KJS::LocalVarTypeOfNode::): (KJS::UnaryPlusNode::): (KJS::UnaryPlusNode::precedence): (KJS::AddNode::): (KJS::AddNode::precedence): (KJS::AddNumbersNode::): (KJS::AddStringLeftNode::): (KJS::AddStringRightNode::): (KJS::AddStringsNode::): (KJS::LessNode::): (KJS::LessNode::precedence): (KJS::LessNumbersNode::): (KJS::LessStringsNode::):
kjs/nodes2string.cpp: (KJS::StringNode::streamTo):
kjs/object.cpp:
kjs/object.h:
kjs/value.h: (KJS::JSValue::getPrimitiveNumber):

Property svn:eol-style set to native

File size: 22.1 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* Copyright (C) 1999-2000 Harri Porten ([email protected])
4	* Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5	* Copyright (C) 2007 Cameron Zwarich ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "lexer.h"
26
27	#include "function.h"
28	#include "interpreter.h"
29	#include "nodes.h"
30	#include <ctype.h>
31	#include <limits.h>
32	#include <string.h>
33	#include <wtf/Assertions.h>
34	#include <wtf/unicode/Unicode.h>
35
36	using namespace WTF;
37	using namespace Unicode;
38
39	// we can't specify the namespace in yacc's C output, so do it here
40	using namespace KJS;
41
42	#ifndef KDE_USE_FINAL
43	#include "grammar.h"
44	#endif
45
46	#include "lookup.h"
47	#include "lexer.lut.h"
48
49	extern YYLTYPE kjsyylloc; // global bison variable holding token info
50
51	// a bridge for yacc from the C world to C++
52	int kjsyylex()
53	{
54	return Lexer::curr()->lex();
55	}
56
57	namespace KJS {
58
59	static Lexer* currLexer = 0;
60
61	static bool isDecimalDigit(int);
62
63	Lexer::Lexer()
64	: yylineno(1),
65	size8(128), size16(128), restrKeyword(false),
66	eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
67	code(0), length(0),
68	#ifndef KJS_PURE_ECMA
69	bol(true),
70	#endif
71	current(0), next1(0), next2(0), next3(0),
72	strings(0), numStrings(0), stringsCapacity(0),
73	identifiers(0), numIdentifiers(0), identifiersCapacity(0)
74	{
75	// allocate space for read buffers
76	buffer8 = new char[size8];
77	buffer16 = new KJS::UChar[size16];
78	currLexer = this;
79	}
80
81	Lexer::~Lexer()
82	{
83	doneParsing();
84	delete [] buffer8;
85	delete [] buffer16;
86	}
87
88	Lexer *Lexer::curr()
89	{
90	if (!currLexer) {
91	// create singleton instance
92	currLexer = new Lexer();
93	}
94	return currLexer;
95	}
96
97	#ifdef KJS_DEBUG_MEM
98	void Lexer::globalClear()
99	{
100	delete currLexer;
101	currLexer = 0L;
102	}
103	#endif
104
105	void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
106	{
107	yylineno = 1 + startingLineNumber;
108	m_sourceURL = sourceURL;
109	restrKeyword = false;
110	delimited = false;
111	eatNextIdentifier = false;
112	stackToken = -1;
113	lastToken = -1;
114	pos = 0;
115	code = c;
116	length = len;
117	skipLF = false;
118	skipCR = false;
119	error = false;
120	#ifndef KJS_PURE_ECMA
121	bol = true;
122	#endif
123
124	// read first characters
125	current = (length > 0) ? code[0].uc : -1;
126	next1 = (length > 1) ? code[1].uc : -1;
127	next2 = (length > 2) ? code[2].uc : -1;
128	next3 = (length > 3) ? code[3].uc : -1;
129	}
130
131	void Lexer::shift(unsigned int p)
132	{
133	// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
134	// <http://bugs.webkit.org/show_bug.cgi?id=10183>.
135	while (p--) {
136	pos++;
137	current = next1;
138	next1 = next2;
139	next2 = next3;
140	next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
141	}
142	}
143
144	// called on each new line
145	void Lexer::nextLine()
146	{
147	yylineno++;
148	#ifndef KJS_PURE_ECMA
149	bol = true;
150	#endif
151	}
152
153	void Lexer::setDone(State s)
154	{
155	state = s;
156	done = true;
157	}
158
159	int Lexer::lex()
160	{
161	int token = 0;
162	state = Start;
163	unsigned short stringType = 0; // either single or double quotes
164	pos8 = pos16 = 0;
165	done = false;
166	terminator = false;
167	skipLF = false;
168	skipCR = false;
169
170	// did we push a token on the stack previously ?
171	// (after an automatic semicolon insertion)
172	if (stackToken >= 0) {
173	setDone(Other);
174	token = stackToken;
175	stackToken = 0;
176	}
177
178	while (!done) {
179	if (skipLF && current != '\n') // found \r but not \n afterwards
180	skipLF = false;
181	if (skipCR && current != '\r') // found \n but not \r afterwards
182	skipCR = false;
183	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
184	{
185	skipLF = false;
186	skipCR = false;
187	shift(1);
188	}
189	switch (state) {
190	case Start:
191	if (isWhiteSpace()) {
192	// do nothing
193	} else if (current == '/' && next1 == '/') {
194	shift(1);
195	state = InSingleLineComment;
196	} else if (current == '/' && next1 == '*') {
197	shift(1);
198	state = InMultiLineComment;
199	} else if (current == -1) {
200	if (!terminator && !delimited) {
201	// automatic semicolon insertion if program incomplete
202	token = ';';
203	stackToken = 0;
204	setDone(Other);
205	} else
206	setDone(Eof);
207	} else if (isLineTerminator()) {
208	nextLine();
209	terminator = true;
210	if (restrKeyword) {
211	token = ';';
212	setDone(Other);
213	}
214	} else if (current == '"' \|\| current == '\'') {
215	state = InString;
216	stringType = static_cast<unsigned short>(current);
217	} else if (isIdentStart(current)) {
218	record16(current);
219	state = InIdentifierOrKeyword;
220	} else if (current == '\\') {
221	state = InIdentifierUnicodeEscapeStart;
222	} else if (current == '0') {
223	record8(current);
224	state = InNum0;
225	} else if (isDecimalDigit(current)) {
226	record8(current);
227	state = InNum;
228	} else if (current == '.' && isDecimalDigit(next1)) {
229	record8(current);
230	state = InDecimal;
231	#ifndef KJS_PURE_ECMA
232	// <!-- marks the beginning of a line comment (for www usage)
233	} else if (current == '<' && next1 == '!' &&
234	next2 == '-' && next3 == '-') {
235	shift(3);
236	state = InSingleLineComment;
237	// same for -->
238	} else if (bol && current == '-' && next1 == '-' && next2 == '>') {
239	shift(2);
240	state = InSingleLineComment;
241	#endif
242	} else {
243	token = matchPunctuator(current, next1, next2, next3);
244	if (token != -1) {
245	setDone(Other);
246	} else {
247	// cerr << "encountered unknown character" << endl;
248	setDone(Bad);
249	}
250	}
251	break;
252	case InString:
253	if (current == stringType) {
254	shift(1);
255	setDone(String);
256	} else if (isLineTerminator() \|\| current == -1) {
257	setDone(Bad);
258	} else if (current == '\\') {
259	state = InEscapeSequence;
260	} else {
261	record16(current);
262	}
263	break;
264	// Escape Sequences inside of strings
265	case InEscapeSequence:
266	if (isOctalDigit(current)) {
267	if (current >= '0' && current <= '3' &&
268	isOctalDigit(next1) && isOctalDigit(next2)) {
269	record16(convertOctal(current, next1, next2));
270	shift(2);
271	state = InString;
272	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
273	record16(convertOctal('0', current, next1));
274	shift(1);
275	state = InString;
276	} else if (isOctalDigit(current)) {
277	record16(convertOctal('0', '0', current));
278	state = InString;
279	} else {
280	setDone(Bad);
281	}
282	} else if (current == 'x')
283	state = InHexEscape;
284	else if (current == 'u')
285	state = InUnicodeEscape;
286	else if (isLineTerminator()) {
287	nextLine();
288	state = InString;
289	} else {
290	record16(singleEscape(static_cast<unsigned short>(current)));
291	state = InString;
292	}
293	break;
294	case InHexEscape:
295	if (isHexDigit(current) && isHexDigit(next1)) {
296	state = InString;
297	record16(convertHex(current, next1));
298	shift(1);
299	} else if (current == stringType) {
300	record16('x');
301	shift(1);
302	setDone(String);
303	} else {
304	record16('x');
305	record16(current);
306	state = InString;
307	}
308	break;
309	case InUnicodeEscape:
310	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
311	record16(convertUnicode(current, next1, next2, next3));
312	shift(3);
313	state = InString;
314	} else if (current == stringType) {
315	record16('u');
316	shift(1);
317	setDone(String);
318	} else {
319	setDone(Bad);
320	}
321	break;
322	case InSingleLineComment:
323	if (isLineTerminator()) {
324	nextLine();
325	terminator = true;
326	if (restrKeyword) {
327	token = ';';
328	setDone(Other);
329	} else
330	state = Start;
331	} else if (current == -1) {
332	setDone(Eof);
333	}
334	break;
335	case InMultiLineComment:
336	if (current == -1) {
337	setDone(Bad);
338	} else if (isLineTerminator()) {
339	nextLine();
340	} else if (current == '*' && next1 == '/') {
341	state = Start;
342	shift(1);
343	}
344	break;
345	case InIdentifierOrKeyword:
346	case InIdentifier:
347	if (isIdentPart(current))
348	record16(current);
349	else if (current == '\\')
350	state = InIdentifierUnicodeEscapeStart;
351	else
352	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
353	break;
354	case InNum0:
355	if (current == 'x' \|\| current == 'X') {
356	record8(current);
357	state = InHex;
358	} else if (current == '.') {
359	record8(current);
360	state = InDecimal;
361	} else if (current == 'e' \|\| current == 'E') {
362	record8(current);
363	state = InExponentIndicator;
364	} else if (isOctalDigit(current)) {
365	record8(current);
366	state = InOctal;
367	} else if (isDecimalDigit(current)) {
368	record8(current);
369	state = InDecimal;
370	} else {
371	setDone(Number);
372	}
373	break;
374	case InHex:
375	if (isHexDigit(current)) {
376	record8(current);
377	} else {
378	setDone(Hex);
379	}
380	break;
381	case InOctal:
382	if (isOctalDigit(current)) {
383	record8(current);
384	}
385	else if (isDecimalDigit(current)) {
386	record8(current);
387	state = InDecimal;
388	} else
389	setDone(Octal);
390	break;
391	case InNum:
392	if (isDecimalDigit(current)) {
393	record8(current);
394	} else if (current == '.') {
395	record8(current);
396	state = InDecimal;
397	} else if (current == 'e' \|\| current == 'E') {
398	record8(current);
399	state = InExponentIndicator;
400	} else
401	setDone(Number);
402	break;
403	case InDecimal:
404	if (isDecimalDigit(current)) {
405	record8(current);
406	} else if (current == 'e' \|\| current == 'E') {
407	record8(current);
408	state = InExponentIndicator;
409	} else
410	setDone(Number);
411	break;
412	case InExponentIndicator:
413	if (current == '+' \|\| current == '-') {
414	record8(current);
415	} else if (isDecimalDigit(current)) {
416	record8(current);
417	state = InExponent;
418	} else
419	setDone(Bad);
420	break;
421	case InExponent:
422	if (isDecimalDigit(current)) {
423	record8(current);
424	} else
425	setDone(Number);
426	break;
427	case InIdentifierUnicodeEscapeStart:
428	if (current == 'u')
429	state = InIdentifierUnicodeEscape;
430	else
431	setDone(Bad);
432	break;
433	case InIdentifierUnicodeEscape:
434	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
435	record16(convertUnicode(current, next1, next2, next3));
436	shift(3);
437	state = InIdentifier;
438	} else {
439	setDone(Bad);
440	}
441	break;
442	default:
443	ASSERT(!"Unhandled state in switch statement");
444	}
445
446	// move on to the next character
447	if (!done)
448	shift(1);
449	#ifndef KJS_PURE_ECMA
450	if (state != Start && state != InSingleLineComment)
451	bol = false;
452	#endif
453	}
454
455	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
456	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
457	state = Bad;
458
459	// terminate string
460	buffer8[pos8] = '\0';
461
462	#ifdef KJS_DEBUG_LEX
463	fprintf(stderr, "line: %d ", lineNo());
464	fprintf(stderr, "yytext (%x): ", buffer8[0]);
465	fprintf(stderr, "%s ", buffer8);
466	#endif
467
468	double dval = 0;
469	if (state == Number) {
470	dval = strtod(buffer8, 0L);
471	} else if (state == Hex) { // scan hex numbers
472	const char *p = buffer8 + 2;
473	while (char c = *p++) {
474	dval *= 16;
475	dval += convertHex(c);
476	}
477
478	if (dval >= mantissaOverflowLowerBound)
479	dval = parseIntOverflow(buffer8 + 2, p - (buffer8 + 3), 16);
480
481	state = Number;
482	} else if (state == Octal) { // scan octal number
483	const char *p = buffer8 + 1;
484	while (char c = *p++) {
485	dval *= 8;
486	dval += c - '0';
487	}
488
489	if (dval >= mantissaOverflowLowerBound)
490	dval = parseIntOverflow(buffer8 + 1, p - (buffer8 + 2), 8);
491
492	state = Number;
493	}
494
495	#ifdef KJS_DEBUG_LEX
496	switch (state) {
497	case Eof:
498	printf("(EOF)\n");
499	break;
500	case Other:
501	printf("(Other)\n");
502	break;
503	case Identifier:
504	printf("(Identifier)/(Keyword)\n");
505	break;
506	case String:
507	printf("(String)\n");
508	break;
509	case Number:
510	printf("(Number)\n");
511	break;
512	default:
513	printf("(unknown)");
514	}
515	#endif
516
517	if (state != Identifier && eatNextIdentifier)
518	eatNextIdentifier = false;
519
520	restrKeyword = false;
521	delimited = false;
522	kjsyylloc.first_line = yylineno; // ???
523	kjsyylloc.last_line = yylineno;
524
525	switch (state) {
526	case Eof:
527	token = 0;
528	break;
529	case Other:
530	if(token == '}' \|\| token == ';') {
531	delimited = true;
532	}
533	break;
534	case IdentifierOrKeyword:
535	if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
536	case Identifier:
537	// Lookup for keyword failed, means this is an identifier
538	// Apply anonymous-function hack below (eat the identifier)
539	if (eatNextIdentifier) {
540	eatNextIdentifier = false;
541	token = lex();
542	break;
543	}
544	kjsyylval.ident = makeIdentifier(buffer16, pos16);
545	token = IDENT;
546	break;
547	}
548
549	eatNextIdentifier = false;
550	// Hack for "f = function somename() { ... }", too hard to get into the grammar
551	if (token == FUNCTION && lastToken == '=' )
552	eatNextIdentifier = true;
553
554	if (token == CONTINUE \|\| token == BREAK \|\|
555	token == RETURN \|\| token == THROW)
556	restrKeyword = true;
557	break;
558	case String:
559	kjsyylval.string = makeUString(buffer16, pos16);
560	token = STRING;
561	break;
562	case Number:
563	kjsyylval.doubleValue = dval;
564	token = NUMBER;
565	break;
566	case Bad:
567	#ifdef KJS_DEBUG_LEX
568	fprintf(stderr, "yylex: ERROR.\n");
569	#endif
570	error = true;
571	return -1;
572	default:
573	ASSERT(!"unhandled numeration value in switch");
574	error = true;
575	return -1;
576	}
577	lastToken = token;
578	return token;
579	}
580
581	bool Lexer::isWhiteSpace() const
582	{
583	return current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| isSeparatorSpace(current);
584	}
585
586	bool Lexer::isLineTerminator()
587	{
588	bool cr = (current == '\r');
589	bool lf = (current == '\n');
590	if (cr)
591	skipLF = true;
592	else if (lf)
593	skipCR = true;
594	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
595	}
596
597	bool Lexer::isIdentStart(int c)
598	{
599	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other))
600	\|\| c == '$' \|\| c == '_';
601	}
602
603	bool Lexer::isIdentPart(int c)
604	{
605	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
606	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector))
607	\|\| c == '$' \|\| c == '_';
608	}
609
610	static bool isDecimalDigit(int c)
611	{
612	return (c >= '0' && c <= '9');
613	}
614
615	bool Lexer::isHexDigit(int c)
616	{
617	return (c >= '0' && c <= '9' \|\|
618	c >= 'a' && c <= 'f' \|\|
619	c >= 'A' && c <= 'F');
620	}
621
622	bool Lexer::isOctalDigit(int c)
623	{
624	return (c >= '0' && c <= '7');
625	}
626
627	int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
628	{
629	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
630	shift(4);
631	return URSHIFTEQUAL;
632	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
633	shift(3);
634	return STREQ;
635	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
636	shift(3);
637	return STRNEQ;
638	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
639	shift(3);
640	return URSHIFT;
641	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
642	shift(3);
643	return LSHIFTEQUAL;
644	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
645	shift(3);
646	return RSHIFTEQUAL;
647	} else if (c1 == '<' && c2 == '=') {
648	shift(2);
649	return LE;
650	} else if (c1 == '>' && c2 == '=') {
651	shift(2);
652	return GE;
653	} else if (c1 == '!' && c2 == '=') {
654	shift(2);
655	return NE;
656	} else if (c1 == '+' && c2 == '+') {
657	shift(2);
658	if (terminator)
659	return AUTOPLUSPLUS;
660	else
661	return PLUSPLUS;
662	} else if (c1 == '-' && c2 == '-') {
663	shift(2);
664	if (terminator)
665	return AUTOMINUSMINUS;
666	else
667	return MINUSMINUS;
668	} else if (c1 == '=' && c2 == '=') {
669	shift(2);
670	return EQEQ;
671	} else if (c1 == '+' && c2 == '=') {
672	shift(2);
673	return PLUSEQUAL;
674	} else if (c1 == '-' && c2 == '=') {
675	shift(2);
676	return MINUSEQUAL;
677	} else if (c1 == '*' && c2 == '=') {
678	shift(2);
679	return MULTEQUAL;
680	} else if (c1 == '/' && c2 == '=') {
681	shift(2);
682	return DIVEQUAL;
683	} else if (c1 == '&' && c2 == '=') {
684	shift(2);
685	return ANDEQUAL;
686	} else if (c1 == '^' && c2 == '=') {
687	shift(2);
688	return XOREQUAL;
689	} else if (c1 == '%' && c2 == '=') {
690	shift(2);
691	return MODEQUAL;
692	} else if (c1 == '\|' && c2 == '=') {
693	shift(2);
694	return OREQUAL;
695	} else if (c1 == '<' && c2 == '<') {
696	shift(2);
697	return LSHIFT;
698	} else if (c1 == '>' && c2 == '>') {
699	shift(2);
700	return RSHIFT;
701	} else if (c1 == '&' && c2 == '&') {
702	shift(2);
703	return AND;
704	} else if (c1 == '\|' && c2 == '\|') {
705	shift(2);
706	return OR;
707	}
708
709	switch(c1) {
710	case '=':
711	case '>':
712	case '<':
713	case ',':
714	case '!':
715	case '~':
716	case '?':
717	case ':':
718	case '.':
719	case '+':
720	case '-':
721	case '*':
722	case '/':
723	case '&':
724	case '\|':
725	case '^':
726	case '%':
727	case '(':
728	case ')':
729	case '{':
730	case '}':
731	case '[':
732	case ']':
733	case ';':
734	shift(1);
735	return static_cast<int>(c1);
736	default:
737	return -1;
738	}
739	}
740
741	unsigned short Lexer::singleEscape(unsigned short c)
742	{
743	switch(c) {
744	case 'b':
745	return 0x08;
746	case 't':
747	return 0x09;
748	case 'n':
749	return 0x0A;
750	case 'v':
751	return 0x0B;
752	case 'f':
753	return 0x0C;
754	case 'r':
755	return 0x0D;
756	case '"':
757	return 0x22;
758	case '\'':
759	return 0x27;
760	case '\\':
761	return 0x5C;
762	default:
763	return c;
764	}
765	}
766
767	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
768	{
769	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
770	}
771
772	unsigned char Lexer::convertHex(int c)
773	{
774	if (c >= '0' && c <= '9')
775	return static_cast<unsigned char>(c - '0');
776	if (c >= 'a' && c <= 'f')
777	return static_cast<unsigned char>(c - 'a' + 10);
778	return static_cast<unsigned char>(c - 'A' + 10);
779	}
780
781	unsigned char Lexer::convertHex(int c1, int c2)
782	{
783	return ((convertHex(c1) << 4) + convertHex(c2));
784	}
785
786	KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
787	{
788	return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
789	(convertHex(c3) << 4) + convertHex(c4));
790	}
791
792	void Lexer::record8(int c)
793	{
794	ASSERT(c >= 0);
795	ASSERT(c <= 0xff);
796
797	// enlarge buffer if full
798	if (pos8 >= size8 - 1) {
799	char tmp = new char[2 size8];
800	memcpy(tmp, buffer8, size8 * sizeof(char));
801	delete [] buffer8;
802	buffer8 = tmp;
803	size8 *= 2;
804	}
805
806	buffer8[pos8++] = (char) c;
807	}
808
809	void Lexer::record16(int c)
810	{
811	ASSERT(c >= 0);
812	ASSERT(c <= USHRT_MAX);
813	record16(UChar(static_cast<unsigned short>(c)));
814	}
815
816	void Lexer::record16(KJS::UChar c)
817	{
818	// enlarge buffer if full
819	if (pos16 >= size16 - 1) {
820	KJS::UChar tmp = new KJS::UChar[2 size16];
821	memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
822	delete [] buffer16;
823	buffer16 = tmp;
824	size16 *= 2;
825	}
826
827	buffer16[pos16++] = c;
828	}
829
830	bool Lexer::scanRegExp()
831	{
832	pos16 = 0;
833	bool lastWasEscape = false;
834	bool inBrackets = false;
835
836	while (1) {
837	if (isLineTerminator() \|\| current == -1)
838	return false;
839	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
840	{
841	// keep track of '[' and ']'
842	if (!lastWasEscape) {
843	if ( current == '[' && !inBrackets )
844	inBrackets = true;
845	if ( current == ']' && inBrackets )
846	inBrackets = false;
847	}
848	record16(current);
849	lastWasEscape =
850	!lastWasEscape && (current == '\\');
851	}
852	else { // end of regexp
853	pattern = UString(buffer16, pos16);
854	pos16 = 0;
855	shift(1);
856	break;
857	}
858	shift(1);
859	}
860
861	while (isIdentPart(current)) {
862	record16(current);
863	shift(1);
864	}
865	flags = UString(buffer16, pos16);
866
867	return true;
868	}
869
870
871	void Lexer::doneParsing()
872	{
873	for (unsigned i = 0; i < numIdentifiers; i++) {
874	delete identifiers[i];
875	}
876	fastFree(identifiers);
877	identifiers = 0;
878	numIdentifiers = 0;
879	identifiersCapacity = 0;
880
881	for (unsigned i = 0; i < numStrings; i++) {
882	delete strings[i];
883	}
884	fastFree(strings);
885	strings = 0;
886	numStrings = 0;
887	stringsCapacity = 0;
888	}
889
890	const int initialCapacity = 64;
891	const int growthFactor = 2;
892
893	// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
894	Identifier Lexer::makeIdentifier(KJS::UChar, unsigned int)
895	{
896	if (numIdentifiers == identifiersCapacity) {
897	identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
898	identifiers = (KJS::Identifier *)fastRealloc(identifiers, sizeof(KJS::Identifier ) * identifiersCapacity);
899	}
900
901	KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
902	identifiers[numIdentifiers++] = identifier;
903	return identifier;
904	}
905
906	// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
907	UString Lexer::makeUString(KJS::UChar, unsigned int)
908	{
909	if (numStrings == stringsCapacity) {
910	stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
911	strings = (UString *)fastRealloc(strings, sizeof(UString ) * stringsCapacity);
912	}
913
914	UString *string = new UString(buffer16, pos16);
915	strings[numStrings++] = string;
916	return string;
917	}
918
919	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 27695

Download in other formats: