Context Navigation

lexer.cpp@ 28545

Visit:

Last change on this file since 28545 was 28468, checked in by [email protected], 17 years ago

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and
functions accessing data members from Interpreter to JSGlobalObject.
Changed Interpreter member functions to static functions.

This resolves a bug in global object bootstrapping, where the global
ExecState could be used when uninitialized.

This is a big change, but it's mostly code motion and renaming.

Layout and JS tests, and testjsglue and testapi, pass. SunSpider reports
a .7% regression, but Shark sees no difference related to this patch,
and SunSpider reported a .7% speedup from an earlier step in this
refactoring, so I think it's fair to call that a wash.

JavaScriptGlue:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject. Replaced JSInterpreter
subclass with JSGlobalObject subclass.

JSRun.cpp: (JSRun::JSRun): (JSRun::Evaluate): (JSRun::CheckSyntax):
JSRun.h: (JSGlueGlobalObject::JSGlueGlobalObject):
JSUtils.cpp: (KJSValueToCFTypeInternal):
JSValueWrapper.cpp: (getThreadGlobalExecState):

WebCore:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject. Changed Interpreter
member functions to static functions. Same for the subclass,
ScriptInterpreter.

This is a big change, but it's mostly code motion and renaming.

WebKit/mac:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject.

WebView/WebFrame.mm: (-[WebFrame _attachScriptDebugger]):

WebKit/win:

Reviewed by Darin Adler.

Third step in refactoring JSGlobalObject: Moved data members and data
member access from Interpreter to JSGlobalObject.

WebFrame.cpp: (WebFrame::globalContext): (WebFrame::attachScriptDebugger): (WebFrame::windowObjectCleared):
WebScriptDebugger.cpp: (WebScriptDebugger::WebScriptDebugger):

Property svn:eol-style set to native

File size: 21.6 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* Copyright (C) 1999-2000 Harri Porten ([email protected])
4	* Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5	* Copyright (C) 2007 Cameron Zwarich ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "lexer.h"
26
27	#include "function.h"
28	#include "nodes.h"
29	#include <ctype.h>
30	#include <limits.h>
31	#include <string.h>
32	#include <wtf/Assertions.h>
33	#include <wtf/unicode/Unicode.h>
34
35	using namespace WTF;
36	using namespace Unicode;
37
38	// we can't specify the namespace in yacc's C output, so do it here
39	using namespace KJS;
40
41	#ifndef KDE_USE_FINAL
42	#include "grammar.h"
43	#endif
44
45	#include "lookup.h"
46	#include "lexer.lut.h"
47
48	extern YYLTYPE kjsyylloc; // global bison variable holding token info
49
50	// a bridge for yacc from the C world to C++
51	int kjsyylex()
52	{
53	return lexer().lex();
54	}
55
56	namespace KJS {
57
58	static bool isDecimalDigit(int);
59
60	static const size_t initialReadBufferCapacity = 32;
61	static const size_t initialStringTableCapacity = 64;
62
63	Lexer& lexer()
64	{
65	ASSERT(JSLock::currentThreadIsHoldingLock());
66
67	// FIXME: We'd like to avoid calling new here, but we don't currently
68	// support tearing down the Lexer at app quit time, since that would involve
69	// tearing down its UString data members without holding the JSLock.
70	static Lexer* staticLexer = new Lexer;
71	return *staticLexer;
72	}
73
74	Lexer::Lexer()
75	: yylineno(1)
76	, restrKeyword(false)
77	, eatNextIdentifier(false)
78	, stackToken(-1)
79	, lastToken(-1)
80	, pos(0)
81	, code(0)
82	, length(0)
83	#ifndef KJS_PURE_ECMA
84	, bol(true)
85	#endif
86	, current(0)
87	, next1(0)
88	, next2(0)
89	, next3(0)
90	{
91	m_buffer8.reserveCapacity(initialReadBufferCapacity);
92	m_buffer16.reserveCapacity(initialReadBufferCapacity);
93	m_strings.reserveCapacity(initialStringTableCapacity);
94	m_identifiers.reserveCapacity(initialStringTableCapacity);
95	}
96
97	void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
98	{
99	yylineno = 1 + startingLineNumber;
100	m_sourceURL = sourceURL;
101	restrKeyword = false;
102	delimited = false;
103	eatNextIdentifier = false;
104	stackToken = -1;
105	lastToken = -1;
106	pos = 0;
107	code = c;
108	length = len;
109	skipLF = false;
110	skipCR = false;
111	error = false;
112	#ifndef KJS_PURE_ECMA
113	bol = true;
114	#endif
115
116	// read first characters
117	current = (length > 0) ? code[0].uc : -1;
118	next1 = (length > 1) ? code[1].uc : -1;
119	next2 = (length > 2) ? code[2].uc : -1;
120	next3 = (length > 3) ? code[3].uc : -1;
121	}
122
123	void Lexer::shift(unsigned int p)
124	{
125	// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
126	// <http://bugs.webkit.org/show_bug.cgi?id=10183>.
127	while (p--) {
128	pos++;
129	current = next1;
130	next1 = next2;
131	next2 = next3;
132	next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
133	}
134	}
135
136	// called on each new line
137	void Lexer::nextLine()
138	{
139	yylineno++;
140	#ifndef KJS_PURE_ECMA
141	bol = true;
142	#endif
143	}
144
145	void Lexer::setDone(State s)
146	{
147	state = s;
148	done = true;
149	}
150
151	int Lexer::lex()
152	{
153	int token = 0;
154	state = Start;
155	unsigned short stringType = 0; // either single or double quotes
156	m_buffer8.clear();
157	m_buffer16.clear();
158	done = false;
159	terminator = false;
160	skipLF = false;
161	skipCR = false;
162
163	// did we push a token on the stack previously ?
164	// (after an automatic semicolon insertion)
165	if (stackToken >= 0) {
166	setDone(Other);
167	token = stackToken;
168	stackToken = 0;
169	}
170
171	while (!done) {
172	if (skipLF && current != '\n') // found \r but not \n afterwards
173	skipLF = false;
174	if (skipCR && current != '\r') // found \n but not \r afterwards
175	skipCR = false;
176	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
177	{
178	skipLF = false;
179	skipCR = false;
180	shift(1);
181	}
182	switch (state) {
183	case Start:
184	if (isWhiteSpace()) {
185	// do nothing
186	} else if (current == '/' && next1 == '/') {
187	shift(1);
188	state = InSingleLineComment;
189	} else if (current == '/' && next1 == '*') {
190	shift(1);
191	state = InMultiLineComment;
192	} else if (current == -1) {
193	if (!terminator && !delimited) {
194	// automatic semicolon insertion if program incomplete
195	token = ';';
196	stackToken = 0;
197	setDone(Other);
198	} else
199	setDone(Eof);
200	} else if (isLineTerminator()) {
201	nextLine();
202	terminator = true;
203	if (restrKeyword) {
204	token = ';';
205	setDone(Other);
206	}
207	} else if (current == '"' \|\| current == '\'') {
208	state = InString;
209	stringType = static_cast<unsigned short>(current);
210	} else if (isIdentStart(current)) {
211	record16(current);
212	state = InIdentifierOrKeyword;
213	} else if (current == '\\') {
214	state = InIdentifierUnicodeEscapeStart;
215	} else if (current == '0') {
216	record8(current);
217	state = InNum0;
218	} else if (isDecimalDigit(current)) {
219	record8(current);
220	state = InNum;
221	} else if (current == '.' && isDecimalDigit(next1)) {
222	record8(current);
223	state = InDecimal;
224	#ifndef KJS_PURE_ECMA
225	// <!-- marks the beginning of a line comment (for www usage)
226	} else if (current == '<' && next1 == '!' &&
227	next2 == '-' && next3 == '-') {
228	shift(3);
229	state = InSingleLineComment;
230	// same for -->
231	} else if (bol && current == '-' && next1 == '-' && next2 == '>') {
232	shift(2);
233	state = InSingleLineComment;
234	#endif
235	} else {
236	token = matchPunctuator(current, next1, next2, next3);
237	if (token != -1) {
238	setDone(Other);
239	} else {
240	// cerr << "encountered unknown character" << endl;
241	setDone(Bad);
242	}
243	}
244	break;
245	case InString:
246	if (current == stringType) {
247	shift(1);
248	setDone(String);
249	} else if (isLineTerminator() \|\| current == -1) {
250	setDone(Bad);
251	} else if (current == '\\') {
252	state = InEscapeSequence;
253	} else {
254	record16(current);
255	}
256	break;
257	// Escape Sequences inside of strings
258	case InEscapeSequence:
259	if (isOctalDigit(current)) {
260	if (current >= '0' && current <= '3' &&
261	isOctalDigit(next1) && isOctalDigit(next2)) {
262	record16(convertOctal(current, next1, next2));
263	shift(2);
264	state = InString;
265	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
266	record16(convertOctal('0', current, next1));
267	shift(1);
268	state = InString;
269	} else if (isOctalDigit(current)) {
270	record16(convertOctal('0', '0', current));
271	state = InString;
272	} else {
273	setDone(Bad);
274	}
275	} else if (current == 'x')
276	state = InHexEscape;
277	else if (current == 'u')
278	state = InUnicodeEscape;
279	else if (isLineTerminator()) {
280	nextLine();
281	state = InString;
282	} else {
283	record16(singleEscape(static_cast<unsigned short>(current)));
284	state = InString;
285	}
286	break;
287	case InHexEscape:
288	if (isHexDigit(current) && isHexDigit(next1)) {
289	state = InString;
290	record16(convertHex(current, next1));
291	shift(1);
292	} else if (current == stringType) {
293	record16('x');
294	shift(1);
295	setDone(String);
296	} else {
297	record16('x');
298	record16(current);
299	state = InString;
300	}
301	break;
302	case InUnicodeEscape:
303	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
304	record16(convertUnicode(current, next1, next2, next3));
305	shift(3);
306	state = InString;
307	} else if (current == stringType) {
308	record16('u');
309	shift(1);
310	setDone(String);
311	} else {
312	setDone(Bad);
313	}
314	break;
315	case InSingleLineComment:
316	if (isLineTerminator()) {
317	nextLine();
318	terminator = true;
319	if (restrKeyword) {
320	token = ';';
321	setDone(Other);
322	} else
323	state = Start;
324	} else if (current == -1) {
325	setDone(Eof);
326	}
327	break;
328	case InMultiLineComment:
329	if (current == -1) {
330	setDone(Bad);
331	} else if (isLineTerminator()) {
332	nextLine();
333	} else if (current == '*' && next1 == '/') {
334	state = Start;
335	shift(1);
336	}
337	break;
338	case InIdentifierOrKeyword:
339	case InIdentifier:
340	if (isIdentPart(current))
341	record16(current);
342	else if (current == '\\')
343	state = InIdentifierUnicodeEscapeStart;
344	else
345	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
346	break;
347	case InNum0:
348	if (current == 'x' \|\| current == 'X') {
349	record8(current);
350	state = InHex;
351	} else if (current == '.') {
352	record8(current);
353	state = InDecimal;
354	} else if (current == 'e' \|\| current == 'E') {
355	record8(current);
356	state = InExponentIndicator;
357	} else if (isOctalDigit(current)) {
358	record8(current);
359	state = InOctal;
360	} else if (isDecimalDigit(current)) {
361	record8(current);
362	state = InDecimal;
363	} else {
364	setDone(Number);
365	}
366	break;
367	case InHex:
368	if (isHexDigit(current)) {
369	record8(current);
370	} else {
371	setDone(Hex);
372	}
373	break;
374	case InOctal:
375	if (isOctalDigit(current)) {
376	record8(current);
377	}
378	else if (isDecimalDigit(current)) {
379	record8(current);
380	state = InDecimal;
381	} else
382	setDone(Octal);
383	break;
384	case InNum:
385	if (isDecimalDigit(current)) {
386	record8(current);
387	} else if (current == '.') {
388	record8(current);
389	state = InDecimal;
390	} else if (current == 'e' \|\| current == 'E') {
391	record8(current);
392	state = InExponentIndicator;
393	} else
394	setDone(Number);
395	break;
396	case InDecimal:
397	if (isDecimalDigit(current)) {
398	record8(current);
399	} else if (current == 'e' \|\| current == 'E') {
400	record8(current);
401	state = InExponentIndicator;
402	} else
403	setDone(Number);
404	break;
405	case InExponentIndicator:
406	if (current == '+' \|\| current == '-') {
407	record8(current);
408	} else if (isDecimalDigit(current)) {
409	record8(current);
410	state = InExponent;
411	} else
412	setDone(Bad);
413	break;
414	case InExponent:
415	if (isDecimalDigit(current)) {
416	record8(current);
417	} else
418	setDone(Number);
419	break;
420	case InIdentifierUnicodeEscapeStart:
421	if (current == 'u')
422	state = InIdentifierUnicodeEscape;
423	else
424	setDone(Bad);
425	break;
426	case InIdentifierUnicodeEscape:
427	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
428	record16(convertUnicode(current, next1, next2, next3));
429	shift(3);
430	state = InIdentifier;
431	} else {
432	setDone(Bad);
433	}
434	break;
435	default:
436	ASSERT(!"Unhandled state in switch statement");
437	}
438
439	// move on to the next character
440	if (!done)
441	shift(1);
442	#ifndef KJS_PURE_ECMA
443	if (state != Start && state != InSingleLineComment)
444	bol = false;
445	#endif
446	}
447
448	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
449	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
450	state = Bad;
451
452	// terminate string
453	m_buffer8.append('\0');
454
455	#ifdef KJS_DEBUG_LEX
456	fprintf(stderr, "line: %d ", lineNo());
457	fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
458	fprintf(stderr, "%s ", buffer8.data());
459	#endif
460
461	double dval = 0;
462	if (state == Number) {
463	dval = strtod(m_buffer8.data(), 0L);
464	} else if (state == Hex) { // scan hex numbers
465	const char* p = m_buffer8.data() + 2;
466	while (char c = *p++) {
467	dval *= 16;
468	dval += convertHex(c);
469	}
470
471	if (dval >= mantissaOverflowLowerBound)
472	dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
473
474	state = Number;
475	} else if (state == Octal) { // scan octal number
476	const char* p = m_buffer8.data() + 1;
477	while (char c = *p++) {
478	dval *= 8;
479	dval += c - '0';
480	}
481
482	if (dval >= mantissaOverflowLowerBound)
483	dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
484
485	state = Number;
486	}
487
488	#ifdef KJS_DEBUG_LEX
489	switch (state) {
490	case Eof:
491	printf("(EOF)\n");
492	break;
493	case Other:
494	printf("(Other)\n");
495	break;
496	case Identifier:
497	printf("(Identifier)/(Keyword)\n");
498	break;
499	case String:
500	printf("(String)\n");
501	break;
502	case Number:
503	printf("(Number)\n");
504	break;
505	default:
506	printf("(unknown)");
507	}
508	#endif
509
510	if (state != Identifier && eatNextIdentifier)
511	eatNextIdentifier = false;
512
513	restrKeyword = false;
514	delimited = false;
515	kjsyylloc.first_line = yylineno; // ???
516	kjsyylloc.last_line = yylineno;
517
518	switch (state) {
519	case Eof:
520	token = 0;
521	break;
522	case Other:
523	if(token == '}' \|\| token == ';') {
524	delimited = true;
525	}
526	break;
527	case IdentifierOrKeyword:
528	if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) {
529	case Identifier:
530	// Lookup for keyword failed, means this is an identifier
531	// Apply anonymous-function hack below (eat the identifier)
532	if (eatNextIdentifier) {
533	eatNextIdentifier = false;
534	token = lex();
535	break;
536	}
537	kjsyylval.ident = makeIdentifier(m_buffer16);
538	token = IDENT;
539	break;
540	}
541
542	eatNextIdentifier = false;
543	// Hack for "f = function somename() { ... }", too hard to get into the grammar
544	if (token == FUNCTION && lastToken == '=' )
545	eatNextIdentifier = true;
546
547	if (token == CONTINUE \|\| token == BREAK \|\|
548	token == RETURN \|\| token == THROW)
549	restrKeyword = true;
550	break;
551	case String:
552	kjsyylval.string = makeUString(m_buffer16);
553	token = STRING;
554	break;
555	case Number:
556	kjsyylval.doubleValue = dval;
557	token = NUMBER;
558	break;
559	case Bad:
560	#ifdef KJS_DEBUG_LEX
561	fprintf(stderr, "yylex: ERROR.\n");
562	#endif
563	error = true;
564	return -1;
565	default:
566	ASSERT(!"unhandled numeration value in switch");
567	error = true;
568	return -1;
569	}
570	lastToken = token;
571	return token;
572	}
573
574	bool Lexer::isWhiteSpace() const
575	{
576	return current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| isSeparatorSpace(current);
577	}
578
579	bool Lexer::isLineTerminator()
580	{
581	bool cr = (current == '\r');
582	bool lf = (current == '\n');
583	if (cr)
584	skipLF = true;
585	else if (lf)
586	skipCR = true;
587	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
588	}
589
590	bool Lexer::isIdentStart(int c)
591	{
592	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other))
593	\|\| c == '$' \|\| c == '_';
594	}
595
596	bool Lexer::isIdentPart(int c)
597	{
598	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
599	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector))
600	\|\| c == '$' \|\| c == '_';
601	}
602
603	static bool isDecimalDigit(int c)
604	{
605	return (c >= '0' && c <= '9');
606	}
607
608	bool Lexer::isHexDigit(int c)
609	{
610	return (c >= '0' && c <= '9' \|\|
611	c >= 'a' && c <= 'f' \|\|
612	c >= 'A' && c <= 'F');
613	}
614
615	bool Lexer::isOctalDigit(int c)
616	{
617	return (c >= '0' && c <= '7');
618	}
619
620	int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
621	{
622	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
623	shift(4);
624	return URSHIFTEQUAL;
625	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
626	shift(3);
627	return STREQ;
628	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
629	shift(3);
630	return STRNEQ;
631	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
632	shift(3);
633	return URSHIFT;
634	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
635	shift(3);
636	return LSHIFTEQUAL;
637	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
638	shift(3);
639	return RSHIFTEQUAL;
640	} else if (c1 == '<' && c2 == '=') {
641	shift(2);
642	return LE;
643	} else if (c1 == '>' && c2 == '=') {
644	shift(2);
645	return GE;
646	} else if (c1 == '!' && c2 == '=') {
647	shift(2);
648	return NE;
649	} else if (c1 == '+' && c2 == '+') {
650	shift(2);
651	if (terminator)
652	return AUTOPLUSPLUS;
653	else
654	return PLUSPLUS;
655	} else if (c1 == '-' && c2 == '-') {
656	shift(2);
657	if (terminator)
658	return AUTOMINUSMINUS;
659	else
660	return MINUSMINUS;
661	} else if (c1 == '=' && c2 == '=') {
662	shift(2);
663	return EQEQ;
664	} else if (c1 == '+' && c2 == '=') {
665	shift(2);
666	return PLUSEQUAL;
667	} else if (c1 == '-' && c2 == '=') {
668	shift(2);
669	return MINUSEQUAL;
670	} else if (c1 == '*' && c2 == '=') {
671	shift(2);
672	return MULTEQUAL;
673	} else if (c1 == '/' && c2 == '=') {
674	shift(2);
675	return DIVEQUAL;
676	} else if (c1 == '&' && c2 == '=') {
677	shift(2);
678	return ANDEQUAL;
679	} else if (c1 == '^' && c2 == '=') {
680	shift(2);
681	return XOREQUAL;
682	} else if (c1 == '%' && c2 == '=') {
683	shift(2);
684	return MODEQUAL;
685	} else if (c1 == '\|' && c2 == '=') {
686	shift(2);
687	return OREQUAL;
688	} else if (c1 == '<' && c2 == '<') {
689	shift(2);
690	return LSHIFT;
691	} else if (c1 == '>' && c2 == '>') {
692	shift(2);
693	return RSHIFT;
694	} else if (c1 == '&' && c2 == '&') {
695	shift(2);
696	return AND;
697	} else if (c1 == '\|' && c2 == '\|') {
698	shift(2);
699	return OR;
700	}
701
702	switch(c1) {
703	case '=':
704	case '>':
705	case '<':
706	case ',':
707	case '!':
708	case '~':
709	case '?':
710	case ':':
711	case '.':
712	case '+':
713	case '-':
714	case '*':
715	case '/':
716	case '&':
717	case '\|':
718	case '^':
719	case '%':
720	case '(':
721	case ')':
722	case '{':
723	case '}':
724	case '[':
725	case ']':
726	case ';':
727	shift(1);
728	return static_cast<int>(c1);
729	default:
730	return -1;
731	}
732	}
733
734	unsigned short Lexer::singleEscape(unsigned short c)
735	{
736	switch(c) {
737	case 'b':
738	return 0x08;
739	case 't':
740	return 0x09;
741	case 'n':
742	return 0x0A;
743	case 'v':
744	return 0x0B;
745	case 'f':
746	return 0x0C;
747	case 'r':
748	return 0x0D;
749	case '"':
750	return 0x22;
751	case '\'':
752	return 0x27;
753	case '\\':
754	return 0x5C;
755	default:
756	return c;
757	}
758	}
759
760	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
761	{
762	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
763	}
764
765	unsigned char Lexer::convertHex(int c)
766	{
767	if (c >= '0' && c <= '9')
768	return static_cast<unsigned char>(c - '0');
769	if (c >= 'a' && c <= 'f')
770	return static_cast<unsigned char>(c - 'a' + 10);
771	return static_cast<unsigned char>(c - 'A' + 10);
772	}
773
774	unsigned char Lexer::convertHex(int c1, int c2)
775	{
776	return ((convertHex(c1) << 4) + convertHex(c2));
777	}
778
779	KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
780	{
781	// FIXME: This conversion is lossy. See http://bugs.webkit.org/show_bug.cgi?id=4920.
782	return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
783	(convertHex(c3) << 4) + convertHex(c4));
784	}
785
786	void Lexer::record8(int c)
787	{
788	ASSERT(c >= 0);
789	ASSERT(c <= 0xff);
790	m_buffer8.append(static_cast<char>(c));
791	}
792
793	void Lexer::record16(int c)
794	{
795	ASSERT(c >= 0);
796	ASSERT(c <= USHRT_MAX);
797	record16(UChar(static_cast<unsigned short>(c)));
798	}
799
800	void Lexer::record16(KJS::UChar c)
801	{
802	m_buffer16.append(c);
803	}
804
805	bool Lexer::scanRegExp()
806	{
807	m_buffer16.clear();
808	bool lastWasEscape = false;
809	bool inBrackets = false;
810
811	while (1) {
812	if (isLineTerminator() \|\| current == -1)
813	return false;
814	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
815	{
816	// keep track of '[' and ']'
817	if (!lastWasEscape) {
818	if ( current == '[' && !inBrackets )
819	inBrackets = true;
820	if ( current == ']' && inBrackets )
821	inBrackets = false;
822	}
823	record16(current);
824	lastWasEscape =
825	!lastWasEscape && (current == '\\');
826	} else { // end of regexp
827	m_pattern = UString(m_buffer16);
828	m_buffer16.clear();
829	shift(1);
830	break;
831	}
832	shift(1);
833	}
834
835	while (isIdentPart(current)) {
836	record16(current);
837	shift(1);
838	}
839	m_flags = UString(m_buffer16);
840
841	return true;
842	}
843
844	void Lexer::clear()
845	{
846	deleteAllValues(m_strings);
847	Vector<UString*> newStrings;
848	newStrings.reserveCapacity(initialStringTableCapacity);
849	m_strings.swap(newStrings);
850
851	deleteAllValues(m_identifiers);
852	Vector<KJS::Identifier*> newIdentifiers;
853	newIdentifiers.reserveCapacity(initialStringTableCapacity);
854	m_identifiers.swap(newIdentifiers);
855
856	Vector<char> newBuffer8;
857	newBuffer8.reserveCapacity(initialReadBufferCapacity);
858	m_buffer8.swap(newBuffer8);
859
860	Vector<UChar> newBuffer16;
861	newBuffer16.reserveCapacity(initialReadBufferCapacity);
862	m_buffer16.swap(newBuffer16);
863
864	m_pattern = 0;
865	m_flags = 0;
866	m_sourceURL = 0;
867	}
868
869	Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer)
870	{
871	KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
872	m_identifiers.append(identifier);
873	return identifier;
874	}
875
876	UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer)
877	{
878	UString* string = new UString(buffer);
879	m_strings.append(string);
880	return string;
881	}
882
883	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 28545

Download in other formats: