Context Navigation

lexer.cpp@ 31936

Visit:

Last change on this file since 31936 was 31936, checked in by [email protected], 17 years ago

Reviewed by Darin.

Implement an abstraction for thread-specific storage, use it to get rid of some static objects.

SunSpider results were not conclusive, possibly up to 0.2% slowdown.

JavaScriptCore.xcodeproj/project.pbxproj:
JavaScriptCore.vcproj/WTF/WTF.vcproj: Added ThreadSpecific.h

wtf/ThreadSpecific.h: Added. (WTF::::ThreadSpecific): (WTF::::~ThreadSpecific): (WTF::::get): (WTF::::set): (WTF::::destroy): (WTF::T): (WTF::::operator): Only implemented for platforms that use pthreads.

kjs/CommonIdentifiers.cpp: (KJS::CommonIdentifiers::shared):
kjs/CommonIdentifiers.h:
kjs/InitializeThreading.cpp: (KJS::initializeThreading):
kjs/Parser.cpp: (KJS::parser):
kjs/Parser.h:
kjs/identifier.cpp: (KJS::identifierTable): (KJS::literalIdentifierTable): (KJS::Identifier::initializeIdentifierThreading):
kjs/identifier.h:
kjs/lexer.cpp: (KJS::lexer):
kjs/lexer.h: Make static instances per-thread.

Property svn:eol-style set to native

File size: 22.1 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* Copyright (C) 1999-2000 Harri Porten ([email protected])
4	* Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5	* Copyright (C) 2007 Cameron Zwarich ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "lexer.h"
26
27	#include "dtoa.h"
28	#include "function.h"
29	#include "nodes.h"
30	#include "NodeInfo.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/Assertions.h>
35	#if USE(MULTIPLE_THREADS)
36	#include <wtf/ThreadSpecific.h>
37	#endif
38	#include <wtf/unicode/Unicode.h>
39
40	using namespace WTF;
41	using namespace Unicode;
42
43	// we can't specify the namespace in yacc's C output, so do it here
44	using namespace KJS;
45
46	#ifndef KDE_USE_FINAL
47	#include "grammar.h"
48	#endif
49
50	#include "lookup.h"
51	#include "lexer.lut.h"
52
53	// a bridge for yacc from the C world to C++
54	int kjsyylex(void* lvalp, void* llocp, void* lexer)
55	{
56	return static_cast<Lexer*>(lexer)->lex(lvalp, llocp);
57	}
58
59	namespace KJS {
60
61	static bool isDecimalDigit(int);
62
63	static const size_t initialReadBufferCapacity = 32;
64	static const size_t initialStringTableCapacity = 64;
65
66	Lexer& lexer()
67	{
68	#if USE(MULTIPLE_THREADS)
69	static ThreadSpecific<Lexer> staticLexer;
70	return *staticLexer;
71	#else
72	static Lexer staticLexer;
73	return staticLexer;
74	#endif
75	}
76
77	Lexer::Lexer()
78	: yylineno(1)
79	, restrKeyword(false)
80	, eatNextIdentifier(false)
81	, stackToken(-1)
82	, lastToken(-1)
83	, pos(0)
84	, code(0)
85	, length(0)
86	, atLineStart(true)
87	, current(0)
88	, next1(0)
89	, next2(0)
90	, next3(0)
91	{
92	m_buffer8.reserveCapacity(initialReadBufferCapacity);
93	m_buffer16.reserveCapacity(initialReadBufferCapacity);
94	m_strings.reserveCapacity(initialStringTableCapacity);
95	m_identifiers.reserveCapacity(initialStringTableCapacity);
96	}
97
98	void Lexer::setCode(int startingLineNumber, const UChar* c, unsigned int len)
99	{
100	yylineno = 1 + startingLineNumber;
101	restrKeyword = false;
102	delimited = false;
103	eatNextIdentifier = false;
104	stackToken = -1;
105	lastToken = -1;
106	pos = 0;
107	code = c;
108	length = len;
109	skipLF = false;
110	skipCR = false;
111	error = false;
112	atLineStart = true;
113
114	// read first characters
115	current = (length > 0) ? code[0] : -1;
116	next1 = (length > 1) ? code[1] : -1;
117	next2 = (length > 2) ? code[2] : -1;
118	next3 = (length > 3) ? code[3] : -1;
119	}
120
121	void Lexer::shift(unsigned int p)
122	{
123	// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
124	// <http://bugs.webkit.org/show_bug.cgi?id=10183>.
125	while (p--) {
126	pos++;
127	current = next1;
128	next1 = next2;
129	next2 = next3;
130	next3 = (pos + 3 < length) ? code[pos + 3] : -1;
131	}
132	}
133
134	// called on each new line
135	void Lexer::nextLine()
136	{
137	yylineno++;
138	atLineStart = true;
139	}
140
141	void Lexer::setDone(State s)
142	{
143	state = s;
144	done = true;
145	}
146
147	int Lexer::lex(void* p1, void* p2)
148	{
149	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
150	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
151	int token = 0;
152	state = Start;
153	unsigned short stringType = 0; // either single or double quotes
154	m_buffer8.clear();
155	m_buffer16.clear();
156	done = false;
157	terminator = false;
158	skipLF = false;
159	skipCR = false;
160
161	// did we push a token on the stack previously ?
162	// (after an automatic semicolon insertion)
163	if (stackToken >= 0) {
164	setDone(Other);
165	token = stackToken;
166	stackToken = 0;
167	}
168
169	while (!done) {
170	if (skipLF && current != '\n') // found \r but not \n afterwards
171	skipLF = false;
172	if (skipCR && current != '\r') // found \n but not \r afterwards
173	skipCR = false;
174	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
175	{
176	skipLF = false;
177	skipCR = false;
178	shift(1);
179	}
180	switch (state) {
181	case Start:
182	if (isWhiteSpace()) {
183	// do nothing
184	} else if (current == '/' && next1 == '/') {
185	shift(1);
186	state = InSingleLineComment;
187	} else if (current == '/' && next1 == '*') {
188	shift(1);
189	state = InMultiLineComment;
190	} else if (current == -1) {
191	if (!terminator && !delimited) {
192	// automatic semicolon insertion if program incomplete
193	token = ';';
194	stackToken = 0;
195	setDone(Other);
196	} else
197	setDone(Eof);
198	} else if (isLineTerminator()) {
199	nextLine();
200	terminator = true;
201	if (restrKeyword) {
202	token = ';';
203	setDone(Other);
204	}
205	} else if (current == '"' \|\| current == '\'') {
206	state = InString;
207	stringType = static_cast<unsigned short>(current);
208	} else if (isIdentStart(current)) {
209	record16(current);
210	state = InIdentifierOrKeyword;
211	} else if (current == '\\') {
212	state = InIdentifierStartUnicodeEscapeStart;
213	} else if (current == '0') {
214	record8(current);
215	state = InNum0;
216	} else if (isDecimalDigit(current)) {
217	record8(current);
218	state = InNum;
219	} else if (current == '.' && isDecimalDigit(next1)) {
220	record8(current);
221	state = InDecimal;
222	// <!-- marks the beginning of a line comment (for www usage)
223	} else if (current == '<' && next1 == '!' &&
224	next2 == '-' && next3 == '-') {
225	shift(3);
226	state = InSingleLineComment;
227	// same for -->
228	} else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
229	shift(2);
230	state = InSingleLineComment;
231	} else {
232	token = matchPunctuator(current, next1, next2, next3);
233	if (token != -1) {
234	setDone(Other);
235	} else {
236	// cerr << "encountered unknown character" << endl;
237	setDone(Bad);
238	}
239	}
240	break;
241	case InString:
242	if (current == stringType) {
243	shift(1);
244	setDone(String);
245	} else if (isLineTerminator() \|\| current == -1) {
246	setDone(Bad);
247	} else if (current == '\\') {
248	state = InEscapeSequence;
249	} else {
250	record16(current);
251	}
252	break;
253	// Escape Sequences inside of strings
254	case InEscapeSequence:
255	if (isOctalDigit(current)) {
256	if (current >= '0' && current <= '3' &&
257	isOctalDigit(next1) && isOctalDigit(next2)) {
258	record16(convertOctal(current, next1, next2));
259	shift(2);
260	state = InString;
261	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
262	record16(convertOctal('0', current, next1));
263	shift(1);
264	state = InString;
265	} else if (isOctalDigit(current)) {
266	record16(convertOctal('0', '0', current));
267	state = InString;
268	} else {
269	setDone(Bad);
270	}
271	} else if (current == 'x')
272	state = InHexEscape;
273	else if (current == 'u')
274	state = InUnicodeEscape;
275	else if (isLineTerminator()) {
276	nextLine();
277	state = InString;
278	} else {
279	record16(singleEscape(static_cast<unsigned short>(current)));
280	state = InString;
281	}
282	break;
283	case InHexEscape:
284	if (isHexDigit(current) && isHexDigit(next1)) {
285	state = InString;
286	record16(convertHex(current, next1));
287	shift(1);
288	} else if (current == stringType) {
289	record16('x');
290	shift(1);
291	setDone(String);
292	} else {
293	record16('x');
294	record16(current);
295	state = InString;
296	}
297	break;
298	case InUnicodeEscape:
299	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
300	record16(convertUnicode(current, next1, next2, next3));
301	shift(3);
302	state = InString;
303	} else if (current == stringType) {
304	record16('u');
305	shift(1);
306	setDone(String);
307	} else {
308	setDone(Bad);
309	}
310	break;
311	case InSingleLineComment:
312	if (isLineTerminator()) {
313	nextLine();
314	terminator = true;
315	if (restrKeyword) {
316	token = ';';
317	setDone(Other);
318	} else
319	state = Start;
320	} else if (current == -1) {
321	setDone(Eof);
322	}
323	break;
324	case InMultiLineComment:
325	if (current == -1) {
326	setDone(Bad);
327	} else if (isLineTerminator()) {
328	nextLine();
329	} else if (current == '*' && next1 == '/') {
330	state = Start;
331	shift(1);
332	}
333	break;
334	case InIdentifierOrKeyword:
335	case InIdentifier:
336	if (isIdentPart(current))
337	record16(current);
338	else if (current == '\\')
339	state = InIdentifierPartUnicodeEscapeStart;
340	else
341	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
342	break;
343	case InNum0:
344	if (current == 'x' \|\| current == 'X') {
345	record8(current);
346	state = InHex;
347	} else if (current == '.') {
348	record8(current);
349	state = InDecimal;
350	} else if (current == 'e' \|\| current == 'E') {
351	record8(current);
352	state = InExponentIndicator;
353	} else if (isOctalDigit(current)) {
354	record8(current);
355	state = InOctal;
356	} else if (isDecimalDigit(current)) {
357	record8(current);
358	state = InDecimal;
359	} else {
360	setDone(Number);
361	}
362	break;
363	case InHex:
364	if (isHexDigit(current)) {
365	record8(current);
366	} else {
367	setDone(Hex);
368	}
369	break;
370	case InOctal:
371	if (isOctalDigit(current)) {
372	record8(current);
373	}
374	else if (isDecimalDigit(current)) {
375	record8(current);
376	state = InDecimal;
377	} else
378	setDone(Octal);
379	break;
380	case InNum:
381	if (isDecimalDigit(current)) {
382	record8(current);
383	} else if (current == '.') {
384	record8(current);
385	state = InDecimal;
386	} else if (current == 'e' \|\| current == 'E') {
387	record8(current);
388	state = InExponentIndicator;
389	} else
390	setDone(Number);
391	break;
392	case InDecimal:
393	if (isDecimalDigit(current)) {
394	record8(current);
395	} else if (current == 'e' \|\| current == 'E') {
396	record8(current);
397	state = InExponentIndicator;
398	} else
399	setDone(Number);
400	break;
401	case InExponentIndicator:
402	if (current == '+' \|\| current == '-') {
403	record8(current);
404	} else if (isDecimalDigit(current)) {
405	record8(current);
406	state = InExponent;
407	} else
408	setDone(Bad);
409	break;
410	case InExponent:
411	if (isDecimalDigit(current)) {
412	record8(current);
413	} else
414	setDone(Number);
415	break;
416	case InIdentifierStartUnicodeEscapeStart:
417	if (current == 'u')
418	state = InIdentifierStartUnicodeEscape;
419	else
420	setDone(Bad);
421	break;
422	case InIdentifierPartUnicodeEscapeStart:
423	if (current == 'u')
424	state = InIdentifierPartUnicodeEscape;
425	else
426	setDone(Bad);
427	break;
428	case InIdentifierStartUnicodeEscape:
429	if (!isHexDigit(current) \|\| !isHexDigit(next1) \|\| !isHexDigit(next2) \|\| !isHexDigit(next3)) {
430	setDone(Bad);
431	break;
432	}
433	token = convertUnicode(current, next1, next2, next3);
434	shift(3);
435	if (!isIdentStart(token)) {
436	setDone(Bad);
437	break;
438	}
439	record16(token);
440	state = InIdentifier;
441	break;
442	case InIdentifierPartUnicodeEscape:
443	if (!isHexDigit(current) \|\| !isHexDigit(next1) \|\| !isHexDigit(next2) \|\| !isHexDigit(next3)) {
444	setDone(Bad);
445	break;
446	}
447	token = convertUnicode(current, next1, next2, next3);
448	shift(3);
449	if (!isIdentPart(token)) {
450	setDone(Bad);
451	break;
452	}
453	record16(token);
454	state = InIdentifier;
455	break;
456	default:
457	ASSERT(!"Unhandled state in switch statement");
458	}
459
460	// move on to the next character
461	if (!done)
462	shift(1);
463	if (state != Start && state != InSingleLineComment)
464	atLineStart = false;
465	}
466
467	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
468	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
469	state = Bad;
470
471	// terminate string
472	m_buffer8.append('\0');
473
474	#ifdef KJS_DEBUG_LEX
475	fprintf(stderr, "line: %d ", lineNo());
476	fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
477	fprintf(stderr, "%s ", buffer8.data());
478	#endif
479
480	double dval = 0;
481	if (state == Number) {
482	dval = kjs_strtod(m_buffer8.data(), 0L);
483	} else if (state == Hex) { // scan hex numbers
484	const char* p = m_buffer8.data() + 2;
485	while (char c = *p++) {
486	dval *= 16;
487	dval += convertHex(c);
488	}
489
490	if (dval >= mantissaOverflowLowerBound)
491	dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
492
493	state = Number;
494	} else if (state == Octal) { // scan octal number
495	const char* p = m_buffer8.data() + 1;
496	while (char c = *p++) {
497	dval *= 8;
498	dval += c - '0';
499	}
500
501	if (dval >= mantissaOverflowLowerBound)
502	dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
503
504	state = Number;
505	}
506
507	#ifdef KJS_DEBUG_LEX
508	switch (state) {
509	case Eof:
510	printf("(EOF)\n");
511	break;
512	case Other:
513	printf("(Other)\n");
514	break;
515	case Identifier:
516	printf("(Identifier)/(Keyword)\n");
517	break;
518	case String:
519	printf("(String)\n");
520	break;
521	case Number:
522	printf("(Number)\n");
523	break;
524	default:
525	printf("(unknown)");
526	}
527	#endif
528
529	if (state != Identifier)
530	eatNextIdentifier = false;
531
532	restrKeyword = false;
533	delimited = false;
534	llocp->first_line = yylineno; // ???
535	llocp->last_line = yylineno;
536
537	switch (state) {
538	case Eof:
539	token = 0;
540	break;
541	case Other:
542	if (token == '}' \|\| token == ';')
543	delimited = true;
544	break;
545	case Identifier:
546	// Apply anonymous-function hack below (eat the identifier).
547	if (eatNextIdentifier) {
548	eatNextIdentifier = false;
549	token = lex(lvalp, llocp);
550	break;
551	}
552	lvalp->ident = makeIdentifier(m_buffer16);
553	token = IDENT;
554	break;
555	case IdentifierOrKeyword:
556	lvalp->ident = makeIdentifier(m_buffer16);
557	if ((token = mainTable.value(*lvalp->ident)) < 0) {
558	// Lookup for keyword failed, means this is an identifier.
559	token = IDENT;
560	break;
561	}
562	// Hack for "f = function somename() { ... }"; too hard to get into the grammar.
563	eatNextIdentifier = token == FUNCTION && lastToken == '=';
564	if (token == CONTINUE \|\| token == BREAK \|\| token == RETURN \|\| token == THROW)
565	restrKeyword = true;
566	break;
567	case String:
568	lvalp->string = makeUString(m_buffer16);
569	token = STRING;
570	break;
571	case Number:
572	lvalp->doubleValue = dval;
573	token = NUMBER;
574	break;
575	case Bad:
576	#ifdef KJS_DEBUG_LEX
577	fprintf(stderr, "yylex: ERROR.\n");
578	#endif
579	error = true;
580	return -1;
581	default:
582	ASSERT(!"unhandled numeration value in switch");
583	error = true;
584	return -1;
585	}
586	lastToken = token;
587	return token;
588	}
589
590	bool Lexer::isWhiteSpace() const
591	{
592	return current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| isSeparatorSpace(current);
593	}
594
595	bool Lexer::isLineTerminator()
596	{
597	bool cr = (current == '\r');
598	bool lf = (current == '\n');
599	if (cr)
600	skipLF = true;
601	else if (lf)
602	skipCR = true;
603	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
604	}
605
606	bool Lexer::isIdentStart(int c)
607	{
608	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other))
609	\|\| c == '$' \|\| c == '_';
610	}
611
612	bool Lexer::isIdentPart(int c)
613	{
614	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
615	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector))
616	\|\| c == '$' \|\| c == '_';
617	}
618
619	static bool isDecimalDigit(int c)
620	{
621	return (c >= '0' && c <= '9');
622	}
623
624	bool Lexer::isHexDigit(int c)
625	{
626	return (c >= '0' && c <= '9' \|\|
627	c >= 'a' && c <= 'f' \|\|
628	c >= 'A' && c <= 'F');
629	}
630
631	bool Lexer::isOctalDigit(int c)
632	{
633	return (c >= '0' && c <= '7');
634	}
635
636	int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
637	{
638	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
639	shift(4);
640	return URSHIFTEQUAL;
641	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
642	shift(3);
643	return STREQ;
644	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
645	shift(3);
646	return STRNEQ;
647	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
648	shift(3);
649	return URSHIFT;
650	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
651	shift(3);
652	return LSHIFTEQUAL;
653	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
654	shift(3);
655	return RSHIFTEQUAL;
656	} else if (c1 == '<' && c2 == '=') {
657	shift(2);
658	return LE;
659	} else if (c1 == '>' && c2 == '=') {
660	shift(2);
661	return GE;
662	} else if (c1 == '!' && c2 == '=') {
663	shift(2);
664	return NE;
665	} else if (c1 == '+' && c2 == '+') {
666	shift(2);
667	if (terminator)
668	return AUTOPLUSPLUS;
669	else
670	return PLUSPLUS;
671	} else if (c1 == '-' && c2 == '-') {
672	shift(2);
673	if (terminator)
674	return AUTOMINUSMINUS;
675	else
676	return MINUSMINUS;
677	} else if (c1 == '=' && c2 == '=') {
678	shift(2);
679	return EQEQ;
680	} else if (c1 == '+' && c2 == '=') {
681	shift(2);
682	return PLUSEQUAL;
683	} else if (c1 == '-' && c2 == '=') {
684	shift(2);
685	return MINUSEQUAL;
686	} else if (c1 == '*' && c2 == '=') {
687	shift(2);
688	return MULTEQUAL;
689	} else if (c1 == '/' && c2 == '=') {
690	shift(2);
691	return DIVEQUAL;
692	} else if (c1 == '&' && c2 == '=') {
693	shift(2);
694	return ANDEQUAL;
695	} else if (c1 == '^' && c2 == '=') {
696	shift(2);
697	return XOREQUAL;
698	} else if (c1 == '%' && c2 == '=') {
699	shift(2);
700	return MODEQUAL;
701	} else if (c1 == '\|' && c2 == '=') {
702	shift(2);
703	return OREQUAL;
704	} else if (c1 == '<' && c2 == '<') {
705	shift(2);
706	return LSHIFT;
707	} else if (c1 == '>' && c2 == '>') {
708	shift(2);
709	return RSHIFT;
710	} else if (c1 == '&' && c2 == '&') {
711	shift(2);
712	return AND;
713	} else if (c1 == '\|' && c2 == '\|') {
714	shift(2);
715	return OR;
716	}
717
718	switch(c1) {
719	case '=':
720	case '>':
721	case '<':
722	case ',':
723	case '!':
724	case '~':
725	case '?':
726	case ':':
727	case '.':
728	case '+':
729	case '-':
730	case '*':
731	case '/':
732	case '&':
733	case '\|':
734	case '^':
735	case '%':
736	case '(':
737	case ')':
738	case '{':
739	case '}':
740	case '[':
741	case ']':
742	case ';':
743	shift(1);
744	return static_cast<int>(c1);
745	default:
746	return -1;
747	}
748	}
749
750	unsigned short Lexer::singleEscape(unsigned short c)
751	{
752	switch(c) {
753	case 'b':
754	return 0x08;
755	case 't':
756	return 0x09;
757	case 'n':
758	return 0x0A;
759	case 'v':
760	return 0x0B;
761	case 'f':
762	return 0x0C;
763	case 'r':
764	return 0x0D;
765	case '"':
766	return 0x22;
767	case '\'':
768	return 0x27;
769	case '\\':
770	return 0x5C;
771	default:
772	return c;
773	}
774	}
775
776	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
777	{
778	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
779	}
780
781	unsigned char Lexer::convertHex(int c)
782	{
783	if (c >= '0' && c <= '9')
784	return static_cast<unsigned char>(c - '0');
785	if (c >= 'a' && c <= 'f')
786	return static_cast<unsigned char>(c - 'a' + 10);
787	return static_cast<unsigned char>(c - 'A' + 10);
788	}
789
790	unsigned char Lexer::convertHex(int c1, int c2)
791	{
792	return ((convertHex(c1) << 4) + convertHex(c2));
793	}
794
795	UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
796	{
797	unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
798	unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
799	return (highByte << 8 \| lowByte);
800	}
801
802	void Lexer::record8(int c)
803	{
804	ASSERT(c >= 0);
805	ASSERT(c <= 0xff);
806	m_buffer8.append(static_cast<char>(c));
807	}
808
809	void Lexer::record16(int c)
810	{
811	ASSERT(c >= 0);
812	ASSERT(c <= USHRT_MAX);
813	record16(UChar(static_cast<unsigned short>(c)));
814	}
815
816	void Lexer::record16(UChar c)
817	{
818	m_buffer16.append(c);
819	}
820
821	bool Lexer::scanRegExp()
822	{
823	m_buffer16.clear();
824	bool lastWasEscape = false;
825	bool inBrackets = false;
826
827	while (1) {
828	if (isLineTerminator() \|\| current == -1)
829	return false;
830	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
831	{
832	// keep track of '[' and ']'
833	if (!lastWasEscape) {
834	if ( current == '[' && !inBrackets )
835	inBrackets = true;
836	if ( current == ']' && inBrackets )
837	inBrackets = false;
838	}
839	record16(current);
840	lastWasEscape =
841	!lastWasEscape && (current == '\\');
842	} else { // end of regexp
843	m_pattern = UString(m_buffer16);
844	m_buffer16.clear();
845	shift(1);
846	break;
847	}
848	shift(1);
849	}
850
851	while (isIdentPart(current)) {
852	record16(current);
853	shift(1);
854	}
855	m_flags = UString(m_buffer16);
856
857	return true;
858	}
859
860	void Lexer::clear()
861	{
862	deleteAllValues(m_strings);
863	Vector<UString*> newStrings;
864	newStrings.reserveCapacity(initialStringTableCapacity);
865	m_strings.swap(newStrings);
866
867	deleteAllValues(m_identifiers);
868	Vector<KJS::Identifier*> newIdentifiers;
869	newIdentifiers.reserveCapacity(initialStringTableCapacity);
870	m_identifiers.swap(newIdentifiers);
871
872	Vector<char> newBuffer8;
873	newBuffer8.reserveCapacity(initialReadBufferCapacity);
874	m_buffer8.swap(newBuffer8);
875
876	Vector<UChar> newBuffer16;
877	newBuffer16.reserveCapacity(initialReadBufferCapacity);
878	m_buffer16.swap(newBuffer16);
879
880	m_pattern = 0;
881	m_flags = 0;
882	}
883
884	Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
885	{
886	KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
887	m_identifiers.append(identifier);
888	return identifier;
889	}
890
891	UString* Lexer::makeUString(const Vector<UChar>& buffer)
892	{
893	UString* string = new UString(buffer);
894	m_strings.append(string);
895	return string;
896	}
897
898	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 31936

Download in other formats: