Context Navigation

lexer.cpp@ 36100

Visit:

Last change on this file since 36100 was 35245, checked in by [email protected], 17 years ago

Bug 18774: SQUIRRELFISH: print meaningful error messages <https://bugs.webkit.org/show_bug.cgi?id=18774>
<rdar://problem/5769353> SQUIRRELFISH: JavaScript error messages are missing informative text

Reviewed by Cameron Zwarich

Add support for decent error messages in JavaScript. This patch achieves this by providing
ensuring the common errors and exceptions have messages that provide the text of expression
that trigger the exception. In addition it attaches a number of properties to the exception
object detailing where in the source the expression came from.

Property svn:eol-style set to native

File size: 27.4 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "lexer.h"
25
26	#include "dtoa.h"
27	#include "JSFunction.h"
28	#include "nodes.h"
29	#include "NodeInfo.h"
30	#include "JSGlobalObjectFunctions.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/Assertions.h>
35	#include <wtf/unicode/Unicode.h>
36
37	using namespace WTF;
38	using namespace Unicode;
39
40	// we can't specify the namespace in yacc's C output, so do it here
41	using namespace KJS;
42
43	#ifndef KDE_USE_FINAL
44	#include "grammar.h"
45	#endif
46
47	#include "lookup.h"
48	#include "lexer.lut.h"
49
50	// a bridge for yacc from the C world to C++
51	int kjsyylex(void* lvalp, void* llocp, void* globalData)
52	{
53	return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
54	}
55
56	namespace KJS {
57
58	static bool isDecimalDigit(int);
59
60	static const size_t initialReadBufferCapacity = 32;
61	static const size_t initialStringTableCapacity = 64;
62
63	Lexer::Lexer(JSGlobalData* globalData)
64	: yylineno(1)
65	, m_restrKeyword(false)
66	, m_eatNextIdentifier(false)
67	, m_stackToken(-1)
68	, m_lastToken(-1)
69	, m_position(0)
70	, m_code(0)
71	, m_length(0)
72	, m_atLineStart(true)
73	, m_current(0)
74	, m_next1(0)
75	, m_next2(0)
76	, m_next3(0)
77	, m_currentOffset(0)
78	, m_nextOffset1(0)
79	, m_nextOffset2(0)
80	, m_nextOffset3(0)
81	, m_globalData(globalData)
82	, m_mainTable(KJS::mainTable)
83	{
84	m_buffer8.reserveCapacity(initialReadBufferCapacity);
85	m_buffer16.reserveCapacity(initialReadBufferCapacity);
86	m_strings.reserveCapacity(initialStringTableCapacity);
87	m_identifiers.reserveCapacity(initialStringTableCapacity);
88	}
89
90	Lexer::~Lexer()
91	{
92	m_mainTable.deleteTable();
93	}
94
95	void Lexer::setCode(int startingLineNumber, PassRefPtr<SourceProvider> source)
96	{
97	yylineno = startingLineNumber;
98	m_restrKeyword = false;
99	m_delimited = false;
100	m_eatNextIdentifier = false;
101	m_stackToken = -1;
102	m_lastToken = -1;
103
104	m_position = 0;
105	m_source = source;
106	m_code = m_source->data();
107	m_length = m_source->length();
108	m_skipLF = false;
109	m_skipCR = false;
110	m_error = false;
111	m_atLineStart = true;
112
113	// read first characters
114	shift(4);
115	}
116
117	void Lexer::shift(unsigned p)
118	{
119	// ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
120	// see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
121
122	while (p--) {
123	m_current = m_next1;
124	m_next1 = m_next2;
125	m_next2 = m_next3;
126	m_currentOffset = m_nextOffset1;
127	m_nextOffset1 = m_nextOffset2;
128	m_nextOffset2 = m_nextOffset3;
129	do {
130	if (m_position >= m_length) {
131	m_nextOffset3 = m_position;
132	m_position++;
133	m_next3 = -1;
134	break;
135	}
136	m_nextOffset3 = m_position;
137	m_next3 = m_code[m_position++];
138	} while (m_next3 == 0xFEFF);
139	}
140	}
141
142	// called on each new line
143	void Lexer::nextLine()
144	{
145	yylineno++;
146	m_atLineStart = true;
147	}
148
149	void Lexer::setDone(State s)
150	{
151	m_state = s;
152	m_done = true;
153	}
154
155	int Lexer::lex(void* p1, void* p2)
156	{
157	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
158	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
159	int token = 0;
160	m_state = Start;
161	unsigned short stringType = 0; // either single or double quotes
162	m_buffer8.clear();
163	m_buffer16.clear();
164	m_done = false;
165	m_terminator = false;
166	m_skipLF = false;
167	m_skipCR = false;
168
169	// did we push a token on the stack previously ?
170	// (after an automatic semicolon insertion)
171	if (m_stackToken >= 0) {
172	setDone(Other);
173	token = m_stackToken;
174	m_stackToken = 0;
175	}
176	int startOffset = m_currentOffset;
177	while (!m_done) {
178	if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
179	m_skipLF = false;
180	if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
181	m_skipCR = false;
182	if (m_skipLF \|\| m_skipCR) { // found \r\n or \n\r -> eat the second one
183	m_skipLF = false;
184	m_skipCR = false;
185	shift(1);
186	}
187	switch (m_state) {
188	case Start:
189	startOffset = m_currentOffset;
190	if (isWhiteSpace()) {
191	// do nothing
192	} else if (m_current == '/' && m_next1 == '/') {
193	shift(1);
194	m_state = InSingleLineComment;
195	} else if (m_current == '/' && m_next1 == '*') {
196	shift(1);
197	m_state = InMultiLineComment;
198	} else if (m_current == -1) {
199	if (!m_terminator && !m_delimited) {
200	// automatic semicolon insertion if program incomplete
201	token = ';';
202	m_stackToken = 0;
203	setDone(Other);
204	} else
205	setDone(Eof);
206	} else if (isLineTerminator()) {
207	nextLine();
208	m_terminator = true;
209	if (m_restrKeyword) {
210	token = ';';
211	setDone(Other);
212	}
213	} else if (m_current == '"' \|\| m_current == '\'') {
214	m_state = InString;
215	stringType = static_cast<unsigned short>(m_current);
216	} else if (isIdentStart(m_current)) {
217	record16(m_current);
218	m_state = InIdentifierOrKeyword;
219	} else if (m_current == '\\')
220	m_state = InIdentifierStartUnicodeEscapeStart;
221	else if (m_current == '0') {
222	record8(m_current);
223	m_state = InNum0;
224	} else if (isDecimalDigit(m_current)) {
225	record8(m_current);
226	m_state = InNum;
227	} else if (m_current == '.' && isDecimalDigit(m_next1)) {
228	record8(m_current);
229	m_state = InDecimal;
230	// <!-- marks the beginning of a line comment (for www usage)
231	} else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
232	shift(3);
233	m_state = InSingleLineComment;
234	// same for -->
235	} else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
236	shift(2);
237	m_state = InSingleLineComment;
238	} else {
239	token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
240	if (token != -1)
241	setDone(Other);
242	else
243	setDone(Bad);
244	}
245	break;
246	case InString:
247	if (m_current == stringType) {
248	shift(1);
249	setDone(String);
250	} else if (isLineTerminator() \|\| m_current == -1)
251	setDone(Bad);
252	else if (m_current == '\\')
253	m_state = InEscapeSequence;
254	else
255	record16(m_current);
256	break;
257	// Escape Sequences inside of strings
258	case InEscapeSequence:
259	if (isOctalDigit(m_current)) {
260	if (m_current >= '0' && m_current <= '3' &&
261	isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
262	record16(convertOctal(m_current, m_next1, m_next2));
263	shift(2);
264	m_state = InString;
265	} else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
266	record16(convertOctal('0', m_current, m_next1));
267	shift(1);
268	m_state = InString;
269	} else if (isOctalDigit(m_current)) {
270	record16(convertOctal('0', '0', m_current));
271	m_state = InString;
272	} else
273	setDone(Bad);
274	} else if (m_current == 'x')
275	m_state = InHexEscape;
276	else if (m_current == 'u')
277	m_state = InUnicodeEscape;
278	else if (isLineTerminator()) {
279	nextLine();
280	m_state = InString;
281	} else {
282	record16(singleEscape(static_cast<unsigned short>(m_current)));
283	m_state = InString;
284	}
285	break;
286	case InHexEscape:
287	if (isHexDigit(m_current) && isHexDigit(m_next1)) {
288	m_state = InString;
289	record16(convertHex(m_current, m_next1));
290	shift(1);
291	} else if (m_current == stringType) {
292	record16('x');
293	shift(1);
294	setDone(String);
295	} else {
296	record16('x');
297	record16(m_current);
298	m_state = InString;
299	}
300	break;
301	case InUnicodeEscape:
302	if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
303	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
304	shift(3);
305	m_state = InString;
306	} else if (m_current == stringType) {
307	record16('u');
308	shift(1);
309	setDone(String);
310	} else
311	setDone(Bad);
312	break;
313	case InSingleLineComment:
314	if (isLineTerminator()) {
315	nextLine();
316	m_terminator = true;
317	if (m_restrKeyword) {
318	token = ';';
319	setDone(Other);
320	} else
321	m_state = Start;
322	} else if (m_current == -1)
323	setDone(Eof);
324	break;
325	case InMultiLineComment:
326	if (m_current == -1)
327	setDone(Bad);
328	else if (isLineTerminator())
329	nextLine();
330	else if (m_current == '*' && m_next1 == '/') {
331	m_state = Start;
332	shift(1);
333	}
334	break;
335	case InIdentifierOrKeyword:
336	case InIdentifier:
337	if (isIdentPart(m_current))
338	record16(m_current);
339	else if (m_current == '\\')
340	m_state = InIdentifierPartUnicodeEscapeStart;
341	else
342	setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
343	break;
344	case InNum0:
345	if (m_current == 'x' \|\| m_current == 'X') {
346	record8(m_current);
347	m_state = InHex;
348	} else if (m_current == '.') {
349	record8(m_current);
350	m_state = InDecimal;
351	} else if (m_current == 'e' \|\| m_current == 'E') {
352	record8(m_current);
353	m_state = InExponentIndicator;
354	} else if (isOctalDigit(m_current)) {
355	record8(m_current);
356	m_state = InOctal;
357	} else if (isDecimalDigit(m_current)) {
358	record8(m_current);
359	m_state = InDecimal;
360	} else
361	setDone(Number);
362	break;
363	case InHex:
364	if (isHexDigit(m_current))
365	record8(m_current);
366	else
367	setDone(Hex);
368	break;
369	case InOctal:
370	if (isOctalDigit(m_current))
371	record8(m_current);
372	else if (isDecimalDigit(m_current)) {
373	record8(m_current);
374	m_state = InDecimal;
375	} else
376	setDone(Octal);
377	break;
378	case InNum:
379	if (isDecimalDigit(m_current))
380	record8(m_current);
381	else if (m_current == '.') {
382	record8(m_current);
383	m_state = InDecimal;
384	} else if (m_current == 'e' \|\| m_current == 'E') {
385	record8(m_current);
386	m_state = InExponentIndicator;
387	} else
388	setDone(Number);
389	break;
390	case InDecimal:
391	if (isDecimalDigit(m_current))
392	record8(m_current);
393	else if (m_current == 'e' \|\| m_current == 'E') {
394	record8(m_current);
395	m_state = InExponentIndicator;
396	} else
397	setDone(Number);
398	break;
399	case InExponentIndicator:
400	if (m_current == '+' \|\| m_current == '-')
401	record8(m_current);
402	else if (isDecimalDigit(m_current)) {
403	record8(m_current);
404	m_state = InExponent;
405	} else
406	setDone(Bad);
407	break;
408	case InExponent:
409	if (isDecimalDigit(m_current))
410	record8(m_current);
411	else
412	setDone(Number);
413	break;
414	case InIdentifierStartUnicodeEscapeStart:
415	if (m_current == 'u')
416	m_state = InIdentifierStartUnicodeEscape;
417	else
418	setDone(Bad);
419	break;
420	case InIdentifierPartUnicodeEscapeStart:
421	if (m_current == 'u')
422	m_state = InIdentifierPartUnicodeEscape;
423	else
424	setDone(Bad);
425	break;
426	case InIdentifierStartUnicodeEscape:
427	if (!isHexDigit(m_current) \|\| !isHexDigit(m_next1) \|\| !isHexDigit(m_next2) \|\| !isHexDigit(m_next3)) {
428	setDone(Bad);
429	break;
430	}
431	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
432	shift(3);
433	if (!isIdentStart(token)) {
434	setDone(Bad);
435	break;
436	}
437	record16(token);
438	m_state = InIdentifier;
439	break;
440	case InIdentifierPartUnicodeEscape:
441	if (!isHexDigit(m_current) \|\| !isHexDigit(m_next1) \|\| !isHexDigit(m_next2) \|\| !isHexDigit(m_next3)) {
442	setDone(Bad);
443	break;
444	}
445	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
446	shift(3);
447	if (!isIdentPart(token)) {
448	setDone(Bad);
449	break;
450	}
451	record16(token);
452	m_state = InIdentifier;
453	break;
454	default:
455	ASSERT(!"Unhandled state in switch statement");
456	}
457
458	// move on to the next character
459	if (!m_done)
460	shift(1);
461	if (m_state != Start && m_state != InSingleLineComment)
462	m_atLineStart = false;
463	}
464
465	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
466	if ((m_state == Number \|\| m_state == Octal \|\| m_state == Hex) && isIdentStart(m_current))
467	m_state = Bad;
468
469	// terminate string
470	m_buffer8.append('\0');
471
472	#ifdef KJS_DEBUG_LEX
473	fprintf(stderr, "line: %d ", lineNo());
474	fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
475	fprintf(stderr, "%s ", m_buffer8.data());
476	#endif
477
478	double dval = 0;
479	if (m_state == Number)
480	dval = strtod(m_buffer8.data(), 0L);
481	else if (m_state == Hex) { // scan hex numbers
482	const char* p = m_buffer8.data() + 2;
483	while (char c = *p++) {
484	dval *= 16;
485	dval += convertHex(c);
486	}
487
488	if (dval >= mantissaOverflowLowerBound)
489	dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
490
491	m_state = Number;
492	} else if (m_state == Octal) { // scan octal number
493	const char* p = m_buffer8.data() + 1;
494	while (char c = *p++) {
495	dval *= 8;
496	dval += c - '0';
497	}
498
499	if (dval >= mantissaOverflowLowerBound)
500	dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
501
502	m_state = Number;
503	}
504
505	#ifdef KJS_DEBUG_LEX
506	switch (m_state) {
507	case Eof:
508	printf("(EOF)\n");
509	break;
510	case Other:
511	printf("(Other)\n");
512	break;
513	case Identifier:
514	printf("(Identifier)/(Keyword)\n");
515	break;
516	case String:
517	printf("(String)\n");
518	break;
519	case Number:
520	printf("(Number)\n");
521	break;
522	default:
523	printf("(unknown)");
524	}
525	#endif
526
527	if (m_state != Identifier)
528	m_eatNextIdentifier = false;
529
530	m_restrKeyword = false;
531	m_delimited = false;
532	llocp->first_line = yylineno;
533	llocp->last_line = yylineno;
534	llocp->first_column = startOffset;
535	llocp->last_column = m_currentOffset;
536	switch (m_state) {
537	case Eof:
538	token = 0;
539	break;
540	case Other:
541	if (token == '}' \|\| token == ';')
542	m_delimited = true;
543	break;
544	case Identifier:
545	// Apply anonymous-function hack below (eat the identifier).
546	if (m_eatNextIdentifier) {
547	m_eatNextIdentifier = false;
548	token = lex(lvalp, llocp);
549	break;
550	}
551	lvalp->ident = makeIdentifier(m_buffer16);
552	token = IDENT;
553	break;
554	case IdentifierOrKeyword: {
555	lvalp->ident = makeIdentifier(m_buffer16);
556	const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
557	if (!entry) {
558	// Lookup for keyword failed, means this is an identifier.
559	token = IDENT;
560	break;
561	}
562	token = entry->integerValue;
563	// Hack for "f = function somename() { ... }"; too hard to get into the grammar.
564	m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
565	if (token == CONTINUE \|\| token == BREAK \|\| token == RETURN \|\| token == THROW)
566	m_restrKeyword = true;
567	break;
568	}
569	case String:
570	lvalp->string = makeUString(m_buffer16);
571	token = STRING;
572	break;
573	case Number:
574	lvalp->doubleValue = dval;
575	token = NUMBER;
576	break;
577	case Bad:
578	#ifdef KJS_DEBUG_LEX
579	fprintf(stderr, "yylex: ERROR.\n");
580	#endif
581	m_error = true;
582	return -1;
583	default:
584	ASSERT(!"unhandled numeration value in switch");
585	m_error = true;
586	return -1;
587	}
588	m_lastToken = token;
589	return token;
590	}
591
592	bool Lexer::isWhiteSpace() const
593	{
594	return m_current == '\t' \|\| m_current == 0x0b \|\| m_current == 0x0c \|\| isSeparatorSpace(m_current);
595	}
596
597	bool Lexer::isLineTerminator()
598	{
599	bool cr = (m_current == '\r');
600	bool lf = (m_current == '\n');
601	if (cr)
602	m_skipLF = true;
603	else if (lf)
604	m_skipCR = true;
605	return cr \|\| lf \|\| m_current == 0x2028 \|\| m_current == 0x2029;
606	}
607
608	bool Lexer::isIdentStart(int c)
609	{
610	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other))
611	\|\| c == '$' \|\| c == '_';
612	}
613
614	bool Lexer::isIdentPart(int c)
615	{
616	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
617	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector))
618	\|\| c == '$' \|\| c == '_';
619	}
620
621	static bool isDecimalDigit(int c)
622	{
623	return (c >= '0' && c <= '9');
624	}
625
626	bool Lexer::isHexDigit(int c)
627	{
628	return (c >= '0' && c <= '9'
629	\|\| c >= 'a' && c <= 'f'
630	\|\| c >= 'A' && c <= 'F');
631	}
632
633	bool Lexer::isOctalDigit(int c)
634	{
635	return (c >= '0' && c <= '7');
636	}
637
638	int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
639	{
640	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
641	shift(4);
642	return URSHIFTEQUAL;
643	}
644	if (c1 == '=' && c2 == '=' && c3 == '=') {
645	shift(3);
646	return STREQ;
647	}
648	if (c1 == '!' && c2 == '=' && c3 == '=') {
649	shift(3);
650	return STRNEQ;
651	}
652	if (c1 == '>' && c2 == '>' && c3 == '>') {
653	shift(3);
654	return URSHIFT;
655	}
656	if (c1 == '<' && c2 == '<' && c3 == '=') {
657	shift(3);
658	return LSHIFTEQUAL;
659	}
660	if (c1 == '>' && c2 == '>' && c3 == '=') {
661	shift(3);
662	return RSHIFTEQUAL;
663	}
664	if (c1 == '<' && c2 == '=') {
665	shift(2);
666	return LE;
667	}
668	if (c1 == '>' && c2 == '=') {
669	shift(2);
670	return GE;
671	}
672	if (c1 == '!' && c2 == '=') {
673	shift(2);
674	return NE;
675	}
676	if (c1 == '+' && c2 == '+') {
677	shift(2);
678	if (m_terminator)
679	return AUTOPLUSPLUS;
680	return PLUSPLUS;
681	}
682	if (c1 == '-' && c2 == '-') {
683	shift(2);
684	if (m_terminator)
685	return AUTOMINUSMINUS;
686	return MINUSMINUS;
687	}
688	if (c1 == '=' && c2 == '=') {
689	shift(2);
690	return EQEQ;
691	}
692	if (c1 == '+' && c2 == '=') {
693	shift(2);
694	return PLUSEQUAL;
695	}
696	if (c1 == '-' && c2 == '=') {
697	shift(2);
698	return MINUSEQUAL;
699	}
700	if (c1 == '*' && c2 == '=') {
701	shift(2);
702	return MULTEQUAL;
703	}
704	if (c1 == '/' && c2 == '=') {
705	shift(2);
706	return DIVEQUAL;
707	}
708	if (c1 == '&' && c2 == '=') {
709	shift(2);
710	return ANDEQUAL;
711	}
712	if (c1 == '^' && c2 == '=') {
713	shift(2);
714	return XOREQUAL;
715	}
716	if (c1 == '%' && c2 == '=') {
717	shift(2);
718	return MODEQUAL;
719	}
720	if (c1 == '\|' && c2 == '=') {
721	shift(2);
722	return OREQUAL;
723	}
724	if (c1 == '<' && c2 == '<') {
725	shift(2);
726	return LSHIFT;
727	}
728	if (c1 == '>' && c2 == '>') {
729	shift(2);
730	return RSHIFT;
731	}
732	if (c1 == '&' && c2 == '&') {
733	shift(2);
734	return AND;
735	}
736	if (c1 == '\|' && c2 == '\|') {
737	shift(2);
738	return OR;
739	}
740
741	switch (c1) {
742	case '=':
743	case '>':
744	case '<':
745	case ',':
746	case '!':
747	case '~':
748	case '?':
749	case ':':
750	case '.':
751	case '+':
752	case '-':
753	case '*':
754	case '/':
755	case '&':
756	case '\|':
757	case '^':
758	case '%':
759	case '(':
760	case ')':
761	case '[':
762	case ']':
763	case ';':
764	shift(1);
765	return static_cast<int>(c1);
766	case '{':
767	charPos = m_position - 4;
768	shift(1);
769	return OPENBRACE;
770	case '}':
771	charPos = m_position - 4;
772	shift(1);
773	return CLOSEBRACE;
774	default:
775	return -1;
776	}
777	}
778
779	unsigned short Lexer::singleEscape(unsigned short c)
780	{
781	switch (c) {
782	case 'b':
783	return 0x08;
784	case 't':
785	return 0x09;
786	case 'n':
787	return 0x0A;
788	case 'v':
789	return 0x0B;
790	case 'f':
791	return 0x0C;
792	case 'r':
793	return 0x0D;
794	case '"':
795	return 0x22;
796	case '\'':
797	return 0x27;
798	case '\\':
799	return 0x5C;
800	default:
801	return c;
802	}
803	}
804
805	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
806	{
807	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
808	}
809
810	unsigned char Lexer::convertHex(int c)
811	{
812	if (c >= '0' && c <= '9')
813	return static_cast<unsigned char>(c - '0');
814	if (c >= 'a' && c <= 'f')
815	return static_cast<unsigned char>(c - 'a' + 10);
816	return static_cast<unsigned char>(c - 'A' + 10);
817	}
818
819	unsigned char Lexer::convertHex(int c1, int c2)
820	{
821	return ((convertHex(c1) << 4) + convertHex(c2));
822	}
823
824	UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
825	{
826	unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
827	unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
828	return (highByte << 8 \| lowByte);
829	}
830
831	void Lexer::record8(int c)
832	{
833	ASSERT(c >= 0);
834	ASSERT(c <= 0xff);
835	m_buffer8.append(static_cast<char>(c));
836	}
837
838	void Lexer::record16(int c)
839	{
840	ASSERT(c >= 0);
841	ASSERT(c <= USHRT_MAX);
842	record16(UChar(static_cast<unsigned short>(c)));
843	}
844
845	void Lexer::record16(UChar c)
846	{
847	m_buffer16.append(c);
848	}
849
850	bool Lexer::scanRegExp()
851	{
852	m_buffer16.clear();
853	bool lastWasEscape = false;
854	bool inBrackets = false;
855
856	while (1) {
857	if (isLineTerminator() \|\| m_current == -1)
858	return false;
859	else if (m_current != '/' \|\| lastWasEscape == true \|\| inBrackets == true) {
860	// keep track of '[' and ']'
861	if (!lastWasEscape) {
862	if ( m_current == '[' && !inBrackets )
863	inBrackets = true;
864	if ( m_current == ']' && inBrackets )
865	inBrackets = false;
866	}
867	record16(m_current);
868	lastWasEscape =
869	!lastWasEscape && (m_current == '\\');
870	} else { // end of regexp
871	m_pattern = UString(m_buffer16);
872	m_buffer16.clear();
873	shift(1);
874	break;
875	}
876	shift(1);
877	}
878
879	while (isIdentPart(m_current)) {
880	record16(m_current);
881	shift(1);
882	}
883	m_flags = UString(m_buffer16);
884
885	return true;
886	}
887
888	void Lexer::clear()
889	{
890	deleteAllValues(m_strings);
891	Vector<UString*> newStrings;
892	newStrings.reserveCapacity(initialStringTableCapacity);
893	m_strings.swap(newStrings);
894
895	deleteAllValues(m_identifiers);
896	Vector<KJS::Identifier*> newIdentifiers;
897	newIdentifiers.reserveCapacity(initialStringTableCapacity);
898	m_identifiers.swap(newIdentifiers);
899
900	Vector<char> newBuffer8;
901	newBuffer8.reserveCapacity(initialReadBufferCapacity);
902	m_buffer8.swap(newBuffer8);
903
904	Vector<UChar> newBuffer16;
905	newBuffer16.reserveCapacity(initialReadBufferCapacity);
906	m_buffer16.swap(newBuffer16);
907
908	m_pattern = 0;
909	m_flags = 0;
910	}
911
912	Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer)
913	{
914	KJS::Identifier* identifier = new KJS::Identifier(m_globalData, buffer.data(), buffer.size());
915	m_identifiers.append(identifier);
916	return identifier;
917	}
918
919	UString* Lexer::makeUString(const Vector<UChar>& buffer)
920	{
921	UString* string = new UString(buffer);
922	m_strings.append(string);
923	return string;
924	}
925
926	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 36100

Download in other formats: