Context Navigation

lexer.cpp@ 20310

Visit:

Last change on this file since 20310 was 20304, checked in by bdash, 18 years ago

2007-03-19 Mark Rowe <[email protected]>

Rubber-stamped by Brady.

Update references to bugzilla.opendarwin.org with bugs.webkit.org.

bindings/c/c_utility.cpp: (KJS::Bindings::convertUTF8ToUTF16):
kjs/function.cpp: (KJS::FunctionImp::callAsFunction):
kjs/grammar.y:
kjs/keywords.table:
kjs/lexer.cpp: (KJS::Lexer::shift):

2007-03-19 Mark Rowe <[email protected]>

Rubber-stamped by Brady.

Update references to bugzilla.opendarwin.org with bugs.webkit.org.

ChangeLog:
WebCore.vcproj/WebCore/build-generated-files.sh:
manual-tests/ATSU-bad-layout.html:
manual-tests/accidental-strict-mode.html:
manual-tests/applet-param-no-name.html:
manual-tests/bidi-parens.html:
manual-tests/bugzilla-3855.html:
manual-tests/bugzilla-4840.html:
manual-tests/bugzilla-6821.html:
manual-tests/containing-block-position-chage.html:
manual-tests/contenteditable-link.html:
manual-tests/css3-cursor-fallback-quirks.html:
manual-tests/css3-cursor-fallback-strict.html:
manual-tests/custom-cursors.html:
manual-tests/dictionary-scrolled-iframe.html:
manual-tests/dom-manipulation-on-resize.html:
manual-tests/drag-image-to-address-bar.html:
manual-tests/empty-link-target.html:
manual-tests/empty-title-popup.html:
manual-tests/first-line-style-crash.html:
manual-tests/invalid-mouse-event.html:
manual-tests/left-overflow-repaint.html:
manual-tests/linkjump-3.html:
manual-tests/log-keypress-events.html:
manual-tests/named-window-blank-target.html:
manual-tests/plain-text-paste.html:
manual-tests/plugin-controller-datasource.html:
manual-tests/pre-tab-selection-rect.html:
manual-tests/redirection-target.html:
manual-tests/redraw-page-cache-visited-links.html:
manual-tests/reset-initiatedDrag.html:
manual-tests/resources/named-window-blank-target-step2.html:
manual-tests/resources/named-window-blank-target-step3.html:
manual-tests/resources/named-window-blank-target-step4.html:
manual-tests/resources/redraw-page-cache-visited-links-2.html:
manual-tests/scrollbar-hittest.html:
manual-tests/scrollbar-hittest2.html:
manual-tests/subview-click-assertion.html:
manual-tests/tabbing-input-google.html:
manual-tests/text-field-autoscroll.html:
manual-tests/textarea-after-stylesheet-link.html:
manual-tests/textarea-focus.html:
manual-tests/whitespace-pre-affinity.html:

2007-03-19 Mark Rowe <[email protected]>

Rubber-stamped by Brady.

Update references to bugzilla.opendarwin.org with bugs.webkit.org.

WebInspector/webInspector/inspector.css:
WebView/WebHTMLView.mm: (-[WebHTMLView firstRectForCharacterRange:]):
WebView/WebView.mm: (-[WebView initWithFrame:frameName:groupName:]):

Property svn:eol-style set to native

File size: 21.8 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* This file is part of the KDE libraries
4	* Copyright (C) 1999-2000 Harri Porten ([email protected])
5	* Copyright (C) 2006 Apple Computer, Inc.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "lexer.h"
26
27	#include <ctype.h>
28	#include <string.h>
29
30	#include "interpreter.h"
31	#include "nodes.h"
32	#include <wtf/unicode/Unicode.h>
33
34	using namespace WTF;
35	using namespace Unicode;
36
37	// we can't specify the namespace in yacc's C output, so do it here
38	using namespace KJS;
39
40	#ifndef KDE_USE_FINAL
41	#include "grammar.h"
42	#endif
43
44	#include "lookup.h"
45	#include "lexer.lut.h"
46
47	extern YYLTYPE kjsyylloc; // global bison variable holding token info
48
49	// a bridge for yacc from the C world to C++
50	int kjsyylex()
51	{
52	return Lexer::curr()->lex();
53	}
54
55	namespace KJS {
56
57	static Lexer* currLexer = 0;
58
59	static bool isDecimalDigit(int);
60
61	Lexer::Lexer()
62	: yylineno(1),
63	size8(128), size16(128), restrKeyword(false),
64	eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
65	code(0), length(0),
66	#ifndef KJS_PURE_ECMA
67	bol(true),
68	#endif
69	current(0), next1(0), next2(0), next3(0),
70	strings(0), numStrings(0), stringsCapacity(0),
71	identifiers(0), numIdentifiers(0), identifiersCapacity(0)
72	{
73	// allocate space for read buffers
74	buffer8 = new char[size8];
75	buffer16 = new KJS::UChar[size16];
76	currLexer = this;
77	}
78
79	Lexer::~Lexer()
80	{
81	doneParsing();
82	delete [] buffer8;
83	delete [] buffer16;
84	}
85
86	Lexer *Lexer::curr()
87	{
88	if (!currLexer) {
89	// create singleton instance
90	currLexer = new Lexer();
91	}
92	return currLexer;
93	}
94
95	#ifdef KJS_DEBUG_MEM
96	void Lexer::globalClear()
97	{
98	delete currLexer;
99	currLexer = 0L;
100	}
101	#endif
102
103	void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
104	{
105	yylineno = 1 + startingLineNumber;
106	m_sourceURL = sourceURL;
107	restrKeyword = false;
108	delimited = false;
109	eatNextIdentifier = false;
110	stackToken = -1;
111	lastToken = -1;
112	pos = 0;
113	code = c;
114	length = len;
115	skipLF = false;
116	skipCR = false;
117	error = false;
118	#ifndef KJS_PURE_ECMA
119	bol = true;
120	#endif
121
122	// read first characters
123	current = (length > 0) ? code[0].uc : -1;
124	next1 = (length > 1) ? code[1].uc : -1;
125	next2 = (length > 2) ? code[2].uc : -1;
126	next3 = (length > 3) ? code[3].uc : -1;
127	}
128
129	void Lexer::shift(unsigned int p)
130	{
131	// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
132	// <http://bugs.webkit.org/show_bug.cgi?id=10183>.
133	while (p--) {
134	pos++;
135	current = next1;
136	next1 = next2;
137	next2 = next3;
138	next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
139	}
140	}
141
142	// called on each new line
143	void Lexer::nextLine()
144	{
145	yylineno++;
146	#ifndef KJS_PURE_ECMA
147	bol = true;
148	#endif
149	}
150
151	void Lexer::setDone(State s)
152	{
153	state = s;
154	done = true;
155	}
156
157	int Lexer::lex()
158	{
159	int token = 0;
160	state = Start;
161	unsigned short stringType = 0; // either single or double quotes
162	pos8 = pos16 = 0;
163	done = false;
164	terminator = false;
165	skipLF = false;
166	skipCR = false;
167
168	// did we push a token on the stack previously ?
169	// (after an automatic semicolon insertion)
170	if (stackToken >= 0) {
171	setDone(Other);
172	token = stackToken;
173	stackToken = 0;
174	}
175
176	while (!done) {
177	if (skipLF && current != '\n') // found \r but not \n afterwards
178	skipLF = false;
179	if (skipCR && current != '\r') // found \n but not \r afterwards
180	skipCR = false;
181	if (skipLF \|\| skipCR) // found \r\n or \n\r -> eat the second one
182	{
183	skipLF = false;
184	skipCR = false;
185	shift(1);
186	}
187	switch (state) {
188	case Start:
189	if (isWhiteSpace()) {
190	// do nothing
191	} else if (current == '/' && next1 == '/') {
192	shift(1);
193	state = InSingleLineComment;
194	} else if (current == '/' && next1 == '*') {
195	shift(1);
196	state = InMultiLineComment;
197	} else if (current == -1) {
198	if (!terminator && !delimited) {
199	// automatic semicolon insertion if program incomplete
200	token = ';';
201	stackToken = 0;
202	setDone(Other);
203	} else
204	setDone(Eof);
205	} else if (isLineTerminator()) {
206	nextLine();
207	terminator = true;
208	if (restrKeyword) {
209	token = ';';
210	setDone(Other);
211	}
212	} else if (current == '"' \|\| current == '\'') {
213	state = InString;
214	stringType = static_cast<unsigned short>(current);
215	} else if (isIdentStart(current)) {
216	record16(current);
217	state = InIdentifierOrKeyword;
218	} else if (current == '\\') {
219	state = InIdentifierUnicodeEscapeStart;
220	} else if (current == '0') {
221	record8(current);
222	state = InNum0;
223	} else if (isDecimalDigit(current)) {
224	record8(current);
225	state = InNum;
226	} else if (current == '.' && isDecimalDigit(next1)) {
227	record8(current);
228	state = InDecimal;
229	#ifndef KJS_PURE_ECMA
230	// <!-- marks the beginning of a line comment (for www usage)
231	} else if (current == '<' && next1 == '!' &&
232	next2 == '-' && next3 == '-') {
233	shift(3);
234	state = InSingleLineComment;
235	// same for -->
236	} else if (bol && current == '-' && next1 == '-' && next2 == '>') {
237	shift(2);
238	state = InSingleLineComment;
239	#endif
240	} else {
241	token = matchPunctuator(current, next1, next2, next3);
242	if (token != -1) {
243	setDone(Other);
244	} else {
245	// cerr << "encountered unknown character" << endl;
246	setDone(Bad);
247	}
248	}
249	break;
250	case InString:
251	if (current == stringType) {
252	shift(1);
253	setDone(String);
254	} else if (isLineTerminator() \|\| current == -1) {
255	setDone(Bad);
256	} else if (current == '\\') {
257	state = InEscapeSequence;
258	} else {
259	record16(current);
260	}
261	break;
262	// Escape Sequences inside of strings
263	case InEscapeSequence:
264	if (isOctalDigit(current)) {
265	if (current >= '0' && current <= '3' &&
266	isOctalDigit(next1) && isOctalDigit(next2)) {
267	record16(convertOctal(current, next1, next2));
268	shift(2);
269	state = InString;
270	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
271	record16(convertOctal('0', current, next1));
272	shift(1);
273	state = InString;
274	} else if (isOctalDigit(current)) {
275	record16(convertOctal('0', '0', current));
276	state = InString;
277	} else {
278	setDone(Bad);
279	}
280	} else if (current == 'x')
281	state = InHexEscape;
282	else if (current == 'u')
283	state = InUnicodeEscape;
284	else if (isLineTerminator()) {
285	nextLine();
286	state = InString;
287	} else {
288	record16(singleEscape(static_cast<unsigned short>(current)));
289	state = InString;
290	}
291	break;
292	case InHexEscape:
293	if (isHexDigit(current) && isHexDigit(next1)) {
294	state = InString;
295	record16(convertHex(current, next1));
296	shift(1);
297	} else if (current == stringType) {
298	record16('x');
299	shift(1);
300	setDone(String);
301	} else {
302	record16('x');
303	record16(current);
304	state = InString;
305	}
306	break;
307	case InUnicodeEscape:
308	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
309	record16(convertUnicode(current, next1, next2, next3));
310	shift(3);
311	state = InString;
312	} else if (current == stringType) {
313	record16('u');
314	shift(1);
315	setDone(String);
316	} else {
317	setDone(Bad);
318	}
319	break;
320	case InSingleLineComment:
321	if (isLineTerminator()) {
322	nextLine();
323	terminator = true;
324	if (restrKeyword) {
325	token = ';';
326	setDone(Other);
327	} else
328	state = Start;
329	} else if (current == -1) {
330	setDone(Eof);
331	}
332	break;
333	case InMultiLineComment:
334	if (current == -1) {
335	setDone(Bad);
336	} else if (isLineTerminator()) {
337	nextLine();
338	} else if (current == '*' && next1 == '/') {
339	state = Start;
340	shift(1);
341	}
342	break;
343	case InIdentifierOrKeyword:
344	case InIdentifier:
345	if (isIdentPart(current))
346	record16(current);
347	else if (current == '\\')
348	state = InIdentifierUnicodeEscapeStart;
349	else
350	setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
351	break;
352	case InNum0:
353	if (current == 'x' \|\| current == 'X') {
354	record8(current);
355	state = InHex;
356	} else if (current == '.') {
357	record8(current);
358	state = InDecimal;
359	} else if (current == 'e' \|\| current == 'E') {
360	record8(current);
361	state = InExponentIndicator;
362	} else if (isOctalDigit(current)) {
363	record8(current);
364	state = InOctal;
365	} else if (isDecimalDigit(current)) {
366	record8(current);
367	state = InDecimal;
368	} else {
369	setDone(Number);
370	}
371	break;
372	case InHex:
373	if (isHexDigit(current)) {
374	record8(current);
375	} else {
376	setDone(Hex);
377	}
378	break;
379	case InOctal:
380	if (isOctalDigit(current)) {
381	record8(current);
382	}
383	else if (isDecimalDigit(current)) {
384	record8(current);
385	state = InDecimal;
386	} else
387	setDone(Octal);
388	break;
389	case InNum:
390	if (isDecimalDigit(current)) {
391	record8(current);
392	} else if (current == '.') {
393	record8(current);
394	state = InDecimal;
395	} else if (current == 'e' \|\| current == 'E') {
396	record8(current);
397	state = InExponentIndicator;
398	} else
399	setDone(Number);
400	break;
401	case InDecimal:
402	if (isDecimalDigit(current)) {
403	record8(current);
404	} else if (current == 'e' \|\| current == 'E') {
405	record8(current);
406	state = InExponentIndicator;
407	} else
408	setDone(Number);
409	break;
410	case InExponentIndicator:
411	if (current == '+' \|\| current == '-') {
412	record8(current);
413	} else if (isDecimalDigit(current)) {
414	record8(current);
415	state = InExponent;
416	} else
417	setDone(Bad);
418	break;
419	case InExponent:
420	if (isDecimalDigit(current)) {
421	record8(current);
422	} else
423	setDone(Number);
424	break;
425	case InIdentifierUnicodeEscapeStart:
426	if (current == 'u')
427	state = InIdentifierUnicodeEscape;
428	else
429	setDone(Bad);
430	break;
431	case InIdentifierUnicodeEscape:
432	if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
433	record16(convertUnicode(current, next1, next2, next3));
434	shift(3);
435	state = InIdentifier;
436	} else {
437	setDone(Bad);
438	}
439	break;
440	default:
441	assert(!"Unhandled state in switch statement");
442	}
443
444	// move on to the next character
445	if (!done)
446	shift(1);
447	#ifndef KJS_PURE_ECMA
448	if (state != Start && state != InSingleLineComment)
449	bol = false;
450	#endif
451	}
452
453	// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
454	if ((state == Number \|\| state == Octal \|\| state == Hex) && isIdentStart(current))
455	state = Bad;
456
457	// terminate string
458	buffer8[pos8] = '\0';
459
460	#ifdef KJS_DEBUG_LEX
461	fprintf(stderr, "line: %d ", lineNo());
462	fprintf(stderr, "yytext (%x): ", buffer8[0]);
463	fprintf(stderr, "%s ", buffer8);
464	#endif
465
466	double dval = 0;
467	if (state == Number) {
468	dval = strtod(buffer8, 0L);
469	} else if (state == Hex) { // scan hex numbers
470	const char *p = buffer8 + 2;
471	while (char c = *p++) {
472	dval *= 16;
473	dval += convertHex(c);
474	}
475	state = Number;
476	} else if (state == Octal) { // scan octal number
477	const char *p = buffer8 + 1;
478	while (char c = *p++) {
479	dval *= 8;
480	dval += c - '0';
481	}
482	state = Number;
483	}
484
485	#ifdef KJS_DEBUG_LEX
486	switch (state) {
487	case Eof:
488	printf("(EOF)\n");
489	break;
490	case Other:
491	printf("(Other)\n");
492	break;
493	case Identifier:
494	printf("(Identifier)/(Keyword)\n");
495	break;
496	case String:
497	printf("(String)\n");
498	break;
499	case Number:
500	printf("(Number)\n");
501	break;
502	default:
503	printf("(unknown)");
504	}
505	#endif
506
507	if (state != Identifier && eatNextIdentifier)
508	eatNextIdentifier = false;
509
510	restrKeyword = false;
511	delimited = false;
512	kjsyylloc.first_line = yylineno; // ???
513	kjsyylloc.last_line = yylineno;
514
515	switch (state) {
516	case Eof:
517	token = 0;
518	break;
519	case Other:
520	if(token == '}' \|\| token == ';') {
521	delimited = true;
522	}
523	break;
524	case IdentifierOrKeyword:
525	if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
526	case Identifier:
527	// Lookup for keyword failed, means this is an identifier
528	// Apply anonymous-function hack below (eat the identifier)
529	if (eatNextIdentifier) {
530	eatNextIdentifier = false;
531	token = lex();
532	break;
533	}
534	kjsyylval.ident = makeIdentifier(buffer16, pos16);
535	token = IDENT;
536	break;
537	}
538
539	eatNextIdentifier = false;
540	// Hack for "f = function somename() { ... }", too hard to get into the grammar
541	if (token == FUNCTION && lastToken == '=' )
542	eatNextIdentifier = true;
543
544	if (token == CONTINUE \|\| token == BREAK \|\|
545	token == RETURN \|\| token == THROW)
546	restrKeyword = true;
547	break;
548	case String:
549	kjsyylval.ustr = makeUString(buffer16, pos16);
550	token = STRING;
551	break;
552	case Number:
553	kjsyylval.dval = dval;
554	token = NUMBER;
555	break;
556	case Bad:
557	#ifdef KJS_DEBUG_LEX
558	fprintf(stderr, "yylex: ERROR.\n");
559	#endif
560	error = true;
561	return -1;
562	default:
563	assert(!"unhandled numeration value in switch");
564	error = true;
565	return -1;
566	}
567	lastToken = token;
568	return token;
569	}
570
571	bool Lexer::isWhiteSpace() const
572	{
573	return current == '\t' \|\| current == 0x0b \|\| current == 0x0c \|\| isSeparatorSpace(current);
574	}
575
576	bool Lexer::isLineTerminator()
577	{
578	bool cr = (current == '\r');
579	bool lf = (current == '\n');
580	if (cr)
581	skipLF = true;
582	else if (lf)
583	skipCR = true;
584	return cr \|\| lf \|\| current == 0x2028 \|\| current == 0x2029;
585	}
586
587	bool Lexer::isIdentStart(int c)
588	{
589	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other))
590	\|\| c == '$' \|\| c == '_';
591	}
592
593	bool Lexer::isIdentPart(int c)
594	{
595	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
596	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector))
597	\|\| c == '$' \|\| c == '_';
598	}
599
600	static bool isDecimalDigit(int c)
601	{
602	return (c >= '0' && c <= '9');
603	}
604
605	bool Lexer::isHexDigit(int c)
606	{
607	return (c >= '0' && c <= '9' \|\|
608	c >= 'a' && c <= 'f' \|\|
609	c >= 'A' && c <= 'F');
610	}
611
612	bool Lexer::isOctalDigit(int c)
613	{
614	return (c >= '0' && c <= '7');
615	}
616
617	int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
618	{
619	if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
620	shift(4);
621	return URSHIFTEQUAL;
622	} else if (c1 == '=' && c2 == '=' && c3 == '=') {
623	shift(3);
624	return STREQ;
625	} else if (c1 == '!' && c2 == '=' && c3 == '=') {
626	shift(3);
627	return STRNEQ;
628	} else if (c1 == '>' && c2 == '>' && c3 == '>') {
629	shift(3);
630	return URSHIFT;
631	} else if (c1 == '<' && c2 == '<' && c3 == '=') {
632	shift(3);
633	return LSHIFTEQUAL;
634	} else if (c1 == '>' && c2 == '>' && c3 == '=') {
635	shift(3);
636	return RSHIFTEQUAL;
637	} else if (c1 == '<' && c2 == '=') {
638	shift(2);
639	return LE;
640	} else if (c1 == '>' && c2 == '=') {
641	shift(2);
642	return GE;
643	} else if (c1 == '!' && c2 == '=') {
644	shift(2);
645	return NE;
646	} else if (c1 == '+' && c2 == '+') {
647	shift(2);
648	if (terminator)
649	return AUTOPLUSPLUS;
650	else
651	return PLUSPLUS;
652	} else if (c1 == '-' && c2 == '-') {
653	shift(2);
654	if (terminator)
655	return AUTOMINUSMINUS;
656	else
657	return MINUSMINUS;
658	} else if (c1 == '=' && c2 == '=') {
659	shift(2);
660	return EQEQ;
661	} else if (c1 == '+' && c2 == '=') {
662	shift(2);
663	return PLUSEQUAL;
664	} else if (c1 == '-' && c2 == '=') {
665	shift(2);
666	return MINUSEQUAL;
667	} else if (c1 == '*' && c2 == '=') {
668	shift(2);
669	return MULTEQUAL;
670	} else if (c1 == '/' && c2 == '=') {
671	shift(2);
672	return DIVEQUAL;
673	} else if (c1 == '&' && c2 == '=') {
674	shift(2);
675	return ANDEQUAL;
676	} else if (c1 == '^' && c2 == '=') {
677	shift(2);
678	return XOREQUAL;
679	} else if (c1 == '%' && c2 == '=') {
680	shift(2);
681	return MODEQUAL;
682	} else if (c1 == '\|' && c2 == '=') {
683	shift(2);
684	return OREQUAL;
685	} else if (c1 == '<' && c2 == '<') {
686	shift(2);
687	return LSHIFT;
688	} else if (c1 == '>' && c2 == '>') {
689	shift(2);
690	return RSHIFT;
691	} else if (c1 == '&' && c2 == '&') {
692	shift(2);
693	return AND;
694	} else if (c1 == '\|' && c2 == '\|') {
695	shift(2);
696	return OR;
697	}
698
699	switch(c1) {
700	case '=':
701	case '>':
702	case '<':
703	case ',':
704	case '!':
705	case '~':
706	case '?':
707	case ':':
708	case '.':
709	case '+':
710	case '-':
711	case '*':
712	case '/':
713	case '&':
714	case '\|':
715	case '^':
716	case '%':
717	case '(':
718	case ')':
719	case '{':
720	case '}':
721	case '[':
722	case ']':
723	case ';':
724	shift(1);
725	return static_cast<int>(c1);
726	default:
727	return -1;
728	}
729	}
730
731	unsigned short Lexer::singleEscape(unsigned short c)
732	{
733	switch(c) {
734	case 'b':
735	return 0x08;
736	case 't':
737	return 0x09;
738	case 'n':
739	return 0x0A;
740	case 'v':
741	return 0x0B;
742	case 'f':
743	return 0x0C;
744	case 'r':
745	return 0x0D;
746	case '"':
747	return 0x22;
748	case '\'':
749	return 0x27;
750	case '\\':
751	return 0x5C;
752	default:
753	return c;
754	}
755	}
756
757	unsigned short Lexer::convertOctal(int c1, int c2, int c3)
758	{
759	return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
760	}
761
762	unsigned char Lexer::convertHex(int c)
763	{
764	if (c >= '0' && c <= '9')
765	return static_cast<unsigned char>(c - '0');
766	if (c >= 'a' && c <= 'f')
767	return static_cast<unsigned char>(c - 'a' + 10);
768	return static_cast<unsigned char>(c - 'A' + 10);
769	}
770
771	unsigned char Lexer::convertHex(int c1, int c2)
772	{
773	return ((convertHex(c1) << 4) + convertHex(c2));
774	}
775
776	KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
777	{
778	return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
779	(convertHex(c3) << 4) + convertHex(c4));
780	}
781
782	void Lexer::record8(int c)
783	{
784	ASSERT(c >= 0);
785	ASSERT(c <= 0xff);
786
787	// enlarge buffer if full
788	if (pos8 >= size8 - 1) {
789	char tmp = new char[2 size8];
790	memcpy(tmp, buffer8, size8 * sizeof(char));
791	delete [] buffer8;
792	buffer8 = tmp;
793	size8 *= 2;
794	}
795
796	buffer8[pos8++] = (char) c;
797	}
798
799	void Lexer::record16(int c)
800	{
801	ASSERT(c >= 0);
802	ASSERT(c <= USHRT_MAX);
803	record16(UChar(static_cast<unsigned short>(c)));
804	}
805
806	void Lexer::record16(KJS::UChar c)
807	{
808	// enlarge buffer if full
809	if (pos16 >= size16 - 1) {
810	KJS::UChar tmp = new KJS::UChar[2 size16];
811	memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar));
812	delete [] buffer16;
813	buffer16 = tmp;
814	size16 *= 2;
815	}
816
817	buffer16[pos16++] = c;
818	}
819
820	bool Lexer::scanRegExp()
821	{
822	pos16 = 0;
823	bool lastWasEscape = false;
824	bool inBrackets = false;
825
826	while (1) {
827	if (isLineTerminator() \|\| current == -1)
828	return false;
829	else if (current != '/' \|\| lastWasEscape == true \|\| inBrackets == true)
830	{
831	// keep track of '[' and ']'
832	if (!lastWasEscape) {
833	if ( current == '[' && !inBrackets )
834	inBrackets = true;
835	if ( current == ']' && inBrackets )
836	inBrackets = false;
837	}
838	record16(current);
839	lastWasEscape =
840	!lastWasEscape && (current == '\\');
841	}
842	else { // end of regexp
843	pattern = UString(buffer16, pos16);
844	pos16 = 0;
845	shift(1);
846	break;
847	}
848	shift(1);
849	}
850
851	while (isIdentPart(current)) {
852	record16(current);
853	shift(1);
854	}
855	flags = UString(buffer16, pos16);
856
857	return true;
858	}
859
860
861	void Lexer::doneParsing()
862	{
863	for (unsigned i = 0; i < numIdentifiers; i++) {
864	delete identifiers[i];
865	}
866	fastFree(identifiers);
867	identifiers = 0;
868	numIdentifiers = 0;
869	identifiersCapacity = 0;
870
871	for (unsigned i = 0; i < numStrings; i++) {
872	delete strings[i];
873	}
874	fastFree(strings);
875	strings = 0;
876	numStrings = 0;
877	stringsCapacity = 0;
878	}
879
880	const int initialCapacity = 64;
881	const int growthFactor = 2;
882
883	// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
884	Identifier Lexer::makeIdentifier(KJS::UChar, unsigned int)
885	{
886	if (numIdentifiers == identifiersCapacity) {
887	identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
888	identifiers = (KJS::Identifier *)fastRealloc(identifiers, sizeof(KJS::Identifier ) * identifiersCapacity);
889	}
890
891	KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);
892	identifiers[numIdentifiers++] = identifier;
893	return identifier;
894	}
895
896	// FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf?
897	UString Lexer::makeUString(KJS::UChar, unsigned int)
898	{
899	if (numStrings == stringsCapacity) {
900	stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
901	strings = (UString *)fastRealloc(strings, sizeof(UString ) * stringsCapacity);
902	}
903
904	UString *string = new UString(buffer16, pos16);
905	strings[numStrings++] = string;
906	return string;
907	}
908
909	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/lexer.cpp@ 20310

Download in other formats: