Context Navigation

LiteralParser.cpp@ 45011

Visit:

Last change on this file since 45011 was 44984, checked in by [email protected], 16 years ago

Fix stupid performance problem in the LiteralParser

Reviewed by Alexey Proskuryakov.

The LiteralParser was making a new UString in order to use
toDouble, however UString's toDouble allows a much wider range
of numberic strings than the LiteralParser accepts, and requires
an additional heap allocation or two for the construciton of the
UString. To rectify this we just call WTF::dtoa directly using
a stack allocated buffer to hold the validated numeric literal.

File size: 14.7 KB

Line
1	/*
2	* Copyright (C) 2009 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "LiteralParser.h"
28
29	#include "JSArray.h"
30	#include "JSString.h"
31	#include "Lexer.h"
32	#include <wtf/ASCIICType.h>
33	#include <wtf/dtoa.h>
34
35	namespace JSC {
36
37	LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38	{
39	while (m_ptr < m_end && isASCIISpace(*m_ptr))
40	++m_ptr;
41
42	ASSERT(m_ptr <= m_end);
43	if (m_ptr >= m_end) {
44	token.type = TokEnd;
45	token.start = token.end = m_ptr;
46	return TokEnd;
47	}
48	token.type = TokError;
49	token.start = m_ptr;
50	switch (*m_ptr) {
51	case '[':
52	token.type = TokLBracket;
53	token.end = ++m_ptr;
54	return TokLBracket;
55	case ']':
56	token.type = TokRBracket;
57	token.end = ++m_ptr;
58	return TokRBracket;
59	case '(':
60	token.type = TokLParen;
61	token.end = ++m_ptr;
62	return TokLBracket;
63	case ')':
64	token.type = TokRParen;
65	token.end = ++m_ptr;
66	return TokRBracket;
67	case '{':
68	token.type = TokLBrace;
69	token.end = ++m_ptr;
70	return TokLBrace;
71	case '}':
72	token.type = TokRBrace;
73	token.end = ++m_ptr;
74	return TokRBrace;
75	case ',':
76	token.type = TokComma;
77	token.end = ++m_ptr;
78	return TokComma;
79	case ':':
80	token.type = TokColon;
81	token.end = ++m_ptr;
82	return TokColon;
83	case '"':
84	if (m_mode == StrictJSON)
85	return lexString<StrictJSON>(token);
86	return lexString<NonStrictJSON>(token);
87	case 't':
88	if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89	m_ptr += 4;
90	token.type = TokTrue;
91	token.end = m_ptr;
92	return TokTrue;
93	}
94	break;
95	case 'f':
96	if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97	m_ptr += 5;
98	token.type = TokFalse;
99	token.end = m_ptr;
100	return TokFalse;
101	}
102	break;
103	case 'n':
104	if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105	m_ptr += 4;
106	token.type = TokNull;
107	token.end = m_ptr;
108	return TokNull;
109	}
110	break;
111	case '-':
112	case '0':
113	case '1':
114	case '2':
115	case '3':
116	case '4':
117	case '5':
118	case '6':
119	case '7':
120	case '8':
121	case '9':
122	return lexNumber(token);
123	}
124	return TokError;
125	}
126
127	static inline bool isSafeStringCharacter(UChar c)
128	{
129	return (c >= ' ' && c <= 0xff && c != '\\' && c != '"') \|\| c == '\t';
130	}
131
132	template <LiteralParser::ParserMode mode> LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
133	{
134	++m_ptr;
135	const UChar* runStart;
136	token.stringToken = UString();
137	do {
138	runStart = m_ptr;
139	while (m_ptr < m_end && isSafeStringCharacter(*m_ptr))
140	++m_ptr;
141	if (runStart < m_ptr)
142	token.stringToken.append(runStart, m_ptr - runStart);
143	if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
144	++m_ptr;
145	if (m_ptr >= m_end)
146	return TokError;
147	switch (*m_ptr) {
148	case '"':
149	token.stringToken.append('"');
150	m_ptr++;
151	break;
152	case '\\':
153	token.stringToken.append('\\');
154	m_ptr++;
155	break;
156	case '/':
157	token.stringToken.append('/');
158	m_ptr++;
159	break;
160	case 'b':
161	token.stringToken.append('\b');
162	m_ptr++;
163	break;
164	case 'f':
165	token.stringToken.append('\f');
166	m_ptr++;
167	break;
168	case 'n':
169	token.stringToken.append('\n');
170	m_ptr++;
171	break;
172	case 'r':
173	token.stringToken.append('\r');
174	m_ptr++;
175	break;
176	case 't':
177	token.stringToken.append('\t');
178	m_ptr++;
179	break;
180
181	case 'u':
182	if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
183	return TokError;
184	for (int i = 1; i < 5; i++) {
185	if (!isASCIIHexDigit(m_ptr[i]))
186	return TokError;
187	}
188	token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
189	m_ptr += 5;
190	break;
191
192	default:
193	return TokError;
194	}
195	}
196	} while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
197
198	if (m_ptr >= m_end \|\| *m_ptr != '"')
199	return TokError;
200
201	token.type = TokString;
202	token.end = ++m_ptr;
203	return TokString;
204	}
205
206	LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
207	{
208	// ES5 and json.org define numbers as
209	// number
210	// int
211	// int frac? exp?
212	//
213	// int
214	// -? 0
215	// -? digit1-9 digits?
216	//
217	// digits
218	// digit digits?
219	//
220	// -?(0 \| [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
221
222	if (m_ptr < m_end && *m_ptr == '-') // -?
223	++m_ptr;
224
225	// (0 \| [1-9][0-9]*)
226	if (m_ptr < m_end && *m_ptr == '0') // 0
227	++m_ptr;
228	else if (m_ptr < m_end && m_ptr >= '1' && m_ptr <= '9') { // [1-9]
229	++m_ptr;
230	// [0-9]*
231	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
232	++m_ptr;
233	} else
234	return TokError;
235
236	// ('.' [0-9]+)?
237	if (m_ptr < m_end && *m_ptr == '.') {
238	++m_ptr;
239	// [0-9]+
240	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr))
241	return TokError;
242
243	++m_ptr;
244	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
245	++m_ptr;
246	}
247
248	// ([eE][+-]? [0-9]+)?
249	if (m_ptr < m_end && (m_ptr == 'e' \|\| m_ptr == 'E')) { // [eE]
250	++m_ptr;
251
252	// [-+]?
253	if (m_ptr < m_end && (m_ptr == '-' \|\| m_ptr == '+'))
254	++m_ptr;
255
256	// [0-9]+
257	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr))
258	return TokError;
259
260	++m_ptr;
261	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
262	++m_ptr;
263	}
264
265	token.type = TokNumber;
266	token.end = m_ptr;
267	Vector<char, 64> buffer(token.end - token.start + 1);
268	int i;
269	for (i = 0; i < token.end - token.start; i++) {
270	ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
271	buffer[i] = static_cast<char>(token.start[i]);
272	}
273	buffer[i] = 0;
274	char* end;
275	token.numberToken = WTF::strtod(buffer.data(), &end);
276	ASSERT(buffer.data() + (token.end - token.start) == end);
277	return TokNumber;
278	}
279
280	JSValue LiteralParser::parse(ParserState initialState)
281	{
282	ParserState state = initialState;
283	MarkedArgumentBuffer objectStack;
284	JSValue lastValue;
285	Vector<ParserState, 16> stateStack;
286	Vector<Identifier, 16> identifierStack;
287	while (1) {
288	switch(state) {
289	startParseArray:
290	case StartParseArray: {
291	JSArray* array = constructEmptyArray(m_exec);
292	objectStack.append(array);
293	// fallthrough
294	}
295	doParseArrayStartExpression:
296	case DoParseArrayStartExpression: {
297	if (m_lexer.next() == TokRBracket) {
298	m_lexer.next();
299	lastValue = objectStack.last();
300	objectStack.removeLast();
301	break;
302	}
303
304	stateStack.append(DoParseArrayEndExpression);
305	goto startParseExpression;
306	}
307	case DoParseArrayEndExpression: {
308	asArray(objectStack.last())->push(m_exec, lastValue);
309
310	if (m_lexer.currentToken().type == TokComma)
311	goto doParseArrayStartExpression;
312
313	if (m_lexer.currentToken().type != TokRBracket)
314	return JSValue();
315
316	m_lexer.next();
317	lastValue = objectStack.last();
318	objectStack.removeLast();
319	break;
320	}
321	startParseObject:
322	case StartParseObject: {
323	JSObject* object = constructEmptyObject(m_exec);
324	objectStack.append(object);
325
326	TokenType type = m_lexer.next();
327	if (type == TokString) {
328	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
329
330	// Check for colon
331	if (m_lexer.next() != TokColon)
332	return JSValue();
333
334	m_lexer.next();
335	identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
336	stateStack.append(DoParseObjectEndExpression);
337	goto startParseExpression;
338	} else if (type != TokRBrace)
339	return JSValue();
340	m_lexer.next();
341	lastValue = objectStack.last();
342	objectStack.removeLast();
343	break;
344	}
345	doParseObjectStartExpression:
346	case DoParseObjectStartExpression: {
347	TokenType type = m_lexer.next();
348	if (type != TokString)
349	return JSValue();
350	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
351
352	// Check for colon
353	if (m_lexer.next() != TokColon)
354	return JSValue();
355
356	m_lexer.next();
357	identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
358	stateStack.append(DoParseObjectEndExpression);
359	goto startParseExpression;
360	}
361	case DoParseObjectEndExpression:
362	{
363	asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
364	identifierStack.removeLast();
365	if (m_lexer.currentToken().type == TokComma)
366	goto doParseObjectStartExpression;
367	if (m_lexer.currentToken().type != TokRBrace)
368	return JSValue();
369	m_lexer.next();
370	lastValue = objectStack.last();
371	objectStack.removeLast();
372	break;
373	}
374	startParseExpression:
375	case StartParseExpression: {
376	switch (m_lexer.currentToken().type) {
377	case TokLBracket:
378	goto startParseArray;
379	case TokLBrace:
380	goto startParseObject;
381	case TokString: {
382	Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
383	m_lexer.next();
384	lastValue = jsString(m_exec, stringToken.stringToken);
385	break;
386	}
387	case TokNumber: {
388	Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
389	m_lexer.next();
390	lastValue = jsNumber(m_exec, numberToken.numberToken);
391	break;
392	}
393	case TokNull:
394	m_lexer.next();
395	lastValue = jsNull();
396	break;
397
398	case TokTrue:
399	m_lexer.next();
400	lastValue = jsBoolean(true);
401	break;
402
403	case TokFalse:
404	m_lexer.next();
405	lastValue = jsBoolean(false);
406	break;
407
408	default:
409	// Error
410	return JSValue();
411	}
412	break;
413	}
414	case StartParseStatement: {
415	switch (m_lexer.currentToken().type) {
416	case TokLBracket:
417	case TokNumber:
418	case TokString:
419	goto startParseExpression;
420
421	case TokLParen: {
422	m_lexer.next();
423	stateStack.append(StartParseStatementEndStatement);
424	goto startParseExpression;
425	}
426	default:
427	return JSValue();
428	}
429	}
430	case StartParseStatementEndStatement: {
431	ASSERT(stateStack.isEmpty());
432	if (m_lexer.currentToken().type != TokRParen)
433	return JSValue();
434	if (m_lexer.next() == TokEnd)
435	return lastValue;
436	return JSValue();
437	}
438	default:
439	ASSERT_NOT_REACHED();
440	}
441	if (stateStack.isEmpty())
442	return lastValue;
443	state = stateStack.last();
444	stateStack.removeLast();
445	continue;
446	}
447	}
448
449	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 45011

Download in other formats: