source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 45011

Last change on this file since 45011 was 44984, checked in by [email protected], 16 years ago

Fix stupid performance problem in the LiteralParser

Reviewed by Alexey Proskuryakov.

The LiteralParser was making a new UString in order to use
toDouble, however UString's toDouble allows a much wider range
of numberic strings than the LiteralParser accepts, and requires
an additional heap allocation or two for the construciton of the
UString. To rectify this we just call WTF::dtoa directly using
a stack allocated buffer to hold the validated numeric literal.

File size: 14.7 KB
Line 
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "LiteralParser.h"
28
29#include "JSArray.h"
30#include "JSString.h"
31#include "Lexer.h"
32#include <wtf/ASCIICType.h>
33#include <wtf/dtoa.h>
34
35namespace JSC {
36
37LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38{
39 while (m_ptr < m_end && isASCIISpace(*m_ptr))
40 ++m_ptr;
41
42 ASSERT(m_ptr <= m_end);
43 if (m_ptr >= m_end) {
44 token.type = TokEnd;
45 token.start = token.end = m_ptr;
46 return TokEnd;
47 }
48 token.type = TokError;
49 token.start = m_ptr;
50 switch (*m_ptr) {
51 case '[':
52 token.type = TokLBracket;
53 token.end = ++m_ptr;
54 return TokLBracket;
55 case ']':
56 token.type = TokRBracket;
57 token.end = ++m_ptr;
58 return TokRBracket;
59 case '(':
60 token.type = TokLParen;
61 token.end = ++m_ptr;
62 return TokLBracket;
63 case ')':
64 token.type = TokRParen;
65 token.end = ++m_ptr;
66 return TokRBracket;
67 case '{':
68 token.type = TokLBrace;
69 token.end = ++m_ptr;
70 return TokLBrace;
71 case '}':
72 token.type = TokRBrace;
73 token.end = ++m_ptr;
74 return TokRBrace;
75 case ',':
76 token.type = TokComma;
77 token.end = ++m_ptr;
78 return TokComma;
79 case ':':
80 token.type = TokColon;
81 token.end = ++m_ptr;
82 return TokColon;
83 case '"':
84 if (m_mode == StrictJSON)
85 return lexString<StrictJSON>(token);
86 return lexString<NonStrictJSON>(token);
87 case 't':
88 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89 m_ptr += 4;
90 token.type = TokTrue;
91 token.end = m_ptr;
92 return TokTrue;
93 }
94 break;
95 case 'f':
96 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97 m_ptr += 5;
98 token.type = TokFalse;
99 token.end = m_ptr;
100 return TokFalse;
101 }
102 break;
103 case 'n':
104 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105 m_ptr += 4;
106 token.type = TokNull;
107 token.end = m_ptr;
108 return TokNull;
109 }
110 break;
111 case '-':
112 case '0':
113 case '1':
114 case '2':
115 case '3':
116 case '4':
117 case '5':
118 case '6':
119 case '7':
120 case '8':
121 case '9':
122 return lexNumber(token);
123 }
124 return TokError;
125}
126
127static inline bool isSafeStringCharacter(UChar c)
128{
129 return (c >= ' ' && c <= 0xff && c != '\\' && c != '"') || c == '\t';
130}
131
132template <LiteralParser::ParserMode mode> LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
133{
134 ++m_ptr;
135 const UChar* runStart;
136 token.stringToken = UString();
137 do {
138 runStart = m_ptr;
139 while (m_ptr < m_end && isSafeStringCharacter(*m_ptr))
140 ++m_ptr;
141 if (runStart < m_ptr)
142 token.stringToken.append(runStart, m_ptr - runStart);
143 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
144 ++m_ptr;
145 if (m_ptr >= m_end)
146 return TokError;
147 switch (*m_ptr) {
148 case '"':
149 token.stringToken.append('"');
150 m_ptr++;
151 break;
152 case '\\':
153 token.stringToken.append('\\');
154 m_ptr++;
155 break;
156 case '/':
157 token.stringToken.append('/');
158 m_ptr++;
159 break;
160 case 'b':
161 token.stringToken.append('\b');
162 m_ptr++;
163 break;
164 case 'f':
165 token.stringToken.append('\f');
166 m_ptr++;
167 break;
168 case 'n':
169 token.stringToken.append('\n');
170 m_ptr++;
171 break;
172 case 'r':
173 token.stringToken.append('\r');
174 m_ptr++;
175 break;
176 case 't':
177 token.stringToken.append('\t');
178 m_ptr++;
179 break;
180
181 case 'u':
182 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
183 return TokError;
184 for (int i = 1; i < 5; i++) {
185 if (!isASCIIHexDigit(m_ptr[i]))
186 return TokError;
187 }
188 token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
189 m_ptr += 5;
190 break;
191
192 default:
193 return TokError;
194 }
195 }
196 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
197
198 if (m_ptr >= m_end || *m_ptr != '"')
199 return TokError;
200
201 token.type = TokString;
202 token.end = ++m_ptr;
203 return TokString;
204}
205
206LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
207{
208 // ES5 and json.org define numbers as
209 // number
210 // int
211 // int frac? exp?
212 //
213 // int
214 // -? 0
215 // -? digit1-9 digits?
216 //
217 // digits
218 // digit digits?
219 //
220 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
221
222 if (m_ptr < m_end && *m_ptr == '-') // -?
223 ++m_ptr;
224
225 // (0 | [1-9][0-9]*)
226 if (m_ptr < m_end && *m_ptr == '0') // 0
227 ++m_ptr;
228 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
229 ++m_ptr;
230 // [0-9]*
231 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
232 ++m_ptr;
233 } else
234 return TokError;
235
236 // ('.' [0-9]+)?
237 if (m_ptr < m_end && *m_ptr == '.') {
238 ++m_ptr;
239 // [0-9]+
240 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
241 return TokError;
242
243 ++m_ptr;
244 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
245 ++m_ptr;
246 }
247
248 // ([eE][+-]? [0-9]+)?
249 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
250 ++m_ptr;
251
252 // [-+]?
253 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
254 ++m_ptr;
255
256 // [0-9]+
257 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
258 return TokError;
259
260 ++m_ptr;
261 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
262 ++m_ptr;
263 }
264
265 token.type = TokNumber;
266 token.end = m_ptr;
267 Vector<char, 64> buffer(token.end - token.start + 1);
268 int i;
269 for (i = 0; i < token.end - token.start; i++) {
270 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
271 buffer[i] = static_cast<char>(token.start[i]);
272 }
273 buffer[i] = 0;
274 char* end;
275 token.numberToken = WTF::strtod(buffer.data(), &end);
276 ASSERT(buffer.data() + (token.end - token.start) == end);
277 return TokNumber;
278}
279
280JSValue LiteralParser::parse(ParserState initialState)
281{
282 ParserState state = initialState;
283 MarkedArgumentBuffer objectStack;
284 JSValue lastValue;
285 Vector<ParserState, 16> stateStack;
286 Vector<Identifier, 16> identifierStack;
287 while (1) {
288 switch(state) {
289 startParseArray:
290 case StartParseArray: {
291 JSArray* array = constructEmptyArray(m_exec);
292 objectStack.append(array);
293 // fallthrough
294 }
295 doParseArrayStartExpression:
296 case DoParseArrayStartExpression: {
297 if (m_lexer.next() == TokRBracket) {
298 m_lexer.next();
299 lastValue = objectStack.last();
300 objectStack.removeLast();
301 break;
302 }
303
304 stateStack.append(DoParseArrayEndExpression);
305 goto startParseExpression;
306 }
307 case DoParseArrayEndExpression: {
308 asArray(objectStack.last())->push(m_exec, lastValue);
309
310 if (m_lexer.currentToken().type == TokComma)
311 goto doParseArrayStartExpression;
312
313 if (m_lexer.currentToken().type != TokRBracket)
314 return JSValue();
315
316 m_lexer.next();
317 lastValue = objectStack.last();
318 objectStack.removeLast();
319 break;
320 }
321 startParseObject:
322 case StartParseObject: {
323 JSObject* object = constructEmptyObject(m_exec);
324 objectStack.append(object);
325
326 TokenType type = m_lexer.next();
327 if (type == TokString) {
328 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
329
330 // Check for colon
331 if (m_lexer.next() != TokColon)
332 return JSValue();
333
334 m_lexer.next();
335 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
336 stateStack.append(DoParseObjectEndExpression);
337 goto startParseExpression;
338 } else if (type != TokRBrace)
339 return JSValue();
340 m_lexer.next();
341 lastValue = objectStack.last();
342 objectStack.removeLast();
343 break;
344 }
345 doParseObjectStartExpression:
346 case DoParseObjectStartExpression: {
347 TokenType type = m_lexer.next();
348 if (type != TokString)
349 return JSValue();
350 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
351
352 // Check for colon
353 if (m_lexer.next() != TokColon)
354 return JSValue();
355
356 m_lexer.next();
357 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
358 stateStack.append(DoParseObjectEndExpression);
359 goto startParseExpression;
360 }
361 case DoParseObjectEndExpression:
362 {
363 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
364 identifierStack.removeLast();
365 if (m_lexer.currentToken().type == TokComma)
366 goto doParseObjectStartExpression;
367 if (m_lexer.currentToken().type != TokRBrace)
368 return JSValue();
369 m_lexer.next();
370 lastValue = objectStack.last();
371 objectStack.removeLast();
372 break;
373 }
374 startParseExpression:
375 case StartParseExpression: {
376 switch (m_lexer.currentToken().type) {
377 case TokLBracket:
378 goto startParseArray;
379 case TokLBrace:
380 goto startParseObject;
381 case TokString: {
382 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
383 m_lexer.next();
384 lastValue = jsString(m_exec, stringToken.stringToken);
385 break;
386 }
387 case TokNumber: {
388 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
389 m_lexer.next();
390 lastValue = jsNumber(m_exec, numberToken.numberToken);
391 break;
392 }
393 case TokNull:
394 m_lexer.next();
395 lastValue = jsNull();
396 break;
397
398 case TokTrue:
399 m_lexer.next();
400 lastValue = jsBoolean(true);
401 break;
402
403 case TokFalse:
404 m_lexer.next();
405 lastValue = jsBoolean(false);
406 break;
407
408 default:
409 // Error
410 return JSValue();
411 }
412 break;
413 }
414 case StartParseStatement: {
415 switch (m_lexer.currentToken().type) {
416 case TokLBracket:
417 case TokNumber:
418 case TokString:
419 goto startParseExpression;
420
421 case TokLParen: {
422 m_lexer.next();
423 stateStack.append(StartParseStatementEndStatement);
424 goto startParseExpression;
425 }
426 default:
427 return JSValue();
428 }
429 }
430 case StartParseStatementEndStatement: {
431 ASSERT(stateStack.isEmpty());
432 if (m_lexer.currentToken().type != TokRParen)
433 return JSValue();
434 if (m_lexer.next() == TokEnd)
435 return lastValue;
436 return JSValue();
437 }
438 default:
439 ASSERT_NOT_REACHED();
440 }
441 if (stateStack.isEmpty())
442 return lastValue;
443 state = stateStack.last();
444 stateStack.removeLast();
445 continue;
446 }
447}
448
449}
Note: See TracBrowser for help on using the repository browser.