source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 48788

Last change on this file since 48788 was 47828, checked in by [email protected], 16 years ago

JSON parser accepts trailing comma in array literals
https://bugs.webkit.org/show_bug.cgi?id=28779

Reviewed by Alexey Proskuryakov.

Update parser to correctly fail if there's a trailing comma.

File size: 15.0 KB
Line 
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "LiteralParser.h"
28
29#include "JSArray.h"
30#include "JSString.h"
31#include "Lexer.h"
32#include <wtf/ASCIICType.h>
33#include <wtf/dtoa.h>
34
35namespace JSC {
36
37LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38{
39 while (m_ptr < m_end && isASCIISpace(*m_ptr))
40 ++m_ptr;
41
42 ASSERT(m_ptr <= m_end);
43 if (m_ptr >= m_end) {
44 token.type = TokEnd;
45 token.start = token.end = m_ptr;
46 return TokEnd;
47 }
48 token.type = TokError;
49 token.start = m_ptr;
50 switch (*m_ptr) {
51 case '[':
52 token.type = TokLBracket;
53 token.end = ++m_ptr;
54 return TokLBracket;
55 case ']':
56 token.type = TokRBracket;
57 token.end = ++m_ptr;
58 return TokRBracket;
59 case '(':
60 token.type = TokLParen;
61 token.end = ++m_ptr;
62 return TokLBracket;
63 case ')':
64 token.type = TokRParen;
65 token.end = ++m_ptr;
66 return TokRBracket;
67 case '{':
68 token.type = TokLBrace;
69 token.end = ++m_ptr;
70 return TokLBrace;
71 case '}':
72 token.type = TokRBrace;
73 token.end = ++m_ptr;
74 return TokRBrace;
75 case ',':
76 token.type = TokComma;
77 token.end = ++m_ptr;
78 return TokComma;
79 case ':':
80 token.type = TokColon;
81 token.end = ++m_ptr;
82 return TokColon;
83 case '"':
84 if (m_mode == StrictJSON)
85 return lexString<StrictJSON>(token);
86 return lexString<NonStrictJSON>(token);
87 case 't':
88 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89 m_ptr += 4;
90 token.type = TokTrue;
91 token.end = m_ptr;
92 return TokTrue;
93 }
94 break;
95 case 'f':
96 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97 m_ptr += 5;
98 token.type = TokFalse;
99 token.end = m_ptr;
100 return TokFalse;
101 }
102 break;
103 case 'n':
104 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105 m_ptr += 4;
106 token.type = TokNull;
107 token.end = m_ptr;
108 return TokNull;
109 }
110 break;
111 case '-':
112 case '0':
113 case '1':
114 case '2':
115 case '3':
116 case '4':
117 case '5':
118 case '6':
119 case '7':
120 case '8':
121 case '9':
122 return lexNumber(token);
123 }
124 return TokError;
125}
126
127template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
128{
129 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
130}
131
132// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
133template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
134{
135 ++m_ptr;
136 const UChar* runStart;
137 token.stringToken = UString();
138 do {
139 runStart = m_ptr;
140 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
141 ++m_ptr;
142 if (runStart < m_ptr)
143 token.stringToken.append(runStart, m_ptr - runStart);
144 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
145 ++m_ptr;
146 if (m_ptr >= m_end)
147 return TokError;
148 switch (*m_ptr) {
149 case '"':
150 token.stringToken.append('"');
151 m_ptr++;
152 break;
153 case '\\':
154 token.stringToken.append('\\');
155 m_ptr++;
156 break;
157 case '/':
158 token.stringToken.append('/');
159 m_ptr++;
160 break;
161 case 'b':
162 token.stringToken.append('\b');
163 m_ptr++;
164 break;
165 case 'f':
166 token.stringToken.append('\f');
167 m_ptr++;
168 break;
169 case 'n':
170 token.stringToken.append('\n');
171 m_ptr++;
172 break;
173 case 'r':
174 token.stringToken.append('\r');
175 m_ptr++;
176 break;
177 case 't':
178 token.stringToken.append('\t');
179 m_ptr++;
180 break;
181
182 case 'u':
183 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
184 return TokError;
185 for (int i = 1; i < 5; i++) {
186 if (!isASCIIHexDigit(m_ptr[i]))
187 return TokError;
188 }
189 token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
190 m_ptr += 5;
191 break;
192
193 default:
194 return TokError;
195 }
196 }
197 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
198
199 if (m_ptr >= m_end || *m_ptr != '"')
200 return TokError;
201
202 token.type = TokString;
203 token.end = ++m_ptr;
204 return TokString;
205}
206
207LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
208{
209 // ES5 and json.org define numbers as
210 // number
211 // int
212 // int frac? exp?
213 //
214 // int
215 // -? 0
216 // -? digit1-9 digits?
217 //
218 // digits
219 // digit digits?
220 //
221 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
222
223 if (m_ptr < m_end && *m_ptr == '-') // -?
224 ++m_ptr;
225
226 // (0 | [1-9][0-9]*)
227 if (m_ptr < m_end && *m_ptr == '0') // 0
228 ++m_ptr;
229 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
230 ++m_ptr;
231 // [0-9]*
232 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
233 ++m_ptr;
234 } else
235 return TokError;
236
237 // ('.' [0-9]+)?
238 if (m_ptr < m_end && *m_ptr == '.') {
239 ++m_ptr;
240 // [0-9]+
241 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
242 return TokError;
243
244 ++m_ptr;
245 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
246 ++m_ptr;
247 }
248
249 // ([eE][+-]? [0-9]+)?
250 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
251 ++m_ptr;
252
253 // [-+]?
254 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
255 ++m_ptr;
256
257 // [0-9]+
258 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
259 return TokError;
260
261 ++m_ptr;
262 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
263 ++m_ptr;
264 }
265
266 token.type = TokNumber;
267 token.end = m_ptr;
268 Vector<char, 64> buffer(token.end - token.start + 1);
269 int i;
270 for (i = 0; i < token.end - token.start; i++) {
271 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
272 buffer[i] = static_cast<char>(token.start[i]);
273 }
274 buffer[i] = 0;
275 char* end;
276 token.numberToken = WTF::strtod(buffer.data(), &end);
277 ASSERT(buffer.data() + (token.end - token.start) == end);
278 return TokNumber;
279}
280
281JSValue LiteralParser::parse(ParserState initialState)
282{
283 ParserState state = initialState;
284 MarkedArgumentBuffer objectStack;
285 JSValue lastValue;
286 Vector<ParserState, 16> stateStack;
287 Vector<Identifier, 16> identifierStack;
288 while (1) {
289 switch(state) {
290 startParseArray:
291 case StartParseArray: {
292 JSArray* array = constructEmptyArray(m_exec);
293 objectStack.append(array);
294 // fallthrough
295 }
296 doParseArrayStartExpression:
297 case DoParseArrayStartExpression: {
298 TokenType lastToken = m_lexer.currentToken().type;
299 if (m_lexer.next() == TokRBracket) {
300 if (lastToken == TokComma)
301 return JSValue();
302 m_lexer.next();
303 lastValue = objectStack.last();
304 objectStack.removeLast();
305 break;
306 }
307
308 stateStack.append(DoParseArrayEndExpression);
309 goto startParseExpression;
310 }
311 case DoParseArrayEndExpression: {
312 asArray(objectStack.last())->push(m_exec, lastValue);
313
314 if (m_lexer.currentToken().type == TokComma)
315 goto doParseArrayStartExpression;
316
317 if (m_lexer.currentToken().type != TokRBracket)
318 return JSValue();
319
320 m_lexer.next();
321 lastValue = objectStack.last();
322 objectStack.removeLast();
323 break;
324 }
325 startParseObject:
326 case StartParseObject: {
327 JSObject* object = constructEmptyObject(m_exec);
328 objectStack.append(object);
329
330 TokenType type = m_lexer.next();
331 if (type == TokString) {
332 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
333
334 // Check for colon
335 if (m_lexer.next() != TokColon)
336 return JSValue();
337
338 m_lexer.next();
339 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
340 stateStack.append(DoParseObjectEndExpression);
341 goto startParseExpression;
342 } else if (type != TokRBrace)
343 return JSValue();
344 m_lexer.next();
345 lastValue = objectStack.last();
346 objectStack.removeLast();
347 break;
348 }
349 doParseObjectStartExpression:
350 case DoParseObjectStartExpression: {
351 TokenType type = m_lexer.next();
352 if (type != TokString)
353 return JSValue();
354 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
355
356 // Check for colon
357 if (m_lexer.next() != TokColon)
358 return JSValue();
359
360 m_lexer.next();
361 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
362 stateStack.append(DoParseObjectEndExpression);
363 goto startParseExpression;
364 }
365 case DoParseObjectEndExpression:
366 {
367 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
368 identifierStack.removeLast();
369 if (m_lexer.currentToken().type == TokComma)
370 goto doParseObjectStartExpression;
371 if (m_lexer.currentToken().type != TokRBrace)
372 return JSValue();
373 m_lexer.next();
374 lastValue = objectStack.last();
375 objectStack.removeLast();
376 break;
377 }
378 startParseExpression:
379 case StartParseExpression: {
380 switch (m_lexer.currentToken().type) {
381 case TokLBracket:
382 goto startParseArray;
383 case TokLBrace:
384 goto startParseObject;
385 case TokString: {
386 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
387 m_lexer.next();
388 lastValue = jsString(m_exec, stringToken.stringToken);
389 break;
390 }
391 case TokNumber: {
392 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
393 m_lexer.next();
394 lastValue = jsNumber(m_exec, numberToken.numberToken);
395 break;
396 }
397 case TokNull:
398 m_lexer.next();
399 lastValue = jsNull();
400 break;
401
402 case TokTrue:
403 m_lexer.next();
404 lastValue = jsBoolean(true);
405 break;
406
407 case TokFalse:
408 m_lexer.next();
409 lastValue = jsBoolean(false);
410 break;
411
412 default:
413 // Error
414 return JSValue();
415 }
416 break;
417 }
418 case StartParseStatement: {
419 switch (m_lexer.currentToken().type) {
420 case TokLBracket:
421 case TokNumber:
422 case TokString:
423 goto startParseExpression;
424
425 case TokLParen: {
426 m_lexer.next();
427 stateStack.append(StartParseStatementEndStatement);
428 goto startParseExpression;
429 }
430 default:
431 return JSValue();
432 }
433 }
434 case StartParseStatementEndStatement: {
435 ASSERT(stateStack.isEmpty());
436 if (m_lexer.currentToken().type != TokRParen)
437 return JSValue();
438 if (m_lexer.next() == TokEnd)
439 return lastValue;
440 return JSValue();
441 }
442 default:
443 ASSERT_NOT_REACHED();
444 }
445 if (stateStack.isEmpty())
446 return lastValue;
447 state = stateStack.last();
448 stateStack.removeLast();
449 continue;
450 }
451}
452
453}
Note: See TracBrowser for help on using the repository browser.