source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 45683

Last change on this file since 45683 was 45356, checked in by [email protected], 16 years ago

<rdar://problem/7016214> JSON.parse fails to parse valid JSON with most Unicode characters
<https://bugs.webkit.org/show_bug.cgi?id=26802>

Reviewed by Gavin Barraclough.

In the original JSON.parse patch unicode was handled correctly, however in some last
minute "clean up" I oversimplified isSafeStringCharacter. This patch corrects this bug.

File size: 14.8 KB
Line 
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "LiteralParser.h"
28
29#include "JSArray.h"
30#include "JSString.h"
31#include "Lexer.h"
32#include <wtf/ASCIICType.h>
33#include <wtf/dtoa.h>
34
35namespace JSC {
36
37LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38{
39 while (m_ptr < m_end && isASCIISpace(*m_ptr))
40 ++m_ptr;
41
42 ASSERT(m_ptr <= m_end);
43 if (m_ptr >= m_end) {
44 token.type = TokEnd;
45 token.start = token.end = m_ptr;
46 return TokEnd;
47 }
48 token.type = TokError;
49 token.start = m_ptr;
50 switch (*m_ptr) {
51 case '[':
52 token.type = TokLBracket;
53 token.end = ++m_ptr;
54 return TokLBracket;
55 case ']':
56 token.type = TokRBracket;
57 token.end = ++m_ptr;
58 return TokRBracket;
59 case '(':
60 token.type = TokLParen;
61 token.end = ++m_ptr;
62 return TokLBracket;
63 case ')':
64 token.type = TokRParen;
65 token.end = ++m_ptr;
66 return TokRBracket;
67 case '{':
68 token.type = TokLBrace;
69 token.end = ++m_ptr;
70 return TokLBrace;
71 case '}':
72 token.type = TokRBrace;
73 token.end = ++m_ptr;
74 return TokRBrace;
75 case ',':
76 token.type = TokComma;
77 token.end = ++m_ptr;
78 return TokComma;
79 case ':':
80 token.type = TokColon;
81 token.end = ++m_ptr;
82 return TokColon;
83 case '"':
84 if (m_mode == StrictJSON)
85 return lexString<StrictJSON>(token);
86 return lexString<NonStrictJSON>(token);
87 case 't':
88 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89 m_ptr += 4;
90 token.type = TokTrue;
91 token.end = m_ptr;
92 return TokTrue;
93 }
94 break;
95 case 'f':
96 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97 m_ptr += 5;
98 token.type = TokFalse;
99 token.end = m_ptr;
100 return TokFalse;
101 }
102 break;
103 case 'n':
104 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105 m_ptr += 4;
106 token.type = TokNull;
107 token.end = m_ptr;
108 return TokNull;
109 }
110 break;
111 case '-':
112 case '0':
113 case '1':
114 case '2':
115 case '3':
116 case '4':
117 case '5':
118 case '6':
119 case '7':
120 case '8':
121 case '9':
122 return lexNumber(token);
123 }
124 return TokError;
125}
126
127template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
128{
129 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
130}
131
132template <LiteralParser::ParserMode mode> LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
133{
134 ++m_ptr;
135 const UChar* runStart;
136 token.stringToken = UString();
137 do {
138 runStart = m_ptr;
139 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
140 ++m_ptr;
141 if (runStart < m_ptr)
142 token.stringToken.append(runStart, m_ptr - runStart);
143 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
144 ++m_ptr;
145 if (m_ptr >= m_end)
146 return TokError;
147 switch (*m_ptr) {
148 case '"':
149 token.stringToken.append('"');
150 m_ptr++;
151 break;
152 case '\\':
153 token.stringToken.append('\\');
154 m_ptr++;
155 break;
156 case '/':
157 token.stringToken.append('/');
158 m_ptr++;
159 break;
160 case 'b':
161 token.stringToken.append('\b');
162 m_ptr++;
163 break;
164 case 'f':
165 token.stringToken.append('\f');
166 m_ptr++;
167 break;
168 case 'n':
169 token.stringToken.append('\n');
170 m_ptr++;
171 break;
172 case 'r':
173 token.stringToken.append('\r');
174 m_ptr++;
175 break;
176 case 't':
177 token.stringToken.append('\t');
178 m_ptr++;
179 break;
180
181 case 'u':
182 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
183 return TokError;
184 for (int i = 1; i < 5; i++) {
185 if (!isASCIIHexDigit(m_ptr[i]))
186 return TokError;
187 }
188 token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
189 m_ptr += 5;
190 break;
191
192 default:
193 return TokError;
194 }
195 }
196 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
197
198 if (m_ptr >= m_end || *m_ptr != '"')
199 return TokError;
200
201 token.type = TokString;
202 token.end = ++m_ptr;
203 return TokString;
204}
205
206LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
207{
208 // ES5 and json.org define numbers as
209 // number
210 // int
211 // int frac? exp?
212 //
213 // int
214 // -? 0
215 // -? digit1-9 digits?
216 //
217 // digits
218 // digit digits?
219 //
220 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
221
222 if (m_ptr < m_end && *m_ptr == '-') // -?
223 ++m_ptr;
224
225 // (0 | [1-9][0-9]*)
226 if (m_ptr < m_end && *m_ptr == '0') // 0
227 ++m_ptr;
228 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
229 ++m_ptr;
230 // [0-9]*
231 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
232 ++m_ptr;
233 } else
234 return TokError;
235
236 // ('.' [0-9]+)?
237 if (m_ptr < m_end && *m_ptr == '.') {
238 ++m_ptr;
239 // [0-9]+
240 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
241 return TokError;
242
243 ++m_ptr;
244 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
245 ++m_ptr;
246 }
247
248 // ([eE][+-]? [0-9]+)?
249 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
250 ++m_ptr;
251
252 // [-+]?
253 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
254 ++m_ptr;
255
256 // [0-9]+
257 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
258 return TokError;
259
260 ++m_ptr;
261 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
262 ++m_ptr;
263 }
264
265 token.type = TokNumber;
266 token.end = m_ptr;
267 Vector<char, 64> buffer(token.end - token.start + 1);
268 int i;
269 for (i = 0; i < token.end - token.start; i++) {
270 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
271 buffer[i] = static_cast<char>(token.start[i]);
272 }
273 buffer[i] = 0;
274 char* end;
275 token.numberToken = WTF::strtod(buffer.data(), &end);
276 ASSERT(buffer.data() + (token.end - token.start) == end);
277 return TokNumber;
278}
279
280JSValue LiteralParser::parse(ParserState initialState)
281{
282 ParserState state = initialState;
283 MarkedArgumentBuffer objectStack;
284 JSValue lastValue;
285 Vector<ParserState, 16> stateStack;
286 Vector<Identifier, 16> identifierStack;
287 while (1) {
288 switch(state) {
289 startParseArray:
290 case StartParseArray: {
291 JSArray* array = constructEmptyArray(m_exec);
292 objectStack.append(array);
293 // fallthrough
294 }
295 doParseArrayStartExpression:
296 case DoParseArrayStartExpression: {
297 if (m_lexer.next() == TokRBracket) {
298 m_lexer.next();
299 lastValue = objectStack.last();
300 objectStack.removeLast();
301 break;
302 }
303
304 stateStack.append(DoParseArrayEndExpression);
305 goto startParseExpression;
306 }
307 case DoParseArrayEndExpression: {
308 asArray(objectStack.last())->push(m_exec, lastValue);
309
310 if (m_lexer.currentToken().type == TokComma)
311 goto doParseArrayStartExpression;
312
313 if (m_lexer.currentToken().type != TokRBracket)
314 return JSValue();
315
316 m_lexer.next();
317 lastValue = objectStack.last();
318 objectStack.removeLast();
319 break;
320 }
321 startParseObject:
322 case StartParseObject: {
323 JSObject* object = constructEmptyObject(m_exec);
324 objectStack.append(object);
325
326 TokenType type = m_lexer.next();
327 if (type == TokString) {
328 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
329
330 // Check for colon
331 if (m_lexer.next() != TokColon)
332 return JSValue();
333
334 m_lexer.next();
335 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
336 stateStack.append(DoParseObjectEndExpression);
337 goto startParseExpression;
338 } else if (type != TokRBrace)
339 return JSValue();
340 m_lexer.next();
341 lastValue = objectStack.last();
342 objectStack.removeLast();
343 break;
344 }
345 doParseObjectStartExpression:
346 case DoParseObjectStartExpression: {
347 TokenType type = m_lexer.next();
348 if (type != TokString)
349 return JSValue();
350 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
351
352 // Check for colon
353 if (m_lexer.next() != TokColon)
354 return JSValue();
355
356 m_lexer.next();
357 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
358 stateStack.append(DoParseObjectEndExpression);
359 goto startParseExpression;
360 }
361 case DoParseObjectEndExpression:
362 {
363 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
364 identifierStack.removeLast();
365 if (m_lexer.currentToken().type == TokComma)
366 goto doParseObjectStartExpression;
367 if (m_lexer.currentToken().type != TokRBrace)
368 return JSValue();
369 m_lexer.next();
370 lastValue = objectStack.last();
371 objectStack.removeLast();
372 break;
373 }
374 startParseExpression:
375 case StartParseExpression: {
376 switch (m_lexer.currentToken().type) {
377 case TokLBracket:
378 goto startParseArray;
379 case TokLBrace:
380 goto startParseObject;
381 case TokString: {
382 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
383 m_lexer.next();
384 lastValue = jsString(m_exec, stringToken.stringToken);
385 break;
386 }
387 case TokNumber: {
388 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
389 m_lexer.next();
390 lastValue = jsNumber(m_exec, numberToken.numberToken);
391 break;
392 }
393 case TokNull:
394 m_lexer.next();
395 lastValue = jsNull();
396 break;
397
398 case TokTrue:
399 m_lexer.next();
400 lastValue = jsBoolean(true);
401 break;
402
403 case TokFalse:
404 m_lexer.next();
405 lastValue = jsBoolean(false);
406 break;
407
408 default:
409 // Error
410 return JSValue();
411 }
412 break;
413 }
414 case StartParseStatement: {
415 switch (m_lexer.currentToken().type) {
416 case TokLBracket:
417 case TokNumber:
418 case TokString:
419 goto startParseExpression;
420
421 case TokLParen: {
422 m_lexer.next();
423 stateStack.append(StartParseStatementEndStatement);
424 goto startParseExpression;
425 }
426 default:
427 return JSValue();
428 }
429 }
430 case StartParseStatementEndStatement: {
431 ASSERT(stateStack.isEmpty());
432 if (m_lexer.currentToken().type != TokRParen)
433 return JSValue();
434 if (m_lexer.next() == TokEnd)
435 return lastValue;
436 return JSValue();
437 }
438 default:
439 ASSERT_NOT_REACHED();
440 }
441 if (stateStack.isEmpty())
442 return lastValue;
443 state = stateStack.last();
444 stateStack.removeLast();
445 continue;
446 }
447}
448
449}
Note: See TracBrowser for help on using the repository browser.