Context Navigation

LiteralParser.cpp@ 45683

Visit:

Last change on this file since 45683 was 45356, checked in by [email protected], 16 years ago

<rdar://problem/7016214> JSON.parse fails to parse valid JSON with most Unicode characters
<https://bugs.webkit.org/show_bug.cgi?id=26802>

Reviewed by Gavin Barraclough.

In the original JSON.parse patch unicode was handled correctly, however in some last
minute "clean up" I oversimplified isSafeStringCharacter. This patch corrects this bug.

File size: 14.8 KB

Line
1	/*
2	* Copyright (C) 2009 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "LiteralParser.h"
28
29	#include "JSArray.h"
30	#include "JSString.h"
31	#include "Lexer.h"
32	#include <wtf/ASCIICType.h>
33	#include <wtf/dtoa.h>
34
35	namespace JSC {
36
37	LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38	{
39	while (m_ptr < m_end && isASCIISpace(*m_ptr))
40	++m_ptr;
41
42	ASSERT(m_ptr <= m_end);
43	if (m_ptr >= m_end) {
44	token.type = TokEnd;
45	token.start = token.end = m_ptr;
46	return TokEnd;
47	}
48	token.type = TokError;
49	token.start = m_ptr;
50	switch (*m_ptr) {
51	case '[':
52	token.type = TokLBracket;
53	token.end = ++m_ptr;
54	return TokLBracket;
55	case ']':
56	token.type = TokRBracket;
57	token.end = ++m_ptr;
58	return TokRBracket;
59	case '(':
60	token.type = TokLParen;
61	token.end = ++m_ptr;
62	return TokLBracket;
63	case ')':
64	token.type = TokRParen;
65	token.end = ++m_ptr;
66	return TokRBracket;
67	case '{':
68	token.type = TokLBrace;
69	token.end = ++m_ptr;
70	return TokLBrace;
71	case '}':
72	token.type = TokRBrace;
73	token.end = ++m_ptr;
74	return TokRBrace;
75	case ',':
76	token.type = TokComma;
77	token.end = ++m_ptr;
78	return TokComma;
79	case ':':
80	token.type = TokColon;
81	token.end = ++m_ptr;
82	return TokColon;
83	case '"':
84	if (m_mode == StrictJSON)
85	return lexString<StrictJSON>(token);
86	return lexString<NonStrictJSON>(token);
87	case 't':
88	if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89	m_ptr += 4;
90	token.type = TokTrue;
91	token.end = m_ptr;
92	return TokTrue;
93	}
94	break;
95	case 'f':
96	if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97	m_ptr += 5;
98	token.type = TokFalse;
99	token.end = m_ptr;
100	return TokFalse;
101	}
102	break;
103	case 'n':
104	if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105	m_ptr += 4;
106	token.type = TokNull;
107	token.end = m_ptr;
108	return TokNull;
109	}
110	break;
111	case '-':
112	case '0':
113	case '1':
114	case '2':
115	case '3':
116	case '4':
117	case '5':
118	case '6':
119	case '7':
120	case '8':
121	case '9':
122	return lexNumber(token);
123	}
124	return TokError;
125	}
126
127	template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
128	{
129	return (c >= ' ' && (mode == LiteralParser::StrictJSON \|\| c <= 0xff) && c != '\\' && c != '"') \|\| c == '\t';
130	}
131
132	template <LiteralParser::ParserMode mode> LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
133	{
134	++m_ptr;
135	const UChar* runStart;
136	token.stringToken = UString();
137	do {
138	runStart = m_ptr;
139	while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
140	++m_ptr;
141	if (runStart < m_ptr)
142	token.stringToken.append(runStart, m_ptr - runStart);
143	if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
144	++m_ptr;
145	if (m_ptr >= m_end)
146	return TokError;
147	switch (*m_ptr) {
148	case '"':
149	token.stringToken.append('"');
150	m_ptr++;
151	break;
152	case '\\':
153	token.stringToken.append('\\');
154	m_ptr++;
155	break;
156	case '/':
157	token.stringToken.append('/');
158	m_ptr++;
159	break;
160	case 'b':
161	token.stringToken.append('\b');
162	m_ptr++;
163	break;
164	case 'f':
165	token.stringToken.append('\f');
166	m_ptr++;
167	break;
168	case 'n':
169	token.stringToken.append('\n');
170	m_ptr++;
171	break;
172	case 'r':
173	token.stringToken.append('\r');
174	m_ptr++;
175	break;
176	case 't':
177	token.stringToken.append('\t');
178	m_ptr++;
179	break;
180
181	case 'u':
182	if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
183	return TokError;
184	for (int i = 1; i < 5; i++) {
185	if (!isASCIIHexDigit(m_ptr[i]))
186	return TokError;
187	}
188	token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
189	m_ptr += 5;
190	break;
191
192	default:
193	return TokError;
194	}
195	}
196	} while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
197
198	if (m_ptr >= m_end \|\| *m_ptr != '"')
199	return TokError;
200
201	token.type = TokString;
202	token.end = ++m_ptr;
203	return TokString;
204	}
205
206	LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
207	{
208	// ES5 and json.org define numbers as
209	// number
210	// int
211	// int frac? exp?
212	//
213	// int
214	// -? 0
215	// -? digit1-9 digits?
216	//
217	// digits
218	// digit digits?
219	//
220	// -?(0 \| [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
221
222	if (m_ptr < m_end && *m_ptr == '-') // -?
223	++m_ptr;
224
225	// (0 \| [1-9][0-9]*)
226	if (m_ptr < m_end && *m_ptr == '0') // 0
227	++m_ptr;
228	else if (m_ptr < m_end && m_ptr >= '1' && m_ptr <= '9') { // [1-9]
229	++m_ptr;
230	// [0-9]*
231	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
232	++m_ptr;
233	} else
234	return TokError;
235
236	// ('.' [0-9]+)?
237	if (m_ptr < m_end && *m_ptr == '.') {
238	++m_ptr;
239	// [0-9]+
240	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr))
241	return TokError;
242
243	++m_ptr;
244	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
245	++m_ptr;
246	}
247
248	// ([eE][+-]? [0-9]+)?
249	if (m_ptr < m_end && (m_ptr == 'e' \|\| m_ptr == 'E')) { // [eE]
250	++m_ptr;
251
252	// [-+]?
253	if (m_ptr < m_end && (m_ptr == '-' \|\| m_ptr == '+'))
254	++m_ptr;
255
256	// [0-9]+
257	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr))
258	return TokError;
259
260	++m_ptr;
261	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
262	++m_ptr;
263	}
264
265	token.type = TokNumber;
266	token.end = m_ptr;
267	Vector<char, 64> buffer(token.end - token.start + 1);
268	int i;
269	for (i = 0; i < token.end - token.start; i++) {
270	ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
271	buffer[i] = static_cast<char>(token.start[i]);
272	}
273	buffer[i] = 0;
274	char* end;
275	token.numberToken = WTF::strtod(buffer.data(), &end);
276	ASSERT(buffer.data() + (token.end - token.start) == end);
277	return TokNumber;
278	}
279
280	JSValue LiteralParser::parse(ParserState initialState)
281	{
282	ParserState state = initialState;
283	MarkedArgumentBuffer objectStack;
284	JSValue lastValue;
285	Vector<ParserState, 16> stateStack;
286	Vector<Identifier, 16> identifierStack;
287	while (1) {
288	switch(state) {
289	startParseArray:
290	case StartParseArray: {
291	JSArray* array = constructEmptyArray(m_exec);
292	objectStack.append(array);
293	// fallthrough
294	}
295	doParseArrayStartExpression:
296	case DoParseArrayStartExpression: {
297	if (m_lexer.next() == TokRBracket) {
298	m_lexer.next();
299	lastValue = objectStack.last();
300	objectStack.removeLast();
301	break;
302	}
303
304	stateStack.append(DoParseArrayEndExpression);
305	goto startParseExpression;
306	}
307	case DoParseArrayEndExpression: {
308	asArray(objectStack.last())->push(m_exec, lastValue);
309
310	if (m_lexer.currentToken().type == TokComma)
311	goto doParseArrayStartExpression;
312
313	if (m_lexer.currentToken().type != TokRBracket)
314	return JSValue();
315
316	m_lexer.next();
317	lastValue = objectStack.last();
318	objectStack.removeLast();
319	break;
320	}
321	startParseObject:
322	case StartParseObject: {
323	JSObject* object = constructEmptyObject(m_exec);
324	objectStack.append(object);
325
326	TokenType type = m_lexer.next();
327	if (type == TokString) {
328	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
329
330	// Check for colon
331	if (m_lexer.next() != TokColon)
332	return JSValue();
333
334	m_lexer.next();
335	identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
336	stateStack.append(DoParseObjectEndExpression);
337	goto startParseExpression;
338	} else if (type != TokRBrace)
339	return JSValue();
340	m_lexer.next();
341	lastValue = objectStack.last();
342	objectStack.removeLast();
343	break;
344	}
345	doParseObjectStartExpression:
346	case DoParseObjectStartExpression: {
347	TokenType type = m_lexer.next();
348	if (type != TokString)
349	return JSValue();
350	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
351
352	// Check for colon
353	if (m_lexer.next() != TokColon)
354	return JSValue();
355
356	m_lexer.next();
357	identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
358	stateStack.append(DoParseObjectEndExpression);
359	goto startParseExpression;
360	}
361	case DoParseObjectEndExpression:
362	{
363	asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
364	identifierStack.removeLast();
365	if (m_lexer.currentToken().type == TokComma)
366	goto doParseObjectStartExpression;
367	if (m_lexer.currentToken().type != TokRBrace)
368	return JSValue();
369	m_lexer.next();
370	lastValue = objectStack.last();
371	objectStack.removeLast();
372	break;
373	}
374	startParseExpression:
375	case StartParseExpression: {
376	switch (m_lexer.currentToken().type) {
377	case TokLBracket:
378	goto startParseArray;
379	case TokLBrace:
380	goto startParseObject;
381	case TokString: {
382	Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
383	m_lexer.next();
384	lastValue = jsString(m_exec, stringToken.stringToken);
385	break;
386	}
387	case TokNumber: {
388	Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
389	m_lexer.next();
390	lastValue = jsNumber(m_exec, numberToken.numberToken);
391	break;
392	}
393	case TokNull:
394	m_lexer.next();
395	lastValue = jsNull();
396	break;
397
398	case TokTrue:
399	m_lexer.next();
400	lastValue = jsBoolean(true);
401	break;
402
403	case TokFalse:
404	m_lexer.next();
405	lastValue = jsBoolean(false);
406	break;
407
408	default:
409	// Error
410	return JSValue();
411	}
412	break;
413	}
414	case StartParseStatement: {
415	switch (m_lexer.currentToken().type) {
416	case TokLBracket:
417	case TokNumber:
418	case TokString:
419	goto startParseExpression;
420
421	case TokLParen: {
422	m_lexer.next();
423	stateStack.append(StartParseStatementEndStatement);
424	goto startParseExpression;
425	}
426	default:
427	return JSValue();
428	}
429	}
430	case StartParseStatementEndStatement: {
431	ASSERT(stateStack.isEmpty());
432	if (m_lexer.currentToken().type != TokRParen)
433	return JSValue();
434	if (m_lexer.next() == TokEnd)
435	return lastValue;
436	return JSValue();
437	}
438	default:
439	ASSERT_NOT_REACHED();
440	}
441	if (stateStack.isEmpty())
442	return lastValue;
443	state = stateStack.last();
444	stateStack.removeLast();
445	continue;
446	}
447	}
448
449	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 45683

Download in other formats: