Context Navigation

LiteralParser.cpp@ 48788

Visit:

Last change on this file since 48788 was 47828, checked in by [email protected], 16 years ago

JSON parser accepts trailing comma in array literals
https://bugs.webkit.org/show_bug.cgi?id=28779

Reviewed by Alexey Proskuryakov.

Update parser to correctly fail if there's a trailing comma.

File size: 15.0 KB

Line
1	/*
2	* Copyright (C) 2009 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "LiteralParser.h"
28
29	#include "JSArray.h"
30	#include "JSString.h"
31	#include "Lexer.h"
32	#include <wtf/ASCIICType.h>
33	#include <wtf/dtoa.h>
34
35	namespace JSC {
36
37	LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38	{
39	while (m_ptr < m_end && isASCIISpace(*m_ptr))
40	++m_ptr;
41
42	ASSERT(m_ptr <= m_end);
43	if (m_ptr >= m_end) {
44	token.type = TokEnd;
45	token.start = token.end = m_ptr;
46	return TokEnd;
47	}
48	token.type = TokError;
49	token.start = m_ptr;
50	switch (*m_ptr) {
51	case '[':
52	token.type = TokLBracket;
53	token.end = ++m_ptr;
54	return TokLBracket;
55	case ']':
56	token.type = TokRBracket;
57	token.end = ++m_ptr;
58	return TokRBracket;
59	case '(':
60	token.type = TokLParen;
61	token.end = ++m_ptr;
62	return TokLBracket;
63	case ')':
64	token.type = TokRParen;
65	token.end = ++m_ptr;
66	return TokRBracket;
67	case '{':
68	token.type = TokLBrace;
69	token.end = ++m_ptr;
70	return TokLBrace;
71	case '}':
72	token.type = TokRBrace;
73	token.end = ++m_ptr;
74	return TokRBrace;
75	case ',':
76	token.type = TokComma;
77	token.end = ++m_ptr;
78	return TokComma;
79	case ':':
80	token.type = TokColon;
81	token.end = ++m_ptr;
82	return TokColon;
83	case '"':
84	if (m_mode == StrictJSON)
85	return lexString<StrictJSON>(token);
86	return lexString<NonStrictJSON>(token);
87	case 't':
88	if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89	m_ptr += 4;
90	token.type = TokTrue;
91	token.end = m_ptr;
92	return TokTrue;
93	}
94	break;
95	case 'f':
96	if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97	m_ptr += 5;
98	token.type = TokFalse;
99	token.end = m_ptr;
100	return TokFalse;
101	}
102	break;
103	case 'n':
104	if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105	m_ptr += 4;
106	token.type = TokNull;
107	token.end = m_ptr;
108	return TokNull;
109	}
110	break;
111	case '-':
112	case '0':
113	case '1':
114	case '2':
115	case '3':
116	case '4':
117	case '5':
118	case '6':
119	case '7':
120	case '8':
121	case '9':
122	return lexNumber(token);
123	}
124	return TokError;
125	}
126
127	template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
128	{
129	return (c >= ' ' && (mode == LiteralParser::StrictJSON \|\| c <= 0xff) && c != '\\' && c != '"') \|\| c == '\t';
130	}
131
132	// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
133	template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
134	{
135	++m_ptr;
136	const UChar* runStart;
137	token.stringToken = UString();
138	do {
139	runStart = m_ptr;
140	while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
141	++m_ptr;
142	if (runStart < m_ptr)
143	token.stringToken.append(runStart, m_ptr - runStart);
144	if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
145	++m_ptr;
146	if (m_ptr >= m_end)
147	return TokError;
148	switch (*m_ptr) {
149	case '"':
150	token.stringToken.append('"');
151	m_ptr++;
152	break;
153	case '\\':
154	token.stringToken.append('\\');
155	m_ptr++;
156	break;
157	case '/':
158	token.stringToken.append('/');
159	m_ptr++;
160	break;
161	case 'b':
162	token.stringToken.append('\b');
163	m_ptr++;
164	break;
165	case 'f':
166	token.stringToken.append('\f');
167	m_ptr++;
168	break;
169	case 'n':
170	token.stringToken.append('\n');
171	m_ptr++;
172	break;
173	case 'r':
174	token.stringToken.append('\r');
175	m_ptr++;
176	break;
177	case 't':
178	token.stringToken.append('\t');
179	m_ptr++;
180	break;
181
182	case 'u':
183	if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
184	return TokError;
185	for (int i = 1; i < 5; i++) {
186	if (!isASCIIHexDigit(m_ptr[i]))
187	return TokError;
188	}
189	token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
190	m_ptr += 5;
191	break;
192
193	default:
194	return TokError;
195	}
196	}
197	} while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
198
199	if (m_ptr >= m_end \|\| *m_ptr != '"')
200	return TokError;
201
202	token.type = TokString;
203	token.end = ++m_ptr;
204	return TokString;
205	}
206
207	LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
208	{
209	// ES5 and json.org define numbers as
210	// number
211	// int
212	// int frac? exp?
213	//
214	// int
215	// -? 0
216	// -? digit1-9 digits?
217	//
218	// digits
219	// digit digits?
220	//
221	// -?(0 \| [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
222
223	if (m_ptr < m_end && *m_ptr == '-') // -?
224	++m_ptr;
225
226	// (0 \| [1-9][0-9]*)
227	if (m_ptr < m_end && *m_ptr == '0') // 0
228	++m_ptr;
229	else if (m_ptr < m_end && m_ptr >= '1' && m_ptr <= '9') { // [1-9]
230	++m_ptr;
231	// [0-9]*
232	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
233	++m_ptr;
234	} else
235	return TokError;
236
237	// ('.' [0-9]+)?
238	if (m_ptr < m_end && *m_ptr == '.') {
239	++m_ptr;
240	// [0-9]+
241	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr))
242	return TokError;
243
244	++m_ptr;
245	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
246	++m_ptr;
247	}
248
249	// ([eE][+-]? [0-9]+)?
250	if (m_ptr < m_end && (m_ptr == 'e' \|\| m_ptr == 'E')) { // [eE]
251	++m_ptr;
252
253	// [-+]?
254	if (m_ptr < m_end && (m_ptr == '-' \|\| m_ptr == '+'))
255	++m_ptr;
256
257	// [0-9]+
258	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr))
259	return TokError;
260
261	++m_ptr;
262	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
263	++m_ptr;
264	}
265
266	token.type = TokNumber;
267	token.end = m_ptr;
268	Vector<char, 64> buffer(token.end - token.start + 1);
269	int i;
270	for (i = 0; i < token.end - token.start; i++) {
271	ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
272	buffer[i] = static_cast<char>(token.start[i]);
273	}
274	buffer[i] = 0;
275	char* end;
276	token.numberToken = WTF::strtod(buffer.data(), &end);
277	ASSERT(buffer.data() + (token.end - token.start) == end);
278	return TokNumber;
279	}
280
281	JSValue LiteralParser::parse(ParserState initialState)
282	{
283	ParserState state = initialState;
284	MarkedArgumentBuffer objectStack;
285	JSValue lastValue;
286	Vector<ParserState, 16> stateStack;
287	Vector<Identifier, 16> identifierStack;
288	while (1) {
289	switch(state) {
290	startParseArray:
291	case StartParseArray: {
292	JSArray* array = constructEmptyArray(m_exec);
293	objectStack.append(array);
294	// fallthrough
295	}
296	doParseArrayStartExpression:
297	case DoParseArrayStartExpression: {
298	TokenType lastToken = m_lexer.currentToken().type;
299	if (m_lexer.next() == TokRBracket) {
300	if (lastToken == TokComma)
301	return JSValue();
302	m_lexer.next();
303	lastValue = objectStack.last();
304	objectStack.removeLast();
305	break;
306	}
307
308	stateStack.append(DoParseArrayEndExpression);
309	goto startParseExpression;
310	}
311	case DoParseArrayEndExpression: {
312	asArray(objectStack.last())->push(m_exec, lastValue);
313
314	if (m_lexer.currentToken().type == TokComma)
315	goto doParseArrayStartExpression;
316
317	if (m_lexer.currentToken().type != TokRBracket)
318	return JSValue();
319
320	m_lexer.next();
321	lastValue = objectStack.last();
322	objectStack.removeLast();
323	break;
324	}
325	startParseObject:
326	case StartParseObject: {
327	JSObject* object = constructEmptyObject(m_exec);
328	objectStack.append(object);
329
330	TokenType type = m_lexer.next();
331	if (type == TokString) {
332	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
333
334	// Check for colon
335	if (m_lexer.next() != TokColon)
336	return JSValue();
337
338	m_lexer.next();
339	identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
340	stateStack.append(DoParseObjectEndExpression);
341	goto startParseExpression;
342	} else if (type != TokRBrace)
343	return JSValue();
344	m_lexer.next();
345	lastValue = objectStack.last();
346	objectStack.removeLast();
347	break;
348	}
349	doParseObjectStartExpression:
350	case DoParseObjectStartExpression: {
351	TokenType type = m_lexer.next();
352	if (type != TokString)
353	return JSValue();
354	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
355
356	// Check for colon
357	if (m_lexer.next() != TokColon)
358	return JSValue();
359
360	m_lexer.next();
361	identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
362	stateStack.append(DoParseObjectEndExpression);
363	goto startParseExpression;
364	}
365	case DoParseObjectEndExpression:
366	{
367	asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
368	identifierStack.removeLast();
369	if (m_lexer.currentToken().type == TokComma)
370	goto doParseObjectStartExpression;
371	if (m_lexer.currentToken().type != TokRBrace)
372	return JSValue();
373	m_lexer.next();
374	lastValue = objectStack.last();
375	objectStack.removeLast();
376	break;
377	}
378	startParseExpression:
379	case StartParseExpression: {
380	switch (m_lexer.currentToken().type) {
381	case TokLBracket:
382	goto startParseArray;
383	case TokLBrace:
384	goto startParseObject;
385	case TokString: {
386	Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
387	m_lexer.next();
388	lastValue = jsString(m_exec, stringToken.stringToken);
389	break;
390	}
391	case TokNumber: {
392	Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
393	m_lexer.next();
394	lastValue = jsNumber(m_exec, numberToken.numberToken);
395	break;
396	}
397	case TokNull:
398	m_lexer.next();
399	lastValue = jsNull();
400	break;
401
402	case TokTrue:
403	m_lexer.next();
404	lastValue = jsBoolean(true);
405	break;
406
407	case TokFalse:
408	m_lexer.next();
409	lastValue = jsBoolean(false);
410	break;
411
412	default:
413	// Error
414	return JSValue();
415	}
416	break;
417	}
418	case StartParseStatement: {
419	switch (m_lexer.currentToken().type) {
420	case TokLBracket:
421	case TokNumber:
422	case TokString:
423	goto startParseExpression;
424
425	case TokLParen: {
426	m_lexer.next();
427	stateStack.append(StartParseStatementEndStatement);
428	goto startParseExpression;
429	}
430	default:
431	return JSValue();
432	}
433	}
434	case StartParseStatementEndStatement: {
435	ASSERT(stateStack.isEmpty());
436	if (m_lexer.currentToken().type != TokRParen)
437	return JSValue();
438	if (m_lexer.next() == TokEnd)
439	return lastValue;
440	return JSValue();
441	}
442	default:
443	ASSERT_NOT_REACHED();
444	}
445	if (stateStack.isEmpty())
446	return lastValue;
447	state = stateStack.last();
448	stateStack.removeLast();
449	continue;
450	}
451	}
452
453	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 48788

Download in other formats: