Context Navigation

LiteralParser.cpp@ 44076

Visit:

Last change on this file since 44076 was 43424, checked in by [email protected], 16 years ago

Add a limited literal parser for eval to handle object and array literals fired at eval

Reviewed by Gavin Barraclough and Darin Adler.

This is a simplified parser and lexer that we can throw at strings passed to eval
in case a site is using eval to parse JSON (eg. json2.js). The lexer is intentionally
limited (in effect it's whitelisting a limited "common" subset of the JSON grammar)
as this decreases the likelihood of us wating time attempting to parse any significant
amount of non-JSON content.

File size: 8.0 KB

Line
1	/*
2	* Copyright (C) 2009 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "LiteralParser.h"
28
29	#include "JSArray.h"
30	#include "JSString.h"
31	#include <wtf/ASCIICType.h>
32
33	namespace JSC {
34
35	class LiteralParser::StackGuard {
36	public:
37	StackGuard(LiteralParser* parser)
38	: m_parser(parser)
39	{
40	m_parser->m_depth++;
41	}
42	~StackGuard()
43	{
44	m_parser->m_depth--;
45	}
46	bool isSafe() { return m_parser->m_depth < 10; }
47	private:
48	LiteralParser* m_parser;
49	};
50
51	static bool isSafeStringCharacter(UChar c)
52	{
53	return (c >= ' ' && c <= 0xff && c != '\\') \|\| c == '\t';
54	}
55
56	LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
57	{
58	while (m_ptr < m_end && isASCIISpace(*m_ptr))
59	++m_ptr;
60
61	ASSERT(m_ptr <= m_end);
62	if (m_ptr >= m_end) {
63	token.type = TokEnd;
64	token.start = token.end = m_ptr;
65	return TokEnd;
66	}
67	token.type = TokError;
68	token.start = m_ptr;
69	switch (*m_ptr) {
70	case '[':
71	token.type = TokLBracket;
72	token.end = ++m_ptr;
73	return TokLBracket;
74	case ']':
75	token.type = TokRBracket;
76	token.end = ++m_ptr;
77	return TokRBracket;
78	case '(':
79	token.type = TokLParen;
80	token.end = ++m_ptr;
81	return TokLBracket;
82	case ')':
83	token.type = TokRParen;
84	token.end = ++m_ptr;
85	return TokRBracket;
86	case '{':
87	token.type = TokLBrace;
88	token.end = ++m_ptr;
89	return TokLBrace;
90	case '}':
91	token.type = TokRBrace;
92	token.end = ++m_ptr;
93	return TokRBrace;
94	case ',':
95	token.type = TokComma;
96	token.end = ++m_ptr;
97	return TokComma;
98	case ':':
99	token.type = TokColon;
100	token.end = ++m_ptr;
101	return TokColon;
102	case '"':
103	case '\'':
104	return lexString(token);
105
106	// Numbers are trickier so we only allow the most basic form, basically
107	// * [1-9][0-9](\.[0-9])?
108	// * \.[0-9]*
109	// * 0(\.[0-9]*)?
110	case '0':
111	// If a number starts with 0 it's expected to be octal. It seems silly
112	// to attempt to handle this case, so we abort
113	if (m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1]))
114	return TokError;
115	return lexNumber(token);
116	case '.':
117	// If a number starts with a '.' it must be followed by a digit
118	if (!(m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1])))
119	return TokError;
120	return lexNumber(token);
121	case '1':
122	case '2':
123	case '3':
124	case '4':
125	case '5':
126	case '6':
127	case '7':
128	case '8':
129	case '9':
130	return lexNumber(token);
131	}
132	return TokError;
133	}
134
135	LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
136	{
137	UChar terminator = *m_ptr;
138	++m_ptr;
139	while (m_ptr < m_end && isSafeStringCharacter(m_ptr) && m_ptr != terminator)
140	++m_ptr;
141	if (m_ptr >= m_end \|\| *m_ptr != terminator) {
142	token.type = TokError;
143	token.end = ++m_ptr;
144	return TokError;
145	}
146	token.type = TokString;
147	token.end = ++m_ptr;
148	return TokString;
149	}
150
151	LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
152	{
153	bool beginsWithDot = *m_ptr == '.';
154	++m_ptr;
155	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
156	++m_ptr;
157
158	if (!beginsWithDot && m_ptr < m_end - 1 && *m_ptr == '.') {
159	++m_ptr;
160	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
161	++m_ptr;
162	}
163
164	if (m_ptr < m_end) {
165	if (m_ptr == 'x' \|\| m_ptr == 'X' \|\| m_ptr == 'e' \|\| m_ptr == 'E') {
166	token.type = TokError;
167	return TokError;
168	}
169	}
170	token.type = TokNumber;
171	token.end = m_ptr;
172	return TokNumber;
173	}
174
175	JSValue LiteralParser::parseStatement()
176	{
177	StackGuard guard(this);
178	if (!guard.isSafe())
179	return abortParse();
180
181	switch (m_lexer.currentToken().type) {
182	case TokLBracket:
183	case TokNumber:
184	case TokString:
185	return parseExpression();
186	case TokLParen: {
187	m_lexer.next();
188	JSValue result = parseExpression();
189	if (m_aborted \|\| m_lexer.currentToken().type != TokRParen)
190	return abortParse();
191	m_lexer.next();
192	return result;
193	}
194	default:
195	return abortParse();
196	}
197	}
198
199	JSValue LiteralParser::parseExpression()
200	{
201	StackGuard guard(this);
202	if (!guard.isSafe())
203	return abortParse();
204	switch (m_lexer.currentToken().type) {
205	case TokLBracket:
206	return parseArray();
207	case TokLBrace:
208	return parseObject();
209	case TokString: {
210	Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
211	m_lexer.next();
212	return jsString(m_exec, UString(stringToken.start + 1, stringToken.end - stringToken.start - 2));
213	}
214	case TokNumber: {
215	Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
216	m_lexer.next();
217	return jsNumber(m_exec, UString(numberToken.start, numberToken.end - numberToken.start).toDouble());
218	}
219	default:
220	return JSValue();
221	}
222	}
223
224	JSValue LiteralParser::parseArray()
225	{
226	StackGuard guard(this);
227	if (!guard.isSafe())
228	return abortParse();
229	JSArray* array = constructEmptyArray(m_exec);
230	while (true) {
231	m_lexer.next();
232	JSValue value = parseExpression();
233	if (m_aborted)
234	return JSValue();
235	if (!value)
236	break;
237	array->push(m_exec, value);
238
239	if (m_lexer.currentToken().type != TokComma)
240	break;
241	}
242	if (m_lexer.currentToken().type != TokRBracket)
243	return abortParse();
244
245	m_lexer.next();
246	return array;
247	}
248
249	JSValue LiteralParser::parseObject()
250	{
251	StackGuard guard(this);
252	if (!guard.isSafe())
253	return abortParse();
254	JSObject* object = constructEmptyObject(m_exec);
255
256	while (m_lexer.next() == TokString) {
257	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
258
259	// Check for colon
260	if (m_lexer.next() != TokColon)
261	return abortParse();
262	m_lexer.next();
263
264	JSValue value = parseExpression();
265	if (!value \|\| m_aborted)
266	return abortParse();
267
268	Identifier ident(m_exec, identifierToken.start + 1, identifierToken.end - identifierToken.start - 2);
269	object->putDirect(ident, value);
270
271	if (m_lexer.currentToken().type != TokComma)
272	break;
273	}
274
275	if (m_lexer.currentToken().type != TokRBrace)
276	return abortParse();
277	m_lexer.next();
278	return object;
279	}
280
281	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 44076

Download in other formats: