source: webkit/trunk/JavaScriptCore/runtime/LiteralParser.cpp@ 44076

Last change on this file since 44076 was 43424, checked in by [email protected], 16 years ago

Add a limited literal parser for eval to handle object and array literals fired at eval

Reviewed by Gavin Barraclough and Darin Adler.

This is a simplified parser and lexer that we can throw at strings passed to eval
in case a site is using eval to parse JSON (eg. json2.js). The lexer is intentionally
limited (in effect it's whitelisting a limited "common" subset of the JSON grammar)
as this decreases the likelihood of us wating time attempting to parse any significant
amount of non-JSON content.

File size: 8.0 KB
Line 
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "LiteralParser.h"
28
29#include "JSArray.h"
30#include "JSString.h"
31#include <wtf/ASCIICType.h>
32
33namespace JSC {
34
35class LiteralParser::StackGuard {
36public:
37 StackGuard(LiteralParser* parser)
38 : m_parser(parser)
39 {
40 m_parser->m_depth++;
41 }
42 ~StackGuard()
43 {
44 m_parser->m_depth--;
45 }
46 bool isSafe() { return m_parser->m_depth < 10; }
47private:
48 LiteralParser* m_parser;
49};
50
51static bool isSafeStringCharacter(UChar c)
52{
53 return (c >= ' ' && c <= 0xff && c != '\\') || c == '\t';
54}
55
56LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
57{
58 while (m_ptr < m_end && isASCIISpace(*m_ptr))
59 ++m_ptr;
60
61 ASSERT(m_ptr <= m_end);
62 if (m_ptr >= m_end) {
63 token.type = TokEnd;
64 token.start = token.end = m_ptr;
65 return TokEnd;
66 }
67 token.type = TokError;
68 token.start = m_ptr;
69 switch (*m_ptr) {
70 case '[':
71 token.type = TokLBracket;
72 token.end = ++m_ptr;
73 return TokLBracket;
74 case ']':
75 token.type = TokRBracket;
76 token.end = ++m_ptr;
77 return TokRBracket;
78 case '(':
79 token.type = TokLParen;
80 token.end = ++m_ptr;
81 return TokLBracket;
82 case ')':
83 token.type = TokRParen;
84 token.end = ++m_ptr;
85 return TokRBracket;
86 case '{':
87 token.type = TokLBrace;
88 token.end = ++m_ptr;
89 return TokLBrace;
90 case '}':
91 token.type = TokRBrace;
92 token.end = ++m_ptr;
93 return TokRBrace;
94 case ',':
95 token.type = TokComma;
96 token.end = ++m_ptr;
97 return TokComma;
98 case ':':
99 token.type = TokColon;
100 token.end = ++m_ptr;
101 return TokColon;
102 case '"':
103 case '\'':
104 return lexString(token);
105
106 // Numbers are trickier so we only allow the most basic form, basically
107 // * [1-9][0-9]*(\.[0-9]*)?
108 // * \.[0-9]*
109 // * 0(\.[0-9]*)?
110 case '0':
111 // If a number starts with 0 it's expected to be octal. It seems silly
112 // to attempt to handle this case, so we abort
113 if (m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1]))
114 return TokError;
115 return lexNumber(token);
116 case '.':
117 // If a number starts with a '.' it must be followed by a digit
118 if (!(m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1])))
119 return TokError;
120 return lexNumber(token);
121 case '1':
122 case '2':
123 case '3':
124 case '4':
125 case '5':
126 case '6':
127 case '7':
128 case '8':
129 case '9':
130 return lexNumber(token);
131 }
132 return TokError;
133}
134
135LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
136{
137 UChar terminator = *m_ptr;
138 ++m_ptr;
139 while (m_ptr < m_end && isSafeStringCharacter(*m_ptr) && *m_ptr != terminator)
140 ++m_ptr;
141 if (m_ptr >= m_end || *m_ptr != terminator) {
142 token.type = TokError;
143 token.end = ++m_ptr;
144 return TokError;
145 }
146 token.type = TokString;
147 token.end = ++m_ptr;
148 return TokString;
149}
150
151LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
152{
153 bool beginsWithDot = *m_ptr == '.';
154 ++m_ptr;
155 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
156 ++m_ptr;
157
158 if (!beginsWithDot && m_ptr < m_end - 1 && *m_ptr == '.') {
159 ++m_ptr;
160 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
161 ++m_ptr;
162 }
163
164 if (m_ptr < m_end) {
165 if (*m_ptr == 'x' || *m_ptr == 'X' || *m_ptr == 'e' || *m_ptr == 'E') {
166 token.type = TokError;
167 return TokError;
168 }
169 }
170 token.type = TokNumber;
171 token.end = m_ptr;
172 return TokNumber;
173}
174
175JSValue LiteralParser::parseStatement()
176{
177 StackGuard guard(this);
178 if (!guard.isSafe())
179 return abortParse();
180
181 switch (m_lexer.currentToken().type) {
182 case TokLBracket:
183 case TokNumber:
184 case TokString:
185 return parseExpression();
186 case TokLParen: {
187 m_lexer.next();
188 JSValue result = parseExpression();
189 if (m_aborted || m_lexer.currentToken().type != TokRParen)
190 return abortParse();
191 m_lexer.next();
192 return result;
193 }
194 default:
195 return abortParse();
196 }
197}
198
199JSValue LiteralParser::parseExpression()
200{
201 StackGuard guard(this);
202 if (!guard.isSafe())
203 return abortParse();
204 switch (m_lexer.currentToken().type) {
205 case TokLBracket:
206 return parseArray();
207 case TokLBrace:
208 return parseObject();
209 case TokString: {
210 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
211 m_lexer.next();
212 return jsString(m_exec, UString(stringToken.start + 1, stringToken.end - stringToken.start - 2));
213 }
214 case TokNumber: {
215 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
216 m_lexer.next();
217 return jsNumber(m_exec, UString(numberToken.start, numberToken.end - numberToken.start).toDouble());
218 }
219 default:
220 return JSValue();
221 }
222}
223
224JSValue LiteralParser::parseArray()
225{
226 StackGuard guard(this);
227 if (!guard.isSafe())
228 return abortParse();
229 JSArray* array = constructEmptyArray(m_exec);
230 while (true) {
231 m_lexer.next();
232 JSValue value = parseExpression();
233 if (m_aborted)
234 return JSValue();
235 if (!value)
236 break;
237 array->push(m_exec, value);
238
239 if (m_lexer.currentToken().type != TokComma)
240 break;
241 }
242 if (m_lexer.currentToken().type != TokRBracket)
243 return abortParse();
244
245 m_lexer.next();
246 return array;
247}
248
249JSValue LiteralParser::parseObject()
250{
251 StackGuard guard(this);
252 if (!guard.isSafe())
253 return abortParse();
254 JSObject* object = constructEmptyObject(m_exec);
255
256 while (m_lexer.next() == TokString) {
257 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
258
259 // Check for colon
260 if (m_lexer.next() != TokColon)
261 return abortParse();
262 m_lexer.next();
263
264 JSValue value = parseExpression();
265 if (!value || m_aborted)
266 return abortParse();
267
268 Identifier ident(m_exec, identifierToken.start + 1, identifierToken.end - identifierToken.start - 2);
269 object->putDirect(ident, value);
270
271 if (m_lexer.currentToken().type != TokComma)
272 break;
273 }
274
275 if (m_lexer.currentToken().type != TokRBrace)
276 return abortParse();
277 m_lexer.next();
278 return object;
279}
280
281}
Note: See TracBrowser for help on using the repository browser.