Context Navigation

UString.cpp@ 58990

Visit:

Last change on this file since 58990 was 58224, checked in by Darin Adler, 15 years ago

2010-04-24 Darin Adler <Darin Adler>

Reviewed by Dan Bernstein.

REGRESSION (r56560): Crash in parseFloat if passed invalid UTF-16 data
https://bugs.webkit.org/show_bug.cgi?id=38083
rdar://problem/7901044

Tests: fast/js/ToNumber.html

fast/js/parseFloat.html

runtime/JSGlobalObjectFunctions.cpp: (JSC::parseInt): Added a FIXME comment about a problem I noticed. (JSC::parseFloat): Added a FIXME comment about a problem I noticed; covered by test cases in the test I added.
runtime/UString.cpp: (JSC::UString::toDouble): Added FIXME comments about two problem I noticed; covered by test cases in the tests I added. Added a return statement so we don't crash when illegal UTF-16 sequences are present.

2010-04-24 Darin Adler <Darin Adler>

Reviewed by Dan Bernstein.

REGRESSION (r56560): Crash in parseFloat if passed invalid UTF-16 data
https://bugs.webkit.org/show_bug.cgi?id=38083
rdar://problem/7901044

fast/js/parseFloat-expected.txt: Added.
fast/js/parseFloat.html: Added.
fast/js/script-tests/parseFloat.js: Added.

fast/js/ToNumber-expected.txt: Added.
fast/js/ToNumber.html: Added.
fast/js/script-tests/ToNumber.js: Added.

Property svn:eol-style set to native

File size: 14.7 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2009 Google Inc. All rights reserved.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "UString.h"
26
27	#include "JSGlobalObjectFunctions.h"
28	#include "Collector.h"
29	#include "dtoa.h"
30	#include "Identifier.h"
31	#include "Operations.h"
32	#include <ctype.h>
33	#include <limits.h>
34	#include <limits>
35	#include <math.h>
36	#include <stdio.h>
37	#include <stdlib.h>
38	#include <string.h>
39	#include <wtf/ASCIICType.h>
40	#include <wtf/Assertions.h>
41	#include <wtf/MathExtras.h>
42	#include <wtf/StringExtras.h>
43	#include <wtf/Vector.h>
44	#include <wtf/unicode/UTF8.h>
45	#include <wtf/StringExtras.h>
46
47	#if HAVE(STRINGS_H)
48	#include <strings.h>
49	#endif
50
51	using namespace WTF;
52	using namespace WTF::Unicode;
53	using namespace std;
54
55	namespace JSC {
56
57	extern const double NaN;
58	extern const double Inf;
59
60	// The null string is immutable, except for refCount.
61	UString* UString::s_nullUString;
62
63	void initializeUString()
64	{
65	// UStringImpl::empty() does not construct its static string in a threadsafe fashion,
66	// so ensure it has been initialized from here.
67	UStringImpl::empty();
68
69	UString::s_nullUString = new UString;
70	}
71
72	UString::UString(const char* c)
73	: m_rep(Rep::create(c))
74	{
75	}
76
77	UString::UString(const char* c, unsigned length)
78	: m_rep(Rep::create(c, length))
79	{
80	}
81
82	UString::UString(const UChar* c, unsigned length)
83	: m_rep(Rep::create(c, length))
84	{
85	}
86
87	UString UString::from(int i)
88	{
89	UChar buf[1 + sizeof(i) * 3];
90	UChar* end = buf + sizeof(buf) / sizeof(UChar);
91	UChar* p = end;
92
93	if (i == 0)
94	*--p = '0';
95	else if (i == INT_MIN) {
96	char minBuf[1 + sizeof(i) * 3];
97	sprintf(minBuf, "%d", INT_MIN);
98	return UString(minBuf);
99	} else {
100	bool negative = false;
101	if (i < 0) {
102	negative = true;
103	i = -i;
104	}
105	while (i) {
106	*--p = static_cast<unsigned short>((i % 10) + '0');
107	i /= 10;
108	}
109	if (negative)
110	*--p = '-';
111	}
112
113	return UString(p, static_cast<unsigned>(end - p));
114	}
115
116	UString UString::from(long long i)
117	{
118	UChar buf[1 + sizeof(i) * 3];
119	UChar* end = buf + sizeof(buf) / sizeof(UChar);
120	UChar* p = end;
121
122	if (i == 0)
123	*--p = '0';
124	else if (i == std::numeric_limits<long long>::min()) {
125	char minBuf[1 + sizeof(i) * 3];
126	#if OS(WINDOWS)
127	snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
128	#else
129	snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
130	#endif
131	return UString(minBuf);
132	} else {
133	bool negative = false;
134	if (i < 0) {
135	negative = true;
136	i = -i;
137	}
138	while (i) {
139	*--p = static_cast<unsigned short>((i % 10) + '0');
140	i /= 10;
141	}
142	if (negative)
143	*--p = '-';
144	}
145
146	return UString(p, static_cast<unsigned>(end - p));
147	}
148
149	UString UString::from(unsigned u)
150	{
151	UChar buf[sizeof(u) * 3];
152	UChar* end = buf + sizeof(buf) / sizeof(UChar);
153	UChar* p = end;
154
155	if (u == 0)
156	*--p = '0';
157	else {
158	while (u) {
159	*--p = static_cast<unsigned short>((u % 10) + '0');
160	u /= 10;
161	}
162	}
163
164	return UString(p, static_cast<unsigned>(end - p));
165	}
166
167	UString UString::from(long l)
168	{
169	UChar buf[1 + sizeof(l) * 3];
170	UChar* end = buf + sizeof(buf) / sizeof(UChar);
171	UChar* p = end;
172
173	if (l == 0)
174	*--p = '0';
175	else if (l == LONG_MIN) {
176	char minBuf[1 + sizeof(l) * 3];
177	sprintf(minBuf, "%ld", LONG_MIN);
178	return UString(minBuf);
179	} else {
180	bool negative = false;
181	if (l < 0) {
182	negative = true;
183	l = -l;
184	}
185	while (l) {
186	*--p = static_cast<unsigned short>((l % 10) + '0');
187	l /= 10;
188	}
189	if (negative)
190	*--p = '-';
191	}
192
193	return UString(p, end - p);
194	}
195
196	UString UString::from(double d)
197	{
198	DtoaBuffer buffer;
199	unsigned length;
200	doubleToStringInJavaScriptFormat(d, buffer, &length);
201	return UString(buffer, length);
202	}
203
204	char* UString::ascii() const
205	{
206	static char* asciiBuffer = 0;
207
208	unsigned length = size();
209	unsigned neededSize = length + 1;
210	delete[] asciiBuffer;
211	asciiBuffer = new char[neededSize];
212
213	const UChar* p = data();
214	char* q = asciiBuffer;
215	const UChar* limit = p + length;
216	while (p != limit) {
217	*q = static_cast<char>(p[0]);
218	++p;
219	++q;
220	}
221	*q = '\0';
222
223	return asciiBuffer;
224	}
225
226	bool UString::is8Bit() const
227	{
228	const UChar* u = data();
229	const UChar* limit = u + size();
230	while (u < limit) {
231	if (u[0] > 0xFF)
232	return false;
233	++u;
234	}
235
236	return true;
237	}
238
239	UChar UString::operator[](unsigned pos) const
240	{
241	if (pos >= size())
242	return '\0';
243	return data()[pos];
244	}
245
246	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
247	{
248	if (size() == 1) {
249	UChar c = data()[0];
250	if (isASCIIDigit(c))
251	return c - '0';
252	if (isASCIISpace(c) && tolerateEmptyString)
253	return 0;
254	return NaN;
255	}
256
257	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
258	// after the number, even if it contains invalid UTF-16 sequences. So we
259	// shouldn't use the UTF8String function, which returns null when it
260	// encounters invalid UTF-16. Further, we have no need to convert the
261	// non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
262	// unnecessary work.
263	CString s = UTF8String();
264	if (s.isNull())
265	return NaN;
266	const char* c = s.data();
267
268	// skip leading white space
269	while (isASCIISpace(*c))
270	c++;
271
272	// empty string ?
273	if (*c == '\0')
274	return tolerateEmptyString ? 0.0 : NaN;
275
276	double d;
277
278	// hex number ?
279	if (c == '0' && ((c + 1) == 'x' \|\| *(c + 1) == 'X')) {
280	const char* firstDigitPosition = c + 2;
281	c++;
282	d = 0.0;
283	while (*(++c)) {
284	if (c >= '0' && c <= '9')
285	d = d * 16.0 + *c - '0';
286	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
287	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
288	else
289	break;
290	}
291
292	if (d >= mantissaOverflowLowerBound)
293	d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
294	} else {
295	// regular number ?
296	char* end;
297	d = WTF::strtod(c, &end);
298	if ((d != 0.0 \|\| end != c) && d != Inf && d != -Inf) {
299	c = end;
300	} else {
301	double sign = 1.0;
302
303	if (*c == '+')
304	c++;
305	else if (*c == '-') {
306	sign = -1.0;
307	c++;
308	}
309
310	// We used strtod() to do the conversion. However, strtod() handles
311	// infinite values slightly differently than JavaScript in that it
312	// converts the string "inf" with any capitalization to infinity,
313	// whereas the ECMA spec requires that it be converted to NaN.
314
315	if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
316	d = sign * Inf;
317	c += 8;
318	} else if ((d == Inf \|\| d == -Inf) && c != 'I' && c != 'i')
319	c = end;
320	else
321	return NaN;
322	}
323	}
324
325	// allow trailing white space
326	while (isASCIISpace(*c))
327	c++;
328	// don't allow anything after - unless tolerant=true
329	// FIXME: If string contains a U+0000 character, then this check is incorrect.
330	if (!tolerateTrailingJunk && *c != '\0')
331	d = NaN;
332
333	return d;
334	}
335
336	double UString::toDouble(bool tolerateTrailingJunk) const
337	{
338	return toDouble(tolerateTrailingJunk, true);
339	}
340
341	double UString::toDouble() const
342	{
343	return toDouble(false, true);
344	}
345
346	uint32_t UString::toUInt32(bool* ok) const
347	{
348	double d = toDouble();
349	bool b = true;
350
351	if (d != static_cast<uint32_t>(d)) {
352	b = false;
353	d = 0;
354	}
355
356	if (ok)
357	*ok = b;
358
359	return static_cast<uint32_t>(d);
360	}
361
362	uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
363	{
364	double d = toDouble(false, tolerateEmptyString);
365	bool b = true;
366
367	if (d != static_cast<uint32_t>(d)) {
368	b = false;
369	d = 0;
370	}
371
372	if (ok)
373	*ok = b;
374
375	return static_cast<uint32_t>(d);
376	}
377
378	uint32_t UString::toStrictUInt32(bool* ok) const
379	{
380	if (ok)
381	*ok = false;
382
383	// Empty string is not OK.
384	unsigned len = m_rep->length();
385	if (len == 0)
386	return 0;
387	const UChar* p = m_rep->characters();
388	unsigned short c = p[0];
389
390	// If the first digit is 0, only 0 itself is OK.
391	if (c == '0') {
392	if (len == 1 && ok)
393	*ok = true;
394	return 0;
395	}
396
397	// Convert to UInt32, checking for overflow.
398	uint32_t i = 0;
399	while (1) {
400	// Process character, turning it into a digit.
401	if (c < '0' \|\| c > '9')
402	return 0;
403	const unsigned d = c - '0';
404
405	// Multiply by 10, checking for overflow out of 32 bits.
406	if (i > 0xFFFFFFFFU / 10)
407	return 0;
408	i *= 10;
409
410	// Add in the digit, checking for overflow out of 32 bits.
411	const unsigned max = 0xFFFFFFFFU - d;
412	if (i > max)
413	return 0;
414	i += d;
415
416	// Handle end of string.
417	if (--len == 0) {
418	if (ok)
419	*ok = true;
420	return i;
421	}
422
423	// Get next character.
424	c = *(++p);
425	}
426	}
427
428	unsigned UString::find(const UString& f, unsigned pos) const
429	{
430	unsigned fsz = f.size();
431
432	if (fsz == 1) {
433	UChar ch = f[0];
434	const UChar* end = data() + size();
435	for (const UChar* c = data() + pos; c < end; c++) {
436	if (*c == ch)
437	return static_cast<unsigned>(c - data());
438	}
439	return NotFound;
440	}
441
442	unsigned sz = size();
443	if (sz < fsz)
444	return NotFound;
445	if (fsz == 0)
446	return pos;
447	const UChar* end = data() + sz - fsz;
448	unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
449	const UChar* fdata = f.data();
450	unsigned short fchar = fdata[0];
451	++fdata;
452	for (const UChar* c = data() + pos; c <= end; c++) {
453	if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
454	return static_cast<unsigned>(c - data());
455	}
456
457	return NotFound;
458	}
459
460	unsigned UString::find(UChar ch, unsigned pos) const
461	{
462	const UChar* end = data() + size();
463	for (const UChar* c = data() + pos; c < end; c++) {
464	if (*c == ch)
465	return static_cast<unsigned>(c - data());
466	}
467
468	return NotFound;
469	}
470
471	unsigned UString::rfind(const UString& f, unsigned pos) const
472	{
473	unsigned sz = size();
474	unsigned fsz = f.size();
475	if (sz < fsz)
476	return NotFound;
477	if (pos > sz - fsz)
478	pos = sz - fsz;
479	if (fsz == 0)
480	return pos;
481	unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
482	const UChar* fdata = f.data();
483	for (const UChar* c = data() + pos; c >= data(); c--) {
484	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
485	return static_cast<unsigned>(c - data());
486	}
487
488	return NotFound;
489	}
490
491	unsigned UString::rfind(UChar ch, unsigned pos) const
492	{
493	if (isEmpty())
494	return NotFound;
495	if (pos + 1 >= size())
496	pos = size() - 1;
497	for (const UChar* c = data() + pos; c >= data(); c--) {
498	if (*c == ch)
499	return static_cast<unsigned>(c - data());
500	}
501
502	return NotFound;
503	}
504
505	UString UString::substr(unsigned pos, unsigned len) const
506	{
507	unsigned s = size();
508
509	if (pos >= s)
510	pos = s;
511	unsigned limit = s - pos;
512	if (len > limit)
513	len = limit;
514
515	if (pos == 0 && len == s)
516	return *this;
517
518	return UString(Rep::create(m_rep, pos, len));
519	}
520
521	bool operator==(const UString& s1, const char *s2)
522	{
523	if (s2 == 0)
524	return s1.isEmpty();
525
526	const UChar* u = s1.data();
527	const UChar* uend = u + s1.size();
528	while (u != uend && *s2) {
529	if (u[0] != (unsigned char)*s2)
530	return false;
531	s2++;
532	u++;
533	}
534
535	return u == uend && *s2 == 0;
536	}
537
538	bool operator<(const UString& s1, const UString& s2)
539	{
540	const unsigned l1 = s1.size();
541	const unsigned l2 = s2.size();
542	const unsigned lmin = l1 < l2 ? l1 : l2;
543	const UChar* c1 = s1.data();
544	const UChar* c2 = s2.data();
545	unsigned l = 0;
546	while (l < lmin && c1 == c2) {
547	c1++;
548	c2++;
549	l++;
550	}
551	if (l < lmin)
552	return (c1[0] < c2[0]);
553
554	return (l1 < l2);
555	}
556
557	bool operator>(const UString& s1, const UString& s2)
558	{
559	const unsigned l1 = s1.size();
560	const unsigned l2 = s2.size();
561	const unsigned lmin = l1 < l2 ? l1 : l2;
562	const UChar* c1 = s1.data();
563	const UChar* c2 = s2.data();
564	unsigned l = 0;
565	while (l < lmin && c1 == c2) {
566	c1++;
567	c2++;
568	l++;
569	}
570	if (l < lmin)
571	return (c1[0] > c2[0]);
572
573	return (l1 > l2);
574	}
575
576	int compare(const UString& s1, const UString& s2)
577	{
578	const unsigned l1 = s1.size();
579	const unsigned l2 = s2.size();
580	const unsigned lmin = l1 < l2 ? l1 : l2;
581	const UChar* c1 = s1.data();
582	const UChar* c2 = s2.data();
583	unsigned l = 0;
584	while (l < lmin && c1 == c2) {
585	c1++;
586	c2++;
587	l++;
588	}
589
590	if (l < lmin)
591	return (c1[0] > c2[0]) ? 1 : -1;
592
593	if (l1 == l2)
594	return 0;
595
596	return (l1 > l2) ? 1 : -1;
597	}
598
599	CString UString::UTF8String(bool strict) const
600	{
601	// Allocate a buffer big enough to hold all the characters.
602	const unsigned length = size();
603	Vector<char, 1024> buffer(length * 3);
604
605	// Convert to runs of 8-bit characters.
606	char* p = buffer.data();
607	const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
608	ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
609	if (result != conversionOK)
610	return CString();
611
612	return CString(buffer.data(), p - buffer.data());
613	}
614
615	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 58990

Download in other formats: