source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 58990

Last change on this file since 58990 was 58224, checked in by Darin Adler, 15 years ago

2010-04-24 Darin Adler <Darin Adler>

Reviewed by Dan Bernstein.

REGRESSION (r56560): Crash in parseFloat if passed invalid UTF-16 data
https://bugs.webkit.org/show_bug.cgi?id=38083
rdar://problem/7901044

Tests: fast/js/ToNumber.html

fast/js/parseFloat.html

  • runtime/JSGlobalObjectFunctions.cpp: (JSC::parseInt): Added a FIXME comment about a problem I noticed. (JSC::parseFloat): Added a FIXME comment about a problem I noticed; covered by test cases in the test I added.
  • runtime/UString.cpp: (JSC::UString::toDouble): Added FIXME comments about two problem I noticed; covered by test cases in the tests I added. Added a return statement so we don't crash when illegal UTF-16 sequences are present.

2010-04-24 Darin Adler <Darin Adler>

Reviewed by Dan Bernstein.

REGRESSION (r56560): Crash in parseFloat if passed invalid UTF-16 data
https://bugs.webkit.org/show_bug.cgi?id=38083
rdar://problem/7901044

  • fast/js/parseFloat-expected.txt: Added.
  • fast/js/parseFloat.html: Added.
  • fast/js/script-tests/parseFloat.js: Added.
  • fast/js/ToNumber-expected.txt: Added.
  • fast/js/ToNumber.html: Added.
  • fast/js/script-tests/ToNumber.js: Added.
  • Property svn:eol-style set to native
File size: 14.7 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "UString.h"
26
27#include "JSGlobalObjectFunctions.h"
28#include "Collector.h"
29#include "dtoa.h"
30#include "Identifier.h"
31#include "Operations.h"
32#include <ctype.h>
33#include <limits.h>
34#include <limits>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <wtf/ASCIICType.h>
40#include <wtf/Assertions.h>
41#include <wtf/MathExtras.h>
42#include <wtf/StringExtras.h>
43#include <wtf/Vector.h>
44#include <wtf/unicode/UTF8.h>
45#include <wtf/StringExtras.h>
46
47#if HAVE(STRINGS_H)
48#include <strings.h>
49#endif
50
51using namespace WTF;
52using namespace WTF::Unicode;
53using namespace std;
54
55namespace JSC {
56
57extern const double NaN;
58extern const double Inf;
59
60// The null string is immutable, except for refCount.
61UString* UString::s_nullUString;
62
63void initializeUString()
64{
65 // UStringImpl::empty() does not construct its static string in a threadsafe fashion,
66 // so ensure it has been initialized from here.
67 UStringImpl::empty();
68
69 UString::s_nullUString = new UString;
70}
71
72UString::UString(const char* c)
73 : m_rep(Rep::create(c))
74{
75}
76
77UString::UString(const char* c, unsigned length)
78 : m_rep(Rep::create(c, length))
79{
80}
81
82UString::UString(const UChar* c, unsigned length)
83 : m_rep(Rep::create(c, length))
84{
85}
86
87UString UString::from(int i)
88{
89 UChar buf[1 + sizeof(i) * 3];
90 UChar* end = buf + sizeof(buf) / sizeof(UChar);
91 UChar* p = end;
92
93 if (i == 0)
94 *--p = '0';
95 else if (i == INT_MIN) {
96 char minBuf[1 + sizeof(i) * 3];
97 sprintf(minBuf, "%d", INT_MIN);
98 return UString(minBuf);
99 } else {
100 bool negative = false;
101 if (i < 0) {
102 negative = true;
103 i = -i;
104 }
105 while (i) {
106 *--p = static_cast<unsigned short>((i % 10) + '0');
107 i /= 10;
108 }
109 if (negative)
110 *--p = '-';
111 }
112
113 return UString(p, static_cast<unsigned>(end - p));
114}
115
116UString UString::from(long long i)
117{
118 UChar buf[1 + sizeof(i) * 3];
119 UChar* end = buf + sizeof(buf) / sizeof(UChar);
120 UChar* p = end;
121
122 if (i == 0)
123 *--p = '0';
124 else if (i == std::numeric_limits<long long>::min()) {
125 char minBuf[1 + sizeof(i) * 3];
126#if OS(WINDOWS)
127 snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
128#else
129 snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
130#endif
131 return UString(minBuf);
132 } else {
133 bool negative = false;
134 if (i < 0) {
135 negative = true;
136 i = -i;
137 }
138 while (i) {
139 *--p = static_cast<unsigned short>((i % 10) + '0');
140 i /= 10;
141 }
142 if (negative)
143 *--p = '-';
144 }
145
146 return UString(p, static_cast<unsigned>(end - p));
147}
148
149UString UString::from(unsigned u)
150{
151 UChar buf[sizeof(u) * 3];
152 UChar* end = buf + sizeof(buf) / sizeof(UChar);
153 UChar* p = end;
154
155 if (u == 0)
156 *--p = '0';
157 else {
158 while (u) {
159 *--p = static_cast<unsigned short>((u % 10) + '0');
160 u /= 10;
161 }
162 }
163
164 return UString(p, static_cast<unsigned>(end - p));
165}
166
167UString UString::from(long l)
168{
169 UChar buf[1 + sizeof(l) * 3];
170 UChar* end = buf + sizeof(buf) / sizeof(UChar);
171 UChar* p = end;
172
173 if (l == 0)
174 *--p = '0';
175 else if (l == LONG_MIN) {
176 char minBuf[1 + sizeof(l) * 3];
177 sprintf(minBuf, "%ld", LONG_MIN);
178 return UString(minBuf);
179 } else {
180 bool negative = false;
181 if (l < 0) {
182 negative = true;
183 l = -l;
184 }
185 while (l) {
186 *--p = static_cast<unsigned short>((l % 10) + '0');
187 l /= 10;
188 }
189 if (negative)
190 *--p = '-';
191 }
192
193 return UString(p, end - p);
194}
195
196UString UString::from(double d)
197{
198 DtoaBuffer buffer;
199 unsigned length;
200 doubleToStringInJavaScriptFormat(d, buffer, &length);
201 return UString(buffer, length);
202}
203
204char* UString::ascii() const
205{
206 static char* asciiBuffer = 0;
207
208 unsigned length = size();
209 unsigned neededSize = length + 1;
210 delete[] asciiBuffer;
211 asciiBuffer = new char[neededSize];
212
213 const UChar* p = data();
214 char* q = asciiBuffer;
215 const UChar* limit = p + length;
216 while (p != limit) {
217 *q = static_cast<char>(p[0]);
218 ++p;
219 ++q;
220 }
221 *q = '\0';
222
223 return asciiBuffer;
224}
225
226bool UString::is8Bit() const
227{
228 const UChar* u = data();
229 const UChar* limit = u + size();
230 while (u < limit) {
231 if (u[0] > 0xFF)
232 return false;
233 ++u;
234 }
235
236 return true;
237}
238
239UChar UString::operator[](unsigned pos) const
240{
241 if (pos >= size())
242 return '\0';
243 return data()[pos];
244}
245
246double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
247{
248 if (size() == 1) {
249 UChar c = data()[0];
250 if (isASCIIDigit(c))
251 return c - '0';
252 if (isASCIISpace(c) && tolerateEmptyString)
253 return 0;
254 return NaN;
255 }
256
257 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
258 // after the number, even if it contains invalid UTF-16 sequences. So we
259 // shouldn't use the UTF8String function, which returns null when it
260 // encounters invalid UTF-16. Further, we have no need to convert the
261 // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
262 // unnecessary work.
263 CString s = UTF8String();
264 if (s.isNull())
265 return NaN;
266 const char* c = s.data();
267
268 // skip leading white space
269 while (isASCIISpace(*c))
270 c++;
271
272 // empty string ?
273 if (*c == '\0')
274 return tolerateEmptyString ? 0.0 : NaN;
275
276 double d;
277
278 // hex number ?
279 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
280 const char* firstDigitPosition = c + 2;
281 c++;
282 d = 0.0;
283 while (*(++c)) {
284 if (*c >= '0' && *c <= '9')
285 d = d * 16.0 + *c - '0';
286 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
287 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
288 else
289 break;
290 }
291
292 if (d >= mantissaOverflowLowerBound)
293 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
294 } else {
295 // regular number ?
296 char* end;
297 d = WTF::strtod(c, &end);
298 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
299 c = end;
300 } else {
301 double sign = 1.0;
302
303 if (*c == '+')
304 c++;
305 else if (*c == '-') {
306 sign = -1.0;
307 c++;
308 }
309
310 // We used strtod() to do the conversion. However, strtod() handles
311 // infinite values slightly differently than JavaScript in that it
312 // converts the string "inf" with any capitalization to infinity,
313 // whereas the ECMA spec requires that it be converted to NaN.
314
315 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
316 d = sign * Inf;
317 c += 8;
318 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
319 c = end;
320 else
321 return NaN;
322 }
323 }
324
325 // allow trailing white space
326 while (isASCIISpace(*c))
327 c++;
328 // don't allow anything after - unless tolerant=true
329 // FIXME: If string contains a U+0000 character, then this check is incorrect.
330 if (!tolerateTrailingJunk && *c != '\0')
331 d = NaN;
332
333 return d;
334}
335
336double UString::toDouble(bool tolerateTrailingJunk) const
337{
338 return toDouble(tolerateTrailingJunk, true);
339}
340
341double UString::toDouble() const
342{
343 return toDouble(false, true);
344}
345
346uint32_t UString::toUInt32(bool* ok) const
347{
348 double d = toDouble();
349 bool b = true;
350
351 if (d != static_cast<uint32_t>(d)) {
352 b = false;
353 d = 0;
354 }
355
356 if (ok)
357 *ok = b;
358
359 return static_cast<uint32_t>(d);
360}
361
362uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
363{
364 double d = toDouble(false, tolerateEmptyString);
365 bool b = true;
366
367 if (d != static_cast<uint32_t>(d)) {
368 b = false;
369 d = 0;
370 }
371
372 if (ok)
373 *ok = b;
374
375 return static_cast<uint32_t>(d);
376}
377
378uint32_t UString::toStrictUInt32(bool* ok) const
379{
380 if (ok)
381 *ok = false;
382
383 // Empty string is not OK.
384 unsigned len = m_rep->length();
385 if (len == 0)
386 return 0;
387 const UChar* p = m_rep->characters();
388 unsigned short c = p[0];
389
390 // If the first digit is 0, only 0 itself is OK.
391 if (c == '0') {
392 if (len == 1 && ok)
393 *ok = true;
394 return 0;
395 }
396
397 // Convert to UInt32, checking for overflow.
398 uint32_t i = 0;
399 while (1) {
400 // Process character, turning it into a digit.
401 if (c < '0' || c > '9')
402 return 0;
403 const unsigned d = c - '0';
404
405 // Multiply by 10, checking for overflow out of 32 bits.
406 if (i > 0xFFFFFFFFU / 10)
407 return 0;
408 i *= 10;
409
410 // Add in the digit, checking for overflow out of 32 bits.
411 const unsigned max = 0xFFFFFFFFU - d;
412 if (i > max)
413 return 0;
414 i += d;
415
416 // Handle end of string.
417 if (--len == 0) {
418 if (ok)
419 *ok = true;
420 return i;
421 }
422
423 // Get next character.
424 c = *(++p);
425 }
426}
427
428unsigned UString::find(const UString& f, unsigned pos) const
429{
430 unsigned fsz = f.size();
431
432 if (fsz == 1) {
433 UChar ch = f[0];
434 const UChar* end = data() + size();
435 for (const UChar* c = data() + pos; c < end; c++) {
436 if (*c == ch)
437 return static_cast<unsigned>(c - data());
438 }
439 return NotFound;
440 }
441
442 unsigned sz = size();
443 if (sz < fsz)
444 return NotFound;
445 if (fsz == 0)
446 return pos;
447 const UChar* end = data() + sz - fsz;
448 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
449 const UChar* fdata = f.data();
450 unsigned short fchar = fdata[0];
451 ++fdata;
452 for (const UChar* c = data() + pos; c <= end; c++) {
453 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
454 return static_cast<unsigned>(c - data());
455 }
456
457 return NotFound;
458}
459
460unsigned UString::find(UChar ch, unsigned pos) const
461{
462 const UChar* end = data() + size();
463 for (const UChar* c = data() + pos; c < end; c++) {
464 if (*c == ch)
465 return static_cast<unsigned>(c - data());
466 }
467
468 return NotFound;
469}
470
471unsigned UString::rfind(const UString& f, unsigned pos) const
472{
473 unsigned sz = size();
474 unsigned fsz = f.size();
475 if (sz < fsz)
476 return NotFound;
477 if (pos > sz - fsz)
478 pos = sz - fsz;
479 if (fsz == 0)
480 return pos;
481 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
482 const UChar* fdata = f.data();
483 for (const UChar* c = data() + pos; c >= data(); c--) {
484 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
485 return static_cast<unsigned>(c - data());
486 }
487
488 return NotFound;
489}
490
491unsigned UString::rfind(UChar ch, unsigned pos) const
492{
493 if (isEmpty())
494 return NotFound;
495 if (pos + 1 >= size())
496 pos = size() - 1;
497 for (const UChar* c = data() + pos; c >= data(); c--) {
498 if (*c == ch)
499 return static_cast<unsigned>(c - data());
500 }
501
502 return NotFound;
503}
504
505UString UString::substr(unsigned pos, unsigned len) const
506{
507 unsigned s = size();
508
509 if (pos >= s)
510 pos = s;
511 unsigned limit = s - pos;
512 if (len > limit)
513 len = limit;
514
515 if (pos == 0 && len == s)
516 return *this;
517
518 return UString(Rep::create(m_rep, pos, len));
519}
520
521bool operator==(const UString& s1, const char *s2)
522{
523 if (s2 == 0)
524 return s1.isEmpty();
525
526 const UChar* u = s1.data();
527 const UChar* uend = u + s1.size();
528 while (u != uend && *s2) {
529 if (u[0] != (unsigned char)*s2)
530 return false;
531 s2++;
532 u++;
533 }
534
535 return u == uend && *s2 == 0;
536}
537
538bool operator<(const UString& s1, const UString& s2)
539{
540 const unsigned l1 = s1.size();
541 const unsigned l2 = s2.size();
542 const unsigned lmin = l1 < l2 ? l1 : l2;
543 const UChar* c1 = s1.data();
544 const UChar* c2 = s2.data();
545 unsigned l = 0;
546 while (l < lmin && *c1 == *c2) {
547 c1++;
548 c2++;
549 l++;
550 }
551 if (l < lmin)
552 return (c1[0] < c2[0]);
553
554 return (l1 < l2);
555}
556
557bool operator>(const UString& s1, const UString& s2)
558{
559 const unsigned l1 = s1.size();
560 const unsigned l2 = s2.size();
561 const unsigned lmin = l1 < l2 ? l1 : l2;
562 const UChar* c1 = s1.data();
563 const UChar* c2 = s2.data();
564 unsigned l = 0;
565 while (l < lmin && *c1 == *c2) {
566 c1++;
567 c2++;
568 l++;
569 }
570 if (l < lmin)
571 return (c1[0] > c2[0]);
572
573 return (l1 > l2);
574}
575
576int compare(const UString& s1, const UString& s2)
577{
578 const unsigned l1 = s1.size();
579 const unsigned l2 = s2.size();
580 const unsigned lmin = l1 < l2 ? l1 : l2;
581 const UChar* c1 = s1.data();
582 const UChar* c2 = s2.data();
583 unsigned l = 0;
584 while (l < lmin && *c1 == *c2) {
585 c1++;
586 c2++;
587 l++;
588 }
589
590 if (l < lmin)
591 return (c1[0] > c2[0]) ? 1 : -1;
592
593 if (l1 == l2)
594 return 0;
595
596 return (l1 > l2) ? 1 : -1;
597}
598
599CString UString::UTF8String(bool strict) const
600{
601 // Allocate a buffer big enough to hold all the characters.
602 const unsigned length = size();
603 Vector<char, 1024> buffer(length * 3);
604
605 // Convert to runs of 8-bit characters.
606 char* p = buffer.data();
607 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
608 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
609 if (result != conversionOK)
610 return CString();
611
612 return CString(buffer.data(), p - buffer.data());
613}
614
615} // namespace JSC
Note: See TracBrowser for help on using the repository browser.