source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 59969

Last change on this file since 59969 was 59969, checked in by [email protected], 15 years ago

Enforce size constraints on various data structures in JavaScriptCore/wtf.
https://bugs.webkit.org/show_bug.cgi?id=39327

Reviewed by Darin Adler.

I only modified the default build for OSX and Chromium's build file to include WTFCompileAsserts.cpp
as those should be sufficient to catch regressions on the size of the data structures.

  • JavaScriptCore.gypi: Added the WTFCompileAsserts.cpp file.
  • JavaScriptCore.xcodeproj/project.pbxproj: Added the WTFCompileAsserts.cpp file.
  • runtime/UString.cpp: Added a compile assert for UString size.
  • wtf/SizeLimits.cpp: Added compile asserts for data structures that didn't have cpp files.
  • wtf/text/StringImpl.cpp: Added a compile assert for StringImpl size.
  • Property svn:eol-style set to native
File size: 14.8 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "UString.h"
26
27#include "JSGlobalObjectFunctions.h"
28#include "Collector.h"
29#include "dtoa.h"
30#include "Identifier.h"
31#include "Operations.h"
32#include <ctype.h>
33#include <limits.h>
34#include <limits>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <wtf/ASCIICType.h>
40#include <wtf/Assertions.h>
41#include <wtf/MathExtras.h>
42#include <wtf/StringExtras.h>
43#include <wtf/Vector.h>
44#include <wtf/unicode/UTF8.h>
45#include <wtf/StringExtras.h>
46
47#if HAVE(STRINGS_H)
48#include <strings.h>
49#endif
50
51using namespace WTF;
52using namespace WTF::Unicode;
53using namespace std;
54
55namespace JSC {
56
57extern const double NaN;
58extern const double Inf;
59
60// The null string is immutable, except for refCount.
61UString* UString::s_nullUString;
62
63COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
64
65void initializeUString()
66{
67 // UStringImpl::empty() does not construct its static string in a threadsafe fashion,
68 // so ensure it has been initialized from here.
69 UStringImpl::empty();
70
71 UString::s_nullUString = new UString;
72}
73
74UString::UString(const char* c)
75 : m_rep(Rep::create(c))
76{
77}
78
79UString::UString(const char* c, unsigned length)
80 : m_rep(Rep::create(c, length))
81{
82}
83
84UString::UString(const UChar* c, unsigned length)
85 : m_rep(Rep::create(c, length))
86{
87}
88
89UString UString::from(int i)
90{
91 UChar buf[1 + sizeof(i) * 3];
92 UChar* end = buf + sizeof(buf) / sizeof(UChar);
93 UChar* p = end;
94
95 if (i == 0)
96 *--p = '0';
97 else if (i == INT_MIN) {
98 char minBuf[1 + sizeof(i) * 3];
99 sprintf(minBuf, "%d", INT_MIN);
100 return UString(minBuf);
101 } else {
102 bool negative = false;
103 if (i < 0) {
104 negative = true;
105 i = -i;
106 }
107 while (i) {
108 *--p = static_cast<unsigned short>((i % 10) + '0');
109 i /= 10;
110 }
111 if (negative)
112 *--p = '-';
113 }
114
115 return UString(p, static_cast<unsigned>(end - p));
116}
117
118UString UString::from(long long i)
119{
120 UChar buf[1 + sizeof(i) * 3];
121 UChar* end = buf + sizeof(buf) / sizeof(UChar);
122 UChar* p = end;
123
124 if (i == 0)
125 *--p = '0';
126 else if (i == std::numeric_limits<long long>::min()) {
127 char minBuf[1 + sizeof(i) * 3];
128#if OS(WINDOWS)
129 snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
130#else
131 snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
132#endif
133 return UString(minBuf);
134 } else {
135 bool negative = false;
136 if (i < 0) {
137 negative = true;
138 i = -i;
139 }
140 while (i) {
141 *--p = static_cast<unsigned short>((i % 10) + '0');
142 i /= 10;
143 }
144 if (negative)
145 *--p = '-';
146 }
147
148 return UString(p, static_cast<unsigned>(end - p));
149}
150
151UString UString::from(unsigned u)
152{
153 UChar buf[sizeof(u) * 3];
154 UChar* end = buf + sizeof(buf) / sizeof(UChar);
155 UChar* p = end;
156
157 if (u == 0)
158 *--p = '0';
159 else {
160 while (u) {
161 *--p = static_cast<unsigned short>((u % 10) + '0');
162 u /= 10;
163 }
164 }
165
166 return UString(p, static_cast<unsigned>(end - p));
167}
168
169UString UString::from(long l)
170{
171 UChar buf[1 + sizeof(l) * 3];
172 UChar* end = buf + sizeof(buf) / sizeof(UChar);
173 UChar* p = end;
174
175 if (l == 0)
176 *--p = '0';
177 else if (l == LONG_MIN) {
178 char minBuf[1 + sizeof(l) * 3];
179 sprintf(minBuf, "%ld", LONG_MIN);
180 return UString(minBuf);
181 } else {
182 bool negative = false;
183 if (l < 0) {
184 negative = true;
185 l = -l;
186 }
187 while (l) {
188 *--p = static_cast<unsigned short>((l % 10) + '0');
189 l /= 10;
190 }
191 if (negative)
192 *--p = '-';
193 }
194
195 return UString(p, end - p);
196}
197
198UString UString::from(double d)
199{
200 DtoaBuffer buffer;
201 unsigned length;
202 doubleToStringInJavaScriptFormat(d, buffer, &length);
203 return UString(buffer, length);
204}
205
206char* UString::ascii() const
207{
208 static char* asciiBuffer = 0;
209
210 unsigned length = size();
211 unsigned neededSize = length + 1;
212 delete[] asciiBuffer;
213 asciiBuffer = new char[neededSize];
214
215 const UChar* p = data();
216 char* q = asciiBuffer;
217 const UChar* limit = p + length;
218 while (p != limit) {
219 *q = static_cast<char>(p[0]);
220 ++p;
221 ++q;
222 }
223 *q = '\0';
224
225 return asciiBuffer;
226}
227
228bool UString::is8Bit() const
229{
230 const UChar* u = data();
231 const UChar* limit = u + size();
232 while (u < limit) {
233 if (u[0] > 0xFF)
234 return false;
235 ++u;
236 }
237
238 return true;
239}
240
241UChar UString::operator[](unsigned pos) const
242{
243 if (pos >= size())
244 return '\0';
245 return data()[pos];
246}
247
248double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
249{
250 if (size() == 1) {
251 UChar c = data()[0];
252 if (isASCIIDigit(c))
253 return c - '0';
254 if (isASCIISpace(c) && tolerateEmptyString)
255 return 0;
256 return NaN;
257 }
258
259 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
260 // after the number, even if it contains invalid UTF-16 sequences. So we
261 // shouldn't use the UTF8String function, which returns null when it
262 // encounters invalid UTF-16. Further, we have no need to convert the
263 // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
264 // unnecessary work.
265 CString s = UTF8String();
266 if (s.isNull())
267 return NaN;
268 const char* c = s.data();
269
270 // skip leading white space
271 while (isASCIISpace(*c))
272 c++;
273
274 // empty string ?
275 if (*c == '\0')
276 return tolerateEmptyString ? 0.0 : NaN;
277
278 double d;
279
280 // hex number ?
281 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
282 const char* firstDigitPosition = c + 2;
283 c++;
284 d = 0.0;
285 while (*(++c)) {
286 if (*c >= '0' && *c <= '9')
287 d = d * 16.0 + *c - '0';
288 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
289 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
290 else
291 break;
292 }
293
294 if (d >= mantissaOverflowLowerBound)
295 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
296 } else {
297 // regular number ?
298 char* end;
299 d = WTF::strtod(c, &end);
300 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
301 c = end;
302 } else {
303 double sign = 1.0;
304
305 if (*c == '+')
306 c++;
307 else if (*c == '-') {
308 sign = -1.0;
309 c++;
310 }
311
312 // We used strtod() to do the conversion. However, strtod() handles
313 // infinite values slightly differently than JavaScript in that it
314 // converts the string "inf" with any capitalization to infinity,
315 // whereas the ECMA spec requires that it be converted to NaN.
316
317 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
318 d = sign * Inf;
319 c += 8;
320 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
321 c = end;
322 else
323 return NaN;
324 }
325 }
326
327 // allow trailing white space
328 while (isASCIISpace(*c))
329 c++;
330 // don't allow anything after - unless tolerant=true
331 // FIXME: If string contains a U+0000 character, then this check is incorrect.
332 if (!tolerateTrailingJunk && *c != '\0')
333 d = NaN;
334
335 return d;
336}
337
338double UString::toDouble(bool tolerateTrailingJunk) const
339{
340 return toDouble(tolerateTrailingJunk, true);
341}
342
343double UString::toDouble() const
344{
345 return toDouble(false, true);
346}
347
348uint32_t UString::toUInt32(bool* ok) const
349{
350 double d = toDouble();
351 bool b = true;
352
353 if (d != static_cast<uint32_t>(d)) {
354 b = false;
355 d = 0;
356 }
357
358 if (ok)
359 *ok = b;
360
361 return static_cast<uint32_t>(d);
362}
363
364uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
365{
366 double d = toDouble(false, tolerateEmptyString);
367 bool b = true;
368
369 if (d != static_cast<uint32_t>(d)) {
370 b = false;
371 d = 0;
372 }
373
374 if (ok)
375 *ok = b;
376
377 return static_cast<uint32_t>(d);
378}
379
380uint32_t UString::toStrictUInt32(bool* ok) const
381{
382 if (ok)
383 *ok = false;
384
385 // Empty string is not OK.
386 unsigned len = m_rep->length();
387 if (len == 0)
388 return 0;
389 const UChar* p = m_rep->characters();
390 unsigned short c = p[0];
391
392 // If the first digit is 0, only 0 itself is OK.
393 if (c == '0') {
394 if (len == 1 && ok)
395 *ok = true;
396 return 0;
397 }
398
399 // Convert to UInt32, checking for overflow.
400 uint32_t i = 0;
401 while (1) {
402 // Process character, turning it into a digit.
403 if (c < '0' || c > '9')
404 return 0;
405 const unsigned d = c - '0';
406
407 // Multiply by 10, checking for overflow out of 32 bits.
408 if (i > 0xFFFFFFFFU / 10)
409 return 0;
410 i *= 10;
411
412 // Add in the digit, checking for overflow out of 32 bits.
413 const unsigned max = 0xFFFFFFFFU - d;
414 if (i > max)
415 return 0;
416 i += d;
417
418 // Handle end of string.
419 if (--len == 0) {
420 if (ok)
421 *ok = true;
422 return i;
423 }
424
425 // Get next character.
426 c = *(++p);
427 }
428}
429
430unsigned UString::find(const UString& f, unsigned pos) const
431{
432 unsigned fsz = f.size();
433
434 if (fsz == 1) {
435 UChar ch = f[0];
436 const UChar* end = data() + size();
437 for (const UChar* c = data() + pos; c < end; c++) {
438 if (*c == ch)
439 return static_cast<unsigned>(c - data());
440 }
441 return NotFound;
442 }
443
444 unsigned sz = size();
445 if (sz < fsz)
446 return NotFound;
447 if (fsz == 0)
448 return pos;
449 const UChar* end = data() + sz - fsz;
450 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
451 const UChar* fdata = f.data();
452 unsigned short fchar = fdata[0];
453 ++fdata;
454 for (const UChar* c = data() + pos; c <= end; c++) {
455 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
456 return static_cast<unsigned>(c - data());
457 }
458
459 return NotFound;
460}
461
462unsigned UString::find(UChar ch, unsigned pos) const
463{
464 const UChar* end = data() + size();
465 for (const UChar* c = data() + pos; c < end; c++) {
466 if (*c == ch)
467 return static_cast<unsigned>(c - data());
468 }
469
470 return NotFound;
471}
472
473unsigned UString::rfind(const UString& f, unsigned pos) const
474{
475 unsigned sz = size();
476 unsigned fsz = f.size();
477 if (sz < fsz)
478 return NotFound;
479 if (pos > sz - fsz)
480 pos = sz - fsz;
481 if (fsz == 0)
482 return pos;
483 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
484 const UChar* fdata = f.data();
485 for (const UChar* c = data() + pos; c >= data(); c--) {
486 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
487 return static_cast<unsigned>(c - data());
488 }
489
490 return NotFound;
491}
492
493unsigned UString::rfind(UChar ch, unsigned pos) const
494{
495 if (isEmpty())
496 return NotFound;
497 if (pos + 1 >= size())
498 pos = size() - 1;
499 for (const UChar* c = data() + pos; c >= data(); c--) {
500 if (*c == ch)
501 return static_cast<unsigned>(c - data());
502 }
503
504 return NotFound;
505}
506
507UString UString::substr(unsigned pos, unsigned len) const
508{
509 unsigned s = size();
510
511 if (pos >= s)
512 pos = s;
513 unsigned limit = s - pos;
514 if (len > limit)
515 len = limit;
516
517 if (pos == 0 && len == s)
518 return *this;
519
520 return UString(Rep::create(m_rep, pos, len));
521}
522
523bool operator==(const UString& s1, const char *s2)
524{
525 if (s2 == 0)
526 return s1.isEmpty();
527
528 const UChar* u = s1.data();
529 const UChar* uend = u + s1.size();
530 while (u != uend && *s2) {
531 if (u[0] != (unsigned char)*s2)
532 return false;
533 s2++;
534 u++;
535 }
536
537 return u == uend && *s2 == 0;
538}
539
540bool operator<(const UString& s1, const UString& s2)
541{
542 const unsigned l1 = s1.size();
543 const unsigned l2 = s2.size();
544 const unsigned lmin = l1 < l2 ? l1 : l2;
545 const UChar* c1 = s1.data();
546 const UChar* c2 = s2.data();
547 unsigned l = 0;
548 while (l < lmin && *c1 == *c2) {
549 c1++;
550 c2++;
551 l++;
552 }
553 if (l < lmin)
554 return (c1[0] < c2[0]);
555
556 return (l1 < l2);
557}
558
559bool operator>(const UString& s1, const UString& s2)
560{
561 const unsigned l1 = s1.size();
562 const unsigned l2 = s2.size();
563 const unsigned lmin = l1 < l2 ? l1 : l2;
564 const UChar* c1 = s1.data();
565 const UChar* c2 = s2.data();
566 unsigned l = 0;
567 while (l < lmin && *c1 == *c2) {
568 c1++;
569 c2++;
570 l++;
571 }
572 if (l < lmin)
573 return (c1[0] > c2[0]);
574
575 return (l1 > l2);
576}
577
578int compare(const UString& s1, const UString& s2)
579{
580 const unsigned l1 = s1.size();
581 const unsigned l2 = s2.size();
582 const unsigned lmin = l1 < l2 ? l1 : l2;
583 const UChar* c1 = s1.data();
584 const UChar* c2 = s2.data();
585 unsigned l = 0;
586 while (l < lmin && *c1 == *c2) {
587 c1++;
588 c2++;
589 l++;
590 }
591
592 if (l < lmin)
593 return (c1[0] > c2[0]) ? 1 : -1;
594
595 if (l1 == l2)
596 return 0;
597
598 return (l1 > l2) ? 1 : -1;
599}
600
601CString UString::UTF8String(bool strict) const
602{
603 // Allocate a buffer big enough to hold all the characters.
604 const unsigned length = size();
605 Vector<char, 1024> buffer(length * 3);
606
607 // Convert to runs of 8-bit characters.
608 char* p = buffer.data();
609 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
610 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
611 if (result != conversionOK)
612 return CString();
613
614 return CString(buffer.data(), p - buffer.data());
615}
616
617} // namespace JSC
Note: See TracBrowser for help on using the repository browser.