source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 61623

Last change on this file since 61623 was 60332, checked in by [email protected], 15 years ago

2010-05-27 Luiz Agostini <[email protected]>

Reviewed by Darin Adler.

UTF-16 code points compare() for String objects
https://bugs.webkit.org/show_bug.cgi?id=39701

Moving compare() implementation from UString to StringImpl for it to be shared
with String. Adding overloaded free functions codePointCompare() in StringImpl
and WTFString. Renaming function compare in UString to codePointCompare to be
consistent.

  • runtime/JSArray.cpp: (JSC::compareByStringPairForQSort):
  • runtime/UString.cpp:
  • runtime/UString.h: (JSC::codePointCompare):
  • wtf/text/StringImpl.cpp: (WebCore::codePointCompare):
  • wtf/text/StringImpl.h:
  • wtf/text/WTFString.cpp: (WebCore::codePointCompare):
  • wtf/text/WTFString.h:
  • Property svn:eol-style set to native
File size: 14.5 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "UString.h"
26
27#include "JSGlobalObjectFunctions.h"
28#include "Collector.h"
29#include "dtoa.h"
30#include "Identifier.h"
31#include "Operations.h"
32#include <ctype.h>
33#include <limits.h>
34#include <limits>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <wtf/ASCIICType.h>
40#include <wtf/Assertions.h>
41#include <wtf/MathExtras.h>
42#include <wtf/StringExtras.h>
43#include <wtf/Vector.h>
44#include <wtf/unicode/UTF8.h>
45#include <wtf/StringExtras.h>
46
47#if HAVE(STRINGS_H)
48#include <strings.h>
49#endif
50
51using namespace WTF;
52using namespace WTF::Unicode;
53using namespace std;
54
55namespace JSC {
56
57extern const double NaN;
58extern const double Inf;
59
60// The null string is immutable, except for refCount.
61UString* UString::s_nullUString;
62
63COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
64
65void initializeUString()
66{
67 // UStringImpl::empty() does not construct its static string in a threadsafe fashion,
68 // so ensure it has been initialized from here.
69 UStringImpl::empty();
70
71 UString::s_nullUString = new UString;
72}
73
74UString::UString(const char* c)
75 : m_rep(Rep::create(c))
76{
77}
78
79UString::UString(const char* c, unsigned length)
80 : m_rep(Rep::create(c, length))
81{
82}
83
84UString::UString(const UChar* c, unsigned length)
85 : m_rep(Rep::create(c, length))
86{
87}
88
89UString UString::from(int i)
90{
91 UChar buf[1 + sizeof(i) * 3];
92 UChar* end = buf + sizeof(buf) / sizeof(UChar);
93 UChar* p = end;
94
95 if (i == 0)
96 *--p = '0';
97 else if (i == INT_MIN) {
98 char minBuf[1 + sizeof(i) * 3];
99 sprintf(minBuf, "%d", INT_MIN);
100 return UString(minBuf);
101 } else {
102 bool negative = false;
103 if (i < 0) {
104 negative = true;
105 i = -i;
106 }
107 while (i) {
108 *--p = static_cast<unsigned short>((i % 10) + '0');
109 i /= 10;
110 }
111 if (negative)
112 *--p = '-';
113 }
114
115 return UString(p, static_cast<unsigned>(end - p));
116}
117
118UString UString::from(long long i)
119{
120 UChar buf[1 + sizeof(i) * 3];
121 UChar* end = buf + sizeof(buf) / sizeof(UChar);
122 UChar* p = end;
123
124 if (i == 0)
125 *--p = '0';
126 else if (i == std::numeric_limits<long long>::min()) {
127 char minBuf[1 + sizeof(i) * 3];
128#if OS(WINDOWS)
129 snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
130#else
131 snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
132#endif
133 return UString(minBuf);
134 } else {
135 bool negative = false;
136 if (i < 0) {
137 negative = true;
138 i = -i;
139 }
140 while (i) {
141 *--p = static_cast<unsigned short>((i % 10) + '0');
142 i /= 10;
143 }
144 if (negative)
145 *--p = '-';
146 }
147
148 return UString(p, static_cast<unsigned>(end - p));
149}
150
151UString UString::from(unsigned u)
152{
153 UChar buf[sizeof(u) * 3];
154 UChar* end = buf + sizeof(buf) / sizeof(UChar);
155 UChar* p = end;
156
157 if (u == 0)
158 *--p = '0';
159 else {
160 while (u) {
161 *--p = static_cast<unsigned short>((u % 10) + '0');
162 u /= 10;
163 }
164 }
165
166 return UString(p, static_cast<unsigned>(end - p));
167}
168
169UString UString::from(long l)
170{
171 UChar buf[1 + sizeof(l) * 3];
172 UChar* end = buf + sizeof(buf) / sizeof(UChar);
173 UChar* p = end;
174
175 if (l == 0)
176 *--p = '0';
177 else if (l == LONG_MIN) {
178 char minBuf[1 + sizeof(l) * 3];
179 sprintf(minBuf, "%ld", LONG_MIN);
180 return UString(minBuf);
181 } else {
182 bool negative = false;
183 if (l < 0) {
184 negative = true;
185 l = -l;
186 }
187 while (l) {
188 *--p = static_cast<unsigned short>((l % 10) + '0');
189 l /= 10;
190 }
191 if (negative)
192 *--p = '-';
193 }
194
195 return UString(p, end - p);
196}
197
198UString UString::from(double d)
199{
200 DtoaBuffer buffer;
201 unsigned length;
202 doubleToStringInJavaScriptFormat(d, buffer, &length);
203 return UString(buffer, length);
204}
205
206char* UString::ascii() const
207{
208 static char* asciiBuffer = 0;
209
210 unsigned length = size();
211 unsigned neededSize = length + 1;
212 delete[] asciiBuffer;
213 asciiBuffer = new char[neededSize];
214
215 const UChar* p = data();
216 char* q = asciiBuffer;
217 const UChar* limit = p + length;
218 while (p != limit) {
219 *q = static_cast<char>(p[0]);
220 ++p;
221 ++q;
222 }
223 *q = '\0';
224
225 return asciiBuffer;
226}
227
228bool UString::is8Bit() const
229{
230 const UChar* u = data();
231 const UChar* limit = u + size();
232 while (u < limit) {
233 if (u[0] > 0xFF)
234 return false;
235 ++u;
236 }
237
238 return true;
239}
240
241UChar UString::operator[](unsigned pos) const
242{
243 if (pos >= size())
244 return '\0';
245 return data()[pos];
246}
247
248double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
249{
250 if (size() == 1) {
251 UChar c = data()[0];
252 if (isASCIIDigit(c))
253 return c - '0';
254 if (isASCIISpace(c) && tolerateEmptyString)
255 return 0;
256 return NaN;
257 }
258
259 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
260 // after the number, even if it contains invalid UTF-16 sequences. So we
261 // shouldn't use the UTF8String function, which returns null when it
262 // encounters invalid UTF-16. Further, we have no need to convert the
263 // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
264 // unnecessary work.
265
266 // FIXME: The space skipping code below skips only ASCII spaces, but callers
267 // need to skip all StrWhiteSpace. The isStrWhiteSpace function does the
268 // right thing but requires UChar, not char, for its argument.
269
270 CString s = UTF8String();
271 if (s.isNull())
272 return NaN;
273 const char* c = s.data();
274
275 // skip leading white space
276 while (isASCIISpace(*c))
277 c++;
278
279 // empty string ?
280 if (*c == '\0')
281 return tolerateEmptyString ? 0.0 : NaN;
282
283 double d;
284
285 // hex number ?
286 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
287 const char* firstDigitPosition = c + 2;
288 c++;
289 d = 0.0;
290 while (*(++c)) {
291 if (*c >= '0' && *c <= '9')
292 d = d * 16.0 + *c - '0';
293 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
294 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
295 else
296 break;
297 }
298
299 if (d >= mantissaOverflowLowerBound)
300 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
301 } else {
302 // regular number ?
303 char* end;
304 d = WTF::strtod(c, &end);
305 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
306 c = end;
307 } else {
308 double sign = 1.0;
309
310 if (*c == '+')
311 c++;
312 else if (*c == '-') {
313 sign = -1.0;
314 c++;
315 }
316
317 // We used strtod() to do the conversion. However, strtod() handles
318 // infinite values slightly differently than JavaScript in that it
319 // converts the string "inf" with any capitalization to infinity,
320 // whereas the ECMA spec requires that it be converted to NaN.
321
322 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
323 d = sign * Inf;
324 c += 8;
325 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
326 c = end;
327 else
328 return NaN;
329 }
330 }
331
332 if (!tolerateTrailingJunk) {
333 // allow trailing white space
334 while (isASCIISpace(*c))
335 c++;
336 if (c != s.data() + s.length())
337 d = NaN;
338 }
339
340 return d;
341}
342
343double UString::toDouble(bool tolerateTrailingJunk) const
344{
345 return toDouble(tolerateTrailingJunk, true);
346}
347
348double UString::toDouble() const
349{
350 return toDouble(false, true);
351}
352
353uint32_t UString::toUInt32(bool* ok) const
354{
355 double d = toDouble();
356 bool b = true;
357
358 if (d != static_cast<uint32_t>(d)) {
359 b = false;
360 d = 0;
361 }
362
363 if (ok)
364 *ok = b;
365
366 return static_cast<uint32_t>(d);
367}
368
369uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
370{
371 double d = toDouble(false, tolerateEmptyString);
372 bool b = true;
373
374 if (d != static_cast<uint32_t>(d)) {
375 b = false;
376 d = 0;
377 }
378
379 if (ok)
380 *ok = b;
381
382 return static_cast<uint32_t>(d);
383}
384
385uint32_t UString::toStrictUInt32(bool* ok) const
386{
387 if (ok)
388 *ok = false;
389
390 // Empty string is not OK.
391 unsigned len = m_rep->length();
392 if (len == 0)
393 return 0;
394 const UChar* p = m_rep->characters();
395 unsigned short c = p[0];
396
397 // If the first digit is 0, only 0 itself is OK.
398 if (c == '0') {
399 if (len == 1 && ok)
400 *ok = true;
401 return 0;
402 }
403
404 // Convert to UInt32, checking for overflow.
405 uint32_t i = 0;
406 while (1) {
407 // Process character, turning it into a digit.
408 if (c < '0' || c > '9')
409 return 0;
410 const unsigned d = c - '0';
411
412 // Multiply by 10, checking for overflow out of 32 bits.
413 if (i > 0xFFFFFFFFU / 10)
414 return 0;
415 i *= 10;
416
417 // Add in the digit, checking for overflow out of 32 bits.
418 const unsigned max = 0xFFFFFFFFU - d;
419 if (i > max)
420 return 0;
421 i += d;
422
423 // Handle end of string.
424 if (--len == 0) {
425 if (ok)
426 *ok = true;
427 return i;
428 }
429
430 // Get next character.
431 c = *(++p);
432 }
433}
434
435unsigned UString::find(const UString& f, unsigned pos) const
436{
437 unsigned fsz = f.size();
438
439 if (fsz == 1) {
440 UChar ch = f[0];
441 const UChar* end = data() + size();
442 for (const UChar* c = data() + pos; c < end; c++) {
443 if (*c == ch)
444 return static_cast<unsigned>(c - data());
445 }
446 return NotFound;
447 }
448
449 unsigned sz = size();
450 if (sz < fsz)
451 return NotFound;
452 if (fsz == 0)
453 return pos;
454 const UChar* end = data() + sz - fsz;
455 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
456 const UChar* fdata = f.data();
457 unsigned short fchar = fdata[0];
458 ++fdata;
459 for (const UChar* c = data() + pos; c <= end; c++) {
460 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
461 return static_cast<unsigned>(c - data());
462 }
463
464 return NotFound;
465}
466
467unsigned UString::find(UChar ch, unsigned pos) const
468{
469 const UChar* end = data() + size();
470 for (const UChar* c = data() + pos; c < end; c++) {
471 if (*c == ch)
472 return static_cast<unsigned>(c - data());
473 }
474
475 return NotFound;
476}
477
478unsigned UString::rfind(const UString& f, unsigned pos) const
479{
480 unsigned sz = size();
481 unsigned fsz = f.size();
482 if (sz < fsz)
483 return NotFound;
484 if (pos > sz - fsz)
485 pos = sz - fsz;
486 if (fsz == 0)
487 return pos;
488 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
489 const UChar* fdata = f.data();
490 for (const UChar* c = data() + pos; c >= data(); c--) {
491 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
492 return static_cast<unsigned>(c - data());
493 }
494
495 return NotFound;
496}
497
498unsigned UString::rfind(UChar ch, unsigned pos) const
499{
500 if (isEmpty())
501 return NotFound;
502 if (pos + 1 >= size())
503 pos = size() - 1;
504 for (const UChar* c = data() + pos; c >= data(); c--) {
505 if (*c == ch)
506 return static_cast<unsigned>(c - data());
507 }
508
509 return NotFound;
510}
511
512UString UString::substr(unsigned pos, unsigned len) const
513{
514 unsigned s = size();
515
516 if (pos >= s)
517 pos = s;
518 unsigned limit = s - pos;
519 if (len > limit)
520 len = limit;
521
522 if (pos == 0 && len == s)
523 return *this;
524
525 return UString(Rep::create(m_rep, pos, len));
526}
527
528bool operator==(const UString& s1, const char *s2)
529{
530 if (s2 == 0)
531 return s1.isEmpty();
532
533 const UChar* u = s1.data();
534 const UChar* uend = u + s1.size();
535 while (u != uend && *s2) {
536 if (u[0] != (unsigned char)*s2)
537 return false;
538 s2++;
539 u++;
540 }
541
542 return u == uend && *s2 == 0;
543}
544
545bool operator<(const UString& s1, const UString& s2)
546{
547 const unsigned l1 = s1.size();
548 const unsigned l2 = s2.size();
549 const unsigned lmin = l1 < l2 ? l1 : l2;
550 const UChar* c1 = s1.data();
551 const UChar* c2 = s2.data();
552 unsigned l = 0;
553 while (l < lmin && *c1 == *c2) {
554 c1++;
555 c2++;
556 l++;
557 }
558 if (l < lmin)
559 return (c1[0] < c2[0]);
560
561 return (l1 < l2);
562}
563
564bool operator>(const UString& s1, const UString& s2)
565{
566 const unsigned l1 = s1.size();
567 const unsigned l2 = s2.size();
568 const unsigned lmin = l1 < l2 ? l1 : l2;
569 const UChar* c1 = s1.data();
570 const UChar* c2 = s2.data();
571 unsigned l = 0;
572 while (l < lmin && *c1 == *c2) {
573 c1++;
574 c2++;
575 l++;
576 }
577 if (l < lmin)
578 return (c1[0] > c2[0]);
579
580 return (l1 > l2);
581}
582
583CString UString::UTF8String(bool strict) const
584{
585 // Allocate a buffer big enough to hold all the characters.
586 const unsigned length = size();
587 Vector<char, 1024> buffer(length * 3);
588
589 // Convert to runs of 8-bit characters.
590 char* p = buffer.data();
591 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
592 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
593 if (result != conversionOK)
594 return CString();
595
596 return CString(buffer.data(), p - buffer.data());
597}
598
599} // namespace JSC
Note: See TracBrowser for help on using the repository browser.