source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 31813

Last change on this file since 31813 was 31677, checked in by [email protected], 17 years ago

Reviewed by Darin.

Make UString thread-safe.

No change on SunSpider total, although individual tests have changed a lot, up to 3%.

  • kjs/InitializeThreading.cpp: (KJS::initializeThreading): Call UString::null() to initialize a static.


  • kjs/identifier.cpp: (KJS::CStringTranslator::translate): (KJS::UCharBufferTranslator::translate): Use "true" for a boolean value instead of 1, because it's C++.
  • kjs/ustring.h: (KJS::CString::adopt): Added a method to create from a char* buffer without copying. (KJS::UString::Rep::ref): Removed an assertion for JSLock::lockCount, as it's no longer necessary to hold JSLock when working with strings. (KJS::UString::Rep::deref): Ditto. (KJS::UString::Rep::isStatic): Added a field to quickly determine that this is an empty or null static string.
  • kjs/ustring.cpp: (KJS::): Removed normalStatBufferSize and statBufferSize, as there is no reason to have such an advanced implementation of a debug-only ascii() method. Removed a long-obsolete comment about UChar. (KJS::UString::Rep::createCopying): Removed an assertion for JSLock::lockCount. (KJS::UString::Rep::create): Ditto. (KJS::UString::Rep::destroy): Ditto. Do not do anything for static null and empty strings, as refcounting is not reliable for those. Reordered branches for a noticeable speed gain - apparently this functiton is hot enough for SunSpider to see an effect from this! (KJS::UString::null): Moved a star, added a comment. (KJS::UString::cstring): Reimplemented to not call ascii(), which is not thread-safe. (KJS::UString::ascii): Simplified statBuffer handling logic. (KJS::UString::toDouble): Use cstring() instead of ascii().
  • Property svn:eol-style set to native
File size: 31.2 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "ustring.h"
26
27#include "JSLock.h"
28#include "collector.h"
29#include "dtoa.h"
30#include "function.h"
31#include "identifier.h"
32#include "operations.h"
33#include <ctype.h>
34#include <float.h>
35#include <limits.h>
36#include <math.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <wtf/Assertions.h>
40#include <wtf/ASCIICType.h>
41#include <wtf/MathExtras.h>
42#include <wtf/Vector.h>
43#include <wtf/unicode/UTF8.h>
44
45#if HAVE(STRING_H)
46#include <string.h>
47#endif
48#if HAVE(STRINGS_H)
49#include <strings.h>
50#endif
51
52using namespace WTF;
53using namespace WTF::Unicode;
54using namespace std;
55
56namespace KJS {
57
58extern const double NaN;
59extern const double Inf;
60
61static inline const size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
62static inline const size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
63
64static inline UChar* allocChars(size_t length)
65{
66 ASSERT(length);
67 if (length > maxUChars())
68 return 0;
69 return static_cast<UChar*>(fastMalloc(sizeof(UChar) * length));
70}
71
72static inline UChar* reallocChars(UChar* buffer, size_t length)
73{
74 ASSERT(length);
75 if (length > maxUChars())
76 return 0;
77 return static_cast<UChar*>(fastRealloc(buffer, sizeof(UChar) * length));
78}
79
80COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
81
82CString::CString(const char *c)
83{
84 length = strlen(c);
85 data = new char[length+1];
86 memcpy(data, c, length + 1);
87}
88
89CString::CString(const char *c, size_t len)
90{
91 length = len;
92 data = new char[len+1];
93 memcpy(data, c, len);
94 data[len] = 0;
95}
96
97CString::CString(const CString &b)
98{
99 length = b.length;
100 if (b.data) {
101 data = new char[length+1];
102 memcpy(data, b.data, length + 1);
103 }
104 else
105 data = 0;
106}
107
108CString::~CString()
109{
110 delete [] data;
111}
112
113CString CString::adopt(char* c, size_t len)
114{
115 CString s;
116 s.data = c;
117 s.length = len;
118
119 return s;
120}
121
122CString &CString::append(const CString &t)
123{
124 char *n;
125 n = new char[length+t.length+1];
126 if (length)
127 memcpy(n, data, length);
128 if (t.length)
129 memcpy(n+length, t.data, t.length);
130 length += t.length;
131 n[length] = 0;
132
133 delete [] data;
134 data = n;
135
136 return *this;
137}
138
139CString &CString::operator=(const char *c)
140{
141 if (data)
142 delete [] data;
143 length = strlen(c);
144 data = new char[length+1];
145 memcpy(data, c, length + 1);
146
147 return *this;
148}
149
150CString &CString::operator=(const CString &str)
151{
152 if (this == &str)
153 return *this;
154
155 if (data)
156 delete [] data;
157 length = str.length;
158 if (str.data) {
159 data = new char[length + 1];
160 memcpy(data, str.data, length + 1);
161 }
162 else
163 data = 0;
164
165 return *this;
166}
167
168bool operator==(const CString& c1, const CString& c2)
169{
170 size_t len = c1.size();
171 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
172}
173
174// These static strings are immutable, except for rc, whose initial value is chosen to reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
175static UChar sharedEmptyChar;
176UString::Rep UString::Rep::null = { 0, 0, INT_MAX / 2, 0, false, true, &UString::Rep::null, 0, 0, 0, 0, 0, 0 };
177UString::Rep UString::Rep::empty = { 0, 0, INT_MAX / 2, 0, false, true, &UString::Rep::empty, 0, &sharedEmptyChar, 0, 0, 0, 0 };
178
179static char* statBuffer = 0; // Only used for debugging via UString::ascii().
180
181PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
182{
183 int sizeInBytes = l * sizeof(UChar);
184 UChar *copyD = static_cast<UChar *>(fastMalloc(sizeInBytes));
185 memcpy(copyD, d, sizeInBytes);
186
187 return create(copyD, l);
188}
189
190PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
191{
192 Rep* r = new Rep;
193 r->offset = 0;
194 r->len = l;
195 r->rc = 1;
196 r->_hash = 0;
197 r->isIdentifier = false;
198 r->isStatic = false;
199 r->baseString = r;
200 r->reportedCost = 0;
201 r->buf = d;
202 r->usedCapacity = l;
203 r->capacity = l;
204 r->usedPreCapacity = 0;
205 r->preCapacity = 0;
206
207 // steal the single reference this Rep was created with
208 return adoptRef(r);
209}
210
211PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
212{
213 ASSERT(base);
214
215 int baseOffset = base->offset;
216
217 base = base->baseString;
218
219 ASSERT(-(offset + baseOffset) <= base->usedPreCapacity);
220 ASSERT(offset + baseOffset + length <= base->usedCapacity);
221
222 Rep *r = new Rep;
223 r->offset = baseOffset + offset;
224 r->len = length;
225 r->rc = 1;
226 r->_hash = 0;
227 r->isIdentifier = false;
228 r->isStatic = false;
229 r->baseString = base.releaseRef();
230 r->reportedCost = 0;
231 r->buf = 0;
232 r->usedCapacity = 0;
233 r->capacity = 0;
234 r->usedPreCapacity = 0;
235 r->preCapacity = 0;
236
237 // steal the single reference this Rep was created with
238 return adoptRef(r);
239}
240
241void UString::Rep::destroy()
242{
243 // Static null and empty strings can never be destroyed, but we cannot rely on reference counting, because ref/deref are not thread-safe.
244 if (!isStatic) {
245 if (isIdentifier)
246 Identifier::remove(this);
247 if (baseString == this)
248 fastFree(buf);
249 else
250 baseString->deref();
251
252 delete this;
253 }
254}
255
256// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
257// or anything like that.
258const unsigned PHI = 0x9e3779b9U;
259
260// Paul Hsieh's SuperFastHash
261// http://www.azillionmonkeys.com/qed/hash.html
262unsigned UString::Rep::computeHash(const UChar *s, int len)
263{
264 unsigned l = len;
265 uint32_t hash = PHI;
266 uint32_t tmp;
267
268 int rem = l & 1;
269 l >>= 1;
270
271 // Main loop
272 for (; l > 0; l--) {
273 hash += s[0];
274 tmp = (s[1] << 11) ^ hash;
275 hash = (hash << 16) ^ tmp;
276 s += 2;
277 hash += hash >> 11;
278 }
279
280 // Handle end case
281 if (rem) {
282 hash += s[0];
283 hash ^= hash << 11;
284 hash += hash >> 17;
285 }
286
287 // Force "avalanching" of final 127 bits
288 hash ^= hash << 3;
289 hash += hash >> 5;
290 hash ^= hash << 2;
291 hash += hash >> 15;
292 hash ^= hash << 10;
293
294 // this avoids ever returning a hash code of 0, since that is used to
295 // signal "hash not computed yet", using a value that is likely to be
296 // effectively the same as 0 when the low bits are masked
297 if (hash == 0)
298 hash = 0x80000000;
299
300 return hash;
301}
302
303// Paul Hsieh's SuperFastHash
304// http://www.azillionmonkeys.com/qed/hash.html
305unsigned UString::Rep::computeHash(const char *s)
306{
307 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
308 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
309 // were 16-bit chunks, which should give matching results
310
311 uint32_t hash = PHI;
312 uint32_t tmp;
313 size_t l = strlen(s);
314
315 size_t rem = l & 1;
316 l >>= 1;
317
318 // Main loop
319 for (; l > 0; l--) {
320 hash += (unsigned char)s[0];
321 tmp = ((unsigned char)s[1] << 11) ^ hash;
322 hash = (hash << 16) ^ tmp;
323 s += 2;
324 hash += hash >> 11;
325 }
326
327 // Handle end case
328 if (rem) {
329 hash += (unsigned char)s[0];
330 hash ^= hash << 11;
331 hash += hash >> 17;
332 }
333
334 // Force "avalanching" of final 127 bits
335 hash ^= hash << 3;
336 hash += hash >> 5;
337 hash ^= hash << 2;
338 hash += hash >> 15;
339 hash ^= hash << 10;
340
341 // this avoids ever returning a hash code of 0, since that is used to
342 // signal "hash not computed yet", using a value that is likely to be
343 // effectively the same as 0 when the low bits are masked
344 if (hash == 0)
345 hash = 0x80000000;
346
347 return hash;
348}
349
350// put these early so they can be inlined
351inline size_t UString::expandedSize(size_t size, size_t otherSize) const
352{
353 // Do the size calculation in two parts, returning overflowIndicator if
354 // we overflow the maximum value that we can handle.
355
356 if (size > maxUChars())
357 return overflowIndicator();
358
359 size_t expandedSize = ((size + 10) / 10 * 11) + 1;
360 if (maxUChars() - expandedSize < otherSize)
361 return overflowIndicator();
362
363 return expandedSize + otherSize;
364}
365
366inline int UString::usedCapacity() const
367{
368 return m_rep->baseString->usedCapacity;
369}
370
371inline int UString::usedPreCapacity() const
372{
373 return m_rep->baseString->usedPreCapacity;
374}
375
376void UString::expandCapacity(int requiredLength)
377{
378 Rep* r = m_rep->baseString;
379
380 if (requiredLength > r->capacity) {
381 size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
382 UChar* oldBuf = r->buf;
383 r->buf = reallocChars(r->buf, newCapacity);
384 if (!r->buf) {
385 r->buf = oldBuf;
386 m_rep = &Rep::null;
387 return;
388 }
389 r->capacity = newCapacity - r->preCapacity;
390 }
391 if (requiredLength > r->usedCapacity) {
392 r->usedCapacity = requiredLength;
393 }
394}
395
396void UString::expandPreCapacity(int requiredPreCap)
397{
398 Rep* r = m_rep->baseString;
399
400 if (requiredPreCap > r->preCapacity) {
401 size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
402 int delta = newCapacity - r->capacity - r->preCapacity;
403
404 UChar* newBuf = allocChars(newCapacity);
405 if (!newBuf) {
406 m_rep = &Rep::null;
407 return;
408 }
409 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
410 fastFree(r->buf);
411 r->buf = newBuf;
412
413 r->preCapacity = newCapacity - r->capacity;
414 }
415 if (requiredPreCap > r->usedPreCapacity) {
416 r->usedPreCapacity = requiredPreCap;
417 }
418}
419
420UString::UString(const char *c)
421{
422 if (!c) {
423 m_rep = &Rep::null;
424 return;
425 }
426
427 if (!c[0]) {
428 m_rep = &Rep::empty;
429 return;
430 }
431
432 size_t length = strlen(c);
433 UChar *d = allocChars(length);
434 if (!d)
435 m_rep = &Rep::null;
436 else {
437 for (size_t i = 0; i < length; i++)
438 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
439 m_rep = Rep::create(d, static_cast<int>(length));
440 }
441}
442
443UString::UString(const UChar *c, int length)
444{
445 if (length == 0)
446 m_rep = &Rep::empty;
447 else
448 m_rep = Rep::createCopying(c, length);
449}
450
451UString::UString(UChar *c, int length, bool copy)
452{
453 if (length == 0)
454 m_rep = &Rep::empty;
455 else if (copy)
456 m_rep = Rep::createCopying(c, length);
457 else
458 m_rep = Rep::create(c, length);
459}
460
461UString::UString(const Vector<UChar>& buffer)
462{
463 if (!buffer.size())
464 m_rep = &Rep::empty;
465 else
466 m_rep = Rep::createCopying(buffer.data(), buffer.size());
467}
468
469
470UString::UString(const UString &a, const UString &b)
471{
472 int aSize = a.size();
473 int aOffset = a.m_rep->offset;
474 int bSize = b.size();
475 int bOffset = b.m_rep->offset;
476 int length = aSize + bSize;
477
478 // possible cases:
479
480 if (aSize == 0) {
481 // a is empty
482 m_rep = b.m_rep;
483 } else if (bSize == 0) {
484 // b is empty
485 m_rep = a.m_rep;
486 } else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
487 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
488 // - a reaches the end of its buffer so it qualifies for shared append
489 // - also, it's at least a quarter the length of b - appending to a much shorter
490 // string does more harm than good
491 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
492 UString x(a);
493 x.expandCapacity(aOffset + length);
494 if (a.data() && x.data()) {
495 memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar));
496 m_rep = Rep::create(a.m_rep, 0, length);
497 } else
498 m_rep = &Rep::null;
499 } else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
500 // - b reaches the beginning of its buffer so it qualifies for shared prepend
501 // - also, it's at least a quarter the length of a - prepending to a much shorter
502 // string does more harm than good
503 UString y(b);
504 y.expandPreCapacity(-bOffset + aSize);
505 if (b.data() && y.data()) {
506 memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar));
507 m_rep = Rep::create(b.m_rep, -aSize, length);
508 } else
509 m_rep = &Rep::null;
510 } else {
511 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
512 size_t newCapacity = expandedSize(length, 0);
513 UChar* d = allocChars(newCapacity);
514 if (!d)
515 m_rep = &Rep::null;
516 else {
517 memcpy(d, a.data(), aSize * sizeof(UChar));
518 memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
519 m_rep = Rep::create(d, length);
520 m_rep->capacity = newCapacity;
521 }
522 }
523}
524
525const UString& UString::null()
526{
527 static UString* n = new UString; // Should be called from main thread at least once to be safely initialized.
528 return *n;
529}
530
531UString UString::from(int i)
532{
533 UChar buf[1 + sizeof(i) * 3];
534 UChar *end = buf + sizeof(buf) / sizeof(UChar);
535 UChar *p = end;
536
537 if (i == 0) {
538 *--p = '0';
539 } else if (i == INT_MIN) {
540 char minBuf[1 + sizeof(i) * 3];
541 sprintf(minBuf, "%d", INT_MIN);
542 return UString(minBuf);
543 } else {
544 bool negative = false;
545 if (i < 0) {
546 negative = true;
547 i = -i;
548 }
549 while (i) {
550 *--p = (unsigned short)((i % 10) + '0');
551 i /= 10;
552 }
553 if (negative) {
554 *--p = '-';
555 }
556 }
557
558 return UString(p, static_cast<int>(end - p));
559}
560
561UString UString::from(unsigned int u)
562{
563 UChar buf[sizeof(u) * 3];
564 UChar *end = buf + sizeof(buf) / sizeof(UChar);
565 UChar *p = end;
566
567 if (u == 0) {
568 *--p = '0';
569 } else {
570 while (u) {
571 *--p = (unsigned short)((u % 10) + '0');
572 u /= 10;
573 }
574 }
575
576 return UString(p, static_cast<int>(end - p));
577}
578
579UString UString::from(long l)
580{
581 UChar buf[1 + sizeof(l) * 3];
582 UChar *end = buf + sizeof(buf) / sizeof(UChar);
583 UChar *p = end;
584
585 if (l == 0) {
586 *--p = '0';
587 } else if (l == LONG_MIN) {
588 char minBuf[1 + sizeof(l) * 3];
589 sprintf(minBuf, "%ld", LONG_MIN);
590 return UString(minBuf);
591 } else {
592 bool negative = false;
593 if (l < 0) {
594 negative = true;
595 l = -l;
596 }
597 while (l) {
598 *--p = (unsigned short)((l % 10) + '0');
599 l /= 10;
600 }
601 if (negative) {
602 *--p = '-';
603 }
604 }
605
606 return UString(p, static_cast<int>(end - p));
607}
608
609UString UString::from(double d)
610{
611 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
612 if (isnan(d))
613 return "NaN";
614
615 char buf[80];
616 int decimalPoint;
617 int sign;
618
619 char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
620 int length = static_cast<int>(strlen(result));
621
622 int i = 0;
623 if (sign) {
624 buf[i++] = '-';
625 }
626
627 if (decimalPoint <= 0 && decimalPoint > -6) {
628 buf[i++] = '0';
629 buf[i++] = '.';
630 for (int j = decimalPoint; j < 0; j++) {
631 buf[i++] = '0';
632 }
633 strcpy(buf + i, result);
634 } else if (decimalPoint <= 21 && decimalPoint > 0) {
635 if (length <= decimalPoint) {
636 strcpy(buf + i, result);
637 i += length;
638 for (int j = 0; j < decimalPoint - length; j++) {
639 buf[i++] = '0';
640 }
641 buf[i] = '\0';
642 } else {
643 strncpy(buf + i, result, decimalPoint);
644 i += decimalPoint;
645 buf[i++] = '.';
646 strcpy(buf + i, result + decimalPoint);
647 }
648 } else if (result[0] < '0' || result[0] > '9') {
649 strcpy(buf + i, result);
650 } else {
651 buf[i++] = result[0];
652 if (length > 1) {
653 buf[i++] = '.';
654 strcpy(buf + i, result + 1);
655 i += length - 1;
656 }
657
658 buf[i++] = 'e';
659 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
660 // decimalPoint can't be more than 3 digits decimal given the
661 // nature of float representation
662 int exponential = decimalPoint - 1;
663 if (exponential < 0)
664 exponential = -exponential;
665 if (exponential >= 100)
666 buf[i++] = static_cast<char>('0' + exponential / 100);
667 if (exponential >= 10)
668 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
669 buf[i++] = static_cast<char>('0' + exponential % 10);
670 buf[i++] = '\0';
671 }
672
673 kjs_freedtoa(result);
674
675 return UString(buf);
676}
677
678UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
679{
680 if (rangeCount == 1 && separatorCount == 0) {
681 int thisSize = size();
682 int position = substringRanges[0].position;
683 int length = substringRanges[0].length;
684 if (position <= 0 && length >= thisSize)
685 return *this;
686 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
687 }
688
689 int totalLength = 0;
690 for (int i = 0; i < rangeCount; i++)
691 totalLength += substringRanges[i].length;
692 for (int i = 0; i < separatorCount; i++)
693 totalLength += separators[i].size();
694
695 if (totalLength == 0)
696 return "";
697
698 UChar* buffer = allocChars(totalLength);
699 if (!buffer)
700 return null();
701
702 int maxCount = max(rangeCount, separatorCount);
703 int bufferPos = 0;
704 for (int i = 0; i < maxCount; i++) {
705 if (i < rangeCount) {
706 memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
707 bufferPos += substringRanges[i].length;
708 }
709 if (i < separatorCount) {
710 memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
711 bufferPos += separators[i].size();
712 }
713 }
714
715 return UString::Rep::create(buffer, totalLength);
716}
717
718UString& UString::append(const UString &t)
719{
720 int thisSize = size();
721 int thisOffset = m_rep->offset;
722 int tSize = t.size();
723 int length = thisSize + tSize;
724
725 // possible cases:
726 if (thisSize == 0) {
727 // this is empty
728 *this = t;
729 } else if (tSize == 0) {
730 // t is empty
731 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
732 // this is direct and has refcount of 1 (so we can just alter it directly)
733 expandCapacity(thisOffset + length);
734 if (data()) {
735 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
736 m_rep->len = length;
737 m_rep->_hash = 0;
738 }
739 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
740 // this reaches the end of the buffer - extend it if it's long enough to append to
741 expandCapacity(thisOffset + length);
742 if (data()) {
743 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
744 m_rep = Rep::create(m_rep, 0, length);
745 }
746 } else {
747 // this is shared with someone using more capacity, gotta make a whole new string
748 size_t newCapacity = expandedSize(length, 0);
749 UChar* d = allocChars(newCapacity);
750 if (!d)
751 m_rep = &Rep::null;
752 else {
753 memcpy(d, data(), thisSize * sizeof(UChar));
754 memcpy(const_cast<UChar*>(d + thisSize), t.data(), tSize * sizeof(UChar));
755 m_rep = Rep::create(d, length);
756 m_rep->capacity = newCapacity;
757 }
758 }
759
760 return *this;
761}
762
763UString& UString::append(const char *t)
764{
765 int thisSize = size();
766 int thisOffset = m_rep->offset;
767 int tSize = static_cast<int>(strlen(t));
768 int length = thisSize + tSize;
769
770 // possible cases:
771 if (thisSize == 0) {
772 // this is empty
773 *this = t;
774 } else if (tSize == 0) {
775 // t is empty, we'll just return *this below.
776 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
777 // this is direct and has refcount of 1 (so we can just alter it directly)
778 expandCapacity(thisOffset + length);
779 UChar *d = const_cast<UChar *>(data());
780 if (d) {
781 for (int i = 0; i < tSize; ++i)
782 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
783 m_rep->len = length;
784 m_rep->_hash = 0;
785 }
786 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
787 // this string reaches the end of the buffer - extend it
788 expandCapacity(thisOffset + length);
789 UChar *d = const_cast<UChar *>(data());
790 if (d) {
791 for (int i = 0; i < tSize; ++i)
792 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
793 m_rep = Rep::create(m_rep, 0, length);
794 }
795 } else {
796 // this is shared with someone using more capacity, gotta make a whole new string
797 size_t newCapacity = expandedSize(length, 0);
798 UChar* d = allocChars(newCapacity);
799 if (!d)
800 m_rep = &Rep::null;
801 else {
802 memcpy(d, data(), thisSize * sizeof(UChar));
803 for (int i = 0; i < tSize; ++i)
804 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
805 m_rep = Rep::create(d, length);
806 m_rep->capacity = newCapacity;
807 }
808 }
809
810 return *this;
811}
812
813UString& UString::append(UChar c)
814{
815 int thisOffset = m_rep->offset;
816 int length = size();
817
818 // possible cases:
819 if (length == 0) {
820 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
821 size_t newCapacity = expandedSize(1, 0);
822 UChar* d = allocChars(newCapacity);
823 if (!d)
824 m_rep = &Rep::null;
825 else {
826 d[0] = c;
827 m_rep = Rep::create(d, 1);
828 m_rep->capacity = newCapacity;
829 }
830 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
831 // this is direct and has refcount of 1 (so we can just alter it directly)
832 expandCapacity(thisOffset + length + 1);
833 UChar *d = const_cast<UChar *>(data());
834 if (d) {
835 d[length] = c;
836 m_rep->len = length + 1;
837 m_rep->_hash = 0;
838 }
839 } else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
840 // this reaches the end of the string - extend it and share
841 expandCapacity(thisOffset + length + 1);
842 UChar *d = const_cast<UChar *>(data());
843 if (d) {
844 d[length] = c;
845 m_rep = Rep::create(m_rep, 0, length + 1);
846 }
847 } else {
848 // this is shared with someone using more capacity, gotta make a whole new string
849 size_t newCapacity = expandedSize(length + 1, 0);
850 UChar* d = allocChars(newCapacity);
851 if (!d)
852 m_rep = &Rep::null;
853 else {
854 memcpy(d, data(), length * sizeof(UChar));
855 d[length] = c;
856 m_rep = Rep::create(d, length + 1);
857 m_rep->capacity = newCapacity;
858 }
859 }
860
861 return *this;
862}
863
864CString UString::cstring() const
865{
866 int length = size();
867 int neededSize = length + 1;
868 char* buf = new char[neededSize];
869
870 const UChar* p = data();
871 char* q = buf;
872 const UChar* limit = p + length;
873 while (p != limit) {
874 *q = static_cast<char>(p[0]);
875 ++p;
876 ++q;
877 }
878 *q = '\0';
879
880 return CString::adopt(buf, length);
881}
882
883char *UString::ascii() const
884{
885 int length = size();
886 int neededSize = length + 1;
887 delete[] statBuffer;
888 statBuffer = new char[neededSize];
889
890 const UChar *p = data();
891 char *q = statBuffer;
892 const UChar *limit = p + length;
893 while (p != limit) {
894 *q = static_cast<char>(p[0]);
895 ++p;
896 ++q;
897 }
898 *q = '\0';
899
900 return statBuffer;
901}
902
903UString& UString::operator=(const char *c)
904{
905 if (!c) {
906 m_rep = &Rep::null;
907 return *this;
908 }
909
910 if (!c[0]) {
911 m_rep = &Rep::empty;
912 return *this;
913 }
914
915 int l = static_cast<int>(strlen(c));
916 UChar *d;
917 if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
918 d = m_rep->buf;
919 m_rep->_hash = 0;
920 m_rep->len = l;
921 } else {
922 d = allocChars(l);
923 if (!d) {
924 m_rep = &Rep::null;
925 return *this;
926 }
927 m_rep = Rep::create(d, l);
928 }
929 for (int i = 0; i < l; i++)
930 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
931
932 return *this;
933}
934
935bool UString::is8Bit() const
936{
937 const UChar *u = data();
938 const UChar *limit = u + size();
939 while (u < limit) {
940 if (u[0] > 0xFF)
941 return false;
942 ++u;
943 }
944
945 return true;
946}
947
948const UChar UString::operator[](int pos) const
949{
950 if (pos >= size())
951 return '\0';
952 return data()[pos];
953}
954
955double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
956{
957 double d;
958
959 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
960 // after the number, so is8Bit is too strict a check.
961 if (!is8Bit())
962 return NaN;
963
964 CString s = cstring();
965 const char* c = s.c_str();
966
967 // skip leading white space
968 while (isASCIISpace(*c))
969 c++;
970
971 // empty string ?
972 if (*c == '\0')
973 return tolerateEmptyString ? 0.0 : NaN;
974
975 // hex number ?
976 if (*c == '0' && (*(c+1) == 'x' || *(c+1) == 'X')) {
977 const char* firstDigitPosition = c + 2;
978 c++;
979 d = 0.0;
980 while (*(++c)) {
981 if (*c >= '0' && *c <= '9')
982 d = d * 16.0 + *c - '0';
983 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
984 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
985 else
986 break;
987 }
988
989 if (d >= mantissaOverflowLowerBound)
990 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
991 } else {
992 // regular number ?
993 char *end;
994 d = kjs_strtod(c, &end);
995 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
996 c = end;
997 } else {
998 double sign = 1.0;
999
1000 if (*c == '+')
1001 c++;
1002 else if (*c == '-') {
1003 sign = -1.0;
1004 c++;
1005 }
1006
1007 // We used strtod() to do the conversion. However, strtod() handles
1008 // infinite values slightly differently than JavaScript in that it
1009 // converts the string "inf" with any capitalization to infinity,
1010 // whereas the ECMA spec requires that it be converted to NaN.
1011
1012 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1013 d = sign * Inf;
1014 c += 8;
1015 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1016 c = end;
1017 else
1018 return NaN;
1019 }
1020 }
1021
1022 // allow trailing white space
1023 while (isASCIISpace(*c))
1024 c++;
1025 // don't allow anything after - unless tolerant=true
1026 if (!tolerateTrailingJunk && *c != '\0')
1027 d = NaN;
1028
1029 return d;
1030}
1031
1032double UString::toDouble(bool tolerateTrailingJunk) const
1033{
1034 return toDouble(tolerateTrailingJunk, true);
1035}
1036
1037double UString::toDouble() const
1038{
1039 return toDouble(false, true);
1040}
1041
1042uint32_t UString::toUInt32(bool *ok) const
1043{
1044 double d = toDouble();
1045 bool b = true;
1046
1047 if (d != static_cast<uint32_t>(d)) {
1048 b = false;
1049 d = 0;
1050 }
1051
1052 if (ok)
1053 *ok = b;
1054
1055 return static_cast<uint32_t>(d);
1056}
1057
1058uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
1059{
1060 double d = toDouble(false, tolerateEmptyString);
1061 bool b = true;
1062
1063 if (d != static_cast<uint32_t>(d)) {
1064 b = false;
1065 d = 0;
1066 }
1067
1068 if (ok)
1069 *ok = b;
1070
1071 return static_cast<uint32_t>(d);
1072}
1073
1074uint32_t UString::toStrictUInt32(bool *ok) const
1075{
1076 if (ok)
1077 *ok = false;
1078
1079 // Empty string is not OK.
1080 int len = m_rep->len;
1081 if (len == 0)
1082 return 0;
1083 const UChar *p = m_rep->data();
1084 unsigned short c = p[0];
1085
1086 // If the first digit is 0, only 0 itself is OK.
1087 if (c == '0') {
1088 if (len == 1 && ok)
1089 *ok = true;
1090 return 0;
1091 }
1092
1093 // Convert to UInt32, checking for overflow.
1094 uint32_t i = 0;
1095 while (1) {
1096 // Process character, turning it into a digit.
1097 if (c < '0' || c > '9')
1098 return 0;
1099 const unsigned d = c - '0';
1100
1101 // Multiply by 10, checking for overflow out of 32 bits.
1102 if (i > 0xFFFFFFFFU / 10)
1103 return 0;
1104 i *= 10;
1105
1106 // Add in the digit, checking for overflow out of 32 bits.
1107 const unsigned max = 0xFFFFFFFFU - d;
1108 if (i > max)
1109 return 0;
1110 i += d;
1111
1112 // Handle end of string.
1113 if (--len == 0) {
1114 if (ok)
1115 *ok = true;
1116 return i;
1117 }
1118
1119 // Get next character.
1120 c = *(++p);
1121 }
1122}
1123
1124int UString::find(const UString &f, int pos) const
1125{
1126 int sz = size();
1127 int fsz = f.size();
1128 if (sz < fsz)
1129 return -1;
1130 if (pos < 0)
1131 pos = 0;
1132 if (fsz == 0)
1133 return pos;
1134 const UChar *end = data() + sz - fsz;
1135 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1136 const UChar *fdata = f.data();
1137 unsigned short fchar = fdata[0];
1138 ++fdata;
1139 for (const UChar *c = data() + pos; c <= end; c++)
1140 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1141 return static_cast<int>(c - data());
1142
1143 return -1;
1144}
1145
1146int UString::find(UChar ch, int pos) const
1147{
1148 if (pos < 0)
1149 pos = 0;
1150 const UChar *end = data() + size();
1151 for (const UChar *c = data() + pos; c < end; c++)
1152 if (*c == ch)
1153 return static_cast<int>(c - data());
1154
1155 return -1;
1156}
1157
1158int UString::rfind(const UString &f, int pos) const
1159{
1160 int sz = size();
1161 int fsz = f.size();
1162 if (sz < fsz)
1163 return -1;
1164 if (pos < 0)
1165 pos = 0;
1166 if (pos > sz - fsz)
1167 pos = sz - fsz;
1168 if (fsz == 0)
1169 return pos;
1170 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1171 const UChar *fdata = f.data();
1172 for (const UChar *c = data() + pos; c >= data(); c--) {
1173 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1174 return static_cast<int>(c - data());
1175 }
1176
1177 return -1;
1178}
1179
1180int UString::rfind(UChar ch, int pos) const
1181{
1182 if (isEmpty())
1183 return -1;
1184 if (pos + 1 >= size())
1185 pos = size() - 1;
1186 for (const UChar *c = data() + pos; c >= data(); c--) {
1187 if (*c == ch)
1188 return static_cast<int>(c-data());
1189 }
1190
1191 return -1;
1192}
1193
1194UString UString::substr(int pos, int len) const
1195{
1196 int s = size();
1197
1198 if (pos < 0)
1199 pos = 0;
1200 else if (pos >= s)
1201 pos = s;
1202 if (len < 0)
1203 len = s;
1204 if (pos + len >= s)
1205 len = s - pos;
1206
1207 if (pos == 0 && len == s)
1208 return *this;
1209
1210 return UString(Rep::create(m_rep, pos, len));
1211}
1212
1213bool operator==(const UString& s1, const UString& s2)
1214{
1215 if (s1.m_rep->len != s2.m_rep->len)
1216 return false;
1217
1218 return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1219 s1.m_rep->len * sizeof(UChar)) == 0);
1220}
1221
1222bool operator==(const UString& s1, const char *s2)
1223{
1224 if (s2 == 0) {
1225 return s1.isEmpty();
1226 }
1227
1228 const UChar *u = s1.data();
1229 const UChar *uend = u + s1.size();
1230 while (u != uend && *s2) {
1231 if (u[0] != (unsigned char)*s2)
1232 return false;
1233 s2++;
1234 u++;
1235 }
1236
1237 return u == uend && *s2 == 0;
1238}
1239
1240bool operator<(const UString& s1, const UString& s2)
1241{
1242 const int l1 = s1.size();
1243 const int l2 = s2.size();
1244 const int lmin = l1 < l2 ? l1 : l2;
1245 const UChar *c1 = s1.data();
1246 const UChar *c2 = s2.data();
1247 int l = 0;
1248 while (l < lmin && *c1 == *c2) {
1249 c1++;
1250 c2++;
1251 l++;
1252 }
1253 if (l < lmin)
1254 return (c1[0] < c2[0]);
1255
1256 return (l1 < l2);
1257}
1258
1259int compare(const UString& s1, const UString& s2)
1260{
1261 const int l1 = s1.size();
1262 const int l2 = s2.size();
1263 const int lmin = l1 < l2 ? l1 : l2;
1264 const UChar *c1 = s1.data();
1265 const UChar *c2 = s2.data();
1266 int l = 0;
1267 while (l < lmin && *c1 == *c2) {
1268 c1++;
1269 c2++;
1270 l++;
1271 }
1272
1273 if (l < lmin)
1274 return (c1[0] > c2[0]) ? 1 : -1;
1275
1276 if (l1 == l2)
1277 return 0;
1278
1279 return (l1 > l2) ? 1 : -1;
1280}
1281
1282CString UString::UTF8String(bool strict) const
1283{
1284 // Allocate a buffer big enough to hold all the characters.
1285 const int length = size();
1286 Vector<char, 1024> buffer(length * 3);
1287
1288 // Convert to runs of 8-bit characters.
1289 char* p = buffer.data();
1290 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1291 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1292 if (result != conversionOK)
1293 return CString();
1294
1295 return CString(buffer.data(), p - buffer.data());
1296}
1297
1298} // namespace KJS
Note: See TracBrowser for help on using the repository browser.