source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 34424

Last change on this file since 34424 was 34361, checked in by [email protected], 17 years ago

Reviewed by Darin.

Fix JSClassCreate to work with old JSCore API threading model.

No change on SunSpider.

  • API/JSClassRef.cpp: (OpaqueJSClass::OpaqueJSClass): Since JSClass is constructed without a context, there is no way for it to create Identifiers. Also, added initializeThreading(), just for good measure.
  • API/JSCallbackObjectFunctions.h: (KJS::::getPropertyNames): Make an Identifier out of the string here, because propertyNames.add() needs that.
  • kjs/identifier.cpp:
  • kjs/identifier.h: (KJS::Identifier::equal):
  • kjs/ustring.cpp: (KJS::equal): Moved equal() from identifier.h to ustring.h, because it's not really about Identifiers, and to make it possible to use it from StrHash. Include StrHash.h from ustring.h to avoid having the behavior depend on headers that happen to be included.
  • wtf/StrHash.h: Removed.
  • kjs/ustring.h: Made RefPtr<UString::Rep> use the same default hash as UString::Rep* (it used to default to pointer equality). Moved the whole StrHash header into ustring.h.
  • JavaScriptCore.exp: Export equal() for WebCore use (this StrHash is used in c_class.cpp, jni_class.cpp, and npruntime.cpp).
  • Property svn:eol-style set to native
File size: 32.4 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "ustring.h"
26
27#include "JSLock.h"
28#include "collector.h"
29#include "dtoa.h"
30#include "function.h"
31#include "identifier.h"
32#include "operations.h"
33#include <ctype.h>
34#include <float.h>
35#include <limits.h>
36#include <math.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <wtf/Assertions.h>
40#include <wtf/ASCIICType.h>
41#include <wtf/MathExtras.h>
42#include <wtf/Vector.h>
43#include <wtf/unicode/UTF8.h>
44
45#if HAVE(STRING_H)
46#include <string.h>
47#endif
48#if HAVE(STRINGS_H)
49#include <strings.h>
50#endif
51
52using namespace WTF;
53using namespace WTF::Unicode;
54using namespace std;
55
56namespace KJS {
57
58extern const double NaN;
59extern const double Inf;
60
61static inline const size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
62static inline const size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
63
64static inline UChar* allocChars(size_t length)
65{
66 ASSERT(length);
67 if (length > maxUChars())
68 return 0;
69 return static_cast<UChar*>(fastMalloc(sizeof(UChar) * length));
70}
71
72static inline UChar* reallocChars(UChar* buffer, size_t length)
73{
74 ASSERT(length);
75 if (length > maxUChars())
76 return 0;
77 return static_cast<UChar*>(fastRealloc(buffer, sizeof(UChar) * length));
78}
79
80COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
81
82CString::CString(const char *c)
83{
84 length = strlen(c);
85 data = new char[length+1];
86 memcpy(data, c, length + 1);
87}
88
89CString::CString(const char *c, size_t len)
90{
91 length = len;
92 data = new char[len+1];
93 memcpy(data, c, len);
94 data[len] = 0;
95}
96
97CString::CString(const CString &b)
98{
99 length = b.length;
100 if (b.data) {
101 data = new char[length+1];
102 memcpy(data, b.data, length + 1);
103 }
104 else
105 data = 0;
106}
107
108CString::~CString()
109{
110 delete [] data;
111}
112
113CString CString::adopt(char* c, size_t len)
114{
115 CString s;
116 s.data = c;
117 s.length = len;
118
119 return s;
120}
121
122CString &CString::append(const CString &t)
123{
124 char *n;
125 n = new char[length+t.length+1];
126 if (length)
127 memcpy(n, data, length);
128 if (t.length)
129 memcpy(n+length, t.data, t.length);
130 length += t.length;
131 n[length] = 0;
132
133 delete [] data;
134 data = n;
135
136 return *this;
137}
138
139CString &CString::operator=(const char *c)
140{
141 if (data)
142 delete [] data;
143 length = strlen(c);
144 data = new char[length+1];
145 memcpy(data, c, length + 1);
146
147 return *this;
148}
149
150CString &CString::operator=(const CString &str)
151{
152 if (this == &str)
153 return *this;
154
155 if (data)
156 delete [] data;
157 length = str.length;
158 if (str.data) {
159 data = new char[length + 1];
160 memcpy(data, str.data, length + 1);
161 }
162 else
163 data = 0;
164
165 return *this;
166}
167
168bool operator==(const CString& c1, const CString& c2)
169{
170 size_t len = c1.size();
171 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
172}
173
174// These static strings are immutable, except for rc, whose initial value is chosen to reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
175static UChar sharedEmptyChar;
176UString::Rep UString::Rep::null = { 0, 0, INT_MAX / 2, 0, 0, &UString::Rep::null, true, 0, 0, 0, 0, 0, 0 };
177UString::Rep UString::Rep::empty = { 0, 0, INT_MAX / 2, 0, 0, &UString::Rep::empty, true, 0, &sharedEmptyChar, 0, 0, 0, 0 };
178
179static char* statBuffer = 0; // Only used for debugging via UString::ascii().
180
181PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
182{
183 int sizeInBytes = l * sizeof(UChar);
184 UChar *copyD = static_cast<UChar *>(fastMalloc(sizeInBytes));
185 memcpy(copyD, d, sizeInBytes);
186
187 return create(copyD, l);
188}
189
190PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
191{
192 Rep* r = new Rep;
193 r->offset = 0;
194 r->len = l;
195 r->rc = 1;
196 r->_hash = 0;
197 r->identifierTable = 0;
198 r->baseString = r;
199 r->isStatic = false;
200 r->reportedCost = 0;
201 r->buf = d;
202 r->usedCapacity = l;
203 r->capacity = l;
204 r->usedPreCapacity = 0;
205 r->preCapacity = 0;
206
207 // steal the single reference this Rep was created with
208 return adoptRef(r);
209}
210
211PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
212{
213 ASSERT(base);
214
215 int baseOffset = base->offset;
216
217 base = base->baseString;
218
219 ASSERT(-(offset + baseOffset) <= base->usedPreCapacity);
220 ASSERT(offset + baseOffset + length <= base->usedCapacity);
221
222 Rep *r = new Rep;
223 r->offset = baseOffset + offset;
224 r->len = length;
225 r->rc = 1;
226 r->_hash = 0;
227 r->identifierTable = 0;
228 r->baseString = base.releaseRef();
229 r->isStatic = false;
230 r->reportedCost = 0;
231 r->buf = 0;
232 r->usedCapacity = 0;
233 r->capacity = 0;
234 r->usedPreCapacity = 0;
235 r->preCapacity = 0;
236
237 // steal the single reference this Rep was created with
238 return adoptRef(r);
239}
240
241PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
242{
243 if (!string)
244 return &UString::Rep::null;
245
246 size_t length = strlen(string);
247 Vector<UChar, 1024> buffer(length);
248 UChar* p = buffer.data();
249 if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
250 return &UString::Rep::null;
251
252 return UString::Rep::createCopying(buffer.data(), p - buffer.data());
253}
254
255void UString::Rep::destroy()
256{
257 // Static null and empty strings can never be destroyed, but we cannot rely on reference counting, because ref/deref are not thread-safe.
258 if (!isStatic) {
259 if (identifierTable)
260 Identifier::remove(this);
261 if (baseString == this)
262 fastFree(buf);
263 else
264 baseString->deref();
265
266 delete this;
267 }
268}
269
270// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
271// or anything like that.
272const unsigned PHI = 0x9e3779b9U;
273
274// Paul Hsieh's SuperFastHash
275// http://www.azillionmonkeys.com/qed/hash.html
276unsigned UString::Rep::computeHash(const UChar *s, int len)
277{
278 unsigned l = len;
279 uint32_t hash = PHI;
280 uint32_t tmp;
281
282 int rem = l & 1;
283 l >>= 1;
284
285 // Main loop
286 for (; l > 0; l--) {
287 hash += s[0];
288 tmp = (s[1] << 11) ^ hash;
289 hash = (hash << 16) ^ tmp;
290 s += 2;
291 hash += hash >> 11;
292 }
293
294 // Handle end case
295 if (rem) {
296 hash += s[0];
297 hash ^= hash << 11;
298 hash += hash >> 17;
299 }
300
301 // Force "avalanching" of final 127 bits
302 hash ^= hash << 3;
303 hash += hash >> 5;
304 hash ^= hash << 2;
305 hash += hash >> 15;
306 hash ^= hash << 10;
307
308 // this avoids ever returning a hash code of 0, since that is used to
309 // signal "hash not computed yet", using a value that is likely to be
310 // effectively the same as 0 when the low bits are masked
311 if (hash == 0)
312 hash = 0x80000000;
313
314 return hash;
315}
316
317// Paul Hsieh's SuperFastHash
318// http://www.azillionmonkeys.com/qed/hash.html
319unsigned UString::Rep::computeHash(const char *s)
320{
321 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
322 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
323 // were 16-bit chunks, which should give matching results
324
325 uint32_t hash = PHI;
326 uint32_t tmp;
327 size_t l = strlen(s);
328
329 size_t rem = l & 1;
330 l >>= 1;
331
332 // Main loop
333 for (; l > 0; l--) {
334 hash += (unsigned char)s[0];
335 tmp = ((unsigned char)s[1] << 11) ^ hash;
336 hash = (hash << 16) ^ tmp;
337 s += 2;
338 hash += hash >> 11;
339 }
340
341 // Handle end case
342 if (rem) {
343 hash += (unsigned char)s[0];
344 hash ^= hash << 11;
345 hash += hash >> 17;
346 }
347
348 // Force "avalanching" of final 127 bits
349 hash ^= hash << 3;
350 hash += hash >> 5;
351 hash ^= hash << 2;
352 hash += hash >> 15;
353 hash ^= hash << 10;
354
355 // this avoids ever returning a hash code of 0, since that is used to
356 // signal "hash not computed yet", using a value that is likely to be
357 // effectively the same as 0 when the low bits are masked
358 if (hash == 0)
359 hash = 0x80000000;
360
361 return hash;
362}
363
364// put these early so they can be inlined
365inline size_t UString::expandedSize(size_t size, size_t otherSize) const
366{
367 // Do the size calculation in two parts, returning overflowIndicator if
368 // we overflow the maximum value that we can handle.
369
370 if (size > maxUChars())
371 return overflowIndicator();
372
373 size_t expandedSize = ((size + 10) / 10 * 11) + 1;
374 if (maxUChars() - expandedSize < otherSize)
375 return overflowIndicator();
376
377 return expandedSize + otherSize;
378}
379
380inline int UString::usedCapacity() const
381{
382 return m_rep->baseString->usedCapacity;
383}
384
385inline int UString::usedPreCapacity() const
386{
387 return m_rep->baseString->usedPreCapacity;
388}
389
390void UString::expandCapacity(int requiredLength)
391{
392 Rep* r = m_rep->baseString;
393
394 if (requiredLength > r->capacity) {
395 size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
396 UChar* oldBuf = r->buf;
397 r->buf = reallocChars(r->buf, newCapacity);
398 if (!r->buf) {
399 r->buf = oldBuf;
400 m_rep = &Rep::null;
401 return;
402 }
403 r->capacity = newCapacity - r->preCapacity;
404 }
405 if (requiredLength > r->usedCapacity) {
406 r->usedCapacity = requiredLength;
407 }
408}
409
410void UString::expandPreCapacity(int requiredPreCap)
411{
412 Rep* r = m_rep->baseString;
413
414 if (requiredPreCap > r->preCapacity) {
415 size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
416 int delta = newCapacity - r->capacity - r->preCapacity;
417
418 UChar* newBuf = allocChars(newCapacity);
419 if (!newBuf) {
420 m_rep = &Rep::null;
421 return;
422 }
423 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
424 fastFree(r->buf);
425 r->buf = newBuf;
426
427 r->preCapacity = newCapacity - r->capacity;
428 }
429 if (requiredPreCap > r->usedPreCapacity) {
430 r->usedPreCapacity = requiredPreCap;
431 }
432}
433
434UString::UString(const char *c)
435{
436 if (!c) {
437 m_rep = &Rep::null;
438 return;
439 }
440
441 if (!c[0]) {
442 m_rep = &Rep::empty;
443 return;
444 }
445
446 size_t length = strlen(c);
447 UChar *d = allocChars(length);
448 if (!d)
449 m_rep = &Rep::null;
450 else {
451 for (size_t i = 0; i < length; i++)
452 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
453 m_rep = Rep::create(d, static_cast<int>(length));
454 }
455}
456
457UString::UString(const UChar *c, int length)
458{
459 if (length == 0)
460 m_rep = &Rep::empty;
461 else
462 m_rep = Rep::createCopying(c, length);
463}
464
465UString::UString(UChar *c, int length, bool copy)
466{
467 if (length == 0)
468 m_rep = &Rep::empty;
469 else if (copy)
470 m_rep = Rep::createCopying(c, length);
471 else
472 m_rep = Rep::create(c, length);
473}
474
475UString::UString(const Vector<UChar>& buffer)
476{
477 if (!buffer.size())
478 m_rep = &Rep::empty;
479 else
480 m_rep = Rep::createCopying(buffer.data(), buffer.size());
481}
482
483
484UString::UString(const UString &a, const UString &b)
485{
486 int aSize = a.size();
487 int aOffset = a.m_rep->offset;
488 int bSize = b.size();
489 int bOffset = b.m_rep->offset;
490 int length = aSize + bSize;
491
492 // possible cases:
493
494 if (aSize == 0) {
495 // a is empty
496 m_rep = b.m_rep;
497 } else if (bSize == 0) {
498 // b is empty
499 m_rep = a.m_rep;
500 } else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
501 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
502 // - a reaches the end of its buffer so it qualifies for shared append
503 // - also, it's at least a quarter the length of b - appending to a much shorter
504 // string does more harm than good
505 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
506 UString x(a);
507 x.expandCapacity(aOffset + length);
508 if (a.data() && x.data()) {
509 memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar));
510 m_rep = Rep::create(a.m_rep, 0, length);
511 } else
512 m_rep = &Rep::null;
513 } else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
514 // - b reaches the beginning of its buffer so it qualifies for shared prepend
515 // - also, it's at least a quarter the length of a - prepending to a much shorter
516 // string does more harm than good
517 UString y(b);
518 y.expandPreCapacity(-bOffset + aSize);
519 if (b.data() && y.data()) {
520 memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar));
521 m_rep = Rep::create(b.m_rep, -aSize, length);
522 } else
523 m_rep = &Rep::null;
524 } else {
525 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
526 size_t newCapacity = expandedSize(length, 0);
527 UChar* d = allocChars(newCapacity);
528 if (!d)
529 m_rep = &Rep::null;
530 else {
531 memcpy(d, a.data(), aSize * sizeof(UChar));
532 memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
533 m_rep = Rep::create(d, length);
534 m_rep->capacity = newCapacity;
535 }
536 }
537}
538
539const UString& UString::null()
540{
541 static UString* n = new UString; // Should be called from main thread at least once to be safely initialized.
542 return *n;
543}
544
545UString UString::from(int i)
546{
547 UChar buf[1 + sizeof(i) * 3];
548 UChar *end = buf + sizeof(buf) / sizeof(UChar);
549 UChar *p = end;
550
551 if (i == 0) {
552 *--p = '0';
553 } else if (i == INT_MIN) {
554 char minBuf[1 + sizeof(i) * 3];
555 sprintf(minBuf, "%d", INT_MIN);
556 return UString(minBuf);
557 } else {
558 bool negative = false;
559 if (i < 0) {
560 negative = true;
561 i = -i;
562 }
563 while (i) {
564 *--p = (unsigned short)((i % 10) + '0');
565 i /= 10;
566 }
567 if (negative) {
568 *--p = '-';
569 }
570 }
571
572 return UString(p, static_cast<int>(end - p));
573}
574
575UString UString::from(unsigned int u)
576{
577 UChar buf[sizeof(u) * 3];
578 UChar *end = buf + sizeof(buf) / sizeof(UChar);
579 UChar *p = end;
580
581 if (u == 0) {
582 *--p = '0';
583 } else {
584 while (u) {
585 *--p = (unsigned short)((u % 10) + '0');
586 u /= 10;
587 }
588 }
589
590 return UString(p, static_cast<int>(end - p));
591}
592
593UString UString::from(long l)
594{
595 UChar buf[1 + sizeof(l) * 3];
596 UChar *end = buf + sizeof(buf) / sizeof(UChar);
597 UChar *p = end;
598
599 if (l == 0) {
600 *--p = '0';
601 } else if (l == LONG_MIN) {
602 char minBuf[1 + sizeof(l) * 3];
603 sprintf(minBuf, "%ld", LONG_MIN);
604 return UString(minBuf);
605 } else {
606 bool negative = false;
607 if (l < 0) {
608 negative = true;
609 l = -l;
610 }
611 while (l) {
612 *--p = (unsigned short)((l % 10) + '0');
613 l /= 10;
614 }
615 if (negative) {
616 *--p = '-';
617 }
618 }
619
620 return UString(p, static_cast<int>(end - p));
621}
622
623UString UString::from(double d)
624{
625 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
626 if (isnan(d))
627 return "NaN";
628
629 char buf[80];
630 int decimalPoint;
631 int sign;
632
633 char *result = dtoa(d, 0, &decimalPoint, &sign, NULL);
634 int length = static_cast<int>(strlen(result));
635
636 int i = 0;
637 if (sign) {
638 buf[i++] = '-';
639 }
640
641 if (decimalPoint <= 0 && decimalPoint > -6) {
642 buf[i++] = '0';
643 buf[i++] = '.';
644 for (int j = decimalPoint; j < 0; j++) {
645 buf[i++] = '0';
646 }
647 strcpy(buf + i, result);
648 } else if (decimalPoint <= 21 && decimalPoint > 0) {
649 if (length <= decimalPoint) {
650 strcpy(buf + i, result);
651 i += length;
652 for (int j = 0; j < decimalPoint - length; j++) {
653 buf[i++] = '0';
654 }
655 buf[i] = '\0';
656 } else {
657 strncpy(buf + i, result, decimalPoint);
658 i += decimalPoint;
659 buf[i++] = '.';
660 strcpy(buf + i, result + decimalPoint);
661 }
662 } else if (result[0] < '0' || result[0] > '9') {
663 strcpy(buf + i, result);
664 } else {
665 buf[i++] = result[0];
666 if (length > 1) {
667 buf[i++] = '.';
668 strcpy(buf + i, result + 1);
669 i += length - 1;
670 }
671
672 buf[i++] = 'e';
673 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
674 // decimalPoint can't be more than 3 digits decimal given the
675 // nature of float representation
676 int exponential = decimalPoint - 1;
677 if (exponential < 0)
678 exponential = -exponential;
679 if (exponential >= 100)
680 buf[i++] = static_cast<char>('0' + exponential / 100);
681 if (exponential >= 10)
682 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
683 buf[i++] = static_cast<char>('0' + exponential % 10);
684 buf[i++] = '\0';
685 }
686
687 freedtoa(result);
688
689 return UString(buf);
690}
691
692UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
693{
694 if (rangeCount == 1 && separatorCount == 0) {
695 int thisSize = size();
696 int position = substringRanges[0].position;
697 int length = substringRanges[0].length;
698 if (position <= 0 && length >= thisSize)
699 return *this;
700 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
701 }
702
703 int totalLength = 0;
704 for (int i = 0; i < rangeCount; i++)
705 totalLength += substringRanges[i].length;
706 for (int i = 0; i < separatorCount; i++)
707 totalLength += separators[i].size();
708
709 if (totalLength == 0)
710 return "";
711
712 UChar* buffer = allocChars(totalLength);
713 if (!buffer)
714 return null();
715
716 int maxCount = max(rangeCount, separatorCount);
717 int bufferPos = 0;
718 for (int i = 0; i < maxCount; i++) {
719 if (i < rangeCount) {
720 memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
721 bufferPos += substringRanges[i].length;
722 }
723 if (i < separatorCount) {
724 memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
725 bufferPos += separators[i].size();
726 }
727 }
728
729 return UString::Rep::create(buffer, totalLength);
730}
731
732UString& UString::append(const UString &t)
733{
734 int thisSize = size();
735 int thisOffset = m_rep->offset;
736 int tSize = t.size();
737 int length = thisSize + tSize;
738
739 // possible cases:
740 if (thisSize == 0) {
741 // this is empty
742 *this = t;
743 } else if (tSize == 0) {
744 // t is empty
745 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
746 // this is direct and has refcount of 1 (so we can just alter it directly)
747 expandCapacity(thisOffset + length);
748 if (data()) {
749 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
750 m_rep->len = length;
751 m_rep->_hash = 0;
752 }
753 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
754 // this reaches the end of the buffer - extend it if it's long enough to append to
755 expandCapacity(thisOffset + length);
756 if (data()) {
757 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
758 m_rep = Rep::create(m_rep, 0, length);
759 }
760 } else {
761 // this is shared with someone using more capacity, gotta make a whole new string
762 size_t newCapacity = expandedSize(length, 0);
763 UChar* d = allocChars(newCapacity);
764 if (!d)
765 m_rep = &Rep::null;
766 else {
767 memcpy(d, data(), thisSize * sizeof(UChar));
768 memcpy(const_cast<UChar*>(d + thisSize), t.data(), tSize * sizeof(UChar));
769 m_rep = Rep::create(d, length);
770 m_rep->capacity = newCapacity;
771 }
772 }
773
774 return *this;
775}
776
777UString& UString::append(const char *t)
778{
779 int thisSize = size();
780 int thisOffset = m_rep->offset;
781 int tSize = static_cast<int>(strlen(t));
782 int length = thisSize + tSize;
783
784 // possible cases:
785 if (thisSize == 0) {
786 // this is empty
787 *this = t;
788 } else if (tSize == 0) {
789 // t is empty, we'll just return *this below.
790 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
791 // this is direct and has refcount of 1 (so we can just alter it directly)
792 expandCapacity(thisOffset + length);
793 UChar *d = const_cast<UChar *>(data());
794 if (d) {
795 for (int i = 0; i < tSize; ++i)
796 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
797 m_rep->len = length;
798 m_rep->_hash = 0;
799 }
800 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
801 // this string reaches the end of the buffer - extend it
802 expandCapacity(thisOffset + length);
803 UChar *d = const_cast<UChar *>(data());
804 if (d) {
805 for (int i = 0; i < tSize; ++i)
806 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
807 m_rep = Rep::create(m_rep, 0, length);
808 }
809 } else {
810 // this is shared with someone using more capacity, gotta make a whole new string
811 size_t newCapacity = expandedSize(length, 0);
812 UChar* d = allocChars(newCapacity);
813 if (!d)
814 m_rep = &Rep::null;
815 else {
816 memcpy(d, data(), thisSize * sizeof(UChar));
817 for (int i = 0; i < tSize; ++i)
818 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
819 m_rep = Rep::create(d, length);
820 m_rep->capacity = newCapacity;
821 }
822 }
823
824 return *this;
825}
826
827UString& UString::append(UChar c)
828{
829 int thisOffset = m_rep->offset;
830 int length = size();
831
832 // possible cases:
833 if (length == 0) {
834 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
835 size_t newCapacity = expandedSize(1, 0);
836 UChar* d = allocChars(newCapacity);
837 if (!d)
838 m_rep = &Rep::null;
839 else {
840 d[0] = c;
841 m_rep = Rep::create(d, 1);
842 m_rep->capacity = newCapacity;
843 }
844 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
845 // this is direct and has refcount of 1 (so we can just alter it directly)
846 expandCapacity(thisOffset + length + 1);
847 UChar *d = const_cast<UChar *>(data());
848 if (d) {
849 d[length] = c;
850 m_rep->len = length + 1;
851 m_rep->_hash = 0;
852 }
853 } else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
854 // this reaches the end of the string - extend it and share
855 expandCapacity(thisOffset + length + 1);
856 UChar *d = const_cast<UChar *>(data());
857 if (d) {
858 d[length] = c;
859 m_rep = Rep::create(m_rep, 0, length + 1);
860 }
861 } else {
862 // this is shared with someone using more capacity, gotta make a whole new string
863 size_t newCapacity = expandedSize(length + 1, 0);
864 UChar* d = allocChars(newCapacity);
865 if (!d)
866 m_rep = &Rep::null;
867 else {
868 memcpy(d, data(), length * sizeof(UChar));
869 d[length] = c;
870 m_rep = Rep::create(d, length + 1);
871 m_rep->capacity = newCapacity;
872 }
873 }
874
875 return *this;
876}
877
878bool UString::getCString(CStringBuffer& buffer) const
879{
880 int length = size();
881 int neededSize = length + 1;
882 buffer.resize(neededSize);
883 char* buf = buffer.data();
884
885 UChar ored = 0;
886 const UChar* p = data();
887 char* q = buf;
888 const UChar* limit = p + length;
889 while (p != limit) {
890 UChar c = p[0];
891 ored |= c;
892 *q = static_cast<char>(c);
893 ++p;
894 ++q;
895 }
896 *q = '\0';
897
898 return !(ored & 0xFF00);
899}
900
901char *UString::ascii() const
902{
903 int length = size();
904 int neededSize = length + 1;
905 delete[] statBuffer;
906 statBuffer = new char[neededSize];
907
908 const UChar *p = data();
909 char *q = statBuffer;
910 const UChar *limit = p + length;
911 while (p != limit) {
912 *q = static_cast<char>(p[0]);
913 ++p;
914 ++q;
915 }
916 *q = '\0';
917
918 return statBuffer;
919}
920
921UString& UString::operator=(const char *c)
922{
923 if (!c) {
924 m_rep = &Rep::null;
925 return *this;
926 }
927
928 if (!c[0]) {
929 m_rep = &Rep::empty;
930 return *this;
931 }
932
933 int l = static_cast<int>(strlen(c));
934 UChar *d;
935 if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
936 d = m_rep->buf;
937 m_rep->_hash = 0;
938 m_rep->len = l;
939 } else {
940 d = allocChars(l);
941 if (!d) {
942 m_rep = &Rep::null;
943 return *this;
944 }
945 m_rep = Rep::create(d, l);
946 }
947 for (int i = 0; i < l; i++)
948 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
949
950 return *this;
951}
952
953bool UString::is8Bit() const
954{
955 const UChar *u = data();
956 const UChar *limit = u + size();
957 while (u < limit) {
958 if (u[0] > 0xFF)
959 return false;
960 ++u;
961 }
962
963 return true;
964}
965
966UChar UString::operator[](int pos) const
967{
968 if (pos >= size())
969 return '\0';
970 return data()[pos];
971}
972
973double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
974{
975 double d;
976
977 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
978 // after the number, so this is too strict a check.
979 CStringBuffer s;
980 if (!getCString(s))
981 return NaN;
982 const char* c = s.data();
983
984 // skip leading white space
985 while (isASCIISpace(*c))
986 c++;
987
988 // empty string ?
989 if (*c == '\0')
990 return tolerateEmptyString ? 0.0 : NaN;
991
992 // hex number ?
993 if (*c == '0' && (*(c+1) == 'x' || *(c+1) == 'X')) {
994 const char* firstDigitPosition = c + 2;
995 c++;
996 d = 0.0;
997 while (*(++c)) {
998 if (*c >= '0' && *c <= '9')
999 d = d * 16.0 + *c - '0';
1000 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
1001 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1002 else
1003 break;
1004 }
1005
1006 if (d >= mantissaOverflowLowerBound)
1007 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1008 } else {
1009 // regular number ?
1010 char *end;
1011 d = strtod(c, &end);
1012 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1013 c = end;
1014 } else {
1015 double sign = 1.0;
1016
1017 if (*c == '+')
1018 c++;
1019 else if (*c == '-') {
1020 sign = -1.0;
1021 c++;
1022 }
1023
1024 // We used strtod() to do the conversion. However, strtod() handles
1025 // infinite values slightly differently than JavaScript in that it
1026 // converts the string "inf" with any capitalization to infinity,
1027 // whereas the ECMA spec requires that it be converted to NaN.
1028
1029 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1030 d = sign * Inf;
1031 c += 8;
1032 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1033 c = end;
1034 else
1035 return NaN;
1036 }
1037 }
1038
1039 // allow trailing white space
1040 while (isASCIISpace(*c))
1041 c++;
1042 // don't allow anything after - unless tolerant=true
1043 if (!tolerateTrailingJunk && *c != '\0')
1044 d = NaN;
1045
1046 return d;
1047}
1048
1049double UString::toDouble(bool tolerateTrailingJunk) const
1050{
1051 return toDouble(tolerateTrailingJunk, true);
1052}
1053
1054double UString::toDouble() const
1055{
1056 return toDouble(false, true);
1057}
1058
1059uint32_t UString::toUInt32(bool *ok) const
1060{
1061 double d = toDouble();
1062 bool b = true;
1063
1064 if (d != static_cast<uint32_t>(d)) {
1065 b = false;
1066 d = 0;
1067 }
1068
1069 if (ok)
1070 *ok = b;
1071
1072 return static_cast<uint32_t>(d);
1073}
1074
1075uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
1076{
1077 double d = toDouble(false, tolerateEmptyString);
1078 bool b = true;
1079
1080 if (d != static_cast<uint32_t>(d)) {
1081 b = false;
1082 d = 0;
1083 }
1084
1085 if (ok)
1086 *ok = b;
1087
1088 return static_cast<uint32_t>(d);
1089}
1090
1091uint32_t UString::toStrictUInt32(bool *ok) const
1092{
1093 if (ok)
1094 *ok = false;
1095
1096 // Empty string is not OK.
1097 int len = m_rep->len;
1098 if (len == 0)
1099 return 0;
1100 const UChar *p = m_rep->data();
1101 unsigned short c = p[0];
1102
1103 // If the first digit is 0, only 0 itself is OK.
1104 if (c == '0') {
1105 if (len == 1 && ok)
1106 *ok = true;
1107 return 0;
1108 }
1109
1110 // Convert to UInt32, checking for overflow.
1111 uint32_t i = 0;
1112 while (1) {
1113 // Process character, turning it into a digit.
1114 if (c < '0' || c > '9')
1115 return 0;
1116 const unsigned d = c - '0';
1117
1118 // Multiply by 10, checking for overflow out of 32 bits.
1119 if (i > 0xFFFFFFFFU / 10)
1120 return 0;
1121 i *= 10;
1122
1123 // Add in the digit, checking for overflow out of 32 bits.
1124 const unsigned max = 0xFFFFFFFFU - d;
1125 if (i > max)
1126 return 0;
1127 i += d;
1128
1129 // Handle end of string.
1130 if (--len == 0) {
1131 if (ok)
1132 *ok = true;
1133 return i;
1134 }
1135
1136 // Get next character.
1137 c = *(++p);
1138 }
1139}
1140
1141int UString::find(const UString &f, int pos) const
1142{
1143 int sz = size();
1144 int fsz = f.size();
1145 if (sz < fsz)
1146 return -1;
1147 if (pos < 0)
1148 pos = 0;
1149 if (fsz == 0)
1150 return pos;
1151 const UChar *end = data() + sz - fsz;
1152 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1153 const UChar *fdata = f.data();
1154 unsigned short fchar = fdata[0];
1155 ++fdata;
1156 for (const UChar *c = data() + pos; c <= end; c++)
1157 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1158 return static_cast<int>(c - data());
1159
1160 return -1;
1161}
1162
1163int UString::find(UChar ch, int pos) const
1164{
1165 if (pos < 0)
1166 pos = 0;
1167 const UChar *end = data() + size();
1168 for (const UChar *c = data() + pos; c < end; c++)
1169 if (*c == ch)
1170 return static_cast<int>(c - data());
1171
1172 return -1;
1173}
1174
1175int UString::rfind(const UString &f, int pos) const
1176{
1177 int sz = size();
1178 int fsz = f.size();
1179 if (sz < fsz)
1180 return -1;
1181 if (pos < 0)
1182 pos = 0;
1183 if (pos > sz - fsz)
1184 pos = sz - fsz;
1185 if (fsz == 0)
1186 return pos;
1187 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1188 const UChar *fdata = f.data();
1189 for (const UChar *c = data() + pos; c >= data(); c--) {
1190 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1191 return static_cast<int>(c - data());
1192 }
1193
1194 return -1;
1195}
1196
1197int UString::rfind(UChar ch, int pos) const
1198{
1199 if (isEmpty())
1200 return -1;
1201 if (pos + 1 >= size())
1202 pos = size() - 1;
1203 for (const UChar *c = data() + pos; c >= data(); c--) {
1204 if (*c == ch)
1205 return static_cast<int>(c-data());
1206 }
1207
1208 return -1;
1209}
1210
1211UString UString::substr(int pos, int len) const
1212{
1213 int s = size();
1214
1215 if (pos < 0)
1216 pos = 0;
1217 else if (pos >= s)
1218 pos = s;
1219 if (len < 0)
1220 len = s;
1221 if (pos + len >= s)
1222 len = s - pos;
1223
1224 if (pos == 0 && len == s)
1225 return *this;
1226
1227 return UString(Rep::create(m_rep, pos, len));
1228}
1229
1230bool operator==(const UString& s1, const UString& s2)
1231{
1232 if (s1.m_rep->len != s2.m_rep->len)
1233 return false;
1234
1235 return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1236 s1.m_rep->len * sizeof(UChar)) == 0);
1237}
1238
1239bool operator==(const UString& s1, const char *s2)
1240{
1241 if (s2 == 0) {
1242 return s1.isEmpty();
1243 }
1244
1245 const UChar *u = s1.data();
1246 const UChar *uend = u + s1.size();
1247 while (u != uend && *s2) {
1248 if (u[0] != (unsigned char)*s2)
1249 return false;
1250 s2++;
1251 u++;
1252 }
1253
1254 return u == uend && *s2 == 0;
1255}
1256
1257bool operator<(const UString& s1, const UString& s2)
1258{
1259 const int l1 = s1.size();
1260 const int l2 = s2.size();
1261 const int lmin = l1 < l2 ? l1 : l2;
1262 const UChar *c1 = s1.data();
1263 const UChar *c2 = s2.data();
1264 int l = 0;
1265 while (l < lmin && *c1 == *c2) {
1266 c1++;
1267 c2++;
1268 l++;
1269 }
1270 if (l < lmin)
1271 return (c1[0] < c2[0]);
1272
1273 return (l1 < l2);
1274}
1275
1276bool operator>(const UString& s1, const UString& s2)
1277{
1278 const int l1 = s1.size();
1279 const int l2 = s2.size();
1280 const int lmin = l1 < l2 ? l1 : l2;
1281 const UChar *c1 = s1.data();
1282 const UChar *c2 = s2.data();
1283 int l = 0;
1284 while (l < lmin && *c1 == *c2) {
1285 c1++;
1286 c2++;
1287 l++;
1288 }
1289 if (l < lmin)
1290 return (c1[0] > c2[0]);
1291
1292 return (l1 > l2);
1293}
1294
1295int compare(const UString& s1, const UString& s2)
1296{
1297 const int l1 = s1.size();
1298 const int l2 = s2.size();
1299 const int lmin = l1 < l2 ? l1 : l2;
1300 const UChar *c1 = s1.data();
1301 const UChar *c2 = s2.data();
1302 int l = 0;
1303 while (l < lmin && *c1 == *c2) {
1304 c1++;
1305 c2++;
1306 l++;
1307 }
1308
1309 if (l < lmin)
1310 return (c1[0] > c2[0]) ? 1 : -1;
1311
1312 if (l1 == l2)
1313 return 0;
1314
1315 return (l1 > l2) ? 1 : -1;
1316}
1317
1318bool equal(const UString::Rep* r, const UString::Rep* b)
1319{
1320 int length = r->len;
1321 if (length != b->len)
1322 return false;
1323 const UChar* d = r->data();
1324 const UChar* s = b->data();
1325 for (int i = 0; i != length; ++i)
1326 if (d[i] != s[i])
1327 return false;
1328 return true;
1329}
1330
1331CString UString::UTF8String(bool strict) const
1332{
1333 // Allocate a buffer big enough to hold all the characters.
1334 const int length = size();
1335 Vector<char, 1024> buffer(length * 3);
1336
1337 // Convert to runs of 8-bit characters.
1338 char* p = buffer.data();
1339 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1340 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1341 if (result != conversionOK)
1342 return CString();
1343
1344 return CString(buffer.data(), p - buffer.data());
1345}
1346
1347} // namespace KJS
Note: See TracBrowser for help on using the repository browser.