source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 27748

Last change on this file since 27748 was 27748, checked in by [email protected], 18 years ago

Reviewed by Sam Weinig.


Fixed http://bugs.webkit.org/show_bug.cgi?id=15958
base64 spends 1.1% of total time checking for special Infinity case


Use a fast character test instead of calling strncmp.


1.1% speedup on string-base64. SunSpider reports a .4% speedup overall;
Sharks reports only .1%. Who are you going to believe? Huh?

  • kjs/ustring.cpp: (KJS::UString::toDouble):
  • Property svn:eol-style set to native
File size: 30.7 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "ustring.h"
26
27#include "JSLock.h"
28#include "collector.h"
29#include "dtoa.h"
30#include "function.h"
31#include "identifier.h"
32#include "operations.h"
33#include <ctype.h>
34#include <float.h>
35#include <limits.h>
36#include <math.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <wtf/Assertions.h>
40#include <wtf/ASCIICType.h>
41#include <wtf/MathExtras.h>
42#include <wtf/Vector.h>
43
44#if HAVE(STRING_H)
45#include <string.h>
46#endif
47#if HAVE(STRINGS_H)
48#include <strings.h>
49#endif
50
51using namespace WTF;
52using namespace std;
53
54namespace KJS {
55
56extern const double NaN;
57extern const double Inf;
58
59static inline const size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
60static inline const size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
61
62static inline UChar* allocChars(size_t length)
63{
64 ASSERT(length);
65 if (length > maxUChars())
66 return 0;
67 return static_cast<UChar*>(fastMalloc(sizeof(UChar) * length));
68}
69
70static inline UChar* reallocChars(UChar* buffer, size_t length)
71{
72 ASSERT(length);
73 if (length > maxUChars())
74 return 0;
75 return static_cast<UChar*>(fastRealloc(buffer, sizeof(UChar) * length));
76}
77
78// we'd rather not do shared substring append for small strings, since
79// this runs too much risk of a tiny initial string holding down a
80// huge buffer. This is also tuned to match the extra cost size, so we
81// don't ever share a buffer that wouldn't be over the extra cost
82// threshold already.
83// FIXME: this should be size_t but that would cause warnings until we
84// fix UString sizes to be size_t instad of int
85static const int minShareSize = Collector::minExtraCostSize / sizeof(UChar);
86
87COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
88
89CString::CString(const char *c)
90{
91 length = strlen(c);
92 data = new char[length+1];
93 memcpy(data, c, length + 1);
94}
95
96CString::CString(const char *c, size_t len)
97{
98 length = len;
99 data = new char[len+1];
100 memcpy(data, c, len);
101 data[len] = 0;
102}
103
104CString::CString(const CString &b)
105{
106 length = b.length;
107 if (b.data) {
108 data = new char[length+1];
109 memcpy(data, b.data, length + 1);
110 }
111 else
112 data = 0;
113}
114
115CString::~CString()
116{
117 delete [] data;
118}
119
120CString &CString::append(const CString &t)
121{
122 char *n;
123 n = new char[length+t.length+1];
124 if (length)
125 memcpy(n, data, length);
126 if (t.length)
127 memcpy(n+length, t.data, t.length);
128 length += t.length;
129 n[length] = 0;
130
131 delete [] data;
132 data = n;
133
134 return *this;
135}
136
137CString &CString::operator=(const char *c)
138{
139 if (data)
140 delete [] data;
141 length = strlen(c);
142 data = new char[length+1];
143 memcpy(data, c, length + 1);
144
145 return *this;
146}
147
148CString &CString::operator=(const CString &str)
149{
150 if (this == &str)
151 return *this;
152
153 if (data)
154 delete [] data;
155 length = str.length;
156 if (str.data) {
157 data = new char[length + 1];
158 memcpy(data, str.data, length + 1);
159 }
160 else
161 data = 0;
162
163 return *this;
164}
165
166bool operator==(const CString& c1, const CString& c2)
167{
168 size_t len = c1.size();
169 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
170}
171
172// Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
173static unsigned short almostUChar;
174UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, &UString::Rep::null, 0, 0, 0, 0, 0 };
175UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, &UString::Rep::empty, reinterpret_cast<UChar*>(&almostUChar), 0, 0, 0, 0 };
176const int normalStatBufferSize = 4096;
177static char *statBuffer = 0;
178static int statBufferSize = 0;
179
180PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
181{
182 ASSERT(JSLock::lockCount() > 0);
183
184 int sizeInBytes = l * sizeof(UChar);
185 UChar *copyD = static_cast<UChar *>(fastMalloc(sizeInBytes));
186 memcpy(copyD, d, sizeInBytes);
187
188 return create(copyD, l);
189}
190
191PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
192{
193 ASSERT(JSLock::lockCount() > 0);
194
195 Rep* r = new Rep;
196 r->offset = 0;
197 r->len = l;
198 r->rc = 1;
199 r->_hash = 0;
200 r->isIdentifier = 0;
201 r->baseString = r;
202 r->buf = d;
203 r->usedCapacity = l;
204 r->capacity = l;
205 r->usedPreCapacity = 0;
206 r->preCapacity = 0;
207
208 // steal the single reference this Rep was created with
209 return adoptRef(r);
210}
211
212PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
213{
214 ASSERT(JSLock::lockCount() > 0);
215 ASSERT(base);
216
217 int baseOffset = base->offset;
218
219 base = base->baseString;
220
221 ASSERT(-(offset + baseOffset) <= base->usedPreCapacity);
222 ASSERT(offset + baseOffset + length <= base->usedCapacity);
223
224 Rep *r = new Rep;
225 r->offset = baseOffset + offset;
226 r->len = length;
227 r->rc = 1;
228 r->_hash = 0;
229 r->isIdentifier = 0;
230 r->baseString = base.releaseRef();
231 r->buf = 0;
232 r->usedCapacity = 0;
233 r->capacity = 0;
234 r->usedPreCapacity = 0;
235 r->preCapacity = 0;
236
237 // steal the single reference this Rep was created with
238 return adoptRef(r);
239}
240
241void UString::Rep::destroy()
242{
243 ASSERT(JSLock::lockCount() > 0);
244
245 if (isIdentifier)
246 Identifier::remove(this);
247 if (baseString != this) {
248 baseString->deref();
249 } else {
250 fastFree(buf);
251 }
252 delete this;
253}
254
255// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
256// or anything like that.
257const unsigned PHI = 0x9e3779b9U;
258
259// Paul Hsieh's SuperFastHash
260// http://www.azillionmonkeys.com/qed/hash.html
261unsigned UString::Rep::computeHash(const UChar *s, int len)
262{
263 unsigned l = len;
264 uint32_t hash = PHI;
265 uint32_t tmp;
266
267 int rem = l & 1;
268 l >>= 1;
269
270 // Main loop
271 for (; l > 0; l--) {
272 hash += s[0].uc;
273 tmp = (s[1].uc << 11) ^ hash;
274 hash = (hash << 16) ^ tmp;
275 s += 2;
276 hash += hash >> 11;
277 }
278
279 // Handle end case
280 if (rem) {
281 hash += s[0].uc;
282 hash ^= hash << 11;
283 hash += hash >> 17;
284 }
285
286 // Force "avalanching" of final 127 bits
287 hash ^= hash << 3;
288 hash += hash >> 5;
289 hash ^= hash << 2;
290 hash += hash >> 15;
291 hash ^= hash << 10;
292
293 // this avoids ever returning a hash code of 0, since that is used to
294 // signal "hash not computed yet", using a value that is likely to be
295 // effectively the same as 0 when the low bits are masked
296 if (hash == 0)
297 hash = 0x80000000;
298
299 return hash;
300}
301
302// Paul Hsieh's SuperFastHash
303// http://www.azillionmonkeys.com/qed/hash.html
304unsigned UString::Rep::computeHash(const char *s)
305{
306 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
307 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
308 // were 16-bit chunks, which should give matching results
309
310 uint32_t hash = PHI;
311 uint32_t tmp;
312 size_t l = strlen(s);
313
314 size_t rem = l & 1;
315 l >>= 1;
316
317 // Main loop
318 for (; l > 0; l--) {
319 hash += (unsigned char)s[0];
320 tmp = ((unsigned char)s[1] << 11) ^ hash;
321 hash = (hash << 16) ^ tmp;
322 s += 2;
323 hash += hash >> 11;
324 }
325
326 // Handle end case
327 if (rem) {
328 hash += (unsigned char)s[0];
329 hash ^= hash << 11;
330 hash += hash >> 17;
331 }
332
333 // Force "avalanching" of final 127 bits
334 hash ^= hash << 3;
335 hash += hash >> 5;
336 hash ^= hash << 2;
337 hash += hash >> 15;
338 hash ^= hash << 10;
339
340 // this avoids ever returning a hash code of 0, since that is used to
341 // signal "hash not computed yet", using a value that is likely to be
342 // effectively the same as 0 when the low bits are masked
343 if (hash == 0)
344 hash = 0x80000000;
345
346 return hash;
347}
348
349// put these early so they can be inlined
350inline size_t UString::expandedSize(size_t size, size_t otherSize) const
351{
352 // Do the size calculation in two parts, returning overflowIndicator if
353 // we overflow the maximum value that we can handle.
354
355 if (size > maxUChars())
356 return overflowIndicator();
357
358 size_t expandedSize = ((size + 10) / 10 * 11) + 1;
359 if (maxUChars() - expandedSize < otherSize)
360 return overflowIndicator();
361
362 return expandedSize + otherSize;
363}
364
365inline int UString::usedCapacity() const
366{
367 return m_rep->baseString->usedCapacity;
368}
369
370inline int UString::usedPreCapacity() const
371{
372 return m_rep->baseString->usedPreCapacity;
373}
374
375void UString::expandCapacity(int requiredLength)
376{
377 Rep* r = m_rep->baseString;
378
379 if (requiredLength > r->capacity) {
380 size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
381 UChar* oldBuf = r->buf;
382 r->buf = reallocChars(r->buf, newCapacity);
383 if (!r->buf) {
384 r->buf = oldBuf;
385 m_rep = &Rep::null;
386 return;
387 }
388 r->capacity = newCapacity - r->preCapacity;
389 }
390 if (requiredLength > r->usedCapacity) {
391 r->usedCapacity = requiredLength;
392 }
393}
394
395void UString::expandPreCapacity(int requiredPreCap)
396{
397 Rep* r = m_rep->baseString;
398
399 if (requiredPreCap > r->preCapacity) {
400 size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
401 int delta = newCapacity - r->capacity - r->preCapacity;
402
403 UChar* newBuf = allocChars(newCapacity);
404 if (!newBuf) {
405 m_rep = &Rep::null;
406 return;
407 }
408 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
409 fastFree(r->buf);
410 r->buf = newBuf;
411
412 r->preCapacity = newCapacity - r->capacity;
413 }
414 if (requiredPreCap > r->usedPreCapacity) {
415 r->usedPreCapacity = requiredPreCap;
416 }
417}
418
419UString::UString(const char *c)
420{
421 if (!c) {
422 m_rep = &Rep::null;
423 return;
424 }
425
426 if (!c[0]) {
427 m_rep = &Rep::empty;
428 return;
429 }
430
431 size_t length = strlen(c);
432 UChar *d = allocChars(length);
433 if (!d)
434 m_rep = &Rep::null;
435 else {
436 for (size_t i = 0; i < length; i++)
437 d[i].uc = c[i];
438 m_rep = Rep::create(d, static_cast<int>(length));
439 }
440}
441
442UString::UString(const UChar *c, int length)
443{
444 if (length == 0)
445 m_rep = &Rep::empty;
446 else
447 m_rep = Rep::createCopying(c, length);
448}
449
450UString::UString(UChar *c, int length, bool copy)
451{
452 if (length == 0)
453 m_rep = &Rep::empty;
454 else if (copy)
455 m_rep = Rep::createCopying(c, length);
456 else
457 m_rep = Rep::create(c, length);
458}
459
460UString::UString(const UString &a, const UString &b)
461{
462 int aSize = a.size();
463 int aOffset = a.m_rep->offset;
464 int bSize = b.size();
465 int bOffset = b.m_rep->offset;
466 int length = aSize + bSize;
467
468 // possible cases:
469
470 if (aSize == 0) {
471 // a is empty
472 m_rep = b.m_rep;
473 } else if (bSize == 0) {
474 // b is empty
475 m_rep = a.m_rep;
476 } else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
477 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
478 // - a reaches the end of its buffer so it qualifies for shared append
479 // - also, it's at least a quarter the length of b - appending to a much shorter
480 // string does more harm than good
481 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
482 UString x(a);
483 x.expandCapacity(aOffset + length);
484 if (a.data() && x.data()) {
485 memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar));
486 m_rep = Rep::create(a.m_rep, 0, length);
487 } else
488 m_rep = &Rep::null;
489 } else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
490 // - b reaches the beginning of its buffer so it qualifies for shared prepend
491 // - also, it's at least a quarter the length of a - prepending to a much shorter
492 // string does more harm than good
493 UString y(b);
494 y.expandPreCapacity(-bOffset + aSize);
495 if (b.data() && y.data()) {
496 memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar));
497 m_rep = Rep::create(b.m_rep, -aSize, length);
498 } else
499 m_rep = &Rep::null;
500 } else {
501 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
502 size_t newCapacity = expandedSize(length, 0);
503 UChar* d = allocChars(newCapacity);
504 if (!d)
505 m_rep = &Rep::null;
506 else {
507 memcpy(d, a.data(), aSize * sizeof(UChar));
508 memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
509 m_rep = Rep::create(d, length);
510 m_rep->capacity = newCapacity;
511 }
512 }
513}
514
515const UString& UString::null()
516{
517 static UString* n = new UString;
518 return *n;
519}
520
521UString UString::from(int i)
522{
523 UChar buf[1 + sizeof(i) * 3];
524 UChar *end = buf + sizeof(buf) / sizeof(UChar);
525 UChar *p = end;
526
527 if (i == 0) {
528 *--p = '0';
529 } else if (i == INT_MIN) {
530 char minBuf[1 + sizeof(i) * 3];
531 sprintf(minBuf, "%d", INT_MIN);
532 return UString(minBuf);
533 } else {
534 bool negative = false;
535 if (i < 0) {
536 negative = true;
537 i = -i;
538 }
539 while (i) {
540 *--p = (unsigned short)((i % 10) + '0');
541 i /= 10;
542 }
543 if (negative) {
544 *--p = '-';
545 }
546 }
547
548 return UString(p, static_cast<int>(end - p));
549}
550
551UString UString::from(unsigned int u)
552{
553 UChar buf[sizeof(u) * 3];
554 UChar *end = buf + sizeof(buf) / sizeof(UChar);
555 UChar *p = end;
556
557 if (u == 0) {
558 *--p = '0';
559 } else {
560 while (u) {
561 *--p = (unsigned short)((u % 10) + '0');
562 u /= 10;
563 }
564 }
565
566 return UString(p, static_cast<int>(end - p));
567}
568
569UString UString::from(long l)
570{
571 UChar buf[1 + sizeof(l) * 3];
572 UChar *end = buf + sizeof(buf) / sizeof(UChar);
573 UChar *p = end;
574
575 if (l == 0) {
576 *--p = '0';
577 } else if (l == LONG_MIN) {
578 char minBuf[1 + sizeof(l) * 3];
579 sprintf(minBuf, "%ld", LONG_MIN);
580 return UString(minBuf);
581 } else {
582 bool negative = false;
583 if (l < 0) {
584 negative = true;
585 l = -l;
586 }
587 while (l) {
588 *--p = (unsigned short)((l % 10) + '0');
589 l /= 10;
590 }
591 if (negative) {
592 *--p = '-';
593 }
594 }
595
596 return UString(p, static_cast<int>(end - p));
597}
598
599UString UString::from(double d)
600{
601 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
602 if (isnan(d))
603 return "NaN";
604
605 char buf[80];
606 int decimalPoint;
607 int sign;
608
609 char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
610 int length = static_cast<int>(strlen(result));
611
612 int i = 0;
613 if (sign) {
614 buf[i++] = '-';
615 }
616
617 if (decimalPoint <= 0 && decimalPoint > -6) {
618 buf[i++] = '0';
619 buf[i++] = '.';
620 for (int j = decimalPoint; j < 0; j++) {
621 buf[i++] = '0';
622 }
623 strcpy(buf + i, result);
624 } else if (decimalPoint <= 21 && decimalPoint > 0) {
625 if (length <= decimalPoint) {
626 strcpy(buf + i, result);
627 i += length;
628 for (int j = 0; j < decimalPoint - length; j++) {
629 buf[i++] = '0';
630 }
631 buf[i] = '\0';
632 } else {
633 strncpy(buf + i, result, decimalPoint);
634 i += decimalPoint;
635 buf[i++] = '.';
636 strcpy(buf + i, result + decimalPoint);
637 }
638 } else if (result[0] < '0' || result[0] > '9') {
639 strcpy(buf + i, result);
640 } else {
641 buf[i++] = result[0];
642 if (length > 1) {
643 buf[i++] = '.';
644 strcpy(buf + i, result + 1);
645 i += length - 1;
646 }
647
648 buf[i++] = 'e';
649 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
650 // decimalPoint can't be more than 3 digits decimal given the
651 // nature of float representation
652 int exponential = decimalPoint - 1;
653 if (exponential < 0)
654 exponential = -exponential;
655 if (exponential >= 100)
656 buf[i++] = static_cast<char>('0' + exponential / 100);
657 if (exponential >= 10)
658 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
659 buf[i++] = static_cast<char>('0' + exponential % 10);
660 buf[i++] = '\0';
661 }
662
663 kjs_freedtoa(result);
664
665 return UString(buf);
666}
667
668UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
669{
670 if (rangeCount == 1 && separatorCount == 0) {
671 int thisSize = size();
672 int position = substringRanges[0].position;
673 int length = substringRanges[0].length;
674 if (position <= 0 && length >= thisSize)
675 return *this;
676 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
677 }
678
679 int totalLength = 0;
680 for (int i = 0; i < rangeCount; i++)
681 totalLength += substringRanges[i].length;
682 for (int i = 0; i < separatorCount; i++)
683 totalLength += separators[i].size();
684
685 if (totalLength == 0)
686 return "";
687
688 UChar* buffer = allocChars(totalLength);
689 if (!buffer)
690 return null();
691
692 int maxCount = max(rangeCount, separatorCount);
693 int bufferPos = 0;
694 for (int i = 0; i < maxCount; i++) {
695 if (i < rangeCount) {
696 memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
697 bufferPos += substringRanges[i].length;
698 }
699 if (i < separatorCount) {
700 memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
701 bufferPos += separators[i].size();
702 }
703 }
704
705 return UString::Rep::create(buffer, totalLength);
706}
707
708UString &UString::append(const UString &t)
709{
710 int thisSize = size();
711 int thisOffset = m_rep->offset;
712 int tSize = t.size();
713 int length = thisSize + tSize;
714
715 // possible cases:
716 if (thisSize == 0) {
717 // this is empty
718 *this = t;
719 } else if (tSize == 0) {
720 // t is empty
721 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
722 // this is direct and has refcount of 1 (so we can just alter it directly)
723 expandCapacity(thisOffset + length);
724 if (data()) {
725 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
726 m_rep->len = length;
727 m_rep->_hash = 0;
728 }
729 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
730 // this reaches the end of the buffer - extend it if it's long enough to append to
731 expandCapacity(thisOffset + length);
732 if (data()) {
733 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
734 m_rep = Rep::create(m_rep, 0, length);
735 }
736 } else {
737 // this is shared with someone using more capacity, gotta make a whole new string
738 size_t newCapacity = expandedSize(length, 0);
739 UChar* d = allocChars(newCapacity);
740 if (!d)
741 m_rep = &Rep::null;
742 else {
743 memcpy(d, data(), thisSize * sizeof(UChar));
744 memcpy(const_cast<UChar*>(d + thisSize), t.data(), tSize * sizeof(UChar));
745 m_rep = Rep::create(d, length);
746 m_rep->capacity = newCapacity;
747 }
748 }
749
750 return *this;
751}
752
753UString &UString::append(const char *t)
754{
755 int thisSize = size();
756 int thisOffset = m_rep->offset;
757 int tSize = static_cast<int>(strlen(t));
758 int length = thisSize + tSize;
759
760 // possible cases:
761 if (thisSize == 0) {
762 // this is empty
763 *this = t;
764 } else if (tSize == 0) {
765 // t is empty, we'll just return *this below.
766 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
767 // this is direct and has refcount of 1 (so we can just alter it directly)
768 expandCapacity(thisOffset + length);
769 UChar *d = const_cast<UChar *>(data());
770 if (d) {
771 for (int i = 0; i < tSize; ++i)
772 d[thisSize + i] = t[i];
773 m_rep->len = length;
774 m_rep->_hash = 0;
775 }
776 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
777 // this string reaches the end of the buffer - extend it
778 expandCapacity(thisOffset + length);
779 UChar *d = const_cast<UChar *>(data());
780 if (d) {
781 for (int i = 0; i < tSize; ++i)
782 d[thisSize + i] = t[i];
783 m_rep = Rep::create(m_rep, 0, length);
784 }
785 } else {
786 // this is shared with someone using more capacity, gotta make a whole new string
787 size_t newCapacity = expandedSize(length, 0);
788 UChar* d = allocChars(newCapacity);
789 if (!d)
790 m_rep = &Rep::null;
791 else {
792 memcpy(d, data(), thisSize * sizeof(UChar));
793 for (int i = 0; i < tSize; ++i)
794 d[thisSize + i] = t[i];
795 m_rep = Rep::create(d, length);
796 m_rep->capacity = newCapacity;
797 }
798 }
799
800 return *this;
801}
802
803UString &UString::append(unsigned short c)
804{
805 int thisOffset = m_rep->offset;
806 int length = size();
807
808 // possible cases:
809 if (length == 0) {
810 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
811 size_t newCapacity = expandedSize(1, 0);
812 UChar* d = allocChars(newCapacity);
813 if (!d)
814 m_rep = &Rep::null;
815 else {
816 d[0] = c;
817 m_rep = Rep::create(d, 1);
818 m_rep->capacity = newCapacity;
819 }
820 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
821 // this is direct and has refcount of 1 (so we can just alter it directly)
822 expandCapacity(thisOffset + length + 1);
823 UChar *d = const_cast<UChar *>(data());
824 if (d) {
825 d[length] = c;
826 m_rep->len = length + 1;
827 m_rep->_hash = 0;
828 }
829 } else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
830 // this reaches the end of the string - extend it and share
831 expandCapacity(thisOffset + length + 1);
832 UChar *d = const_cast<UChar *>(data());
833 if (d) {
834 d[length] = c;
835 m_rep = Rep::create(m_rep, 0, length + 1);
836 }
837 } else {
838 // this is shared with someone using more capacity, gotta make a whole new string
839 size_t newCapacity = expandedSize(length + 1, 0);
840 UChar* d = allocChars(newCapacity);
841 if (!d)
842 m_rep = &Rep::null;
843 else {
844 memcpy(d, data(), length * sizeof(UChar));
845 d[length] = c;
846 m_rep = Rep::create(d, length + 1);
847 m_rep->capacity = newCapacity;
848 }
849 }
850
851 return *this;
852}
853
854CString UString::cstring() const
855{
856 return ascii();
857}
858
859char *UString::ascii() const
860{
861 // Never make the buffer smaller than normalStatBufferSize.
862 // Thus we almost never need to reallocate.
863 int length = size();
864 int neededSize = length + 1;
865 if (neededSize < normalStatBufferSize) {
866 neededSize = normalStatBufferSize;
867 }
868 if (neededSize != statBufferSize) {
869 delete [] statBuffer;
870 statBuffer = new char [neededSize];
871 statBufferSize = neededSize;
872 }
873
874 const UChar *p = data();
875 char *q = statBuffer;
876 const UChar *limit = p + length;
877 while (p != limit) {
878 *q = static_cast<char>(p->uc);
879 ++p;
880 ++q;
881 }
882 *q = '\0';
883
884 return statBuffer;
885}
886
887#ifdef KJS_DEBUG_MEM
888void UString::globalClear()
889{
890 delete [] statBuffer;
891 statBuffer = 0;
892 statBufferSize = 0;
893}
894#endif
895
896UString &UString::operator=(const char *c)
897{
898 if (!c) {
899 m_rep = &Rep::null;
900 return *this;
901 }
902
903 if (!c[0]) {
904 m_rep = &Rep::empty;
905 return *this;
906 }
907
908 int l = static_cast<int>(strlen(c));
909 UChar *d;
910 if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
911 d = m_rep->buf;
912 m_rep->_hash = 0;
913 m_rep->len = l;
914 } else {
915 d = allocChars(l);
916 if (!d) {
917 m_rep = &Rep::null;
918 return *this;
919 }
920 m_rep = Rep::create(d, l);
921 }
922 for (int i = 0; i < l; i++)
923 d[i].uc = c[i];
924
925 return *this;
926}
927
928bool UString::is8Bit() const
929{
930 const UChar *u = data();
931 const UChar *limit = u + size();
932 while (u < limit) {
933 if (u->uc > 0xFF)
934 return false;
935 ++u;
936 }
937
938 return true;
939}
940
941const UChar UString::operator[](int pos) const
942{
943 if (pos >= size())
944 return '\0';
945 return data()[pos];
946}
947
948double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
949{
950 double d;
951
952 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
953 // after the number, so is8Bit is too strict a check.
954 if (!is8Bit())
955 return NaN;
956
957 const char *c = ascii();
958
959 // skip leading white space
960 while (isASCIISpace(*c))
961 c++;
962
963 // empty string ?
964 if (*c == '\0')
965 return tolerateEmptyString ? 0.0 : NaN;
966
967 // hex number ?
968 if (*c == '0' && (*(c+1) == 'x' || *(c+1) == 'X')) {
969 const char* firstDigitPosition = c + 2;
970 c++;
971 d = 0.0;
972 while (*(++c)) {
973 if (*c >= '0' && *c <= '9')
974 d = d * 16.0 + *c - '0';
975 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
976 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
977 else
978 break;
979 }
980
981 if (d >= mantissaOverflowLowerBound)
982 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
983 } else {
984 // regular number ?
985 char *end;
986 d = kjs_strtod(c, &end);
987 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
988 c = end;
989 } else {
990 double sign = 1.0;
991
992 if (*c == '+')
993 c++;
994 else if (*c == '-') {
995 sign = -1.0;
996 c++;
997 }
998
999 // We used strtod() to do the conversion. However, strtod() handles
1000 // infinite values slightly differently than JavaScript in that it
1001 // converts the string "inf" with any capitalization to infinity,
1002 // whereas the ECMA spec requires that it be converted to NaN.
1003
1004 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1005 d = sign * Inf;
1006 c += 8;
1007 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1008 c = end;
1009 else
1010 return NaN;
1011 }
1012 }
1013
1014 // allow trailing white space
1015 while (isASCIISpace(*c))
1016 c++;
1017 // don't allow anything after - unless tolerant=true
1018 if (!tolerateTrailingJunk && *c != '\0')
1019 d = NaN;
1020
1021 return d;
1022}
1023
1024double UString::toDouble(bool tolerateTrailingJunk) const
1025{
1026 return toDouble(tolerateTrailingJunk, true);
1027}
1028
1029double UString::toDouble() const
1030{
1031 return toDouble(false, true);
1032}
1033
1034uint32_t UString::toUInt32(bool *ok) const
1035{
1036 double d = toDouble();
1037 bool b = true;
1038
1039 if (d != static_cast<uint32_t>(d)) {
1040 b = false;
1041 d = 0;
1042 }
1043
1044 if (ok)
1045 *ok = b;
1046
1047 return static_cast<uint32_t>(d);
1048}
1049
1050uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
1051{
1052 double d = toDouble(false, tolerateEmptyString);
1053 bool b = true;
1054
1055 if (d != static_cast<uint32_t>(d)) {
1056 b = false;
1057 d = 0;
1058 }
1059
1060 if (ok)
1061 *ok = b;
1062
1063 return static_cast<uint32_t>(d);
1064}
1065
1066uint32_t UString::toStrictUInt32(bool *ok) const
1067{
1068 if (ok)
1069 *ok = false;
1070
1071 // Empty string is not OK.
1072 int len = m_rep->len;
1073 if (len == 0)
1074 return 0;
1075 const UChar *p = m_rep->data();
1076 unsigned short c = p->unicode();
1077
1078 // If the first digit is 0, only 0 itself is OK.
1079 if (c == '0') {
1080 if (len == 1 && ok)
1081 *ok = true;
1082 return 0;
1083 }
1084
1085 // Convert to UInt32, checking for overflow.
1086 uint32_t i = 0;
1087 while (1) {
1088 // Process character, turning it into a digit.
1089 if (c < '0' || c > '9')
1090 return 0;
1091 const unsigned d = c - '0';
1092
1093 // Multiply by 10, checking for overflow out of 32 bits.
1094 if (i > 0xFFFFFFFFU / 10)
1095 return 0;
1096 i *= 10;
1097
1098 // Add in the digit, checking for overflow out of 32 bits.
1099 const unsigned max = 0xFFFFFFFFU - d;
1100 if (i > max)
1101 return 0;
1102 i += d;
1103
1104 // Handle end of string.
1105 if (--len == 0) {
1106 if (ok)
1107 *ok = true;
1108 return i;
1109 }
1110
1111 // Get next character.
1112 c = (++p)->unicode();
1113 }
1114}
1115
1116int UString::find(const UString &f, int pos) const
1117{
1118 int sz = size();
1119 int fsz = f.size();
1120 if (sz < fsz)
1121 return -1;
1122 if (pos < 0)
1123 pos = 0;
1124 if (fsz == 0)
1125 return pos;
1126 const UChar *end = data() + sz - fsz;
1127 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1128 const UChar *fdata = f.data();
1129 unsigned short fchar = fdata->uc;
1130 ++fdata;
1131 for (const UChar *c = data() + pos; c <= end; c++)
1132 if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1133 return static_cast<int>(c - data());
1134
1135 return -1;
1136}
1137
1138int UString::find(UChar ch, int pos) const
1139{
1140 if (pos < 0)
1141 pos = 0;
1142 const UChar *end = data() + size();
1143 for (const UChar *c = data() + pos; c < end; c++)
1144 if (*c == ch)
1145 return static_cast<int>(c - data());
1146
1147 return -1;
1148}
1149
1150int UString::rfind(const UString &f, int pos) const
1151{
1152 int sz = size();
1153 int fsz = f.size();
1154 if (sz < fsz)
1155 return -1;
1156 if (pos < 0)
1157 pos = 0;
1158 if (pos > sz - fsz)
1159 pos = sz - fsz;
1160 if (fsz == 0)
1161 return pos;
1162 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1163 const UChar *fdata = f.data();
1164 for (const UChar *c = data() + pos; c >= data(); c--) {
1165 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1166 return static_cast<int>(c - data());
1167 }
1168
1169 return -1;
1170}
1171
1172int UString::rfind(UChar ch, int pos) const
1173{
1174 if (isEmpty())
1175 return -1;
1176 if (pos + 1 >= size())
1177 pos = size() - 1;
1178 for (const UChar *c = data() + pos; c >= data(); c--) {
1179 if (*c == ch)
1180 return static_cast<int>(c-data());
1181 }
1182
1183 return -1;
1184}
1185
1186UString UString::substr(int pos, int len) const
1187{
1188 int s = size();
1189
1190 if (pos < 0)
1191 pos = 0;
1192 else if (pos >= s)
1193 pos = s;
1194 if (len < 0)
1195 len = s;
1196 if (pos + len >= s)
1197 len = s - pos;
1198
1199 if (pos == 0 && len == s)
1200 return *this;
1201
1202 return UString(Rep::create(m_rep, pos, len));
1203}
1204
1205bool operator==(const UString& s1, const UString& s2)
1206{
1207 if (s1.m_rep->len != s2.m_rep->len)
1208 return false;
1209
1210 return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1211 s1.m_rep->len * sizeof(UChar)) == 0);
1212}
1213
1214bool operator==(const UString& s1, const char *s2)
1215{
1216 if (s2 == 0) {
1217 return s1.isEmpty();
1218 }
1219
1220 const UChar *u = s1.data();
1221 const UChar *uend = u + s1.size();
1222 while (u != uend && *s2) {
1223 if (u->uc != (unsigned char)*s2)
1224 return false;
1225 s2++;
1226 u++;
1227 }
1228
1229 return u == uend && *s2 == 0;
1230}
1231
1232bool operator<(const UString& s1, const UString& s2)
1233{
1234 const int l1 = s1.size();
1235 const int l2 = s2.size();
1236 const int lmin = l1 < l2 ? l1 : l2;
1237 const UChar *c1 = s1.data();
1238 const UChar *c2 = s2.data();
1239 int l = 0;
1240 while (l < lmin && *c1 == *c2) {
1241 c1++;
1242 c2++;
1243 l++;
1244 }
1245 if (l < lmin)
1246 return (c1->uc < c2->uc);
1247
1248 return (l1 < l2);
1249}
1250
1251int compare(const UString& s1, const UString& s2)
1252{
1253 const int l1 = s1.size();
1254 const int l2 = s2.size();
1255 const int lmin = l1 < l2 ? l1 : l2;
1256 const UChar *c1 = s1.data();
1257 const UChar *c2 = s2.data();
1258 int l = 0;
1259 while (l < lmin && *c1 == *c2) {
1260 c1++;
1261 c2++;
1262 l++;
1263 }
1264
1265 if (l < lmin)
1266 return (c1->uc > c2->uc) ? 1 : -1;
1267
1268 if (l1 == l2)
1269 return 0;
1270
1271 return (l1 > l2) ? 1 : -1;
1272}
1273
1274CString UString::UTF8String(bool strict) const
1275{
1276 // Allocate a buffer big enough to hold all the characters.
1277 const int length = size();
1278 Vector<char, 1024> buffer(length * 3);
1279
1280 // Convert to runs of 8-bit characters.
1281 char* p = buffer.data();
1282 const ::UChar* d = &data()->uc;
1283 ConversionResult result = ConvertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1284 if (result != conversionOK)
1285 return CString();
1286
1287 return CString(buffer.data(), p - buffer.data());
1288}
1289
1290
1291} // namespace KJS
Note: See TracBrowser for help on using the repository browser.