source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 43383

Last change on this file since 43383 was 43383, checked in by [email protected], 16 years ago

2009-05-07 Gavin Barraclough <[email protected]>

Reviewed by Maciej Stachowiak.

Previously, when appending to an existing string and growing the underlying buffer,
we would actually allocate 110% of the required size in order to give us some space
to expand into. Now we treat strings differently based on their size:

Small Strings (up to 4 pages):
Expand the allocation size to 112.5% of the amount requested. This is largely sicking
to our previous policy, however 112.5% is cheaper to calculate.

Medium Strings (up to 128 pages):
For pages covering multiple pages over-allocation is less of a concern - any unused
space will not be paged in if it is not used, so this is purely a VM overhead. For
these strings allocate 2x the requested size.

Large Strings (to infinity and beyond!):
Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow
any individual string be responsible for.

Additionally, round small allocations up to a multiple of 16 bytes, and medium and
large allocations up to a multiple of page size.

~1.5% progression on Sunspider, due to 5% improvement on tagcloud & 15% on validate.

  • runtime/UString.cpp: (JSC::expandedSize):
  • Property svn:eol-style set to native
File size: 46.7 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (c) 2009, Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "UString.h"
26
27#include "JSGlobalObjectFunctions.h"
28#include "Collector.h"
29#include "dtoa.h"
30#include "Identifier.h"
31#include "Operations.h"
32#include <ctype.h>
33#include <float.h>
34#include <limits.h>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <wtf/ASCIICType.h>
39#include <wtf/Assertions.h>
40#include <wtf/MathExtras.h>
41#include <wtf/Vector.h>
42#include <wtf/unicode/UTF8.h>
43
44#if HAVE(STRING_H)
45#include <string.h>
46#endif
47#if HAVE(STRINGS_H)
48#include <strings.h>
49#endif
50
51using namespace WTF;
52using namespace WTF::Unicode;
53using namespace std;
54
55// This can be tuned differently per platform by putting platform #ifs right here.
56// If you don't define this macro at all, then copyChars will just call directly
57// to memcpy.
58#define USTRING_COPY_CHARS_INLINE_CUTOFF 20
59
60namespace JSC {
61
62extern const double NaN;
63extern const double Inf;
64
65static inline size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
66static inline size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
67
68static inline UChar* allocChars(size_t length)
69{
70 ASSERT(length);
71 if (length > maxUChars())
72 return 0;
73 return static_cast<UChar*>(tryFastMalloc(sizeof(UChar) * length));
74}
75
76static inline UChar* reallocChars(UChar* buffer, size_t length)
77{
78 ASSERT(length);
79 if (length > maxUChars())
80 return 0;
81 return static_cast<UChar*>(tryFastRealloc(buffer, sizeof(UChar) * length));
82}
83
84static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
85{
86#ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
87 if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) {
88 for (unsigned i = 0; i < numCharacters; ++i)
89 destination[i] = source[i];
90 return;
91 }
92#endif
93 memcpy(destination, source, numCharacters * sizeof(UChar));
94}
95
96COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes);
97
98CString::CString(const char* c)
99 : m_length(strlen(c))
100 , m_data(new char[m_length + 1])
101{
102 memcpy(m_data, c, m_length + 1);
103}
104
105CString::CString(const char* c, size_t length)
106 : m_length(length)
107 , m_data(new char[length + 1])
108{
109 memcpy(m_data, c, m_length);
110 m_data[m_length] = 0;
111}
112
113CString::CString(const CString& b)
114{
115 m_length = b.m_length;
116 if (b.m_data) {
117 m_data = new char[m_length + 1];
118 memcpy(m_data, b.m_data, m_length + 1);
119 } else
120 m_data = 0;
121}
122
123CString::~CString()
124{
125 delete [] m_data;
126}
127
128CString CString::adopt(char* c, size_t length)
129{
130 CString s;
131 s.m_data = c;
132 s.m_length = length;
133 return s;
134}
135
136CString& CString::append(const CString& t)
137{
138 char* n;
139 n = new char[m_length + t.m_length + 1];
140 if (m_length)
141 memcpy(n, m_data, m_length);
142 if (t.m_length)
143 memcpy(n + m_length, t.m_data, t.m_length);
144 m_length += t.m_length;
145 n[m_length] = 0;
146
147 delete [] m_data;
148 m_data = n;
149
150 return *this;
151}
152
153CString& CString::operator=(const char* c)
154{
155 if (m_data)
156 delete [] m_data;
157 m_length = strlen(c);
158 m_data = new char[m_length + 1];
159 memcpy(m_data, c, m_length + 1);
160
161 return *this;
162}
163
164CString& CString::operator=(const CString& str)
165{
166 if (this == &str)
167 return *this;
168
169 if (m_data)
170 delete [] m_data;
171 m_length = str.m_length;
172 if (str.m_data) {
173 m_data = new char[m_length + 1];
174 memcpy(m_data, str.m_data, m_length + 1);
175 } else
176 m_data = 0;
177
178 return *this;
179}
180
181bool operator==(const CString& c1, const CString& c2)
182{
183 size_t len = c1.size();
184 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
185}
186
187// These static strings are immutable, except for rc, whose initial value is chosen to
188// reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
189static UChar sharedEmptyChar;
190UString::BaseString* UString::Rep::nullBaseString;
191UString::BaseString* UString::Rep::emptyBaseString;
192UString* UString::nullUString;
193
194static void initializeStaticBaseString(UString::BaseString& base)
195{
196 base.rc = INT_MAX / 2;
197 base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag);
198 base.checkConsistency();
199}
200
201void initializeUString()
202{
203 UString::Rep::nullBaseString = new UString::BaseString(0, 0);
204 initializeStaticBaseString(*UString::Rep::nullBaseString);
205
206 UString::Rep::emptyBaseString = new UString::BaseString(&sharedEmptyChar, 0);
207 initializeStaticBaseString(*UString::Rep::emptyBaseString);
208
209 UString::nullUString = new UString;
210}
211
212static char* statBuffer = 0; // Only used for debugging via UString::ascii().
213
214PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar* d, int l)
215{
216 UChar* copyD = static_cast<UChar*>(fastMalloc(l * sizeof(UChar)));
217 copyChars(copyD, d, l);
218 return create(copyD, l);
219}
220
221PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
222{
223 if (!string)
224 return &UString::Rep::null();
225
226 size_t length = strlen(string);
227 Vector<UChar, 1024> buffer(length);
228 UChar* p = buffer.data();
229 if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
230 return &UString::Rep::null();
231
232 return UString::Rep::createCopying(buffer.data(), p - buffer.data());
233}
234
235void UString::Rep::destroy()
236{
237 checkConsistency();
238
239 // Static null and empty strings can never be destroyed, but we cannot rely on
240 // reference counting, because ref/deref are not thread-safe.
241 if (!isStatic()) {
242 if (identifierTable())
243 Identifier::remove(this);
244 UString::BaseString* base = baseString();
245 if (base == this)
246 fastFree(base->buf);
247 else
248 base->deref();
249
250 delete this;
251 }
252}
253
254// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
255// or anything like that.
256const unsigned PHI = 0x9e3779b9U;
257
258// Paul Hsieh's SuperFastHash
259// http://www.azillionmonkeys.com/qed/hash.html
260unsigned UString::Rep::computeHash(const UChar* s, int len)
261{
262 unsigned l = len;
263 uint32_t hash = PHI;
264 uint32_t tmp;
265
266 int rem = l & 1;
267 l >>= 1;
268
269 // Main loop
270 for (; l > 0; l--) {
271 hash += s[0];
272 tmp = (s[1] << 11) ^ hash;
273 hash = (hash << 16) ^ tmp;
274 s += 2;
275 hash += hash >> 11;
276 }
277
278 // Handle end case
279 if (rem) {
280 hash += s[0];
281 hash ^= hash << 11;
282 hash += hash >> 17;
283 }
284
285 // Force "avalanching" of final 127 bits
286 hash ^= hash << 3;
287 hash += hash >> 5;
288 hash ^= hash << 2;
289 hash += hash >> 15;
290 hash ^= hash << 10;
291
292 // this avoids ever returning a hash code of 0, since that is used to
293 // signal "hash not computed yet", using a value that is likely to be
294 // effectively the same as 0 when the low bits are masked
295 if (hash == 0)
296 hash = 0x80000000;
297
298 return hash;
299}
300
301// Paul Hsieh's SuperFastHash
302// http://www.azillionmonkeys.com/qed/hash.html
303unsigned UString::Rep::computeHash(const char* s, int l)
304{
305 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
306 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
307 // were 16-bit chunks, which should give matching results
308
309 uint32_t hash = PHI;
310 uint32_t tmp;
311
312 size_t rem = l & 1;
313 l >>= 1;
314
315 // Main loop
316 for (; l > 0; l--) {
317 hash += static_cast<unsigned char>(s[0]);
318 tmp = (static_cast<unsigned char>(s[1]) << 11) ^ hash;
319 hash = (hash << 16) ^ tmp;
320 s += 2;
321 hash += hash >> 11;
322 }
323
324 // Handle end case
325 if (rem) {
326 hash += static_cast<unsigned char>(s[0]);
327 hash ^= hash << 11;
328 hash += hash >> 17;
329 }
330
331 // Force "avalanching" of final 127 bits
332 hash ^= hash << 3;
333 hash += hash >> 5;
334 hash ^= hash << 2;
335 hash += hash >> 15;
336 hash ^= hash << 10;
337
338 // this avoids ever returning a hash code of 0, since that is used to
339 // signal "hash not computed yet", using a value that is likely to be
340 // effectively the same as 0 when the low bits are masked
341 if (hash == 0)
342 hash = 0x80000000;
343
344 return hash;
345}
346
347#ifndef NDEBUG
348void UString::Rep::checkConsistency() const
349{
350 const UString::BaseString* base = baseString();
351
352 // There is no recursion for base strings.
353 ASSERT(base == base->baseString());
354
355 if (isStatic()) {
356 // There are only two static strings: null and empty.
357 ASSERT(!len);
358
359 // Static strings cannot get in identifier tables, because they are globally shared.
360 ASSERT(!identifierTable());
361 }
362
363 // The string fits in buffer.
364 ASSERT(base->usedPreCapacity <= base->preCapacity);
365 ASSERT(base->usedCapacity <= base->capacity);
366 ASSERT(-offset <= base->usedPreCapacity);
367 ASSERT(offset + len <= base->usedCapacity);
368}
369#endif
370
371// Put these early so they can be inlined.
372static inline size_t expandedSize(size_t capacitySize, size_t precapacitySize)
373{
374 // Combine capacitySize & precapacitySize to produce a single size to allocate,
375 // check that doing so does not result in overflow.
376 size_t size = capacitySize + precapacitySize;
377 if (size < capacitySize)
378 return overflowIndicator();
379
380 // Small Strings (up to 4 pages):
381 // Expand the allocation size to 112.5% of the amount requested. This is largely sicking
382 // to our previous policy, however 112.5% is cheaper to calculate.
383 if (size < 0x4000) {
384 size_t expandedSize = ((size + (size >> 3)) | 15) + 1;
385 // Given the limited range within which we calculate the expansion in this
386 // fashion the above calculation should never overflow.
387 ASSERT(expandedSize >= size);
388 ASSERT(expandedSize < maxUChars());
389 return expandedSize;
390 }
391
392 // Medium Strings (up to 128 pages):
393 // For pages covering multiple pages over-allocation is less of a concern - any unused
394 // space will not be paged in if it is not used, so this is purely a VM overhead. For
395 // these strings allocate 2x the requested size.
396 if (size < 0x80000) {
397 size_t expandedSize = ((size + size) | 0xfff) + 1;
398 // Given the limited range within which we calculate the expansion in this
399 // fashion the above calculation should never overflow.
400 ASSERT(expandedSize >= size);
401 ASSERT(expandedSize < maxUChars());
402 return expandedSize;
403 }
404
405 // Large Strings (to infinity and beyond!):
406 // Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow
407 // any individual string be responsible for.
408 size_t expandedSize = ((size + (size >> 3)) | 0xfff) + 1;
409
410 // Check for overflow - any result that is at least as large as requested (but
411 // still below the limit) is okay.
412 if ((expandedSize >= size) && (expandedSize < maxUChars()))
413 return expandedSize;
414 return overflowIndicator();
415}
416
417static inline bool expandCapacity(UString::Rep* rep, int requiredLength)
418{
419 rep->checkConsistency();
420
421 UString::BaseString* base = rep->baseString();
422
423 if (requiredLength > base->capacity) {
424 size_t newCapacity = expandedSize(requiredLength, base->preCapacity);
425 UChar* oldBuf = base->buf;
426 base->buf = reallocChars(base->buf, newCapacity);
427 if (!base->buf) {
428 base->buf = oldBuf;
429 return false;
430 }
431 base->capacity = newCapacity - base->preCapacity;
432 }
433 if (requiredLength > base->usedCapacity)
434 base->usedCapacity = requiredLength;
435
436 rep->checkConsistency();
437 return true;
438}
439
440bool UString::Rep::reserveCapacity(int capacity)
441{
442 // If this is an empty string there is no point 'growing' it - just allocate a new one.
443 // If the BaseString is shared with another string that is using more capacity than this
444 // string is, then growing the buffer won't help.
445 if (!m_baseString->buf || !m_baseString->capacity || (offset + len) != m_baseString->usedCapacity)
446 return false;
447
448 // If there is already sufficient capacity, no need to grow!
449 if (capacity <= m_baseString->capacity)
450 return true;
451
452 checkConsistency();
453
454 size_t newCapacity = expandedSize(capacity, m_baseString->preCapacity);
455 UChar* oldBuf = m_baseString->buf;
456 m_baseString->buf = reallocChars(m_baseString->buf, newCapacity);
457 if (!m_baseString->buf) {
458 m_baseString->buf = oldBuf;
459 return false;
460 }
461 m_baseString->capacity = newCapacity - m_baseString->preCapacity;
462
463 checkConsistency();
464 return true;
465}
466
467void UString::expandCapacity(int requiredLength)
468{
469 if (!JSC::expandCapacity(m_rep.get(), requiredLength))
470 makeNull();
471}
472
473void UString::expandPreCapacity(int requiredPreCap)
474{
475 m_rep->checkConsistency();
476
477 BaseString* base = m_rep->baseString();
478
479 if (requiredPreCap > base->preCapacity) {
480 size_t newCapacity = expandedSize(requiredPreCap, base->capacity);
481 int delta = newCapacity - base->capacity - base->preCapacity;
482
483 UChar* newBuf = allocChars(newCapacity);
484 if (!newBuf) {
485 makeNull();
486 return;
487 }
488 copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity);
489 fastFree(base->buf);
490 base->buf = newBuf;
491
492 base->preCapacity = newCapacity - base->capacity;
493 }
494 if (requiredPreCap > base->usedPreCapacity)
495 base->usedPreCapacity = requiredPreCap;
496
497 m_rep->checkConsistency();
498}
499
500static PassRefPtr<UString::Rep> createRep(const char* c)
501{
502 if (!c)
503 return &UString::Rep::null();
504
505 if (!c[0])
506 return &UString::Rep::empty();
507
508 size_t length = strlen(c);
509 UChar* d = allocChars(length);
510 if (!d)
511 return &UString::Rep::null();
512 else {
513 for (size_t i = 0; i < length; i++)
514 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
515 return UString::Rep::create(d, static_cast<int>(length));
516 }
517
518}
519
520UString::UString(const char* c)
521 : m_rep(createRep(c))
522{
523}
524
525UString::UString(const UChar* c, int length)
526{
527 if (length == 0)
528 m_rep = &Rep::empty();
529 else
530 m_rep = Rep::createCopying(c, length);
531}
532
533UString::UString(UChar* c, int length, bool copy)
534{
535 if (length == 0)
536 m_rep = &Rep::empty();
537 else if (copy)
538 m_rep = Rep::createCopying(c, length);
539 else
540 m_rep = Rep::create(c, length);
541}
542
543UString::UString(const Vector<UChar>& buffer)
544{
545 if (!buffer.size())
546 m_rep = &Rep::empty();
547 else
548 m_rep = Rep::createCopying(buffer.data(), buffer.size());
549}
550
551static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false)
552{
553 ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength);
554
555 const int plusLength = plusOne ? 1 : 0;
556 if (currentCapacity > std::numeric_limits<int>::max() - extendLength - plusLength)
557 CRASH();
558
559 return currentCapacity + extendLength + plusLength;
560}
561
562static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const UChar* tData, int tSize)
563{
564 RefPtr<UString::Rep> rep = r;
565
566 rep->checkConsistency();
567
568 int thisSize = rep->size();
569 int thisOffset = rep->offset;
570 int length = thisSize + tSize;
571 UString::BaseString* base = rep->baseString();
572
573 // possible cases:
574 if (tSize == 0) {
575 // t is empty
576 } else if (thisSize == 0) {
577 // this is empty
578 rep = UString::Rep::createCopying(tData, tSize);
579 } else if (rep == base && !base->isShared()) {
580 // this is direct and has refcount of 1 (so we can just alter it directly)
581 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
582 rep = &UString::Rep::null();
583 if (rep->data()) {
584 copyChars(rep->data() + thisSize, tData, tSize);
585 rep->len = length;
586 rep->_hash = 0;
587 }
588 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
589 // this reaches the end of the buffer - extend it if it's long enough to append to
590 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
591 rep = &UString::Rep::null();
592 if (rep->data()) {
593 copyChars(rep->data() + thisSize, tData, tSize);
594 rep = UString::Rep::create(rep, 0, length);
595 }
596 } else {
597 // this is shared with someone using more capacity, gotta make a whole new string
598 size_t newCapacity = expandedSize(length, 0);
599 UChar* d = allocChars(newCapacity);
600 if (!d)
601 rep = &UString::Rep::null();
602 else {
603 copyChars(d, rep->data(), thisSize);
604 copyChars(d + thisSize, tData, tSize);
605 rep = UString::Rep::create(d, length);
606 rep->baseString()->capacity = newCapacity;
607 }
608 }
609
610 rep->checkConsistency();
611
612 return rep.release();
613}
614
615static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const char* t)
616{
617 RefPtr<UString::Rep> rep = r;
618
619 rep->checkConsistency();
620
621 int thisSize = rep->size();
622 int thisOffset = rep->offset;
623 int tSize = static_cast<int>(strlen(t));
624 int length = thisSize + tSize;
625 UString::BaseString* base = rep->baseString();
626
627 // possible cases:
628 if (thisSize == 0) {
629 // this is empty
630 rep = createRep(t);
631 } else if (tSize == 0) {
632 // t is empty, we'll just return *this below.
633 } else if (rep == base && !base->isShared()) {
634 // this is direct and has refcount of 1 (so we can just alter it directly)
635 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
636 UChar* d = rep->data();
637 if (d) {
638 for (int i = 0; i < tSize; ++i)
639 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
640 rep->len = length;
641 rep->_hash = 0;
642 }
643 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
644 // this string reaches the end of the buffer - extend it
645 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
646 UChar* d = rep->data();
647 if (d) {
648 for (int i = 0; i < tSize; ++i)
649 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
650 rep = UString::Rep::create(rep, 0, length);
651 }
652 } else {
653 // this is shared with someone using more capacity, gotta make a whole new string
654 size_t newCapacity = expandedSize(length, 0);
655 UChar* d = allocChars(newCapacity);
656 if (!d)
657 rep = &UString::Rep::null();
658 else {
659 copyChars(d, rep->data(), thisSize);
660 for (int i = 0; i < tSize; ++i)
661 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
662 rep = UString::Rep::create(d, length);
663 rep->baseString()->capacity = newCapacity;
664 }
665 }
666
667 rep->checkConsistency();
668
669 return rep.release();
670}
671
672PassRefPtr<UString::Rep> concatenate(UString::Rep* a, UString::Rep* b)
673{
674 a->checkConsistency();
675 b->checkConsistency();
676
677 int aSize = a->size();
678 int bSize = b->size();
679 int aOffset = a->offset;
680
681 // possible cases:
682
683 UString::BaseString* aBase = a->baseString();
684 if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + aSize < aBase->capacity) {
685 // b is a single character (common fast case)
686 ++aBase->usedCapacity;
687 a->data()[aSize] = b->data()[0];
688 return UString::Rep::create(a, 0, aSize + 1);
689 }
690
691 // a is empty
692 if (aSize == 0)
693 return b;
694 // b is empty
695 if (bSize == 0)
696 return a;
697
698 int bOffset = b->offset;
699 int length = aSize + bSize;
700
701 UString::BaseString* bBase = b->baseString();
702 if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize
703 && (-bOffset != bBase->usedPreCapacity || aSize >= bSize)) {
704 // - a reaches the end of its buffer so it qualifies for shared append
705 // - also, it's at least a quarter the length of b - appending to a much shorter
706 // string does more harm than good
707 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
708
709 UString x(a);
710 x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length));
711 if (!a->data() || !x.data())
712 return 0;
713 copyChars(a->data() + aSize, b->data(), bSize);
714 PassRefPtr<UString::Rep> result = UString::Rep::create(a, 0, length);
715
716 a->checkConsistency();
717 b->checkConsistency();
718 result->checkConsistency();
719
720 return result;
721 }
722
723 if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize) {
724 // - b reaches the beginning of its buffer so it qualifies for shared prepend
725 // - also, it's at least a quarter the length of a - prepending to a much shorter
726 // string does more harm than good
727 UString y(b);
728 y.expandPreCapacity(-bOffset + aSize);
729 if (!b->data() || !y.data())
730 return 0;
731 copyChars(b->data() - aSize, a->data(), aSize);
732 PassRefPtr<UString::Rep> result = UString::Rep::create(b, -aSize, length);
733
734 a->checkConsistency();
735 b->checkConsistency();
736 result->checkConsistency();
737
738 return result;
739 }
740
741 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
742 size_t newCapacity = expandedSize(length, 0);
743 UChar* d = allocChars(newCapacity);
744 if (!d)
745 return 0;
746 copyChars(d, a->data(), aSize);
747 copyChars(d + aSize, b->data(), bSize);
748 PassRefPtr<UString::Rep> result = UString::Rep::create(d, length);
749 result->baseString()->capacity = newCapacity;
750
751 a->checkConsistency();
752 b->checkConsistency();
753 result->checkConsistency();
754
755 return result;
756}
757
758PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, int i)
759{
760 UChar buf[1 + sizeof(i) * 3];
761 UChar* end = buf + sizeof(buf) / sizeof(UChar);
762 UChar* p = end;
763
764 if (i == 0)
765 *--p = '0';
766 else if (i == INT_MIN) {
767 char minBuf[1 + sizeof(i) * 3];
768 sprintf(minBuf, "%d", INT_MIN);
769 return concatenate(rep, minBuf);
770 } else {
771 bool negative = false;
772 if (i < 0) {
773 negative = true;
774 i = -i;
775 }
776 while (i) {
777 *--p = static_cast<unsigned short>((i % 10) + '0');
778 i /= 10;
779 }
780 if (negative)
781 *--p = '-';
782 }
783
784 return concatenate(rep, p, static_cast<int>(end - p));
785
786}
787
788PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, double d)
789{
790 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
791 if (isnan(d))
792 return concatenate(rep, "NaN");
793
794 if (d == 0.0) // stringify -0 as 0
795 d = 0.0;
796
797 char buf[80];
798 int decimalPoint;
799 int sign;
800
801 char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL);
802 int length = static_cast<int>(strlen(result));
803
804 int i = 0;
805 if (sign)
806 buf[i++] = '-';
807
808 if (decimalPoint <= 0 && decimalPoint > -6) {
809 buf[i++] = '0';
810 buf[i++] = '.';
811 for (int j = decimalPoint; j < 0; j++)
812 buf[i++] = '0';
813 strcpy(buf + i, result);
814 } else if (decimalPoint <= 21 && decimalPoint > 0) {
815 if (length <= decimalPoint) {
816 strcpy(buf + i, result);
817 i += length;
818 for (int j = 0; j < decimalPoint - length; j++)
819 buf[i++] = '0';
820 buf[i] = '\0';
821 } else {
822 strncpy(buf + i, result, decimalPoint);
823 i += decimalPoint;
824 buf[i++] = '.';
825 strcpy(buf + i, result + decimalPoint);
826 }
827 } else if (result[0] < '0' || result[0] > '9')
828 strcpy(buf + i, result);
829 else {
830 buf[i++] = result[0];
831 if (length > 1) {
832 buf[i++] = '.';
833 strcpy(buf + i, result + 1);
834 i += length - 1;
835 }
836
837 buf[i++] = 'e';
838 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
839 // decimalPoint can't be more than 3 digits decimal given the
840 // nature of float representation
841 int exponential = decimalPoint - 1;
842 if (exponential < 0)
843 exponential = -exponential;
844 if (exponential >= 100)
845 buf[i++] = static_cast<char>('0' + exponential / 100);
846 if (exponential >= 10)
847 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
848 buf[i++] = static_cast<char>('0' + exponential % 10);
849 buf[i++] = '\0';
850 }
851
852 WTF::freedtoa(result);
853
854 return concatenate(rep, buf);
855}
856
857UString UString::from(int i)
858{
859 UChar buf[1 + sizeof(i) * 3];
860 UChar* end = buf + sizeof(buf) / sizeof(UChar);
861 UChar* p = end;
862
863 if (i == 0)
864 *--p = '0';
865 else if (i == INT_MIN) {
866 char minBuf[1 + sizeof(i) * 3];
867 sprintf(minBuf, "%d", INT_MIN);
868 return UString(minBuf);
869 } else {
870 bool negative = false;
871 if (i < 0) {
872 negative = true;
873 i = -i;
874 }
875 while (i) {
876 *--p = static_cast<unsigned short>((i % 10) + '0');
877 i /= 10;
878 }
879 if (negative)
880 *--p = '-';
881 }
882
883 return UString(p, static_cast<int>(end - p));
884}
885
886UString UString::from(unsigned int u)
887{
888 UChar buf[sizeof(u) * 3];
889 UChar* end = buf + sizeof(buf) / sizeof(UChar);
890 UChar* p = end;
891
892 if (u == 0)
893 *--p = '0';
894 else {
895 while (u) {
896 *--p = static_cast<unsigned short>((u % 10) + '0');
897 u /= 10;
898 }
899 }
900
901 return UString(p, static_cast<int>(end - p));
902}
903
904UString UString::from(long l)
905{
906 UChar buf[1 + sizeof(l) * 3];
907 UChar* end = buf + sizeof(buf) / sizeof(UChar);
908 UChar* p = end;
909
910 if (l == 0)
911 *--p = '0';
912 else if (l == LONG_MIN) {
913 char minBuf[1 + sizeof(l) * 3];
914 sprintf(minBuf, "%ld", LONG_MIN);
915 return UString(minBuf);
916 } else {
917 bool negative = false;
918 if (l < 0) {
919 negative = true;
920 l = -l;
921 }
922 while (l) {
923 *--p = static_cast<unsigned short>((l % 10) + '0');
924 l /= 10;
925 }
926 if (negative)
927 *--p = '-';
928 }
929
930 return UString(p, static_cast<int>(end - p));
931}
932
933UString UString::from(double d)
934{
935 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
936 if (isnan(d))
937 return "NaN";
938
939 char buf[80];
940 int decimalPoint;
941 int sign;
942
943 char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL);
944 int length = static_cast<int>(strlen(result));
945
946 int i = 0;
947 if (sign)
948 buf[i++] = '-';
949
950 if (decimalPoint <= 0 && decimalPoint > -6) {
951 buf[i++] = '0';
952 buf[i++] = '.';
953 for (int j = decimalPoint; j < 0; j++)
954 buf[i++] = '0';
955 strcpy(buf + i, result);
956 } else if (decimalPoint <= 21 && decimalPoint > 0) {
957 if (length <= decimalPoint) {
958 strcpy(buf + i, result);
959 i += length;
960 for (int j = 0; j < decimalPoint - length; j++)
961 buf[i++] = '0';
962 buf[i] = '\0';
963 } else {
964 strncpy(buf + i, result, decimalPoint);
965 i += decimalPoint;
966 buf[i++] = '.';
967 strcpy(buf + i, result + decimalPoint);
968 }
969 } else if (result[0] < '0' || result[0] > '9')
970 strcpy(buf + i, result);
971 else {
972 buf[i++] = result[0];
973 if (length > 1) {
974 buf[i++] = '.';
975 strcpy(buf + i, result + 1);
976 i += length - 1;
977 }
978
979 buf[i++] = 'e';
980 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
981 // decimalPoint can't be more than 3 digits decimal given the
982 // nature of float representation
983 int exponential = decimalPoint - 1;
984 if (exponential < 0)
985 exponential = -exponential;
986 if (exponential >= 100)
987 buf[i++] = static_cast<char>('0' + exponential / 100);
988 if (exponential >= 10)
989 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
990 buf[i++] = static_cast<char>('0' + exponential % 10);
991 buf[i++] = '\0';
992 }
993
994 WTF::freedtoa(result);
995
996 return UString(buf);
997}
998
999UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
1000{
1001 m_rep->checkConsistency();
1002
1003 if (rangeCount == 1 && separatorCount == 0) {
1004 int thisSize = size();
1005 int position = substringRanges[0].position;
1006 int length = substringRanges[0].length;
1007 if (position <= 0 && length >= thisSize)
1008 return *this;
1009 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
1010 }
1011
1012 int totalLength = 0;
1013 for (int i = 0; i < rangeCount; i++)
1014 totalLength += substringRanges[i].length;
1015 for (int i = 0; i < separatorCount; i++)
1016 totalLength += separators[i].size();
1017
1018 if (totalLength == 0)
1019 return "";
1020
1021 UChar* buffer = allocChars(totalLength);
1022 if (!buffer)
1023 return null();
1024
1025 int maxCount = max(rangeCount, separatorCount);
1026 int bufferPos = 0;
1027 for (int i = 0; i < maxCount; i++) {
1028 if (i < rangeCount) {
1029 copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length);
1030 bufferPos += substringRanges[i].length;
1031 }
1032 if (i < separatorCount) {
1033 copyChars(buffer + bufferPos, separators[i].data(), separators[i].size());
1034 bufferPos += separators[i].size();
1035 }
1036 }
1037
1038 return UString::Rep::create(buffer, totalLength);
1039}
1040
1041UString UString::replaceRange(int rangeStart, int rangeLength, const UString& replacement) const
1042{
1043 m_rep->checkConsistency();
1044
1045 int replacementLength = replacement.size();
1046 int totalLength = size() - rangeLength + replacementLength;
1047 if (totalLength == 0)
1048 return "";
1049
1050 UChar* buffer = allocChars(totalLength);
1051 if (!buffer)
1052 return null();
1053
1054 copyChars(buffer, data(), rangeStart);
1055 copyChars(buffer + rangeStart, replacement.data(), replacementLength);
1056 int rangeEnd = rangeStart + rangeLength;
1057 copyChars(buffer + rangeStart + replacementLength, data() + rangeEnd, size() - rangeEnd);
1058
1059 return UString::Rep::create(buffer, totalLength);
1060}
1061
1062
1063UString& UString::append(const UString &t)
1064{
1065 m_rep->checkConsistency();
1066 t.rep()->checkConsistency();
1067
1068 int thisSize = size();
1069 int thisOffset = m_rep->offset;
1070 int tSize = t.size();
1071 int length = thisSize + tSize;
1072 BaseString* base = m_rep->baseString();
1073
1074 // possible cases:
1075 if (thisSize == 0) {
1076 // this is empty
1077 *this = t;
1078 } else if (tSize == 0) {
1079 // t is empty
1080 } else if (m_rep == base && !base->isShared()) {
1081 // this is direct and has refcount of 1 (so we can just alter it directly)
1082 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1083 if (data()) {
1084 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1085 m_rep->len = length;
1086 m_rep->_hash = 0;
1087 }
1088 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
1089 // this reaches the end of the buffer - extend it if it's long enough to append to
1090 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1091 if (data()) {
1092 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1093 m_rep = Rep::create(m_rep, 0, length);
1094 }
1095 } else {
1096 // this is shared with someone using more capacity, gotta make a whole new string
1097 size_t newCapacity = expandedSize(length, 0);
1098 UChar* d = allocChars(newCapacity);
1099 if (!d)
1100 makeNull();
1101 else {
1102 copyChars(d, data(), thisSize);
1103 copyChars(d + thisSize, t.data(), tSize);
1104 m_rep = Rep::create(d, length);
1105 m_rep->baseString()->capacity = newCapacity;
1106 }
1107 }
1108
1109 m_rep->checkConsistency();
1110 t.rep()->checkConsistency();
1111
1112 return *this;
1113}
1114
1115UString& UString::append(const UChar* tData, int tSize)
1116{
1117 m_rep = concatenate(m_rep.release(), tData, tSize);
1118 return *this;
1119}
1120
1121UString& UString::appendNumeric(int i)
1122{
1123 m_rep = concatenate(rep(), i);
1124 return *this;
1125}
1126
1127UString& UString::appendNumeric(double d)
1128{
1129 m_rep = concatenate(rep(), d);
1130 return *this;
1131}
1132
1133UString& UString::append(const char* t)
1134{
1135 m_rep = concatenate(m_rep.release(), t);
1136 return *this;
1137}
1138
1139UString& UString::append(UChar c)
1140{
1141 m_rep->checkConsistency();
1142
1143 int thisOffset = m_rep->offset;
1144 int length = size();
1145 BaseString* base = m_rep->baseString();
1146
1147 // possible cases:
1148 if (length == 0) {
1149 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1150 size_t newCapacity = expandedSize(1, 0);
1151 UChar* d = allocChars(newCapacity);
1152 if (!d)
1153 makeNull();
1154 else {
1155 d[0] = c;
1156 m_rep = Rep::create(d, 1);
1157 m_rep->baseString()->capacity = newCapacity;
1158 }
1159 } else if (m_rep == base && !base->isShared()) {
1160 // this is direct and has refcount of 1 (so we can just alter it directly)
1161 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1162 UChar* d = m_rep->data();
1163 if (d) {
1164 d[length] = c;
1165 m_rep->len = length + 1;
1166 m_rep->_hash = 0;
1167 }
1168 } else if (thisOffset + length == base->usedCapacity && length >= minShareSize) {
1169 // this reaches the end of the string - extend it and share
1170 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1171 UChar* d = m_rep->data();
1172 if (d) {
1173 d[length] = c;
1174 m_rep = Rep::create(m_rep, 0, length + 1);
1175 }
1176 } else {
1177 // this is shared with someone using more capacity, gotta make a whole new string
1178 size_t newCapacity = expandedSize(length + 1, 0);
1179 UChar* d = allocChars(newCapacity);
1180 if (!d)
1181 makeNull();
1182 else {
1183 copyChars(d, data(), length);
1184 d[length] = c;
1185 m_rep = Rep::create(d, length + 1);
1186 m_rep->baseString()->capacity = newCapacity;
1187 }
1188 }
1189
1190 m_rep->checkConsistency();
1191
1192 return *this;
1193}
1194
1195bool UString::getCString(CStringBuffer& buffer) const
1196{
1197 int length = size();
1198 int neededSize = length + 1;
1199 buffer.resize(neededSize);
1200 char* buf = buffer.data();
1201
1202 UChar ored = 0;
1203 const UChar* p = data();
1204 char* q = buf;
1205 const UChar* limit = p + length;
1206 while (p != limit) {
1207 UChar c = p[0];
1208 ored |= c;
1209 *q = static_cast<char>(c);
1210 ++p;
1211 ++q;
1212 }
1213 *q = '\0';
1214
1215 return !(ored & 0xFF00);
1216}
1217
1218char* UString::ascii() const
1219{
1220 int length = size();
1221 int neededSize = length + 1;
1222 delete[] statBuffer;
1223 statBuffer = new char[neededSize];
1224
1225 const UChar* p = data();
1226 char* q = statBuffer;
1227 const UChar* limit = p + length;
1228 while (p != limit) {
1229 *q = static_cast<char>(p[0]);
1230 ++p;
1231 ++q;
1232 }
1233 *q = '\0';
1234
1235 return statBuffer;
1236}
1237
1238UString& UString::operator=(const char* c)
1239{
1240 if (!c) {
1241 m_rep = &Rep::null();
1242 return *this;
1243 }
1244
1245 if (!c[0]) {
1246 m_rep = &Rep::empty();
1247 return *this;
1248 }
1249
1250 int l = static_cast<int>(strlen(c));
1251 UChar* d;
1252 BaseString* base = m_rep->baseString();
1253 if (!base->isShared() && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) {
1254 d = base->buf;
1255 m_rep->_hash = 0;
1256 m_rep->len = l;
1257 } else {
1258 d = allocChars(l);
1259 if (!d) {
1260 makeNull();
1261 return *this;
1262 }
1263 m_rep = Rep::create(d, l);
1264 }
1265 for (int i = 0; i < l; i++)
1266 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
1267
1268 return *this;
1269}
1270
1271bool UString::is8Bit() const
1272{
1273 const UChar* u = data();
1274 const UChar* limit = u + size();
1275 while (u < limit) {
1276 if (u[0] > 0xFF)
1277 return false;
1278 ++u;
1279 }
1280
1281 return true;
1282}
1283
1284UChar UString::operator[](int pos) const
1285{
1286 if (pos >= size())
1287 return '\0';
1288 return data()[pos];
1289}
1290
1291double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
1292{
1293 if (size() == 1) {
1294 UChar c = data()[0];
1295 if (isASCIIDigit(c))
1296 return c - '0';
1297 if (isASCIISpace(c) && tolerateEmptyString)
1298 return 0;
1299 return NaN;
1300 }
1301
1302 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1303 // after the number, so this is too strict a check.
1304 CStringBuffer s;
1305 if (!getCString(s))
1306 return NaN;
1307 const char* c = s.data();
1308
1309 // skip leading white space
1310 while (isASCIISpace(*c))
1311 c++;
1312
1313 // empty string ?
1314 if (*c == '\0')
1315 return tolerateEmptyString ? 0.0 : NaN;
1316
1317 double d;
1318
1319 // hex number ?
1320 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
1321 const char* firstDigitPosition = c + 2;
1322 c++;
1323 d = 0.0;
1324 while (*(++c)) {
1325 if (*c >= '0' && *c <= '9')
1326 d = d * 16.0 + *c - '0';
1327 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
1328 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1329 else
1330 break;
1331 }
1332
1333 if (d >= mantissaOverflowLowerBound)
1334 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1335 } else {
1336 // regular number ?
1337 char* end;
1338 d = WTF::strtod(c, &end);
1339 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1340 c = end;
1341 } else {
1342 double sign = 1.0;
1343
1344 if (*c == '+')
1345 c++;
1346 else if (*c == '-') {
1347 sign = -1.0;
1348 c++;
1349 }
1350
1351 // We used strtod() to do the conversion. However, strtod() handles
1352 // infinite values slightly differently than JavaScript in that it
1353 // converts the string "inf" with any capitalization to infinity,
1354 // whereas the ECMA spec requires that it be converted to NaN.
1355
1356 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1357 d = sign * Inf;
1358 c += 8;
1359 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1360 c = end;
1361 else
1362 return NaN;
1363 }
1364 }
1365
1366 // allow trailing white space
1367 while (isASCIISpace(*c))
1368 c++;
1369 // don't allow anything after - unless tolerant=true
1370 if (!tolerateTrailingJunk && *c != '\0')
1371 d = NaN;
1372
1373 return d;
1374}
1375
1376double UString::toDouble(bool tolerateTrailingJunk) const
1377{
1378 return toDouble(tolerateTrailingJunk, true);
1379}
1380
1381double UString::toDouble() const
1382{
1383 return toDouble(false, true);
1384}
1385
1386uint32_t UString::toUInt32(bool* ok) const
1387{
1388 double d = toDouble();
1389 bool b = true;
1390
1391 if (d != static_cast<uint32_t>(d)) {
1392 b = false;
1393 d = 0;
1394 }
1395
1396 if (ok)
1397 *ok = b;
1398
1399 return static_cast<uint32_t>(d);
1400}
1401
1402uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
1403{
1404 double d = toDouble(false, tolerateEmptyString);
1405 bool b = true;
1406
1407 if (d != static_cast<uint32_t>(d)) {
1408 b = false;
1409 d = 0;
1410 }
1411
1412 if (ok)
1413 *ok = b;
1414
1415 return static_cast<uint32_t>(d);
1416}
1417
1418uint32_t UString::toStrictUInt32(bool* ok) const
1419{
1420 if (ok)
1421 *ok = false;
1422
1423 // Empty string is not OK.
1424 int len = m_rep->len;
1425 if (len == 0)
1426 return 0;
1427 const UChar* p = m_rep->data();
1428 unsigned short c = p[0];
1429
1430 // If the first digit is 0, only 0 itself is OK.
1431 if (c == '0') {
1432 if (len == 1 && ok)
1433 *ok = true;
1434 return 0;
1435 }
1436
1437 // Convert to UInt32, checking for overflow.
1438 uint32_t i = 0;
1439 while (1) {
1440 // Process character, turning it into a digit.
1441 if (c < '0' || c > '9')
1442 return 0;
1443 const unsigned d = c - '0';
1444
1445 // Multiply by 10, checking for overflow out of 32 bits.
1446 if (i > 0xFFFFFFFFU / 10)
1447 return 0;
1448 i *= 10;
1449
1450 // Add in the digit, checking for overflow out of 32 bits.
1451 const unsigned max = 0xFFFFFFFFU - d;
1452 if (i > max)
1453 return 0;
1454 i += d;
1455
1456 // Handle end of string.
1457 if (--len == 0) {
1458 if (ok)
1459 *ok = true;
1460 return i;
1461 }
1462
1463 // Get next character.
1464 c = *(++p);
1465 }
1466}
1467
1468int UString::find(const UString& f, int pos) const
1469{
1470 int fsz = f.size();
1471
1472 if (pos < 0)
1473 pos = 0;
1474
1475 if (fsz == 1) {
1476 UChar ch = f[0];
1477 const UChar* end = data() + size();
1478 for (const UChar* c = data() + pos; c < end; c++) {
1479 if (*c == ch)
1480 return static_cast<int>(c - data());
1481 }
1482 return -1;
1483 }
1484
1485 int sz = size();
1486 if (sz < fsz)
1487 return -1;
1488 if (fsz == 0)
1489 return pos;
1490 const UChar* end = data() + sz - fsz;
1491 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1492 const UChar* fdata = f.data();
1493 unsigned short fchar = fdata[0];
1494 ++fdata;
1495 for (const UChar* c = data() + pos; c <= end; c++) {
1496 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1497 return static_cast<int>(c - data());
1498 }
1499
1500 return -1;
1501}
1502
1503int UString::find(UChar ch, int pos) const
1504{
1505 if (pos < 0)
1506 pos = 0;
1507 const UChar* end = data() + size();
1508 for (const UChar* c = data() + pos; c < end; c++) {
1509 if (*c == ch)
1510 return static_cast<int>(c - data());
1511 }
1512
1513 return -1;
1514}
1515
1516int UString::rfind(const UString& f, int pos) const
1517{
1518 int sz = size();
1519 int fsz = f.size();
1520 if (sz < fsz)
1521 return -1;
1522 if (pos < 0)
1523 pos = 0;
1524 if (pos > sz - fsz)
1525 pos = sz - fsz;
1526 if (fsz == 0)
1527 return pos;
1528 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1529 const UChar* fdata = f.data();
1530 for (const UChar* c = data() + pos; c >= data(); c--) {
1531 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1532 return static_cast<int>(c - data());
1533 }
1534
1535 return -1;
1536}
1537
1538int UString::rfind(UChar ch, int pos) const
1539{
1540 if (isEmpty())
1541 return -1;
1542 if (pos + 1 >= size())
1543 pos = size() - 1;
1544 for (const UChar* c = data() + pos; c >= data(); c--) {
1545 if (*c == ch)
1546 return static_cast<int>(c - data());
1547 }
1548
1549 return -1;
1550}
1551
1552UString UString::substr(int pos, int len) const
1553{
1554 int s = size();
1555
1556 if (pos < 0)
1557 pos = 0;
1558 else if (pos >= s)
1559 pos = s;
1560 if (len < 0)
1561 len = s;
1562 if (pos + len >= s)
1563 len = s - pos;
1564
1565 if (pos == 0 && len == s)
1566 return *this;
1567
1568 return UString(Rep::create(m_rep, pos, len));
1569}
1570
1571bool operator==(const UString& s1, const UString& s2)
1572{
1573 int size = s1.size();
1574 switch (size) {
1575 case 0:
1576 return !s2.size();
1577 case 1:
1578 return s2.size() == 1 && s1.data()[0] == s2.data()[0];
1579 default:
1580 return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0;
1581 }
1582}
1583
1584bool operator==(const UString& s1, const char *s2)
1585{
1586 if (s2 == 0)
1587 return s1.isEmpty();
1588
1589 const UChar* u = s1.data();
1590 const UChar* uend = u + s1.size();
1591 while (u != uend && *s2) {
1592 if (u[0] != (unsigned char)*s2)
1593 return false;
1594 s2++;
1595 u++;
1596 }
1597
1598 return u == uend && *s2 == 0;
1599}
1600
1601bool operator<(const UString& s1, const UString& s2)
1602{
1603 const int l1 = s1.size();
1604 const int l2 = s2.size();
1605 const int lmin = l1 < l2 ? l1 : l2;
1606 const UChar* c1 = s1.data();
1607 const UChar* c2 = s2.data();
1608 int l = 0;
1609 while (l < lmin && *c1 == *c2) {
1610 c1++;
1611 c2++;
1612 l++;
1613 }
1614 if (l < lmin)
1615 return (c1[0] < c2[0]);
1616
1617 return (l1 < l2);
1618}
1619
1620bool operator>(const UString& s1, const UString& s2)
1621{
1622 const int l1 = s1.size();
1623 const int l2 = s2.size();
1624 const int lmin = l1 < l2 ? l1 : l2;
1625 const UChar* c1 = s1.data();
1626 const UChar* c2 = s2.data();
1627 int l = 0;
1628 while (l < lmin && *c1 == *c2) {
1629 c1++;
1630 c2++;
1631 l++;
1632 }
1633 if (l < lmin)
1634 return (c1[0] > c2[0]);
1635
1636 return (l1 > l2);
1637}
1638
1639int compare(const UString& s1, const UString& s2)
1640{
1641 const int l1 = s1.size();
1642 const int l2 = s2.size();
1643 const int lmin = l1 < l2 ? l1 : l2;
1644 const UChar* c1 = s1.data();
1645 const UChar* c2 = s2.data();
1646 int l = 0;
1647 while (l < lmin && *c1 == *c2) {
1648 c1++;
1649 c2++;
1650 l++;
1651 }
1652
1653 if (l < lmin)
1654 return (c1[0] > c2[0]) ? 1 : -1;
1655
1656 if (l1 == l2)
1657 return 0;
1658
1659 return (l1 > l2) ? 1 : -1;
1660}
1661
1662bool equal(const UString::Rep* r, const UString::Rep* b)
1663{
1664 int length = r->len;
1665 if (length != b->len)
1666 return false;
1667 const UChar* d = r->data();
1668 const UChar* s = b->data();
1669 for (int i = 0; i != length; ++i) {
1670 if (d[i] != s[i])
1671 return false;
1672 }
1673 return true;
1674}
1675
1676CString UString::UTF8String(bool strict) const
1677{
1678 // Allocate a buffer big enough to hold all the characters.
1679 const int length = size();
1680 Vector<char, 1024> buffer(length * 3);
1681
1682 // Convert to runs of 8-bit characters.
1683 char* p = buffer.data();
1684 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1685 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1686 if (result != conversionOK)
1687 return CString();
1688
1689 return CString(buffer.data(), p - buffer.data());
1690}
1691
1692// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1693NEVER_INLINE void UString::makeNull()
1694{
1695 m_rep = &Rep::null();
1696}
1697
1698// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1699NEVER_INLINE UString::Rep* UString::nullRep()
1700{
1701 return &Rep::null();
1702}
1703
1704} // namespace JSC
Note: See TracBrowser for help on using the repository browser.