Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 47620

Visit:

Last change on this file since 47620 was 47102, checked in by [email protected], 16 years ago

2009-08-12 Balazs Kelemen <[email protected]>

Reviewed by Ariya Hidayat.

Build fix: use std::numeric_limits<long long>::min() instead of LLONG_MIN
since LLONG_MIN is not defined in standard c++.

runtime/UString.cpp: (JSC::UString::from):

Property svn:eol-style set to native

File size: 49.6 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2009 Google Inc. All rights reserved.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "UString.h"
26
27	#include "JSGlobalObjectFunctions.h"
28	#include "Collector.h"
29	#include "dtoa.h"
30	#include "Identifier.h"
31	#include "Operations.h"
32	#include <ctype.h>
33	#include <float.h>
34	#include <limits.h>
35	#include <limits>
36	#include <math.h>
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <wtf/ASCIICType.h>
40	#include <wtf/Assertions.h>
41	#include <wtf/MathExtras.h>
42	#include <wtf/StringExtras.h>
43	#include <wtf/Vector.h>
44	#include <wtf/unicode/UTF8.h>
45
46	#if HAVE(STRING_H)
47	#include <string.h>
48	#endif
49	#if HAVE(STRINGS_H)
50	#include <strings.h>
51	#endif
52
53	using namespace WTF;
54	using namespace WTF::Unicode;
55	using namespace std;
56
57	// This can be tuned differently per platform by putting platform #ifs right here.
58	// If you don't define this macro at all, then copyChars will just call directly
59	// to memcpy.
60	#define USTRING_COPY_CHARS_INLINE_CUTOFF 20
61
62	namespace JSC {
63
64	extern const double NaN;
65	extern const double Inf;
66
67	// This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
68	static const int minLengthToShare = 10;
69
70	static inline size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
71	static inline size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
72
73	static inline PossiblyNull<UChar*> allocChars(size_t length)
74	{
75	ASSERT(length);
76	if (length > maxUChars())
77	return 0;
78	return tryFastMalloc(sizeof(UChar) * length);
79	}
80
81	static inline PossiblyNull<UChar> reallocChars(UChar buffer, size_t length)
82	{
83	ASSERT(length);
84	if (length > maxUChars())
85	return 0;
86	return tryFastRealloc(buffer, sizeof(UChar) * length);
87	}
88
89	static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
90	{
91	#ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
92	if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) {
93	for (unsigned i = 0; i < numCharacters; ++i)
94	destination[i] = source[i];
95	return;
96	}
97	#endif
98	memcpy(destination, source, numCharacters * sizeof(UChar));
99	}
100
101	COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes);
102
103	CString::CString(const char* c)
104	: m_length(strlen(c))
105	, m_data(new char[m_length + 1])
106	{
107	memcpy(m_data, c, m_length + 1);
108	}
109
110	CString::CString(const char* c, size_t length)
111	: m_length(length)
112	, m_data(new char[length + 1])
113	{
114	memcpy(m_data, c, m_length);
115	m_data[m_length] = 0;
116	}
117
118	CString::CString(const CString& b)
119	{
120	m_length = b.m_length;
121	if (b.m_data) {
122	m_data = new char[m_length + 1];
123	memcpy(m_data, b.m_data, m_length + 1);
124	} else
125	m_data = 0;
126	}
127
128	CString::~CString()
129	{
130	delete [] m_data;
131	}
132
133	CString CString::adopt(char* c, size_t length)
134	{
135	CString s;
136	s.m_data = c;
137	s.m_length = length;
138	return s;
139	}
140
141	CString& CString::append(const CString& t)
142	{
143	char* n;
144	n = new char[m_length + t.m_length + 1];
145	if (m_length)
146	memcpy(n, m_data, m_length);
147	if (t.m_length)
148	memcpy(n + m_length, t.m_data, t.m_length);
149	m_length += t.m_length;
150	n[m_length] = 0;
151
152	delete [] m_data;
153	m_data = n;
154
155	return *this;
156	}
157
158	CString& CString::operator=(const char* c)
159	{
160	if (m_data)
161	delete [] m_data;
162	m_length = strlen(c);
163	m_data = new char[m_length + 1];
164	memcpy(m_data, c, m_length + 1);
165
166	return *this;
167	}
168
169	CString& CString::operator=(const CString& str)
170	{
171	if (this == &str)
172	return *this;
173
174	if (m_data)
175	delete [] m_data;
176	m_length = str.m_length;
177	if (str.m_data) {
178	m_data = new char[m_length + 1];
179	memcpy(m_data, str.m_data, m_length + 1);
180	} else
181	m_data = 0;
182
183	return *this;
184	}
185
186	bool operator==(const CString& c1, const CString& c2)
187	{
188	size_t len = c1.size();
189	return len == c2.size() && (len == 0 \|\| memcmp(c1.c_str(), c2.c_str(), len) == 0);
190	}
191
192	// These static strings are immutable, except for rc, whose initial value is chosen to
193	// reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
194	static UChar sharedEmptyChar;
195	UString::BaseString* UString::Rep::nullBaseString;
196	UString::BaseString* UString::Rep::emptyBaseString;
197	UString* UString::nullUString;
198
199	static void initializeStaticBaseString(UString::BaseString& base)
200	{
201	base.rc = INT_MAX / 2;
202	base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag);
203	base.checkConsistency();
204	}
205
206	void initializeUString()
207	{
208	UString::Rep::nullBaseString = new UString::BaseString(0, 0);
209	initializeStaticBaseString(*UString::Rep::nullBaseString);
210
211	UString::Rep::emptyBaseString = new UString::BaseString(&sharedEmptyChar, 0);
212	initializeStaticBaseString(*UString::Rep::emptyBaseString);
213
214	UString::nullUString = new UString;
215	}
216
217	static char* statBuffer = 0; // Only used for debugging via UString::ascii().
218
219	PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar* d, int l)
220	{
221	UChar* copyD = static_cast<UChar>(fastMalloc(l sizeof(UChar)));
222	copyChars(copyD, d, l);
223	return create(copyD, l);
224	}
225
226	PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
227	{
228	if (!string)
229	return &UString::Rep::null();
230
231	size_t length = strlen(string);
232	Vector<UChar, 1024> buffer(length);
233	UChar* p = buffer.data();
234	if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
235	return &UString::Rep::null();
236
237	return UString::Rep::createCopying(buffer.data(), p - buffer.data());
238	}
239
240	PassRefPtr<UString::Rep> UString::Rep::create(UChar* string, int length, PassRefPtr<UString::SharedUChar> sharedBuffer)
241	{
242	PassRefPtr<UString::Rep> rep = create(string, length);
243	rep->baseString()->setSharedBuffer(sharedBuffer);
244	rep->checkConsistency();
245	return rep;
246	}
247
248	UString::SharedUChar* UString::Rep::sharedBuffer()
249	{
250	UString::BaseString* base = baseString();
251	if (len < minLengthToShare)
252	return 0;
253
254	return base->sharedBuffer();
255	}
256
257	void UString::Rep::destroy()
258	{
259	checkConsistency();
260
261	// Static null and empty strings can never be destroyed, but we cannot rely on
262	// reference counting, because ref/deref are not thread-safe.
263	if (!isStatic()) {
264	if (identifierTable())
265	Identifier::remove(this);
266
267	UString::BaseString* base = baseString();
268	if (base == this) {
269	if (m_sharedBuffer)
270	m_sharedBuffer->deref();
271	else
272	fastFree(base->buf);
273	} else
274	base->deref();
275
276	delete this;
277	}
278	}
279
280	// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
281	// or anything like that.
282	const unsigned PHI = 0x9e3779b9U;
283
284	// Paul Hsieh's SuperFastHash
285	// http://www.azillionmonkeys.com/qed/hash.html
286	unsigned UString::Rep::computeHash(const UChar* s, int len)
287	{
288	unsigned l = len;
289	uint32_t hash = PHI;
290	uint32_t tmp;
291
292	int rem = l & 1;
293	l >>= 1;
294
295	// Main loop
296	for (; l > 0; l--) {
297	hash += s[0];
298	tmp = (s[1] << 11) ^ hash;
299	hash = (hash << 16) ^ tmp;
300	s += 2;
301	hash += hash >> 11;
302	}
303
304	// Handle end case
305	if (rem) {
306	hash += s[0];
307	hash ^= hash << 11;
308	hash += hash >> 17;
309	}
310
311	// Force "avalanching" of final 127 bits
312	hash ^= hash << 3;
313	hash += hash >> 5;
314	hash ^= hash << 2;
315	hash += hash >> 15;
316	hash ^= hash << 10;
317
318	// this avoids ever returning a hash code of 0, since that is used to
319	// signal "hash not computed yet", using a value that is likely to be
320	// effectively the same as 0 when the low bits are masked
321	if (hash == 0)
322	hash = 0x80000000;
323
324	return hash;
325	}
326
327	// Paul Hsieh's SuperFastHash
328	// http://www.azillionmonkeys.com/qed/hash.html
329	unsigned UString::Rep::computeHash(const char* s, int l)
330	{
331	// This hash is designed to work on 16-bit chunks at a time. But since the normal case
332	// (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
333	// were 16-bit chunks, which should give matching results
334
335	uint32_t hash = PHI;
336	uint32_t tmp;
337
338	size_t rem = l & 1;
339	l >>= 1;
340
341	// Main loop
342	for (; l > 0; l--) {
343	hash += static_cast<unsigned char>(s[0]);
344	tmp = (static_cast<unsigned char>(s[1]) << 11) ^ hash;
345	hash = (hash << 16) ^ tmp;
346	s += 2;
347	hash += hash >> 11;
348	}
349
350	// Handle end case
351	if (rem) {
352	hash += static_cast<unsigned char>(s[0]);
353	hash ^= hash << 11;
354	hash += hash >> 17;
355	}
356
357	// Force "avalanching" of final 127 bits
358	hash ^= hash << 3;
359	hash += hash >> 5;
360	hash ^= hash << 2;
361	hash += hash >> 15;
362	hash ^= hash << 10;
363
364	// this avoids ever returning a hash code of 0, since that is used to
365	// signal "hash not computed yet", using a value that is likely to be
366	// effectively the same as 0 when the low bits are masked
367	if (hash == 0)
368	hash = 0x80000000;
369
370	return hash;
371	}
372
373	#ifndef NDEBUG
374	void UString::Rep::checkConsistency() const
375	{
376	const UString::BaseString* base = baseString();
377
378	// There is no recursion for base strings.
379	ASSERT(base == base->baseString());
380
381	if (isStatic()) {
382	// There are only two static strings: null and empty.
383	ASSERT(!len);
384
385	// Static strings cannot get in identifier tables, because they are globally shared.
386	ASSERT(!identifierTable());
387	}
388
389	// The string fits in buffer.
390	ASSERT(base->usedPreCapacity <= base->preCapacity);
391	ASSERT(base->usedCapacity <= base->capacity);
392	ASSERT(-offset <= base->usedPreCapacity);
393	ASSERT(offset + len <= base->usedCapacity);
394	}
395	#endif
396
397	UString::SharedUChar* UString::BaseString::sharedBuffer()
398	{
399	if (!m_sharedBuffer)
400	setSharedBuffer(SharedUChar::create(new OwnFastMallocPtr<UChar>(buf)));
401	return m_sharedBuffer;
402	}
403
404	void UString::BaseString::setSharedBuffer(PassRefPtr<UString::SharedUChar> sharedBuffer)
405	{
406	// The manual steps below are because m_sharedBuffer can't be a RefPtr. m_sharedBuffer
407	// is in a union with another variable to avoid making BaseString any larger.
408	if (m_sharedBuffer)
409	m_sharedBuffer->deref();
410	m_sharedBuffer = sharedBuffer.releaseRef();
411	}
412
413	bool UString::BaseString::slowIsBufferReadOnly()
414	{
415	// The buffer may not be modified as soon as the underlying data has been shared with another class.
416	if (m_sharedBuffer->isShared())
417	return true;
418
419	// At this point, we know it that the underlying buffer isn't shared outside of this base class,
420	// so get rid of m_sharedBuffer.
421	OwnPtr<OwnFastMallocPtr<UChar> > mallocPtr(m_sharedBuffer->release());
422	UChar* unsharedBuf = const_cast<UChar*>(mallocPtr->release());
423	setSharedBuffer(0);
424	preCapacity += (buf - unsharedBuf);
425	buf = unsharedBuf;
426	return false;
427	}
428
429	// Put these early so they can be inlined.
430	static inline size_t expandedSize(size_t capacitySize, size_t precapacitySize)
431	{
432	// Combine capacitySize & precapacitySize to produce a single size to allocate,
433	// check that doing so does not result in overflow.
434	size_t size = capacitySize + precapacitySize;
435	if (size < capacitySize)
436	return overflowIndicator();
437
438	// Small Strings (up to 4 pages):
439	// Expand the allocation size to 112.5% of the amount requested. This is largely sicking
440	// to our previous policy, however 112.5% is cheaper to calculate.
441	if (size < 0x4000) {
442	size_t expandedSize = ((size + (size >> 3)) \| 15) + 1;
443	// Given the limited range within which we calculate the expansion in this
444	// fashion the above calculation should never overflow.
445	ASSERT(expandedSize >= size);
446	ASSERT(expandedSize < maxUChars());
447	return expandedSize;
448	}
449
450	// Medium Strings (up to 128 pages):
451	// For pages covering multiple pages over-allocation is less of a concern - any unused
452	// space will not be paged in if it is not used, so this is purely a VM overhead. For
453	// these strings allocate 2x the requested size.
454	if (size < 0x80000) {
455	size_t expandedSize = ((size + size) \| 0xfff) + 1;
456	// Given the limited range within which we calculate the expansion in this
457	// fashion the above calculation should never overflow.
458	ASSERT(expandedSize >= size);
459	ASSERT(expandedSize < maxUChars());
460	return expandedSize;
461	}
462
463	// Large Strings (to infinity and beyond!):
464	// Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow
465	// any individual string be responsible for.
466	size_t expandedSize = ((size + (size >> 3)) \| 0xfff) + 1;
467
468	// Check for overflow - any result that is at least as large as requested (but
469	// still below the limit) is okay.
470	if ((expandedSize >= size) && (expandedSize < maxUChars()))
471	return expandedSize;
472	return overflowIndicator();
473	}
474
475	static inline bool expandCapacity(UString::Rep* rep, int requiredLength)
476	{
477	rep->checkConsistency();
478	ASSERT(!rep->baseString()->isBufferReadOnly());
479
480	UString::BaseString* base = rep->baseString();
481
482	if (requiredLength > base->capacity) {
483	size_t newCapacity = expandedSize(requiredLength, base->preCapacity);
484	UChar* oldBuf = base->buf;
485	if (!reallocChars(base->buf, newCapacity).getValue(base->buf)) {
486	base->buf = oldBuf;
487	return false;
488	}
489	base->capacity = newCapacity - base->preCapacity;
490	}
491	if (requiredLength > base->usedCapacity)
492	base->usedCapacity = requiredLength;
493
494	rep->checkConsistency();
495	return true;
496	}
497
498	bool UString::Rep::reserveCapacity(int capacity)
499	{
500	// If this is an empty string there is no point 'growing' it - just allocate a new one.
501	// If the BaseString is shared with another string that is using more capacity than this
502	// string is, then growing the buffer won't help.
503	// If the BaseString's buffer is readonly, then it isn't allowed to grow.
504	UString::BaseString* base = baseString();
505	if (!base->buf \|\| !base->capacity \|\| (offset + len) != base->usedCapacity \|\| base->isBufferReadOnly())
506	return false;
507
508	// If there is already sufficient capacity, no need to grow!
509	if (capacity <= base->capacity)
510	return true;
511
512	checkConsistency();
513
514	size_t newCapacity = expandedSize(capacity, base->preCapacity);
515	UChar* oldBuf = base->buf;
516	if (!reallocChars(base->buf, newCapacity).getValue(base->buf)) {
517	base->buf = oldBuf;
518	return false;
519	}
520	base->capacity = newCapacity - base->preCapacity;
521
522	checkConsistency();
523	return true;
524	}
525
526	void UString::expandCapacity(int requiredLength)
527	{
528	if (!JSC::expandCapacity(m_rep.get(), requiredLength))
529	makeNull();
530	}
531
532	void UString::expandPreCapacity(int requiredPreCap)
533	{
534	m_rep->checkConsistency();
535	ASSERT(!m_rep->baseString()->isBufferReadOnly());
536
537	BaseString* base = m_rep->baseString();
538
539	if (requiredPreCap > base->preCapacity) {
540	size_t newCapacity = expandedSize(requiredPreCap, base->capacity);
541	int delta = newCapacity - base->capacity - base->preCapacity;
542
543	UChar* newBuf;
544	if (!allocChars(newCapacity).getValue(newBuf)) {
545	makeNull();
546	return;
547	}
548	copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity);
549	fastFree(base->buf);
550	base->buf = newBuf;
551
552	base->preCapacity = newCapacity - base->capacity;
553	}
554	if (requiredPreCap > base->usedPreCapacity)
555	base->usedPreCapacity = requiredPreCap;
556
557	m_rep->checkConsistency();
558	}
559
560	static PassRefPtr<UString::Rep> createRep(const char* c)
561	{
562	if (!c)
563	return &UString::Rep::null();
564
565	if (!c[0])
566	return &UString::Rep::empty();
567
568	size_t length = strlen(c);
569	UChar* d;
570	if (!allocChars(length).getValue(d))
571	return &UString::Rep::null();
572	else {
573	for (size_t i = 0; i < length; i++)
574	d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
575	return UString::Rep::create(d, static_cast<int>(length));
576	}
577
578	}
579
580	UString::UString(const char* c)
581	: m_rep(createRep(c))
582	{
583	}
584
585	UString::UString(const UChar* c, int length)
586	{
587	if (length == 0)
588	m_rep = &Rep::empty();
589	else
590	m_rep = Rep::createCopying(c, length);
591	}
592
593	UString::UString(UChar* c, int length, bool copy)
594	{
595	if (length == 0)
596	m_rep = &Rep::empty();
597	else if (copy)
598	m_rep = Rep::createCopying(c, length);
599	else
600	m_rep = Rep::create(c, length);
601	}
602
603	UString::UString(const Vector<UChar>& buffer)
604	{
605	if (!buffer.size())
606	m_rep = &Rep::empty();
607	else
608	m_rep = Rep::createCopying(buffer.data(), buffer.size());
609	}
610
611	static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false)
612	{
613	ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength);
614
615	const int plusLength = plusOne ? 1 : 0;
616	if (currentCapacity > std::numeric_limits<int>::max() - extendLength - plusLength)
617	CRASH();
618
619	return currentCapacity + extendLength + plusLength;
620	}
621
622	static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const UChar* tData, int tSize)
623	{
624	RefPtr<UString::Rep> rep = r;
625
626	rep->checkConsistency();
627
628	int thisSize = rep->size();
629	int thisOffset = rep->offset;
630	int length = thisSize + tSize;
631	UString::BaseString* base = rep->baseString();
632
633	// possible cases:
634	if (tSize == 0) {
635	// t is empty
636	} else if (thisSize == 0) {
637	// this is empty
638	rep = UString::Rep::createCopying(tData, tSize);
639	} else if (rep == base && !base->isShared()) {
640	// this is direct and has refcount of 1 (so we can just alter it directly)
641	if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
642	rep = &UString::Rep::null();
643	if (rep->data()) {
644	copyChars(rep->data() + thisSize, tData, tSize);
645	rep->len = length;
646	rep->_hash = 0;
647	}
648	} else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
649	// this reaches the end of the buffer - extend it if it's long enough to append to
650	if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
651	rep = &UString::Rep::null();
652	if (rep->data()) {
653	copyChars(rep->data() + thisSize, tData, tSize);
654	rep = UString::Rep::create(rep, 0, length);
655	}
656	} else {
657	// This is shared in some way that prevents us from modifying base, so we must make a whole new string.
658	size_t newCapacity = expandedSize(length, 0);
659	UChar* d;
660	if (!allocChars(newCapacity).getValue(d))
661	rep = &UString::Rep::null();
662	else {
663	copyChars(d, rep->data(), thisSize);
664	copyChars(d + thisSize, tData, tSize);
665	rep = UString::Rep::create(d, length);
666	rep->baseString()->capacity = newCapacity;
667	}
668	}
669
670	rep->checkConsistency();
671
672	return rep.release();
673	}
674
675	static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const char* t)
676	{
677	RefPtr<UString::Rep> rep = r;
678
679	rep->checkConsistency();
680
681	int thisSize = rep->size();
682	int thisOffset = rep->offset;
683	int tSize = static_cast<int>(strlen(t));
684	int length = thisSize + tSize;
685	UString::BaseString* base = rep->baseString();
686
687	// possible cases:
688	if (thisSize == 0) {
689	// this is empty
690	rep = createRep(t);
691	} else if (tSize == 0) {
692	// t is empty, we'll just return *this below.
693	} else if (rep == base && !base->isShared()) {
694	// this is direct and has refcount of 1 (so we can just alter it directly)
695	expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
696	UChar* d = rep->data();
697	if (d) {
698	for (int i = 0; i < tSize; ++i)
699	d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
700	rep->len = length;
701	rep->_hash = 0;
702	}
703	} else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
704	// this string reaches the end of the buffer - extend it
705	expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
706	UChar* d = rep->data();
707	if (d) {
708	for (int i = 0; i < tSize; ++i)
709	d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
710	rep = UString::Rep::create(rep, 0, length);
711	}
712	} else {
713	// This is shared in some way that prevents us from modifying base, so we must make a whole new string.
714	size_t newCapacity = expandedSize(length, 0);
715	UChar* d;
716	if (!allocChars(newCapacity).getValue(d))
717	rep = &UString::Rep::null();
718	else {
719	copyChars(d, rep->data(), thisSize);
720	for (int i = 0; i < tSize; ++i)
721	d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
722	rep = UString::Rep::create(d, length);
723	rep->baseString()->capacity = newCapacity;
724	}
725	}
726
727	rep->checkConsistency();
728
729	return rep.release();
730	}
731
732	PassRefPtr<UString::Rep> concatenate(UString::Rep* a, UString::Rep* b)
733	{
734	a->checkConsistency();
735	b->checkConsistency();
736
737	int aSize = a->size();
738	int bSize = b->size();
739	int aOffset = a->offset;
740
741	// possible cases:
742
743	UString::BaseString* aBase = a->baseString();
744	if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + aSize < aBase->capacity && !aBase->isBufferReadOnly()) {
745	// b is a single character (common fast case)
746	++aBase->usedCapacity;
747	a->data()[aSize] = b->data()[0];
748	return UString::Rep::create(a, 0, aSize + 1);
749	}
750
751	// a is empty
752	if (aSize == 0)
753	return b;
754	// b is empty
755	if (bSize == 0)
756	return a;
757
758	int bOffset = b->offset;
759	int length = aSize + bSize;
760
761	UString::BaseString* bBase = b->baseString();
762	if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize
763	&& (-bOffset != bBase->usedPreCapacity \|\| aSize >= bSize) && !aBase->isBufferReadOnly()) {
764	// - a reaches the end of its buffer so it qualifies for shared append
765	// - also, it's at least a quarter the length of b - appending to a much shorter
766	// string does more harm than good
767	// - however, if b qualifies for prepend and is longer than a, we'd rather prepend
768
769	UString x(a);
770	x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length));
771	if (!a->data() \|\| !x.data())
772	return 0;
773	copyChars(a->data() + aSize, b->data(), bSize);
774	PassRefPtr<UString::Rep> result = UString::Rep::create(a, 0, length);
775
776	a->checkConsistency();
777	b->checkConsistency();
778	result->checkConsistency();
779
780	return result;
781	}
782
783	if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize && !bBase->isBufferReadOnly()) {
784	// - b reaches the beginning of its buffer so it qualifies for shared prepend
785	// - also, it's at least a quarter the length of a - prepending to a much shorter
786	// string does more harm than good
787	UString y(b);
788	y.expandPreCapacity(-bOffset + aSize);
789	if (!b->data() \|\| !y.data())
790	return 0;
791	copyChars(b->data() - aSize, a->data(), aSize);
792	PassRefPtr<UString::Rep> result = UString::Rep::create(b, -aSize, length);
793
794	a->checkConsistency();
795	b->checkConsistency();
796	result->checkConsistency();
797
798	return result;
799	}
800
801	// a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
802	size_t newCapacity = expandedSize(length, 0);
803	UChar* d;
804	if (!allocChars(newCapacity).getValue(d))
805	return 0;
806	copyChars(d, a->data(), aSize);
807	copyChars(d + aSize, b->data(), bSize);
808	PassRefPtr<UString::Rep> result = UString::Rep::create(d, length);
809	result->baseString()->capacity = newCapacity;
810
811	a->checkConsistency();
812	b->checkConsistency();
813	result->checkConsistency();
814
815	return result;
816	}
817
818	PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, int i)
819	{
820	UChar buf[1 + sizeof(i) * 3];
821	UChar* end = buf + sizeof(buf) / sizeof(UChar);
822	UChar* p = end;
823
824	if (i == 0)
825	*--p = '0';
826	else if (i == INT_MIN) {
827	char minBuf[1 + sizeof(i) * 3];
828	sprintf(minBuf, "%d", INT_MIN);
829	return concatenate(rep, minBuf);
830	} else {
831	bool negative = false;
832	if (i < 0) {
833	negative = true;
834	i = -i;
835	}
836	while (i) {
837	*--p = static_cast<unsigned short>((i % 10) + '0');
838	i /= 10;
839	}
840	if (negative)
841	*--p = '-';
842	}
843
844	return concatenate(rep, p, static_cast<int>(end - p));
845
846	}
847
848	PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, double d)
849	{
850	// avoid ever printing -NaN, in JS conceptually there is only one NaN value
851	if (isnan(d))
852	return concatenate(rep, "NaN");
853
854	if (d == 0.0) // stringify -0 as 0
855	d = 0.0;
856
857	char buf[80];
858	int decimalPoint;
859	int sign;
860
861	char result[80];
862	WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL);
863	int length = static_cast<int>(strlen(result));
864
865	int i = 0;
866	if (sign)
867	buf[i++] = '-';
868
869	if (decimalPoint <= 0 && decimalPoint > -6) {
870	buf[i++] = '0';
871	buf[i++] = '.';
872	for (int j = decimalPoint; j < 0; j++)
873	buf[i++] = '0';
874	strcpy(buf + i, result);
875	} else if (decimalPoint <= 21 && decimalPoint > 0) {
876	if (length <= decimalPoint) {
877	strcpy(buf + i, result);
878	i += length;
879	for (int j = 0; j < decimalPoint - length; j++)
880	buf[i++] = '0';
881	buf[i] = '\0';
882	} else {
883	strncpy(buf + i, result, decimalPoint);
884	i += decimalPoint;
885	buf[i++] = '.';
886	strcpy(buf + i, result + decimalPoint);
887	}
888	} else if (result[0] < '0' \|\| result[0] > '9')
889	strcpy(buf + i, result);
890	else {
891	buf[i++] = result[0];
892	if (length > 1) {
893	buf[i++] = '.';
894	strcpy(buf + i, result + 1);
895	i += length - 1;
896	}
897
898	buf[i++] = 'e';
899	buf[i++] = (decimalPoint >= 0) ? '+' : '-';
900	// decimalPoint can't be more than 3 digits decimal given the
901	// nature of float representation
902	int exponential = decimalPoint - 1;
903	if (exponential < 0)
904	exponential = -exponential;
905	if (exponential >= 100)
906	buf[i++] = static_cast<char>('0' + exponential / 100);
907	if (exponential >= 10)
908	buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
909	buf[i++] = static_cast<char>('0' + exponential % 10);
910	buf[i++] = '\0';
911	}
912
913	return concatenate(rep, buf);
914	}
915
916	UString UString::from(int i)
917	{
918	UChar buf[1 + sizeof(i) * 3];
919	UChar* end = buf + sizeof(buf) / sizeof(UChar);
920	UChar* p = end;
921
922	if (i == 0)
923	*--p = '0';
924	else if (i == INT_MIN) {
925	char minBuf[1 + sizeof(i) * 3];
926	sprintf(minBuf, "%d", INT_MIN);
927	return UString(minBuf);
928	} else {
929	bool negative = false;
930	if (i < 0) {
931	negative = true;
932	i = -i;
933	}
934	while (i) {
935	*--p = static_cast<unsigned short>((i % 10) + '0');
936	i /= 10;
937	}
938	if (negative)
939	*--p = '-';
940	}
941
942	return UString(p, static_cast<int>(end - p));
943	}
944
945	UString UString::from(long long i)
946	{
947	UChar buf[1 + sizeof(i) * 3];
948	UChar* end = buf + sizeof(buf) / sizeof(UChar);
949	UChar* p = end;
950
951	if (i == 0)
952	*--p = '0';
953	else if (i == std::numeric_limits<long long>::min()) {
954	char minBuf[1 + sizeof(i) * 3];
955	#if PLATFORM(WIN_OS)
956	snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
957	#else
958	snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
959	#endif
960	return UString(minBuf);
961	} else {
962	bool negative = false;
963	if (i < 0) {
964	negative = true;
965	i = -i;
966	}
967	while (i) {
968	*--p = static_cast<unsigned short>((i % 10) + '0');
969	i /= 10;
970	}
971	if (negative)
972	*--p = '-';
973	}
974
975	return UString(p, static_cast<int>(end - p));
976	}
977
978	UString UString::from(unsigned int u)
979	{
980	UChar buf[sizeof(u) * 3];
981	UChar* end = buf + sizeof(buf) / sizeof(UChar);
982	UChar* p = end;
983
984	if (u == 0)
985	*--p = '0';
986	else {
987	while (u) {
988	*--p = static_cast<unsigned short>((u % 10) + '0');
989	u /= 10;
990	}
991	}
992
993	return UString(p, static_cast<int>(end - p));
994	}
995
996	UString UString::from(long l)
997	{
998	UChar buf[1 + sizeof(l) * 3];
999	UChar* end = buf + sizeof(buf) / sizeof(UChar);
1000	UChar* p = end;
1001
1002	if (l == 0)
1003	*--p = '0';
1004	else if (l == LONG_MIN) {
1005	char minBuf[1 + sizeof(l) * 3];
1006	sprintf(minBuf, "%ld", LONG_MIN);
1007	return UString(minBuf);
1008	} else {
1009	bool negative = false;
1010	if (l < 0) {
1011	negative = true;
1012	l = -l;
1013	}
1014	while (l) {
1015	*--p = static_cast<unsigned short>((l % 10) + '0');
1016	l /= 10;
1017	}
1018	if (negative)
1019	*--p = '-';
1020	}
1021
1022	return UString(p, static_cast<int>(end - p));
1023	}
1024
1025	UString UString::from(double d)
1026	{
1027	// avoid ever printing -NaN, in JS conceptually there is only one NaN value
1028	if (isnan(d))
1029	return "NaN";
1030
1031	char buf[80];
1032	int decimalPoint;
1033	int sign;
1034
1035	char result[80];
1036	WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL);
1037	int length = static_cast<int>(strlen(result));
1038
1039	int i = 0;
1040	if (sign)
1041	buf[i++] = '-';
1042
1043	if (decimalPoint <= 0 && decimalPoint > -6) {
1044	buf[i++] = '0';
1045	buf[i++] = '.';
1046	for (int j = decimalPoint; j < 0; j++)
1047	buf[i++] = '0';
1048	strcpy(buf + i, result);
1049	} else if (decimalPoint <= 21 && decimalPoint > 0) {
1050	if (length <= decimalPoint) {
1051	strcpy(buf + i, result);
1052	i += length;
1053	for (int j = 0; j < decimalPoint - length; j++)
1054	buf[i++] = '0';
1055	buf[i] = '\0';
1056	} else {
1057	strncpy(buf + i, result, decimalPoint);
1058	i += decimalPoint;
1059	buf[i++] = '.';
1060	strcpy(buf + i, result + decimalPoint);
1061	}
1062	} else if (result[0] < '0' \|\| result[0] > '9')
1063	strcpy(buf + i, result);
1064	else {
1065	buf[i++] = result[0];
1066	if (length > 1) {
1067	buf[i++] = '.';
1068	strcpy(buf + i, result + 1);
1069	i += length - 1;
1070	}
1071
1072	buf[i++] = 'e';
1073	buf[i++] = (decimalPoint >= 0) ? '+' : '-';
1074	// decimalPoint can't be more than 3 digits decimal given the
1075	// nature of float representation
1076	int exponential = decimalPoint - 1;
1077	if (exponential < 0)
1078	exponential = -exponential;
1079	if (exponential >= 100)
1080	buf[i++] = static_cast<char>('0' + exponential / 100);
1081	if (exponential >= 10)
1082	buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
1083	buf[i++] = static_cast<char>('0' + exponential % 10);
1084	buf[i++] = '\0';
1085	}
1086
1087	return UString(buf);
1088	}
1089
1090	UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
1091	{
1092	m_rep->checkConsistency();
1093
1094	if (rangeCount == 1 && separatorCount == 0) {
1095	int thisSize = size();
1096	int position = substringRanges[0].position;
1097	int length = substringRanges[0].length;
1098	if (position <= 0 && length >= thisSize)
1099	return *this;
1100	return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
1101	}
1102
1103	int totalLength = 0;
1104	for (int i = 0; i < rangeCount; i++)
1105	totalLength += substringRanges[i].length;
1106	for (int i = 0; i < separatorCount; i++)
1107	totalLength += separators[i].size();
1108
1109	if (totalLength == 0)
1110	return "";
1111
1112	UChar* buffer;
1113	if (!allocChars(totalLength).getValue(buffer))
1114	return null();
1115
1116	int maxCount = max(rangeCount, separatorCount);
1117	int bufferPos = 0;
1118	for (int i = 0; i < maxCount; i++) {
1119	if (i < rangeCount) {
1120	copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length);
1121	bufferPos += substringRanges[i].length;
1122	}
1123	if (i < separatorCount) {
1124	copyChars(buffer + bufferPos, separators[i].data(), separators[i].size());
1125	bufferPos += separators[i].size();
1126	}
1127	}
1128
1129	return UString::Rep::create(buffer, totalLength);
1130	}
1131
1132	UString UString::replaceRange(int rangeStart, int rangeLength, const UString& replacement) const
1133	{
1134	m_rep->checkConsistency();
1135
1136	int replacementLength = replacement.size();
1137	int totalLength = size() - rangeLength + replacementLength;
1138	if (totalLength == 0)
1139	return "";
1140
1141	UChar* buffer;
1142	if (!allocChars(totalLength).getValue(buffer))
1143	return null();
1144
1145	copyChars(buffer, data(), rangeStart);
1146	copyChars(buffer + rangeStart, replacement.data(), replacementLength);
1147	int rangeEnd = rangeStart + rangeLength;
1148	copyChars(buffer + rangeStart + replacementLength, data() + rangeEnd, size() - rangeEnd);
1149
1150	return UString::Rep::create(buffer, totalLength);
1151	}
1152
1153
1154	UString& UString::append(const UString &t)
1155	{
1156	m_rep->checkConsistency();
1157	t.rep()->checkConsistency();
1158
1159	int thisSize = size();
1160	int thisOffset = m_rep->offset;
1161	int tSize = t.size();
1162	int length = thisSize + tSize;
1163	BaseString* base = m_rep->baseString();
1164
1165	// possible cases:
1166	if (thisSize == 0) {
1167	// this is empty
1168	*this = t;
1169	} else if (tSize == 0) {
1170	// t is empty
1171	} else if (m_rep == base && !base->isShared()) {
1172	// this is direct and has refcount of 1 (so we can just alter it directly)
1173	expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1174	if (data()) {
1175	copyChars(m_rep->data() + thisSize, t.data(), tSize);
1176	m_rep->len = length;
1177	m_rep->_hash = 0;
1178	}
1179	} else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
1180	// this reaches the end of the buffer - extend it if it's long enough to append to
1181	expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1182	if (data()) {
1183	copyChars(m_rep->data() + thisSize, t.data(), tSize);
1184	m_rep = Rep::create(m_rep, 0, length);
1185	}
1186	} else {
1187	// This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1188	size_t newCapacity = expandedSize(length, 0);
1189	UChar* d;
1190	if (!allocChars(newCapacity).getValue(d))
1191	makeNull();
1192	else {
1193	copyChars(d, data(), thisSize);
1194	copyChars(d + thisSize, t.data(), tSize);
1195	m_rep = Rep::create(d, length);
1196	m_rep->baseString()->capacity = newCapacity;
1197	}
1198	}
1199
1200	m_rep->checkConsistency();
1201	t.rep()->checkConsistency();
1202
1203	return *this;
1204	}
1205
1206	UString& UString::append(const UChar* tData, int tSize)
1207	{
1208	m_rep = concatenate(m_rep.release(), tData, tSize);
1209	return *this;
1210	}
1211
1212	UString& UString::appendNumeric(int i)
1213	{
1214	m_rep = concatenate(rep(), i);
1215	return *this;
1216	}
1217
1218	UString& UString::appendNumeric(double d)
1219	{
1220	m_rep = concatenate(rep(), d);
1221	return *this;
1222	}
1223
1224	UString& UString::append(const char* t)
1225	{
1226	m_rep = concatenate(m_rep.release(), t);
1227	return *this;
1228	}
1229
1230	UString& UString::append(UChar c)
1231	{
1232	m_rep->checkConsistency();
1233
1234	int thisOffset = m_rep->offset;
1235	int length = size();
1236	BaseString* base = m_rep->baseString();
1237
1238	// possible cases:
1239	if (length == 0) {
1240	// this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1241	size_t newCapacity = expandedSize(1, 0);
1242	UChar* d;
1243	if (!allocChars(newCapacity).getValue(d))
1244	makeNull();
1245	else {
1246	d[0] = c;
1247	m_rep = Rep::create(d, 1);
1248	m_rep->baseString()->capacity = newCapacity;
1249	}
1250	} else if (m_rep == base && !base->isShared()) {
1251	// this is direct and has refcount of 1 (so we can just alter it directly)
1252	expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1253	UChar* d = m_rep->data();
1254	if (d) {
1255	d[length] = c;
1256	m_rep->len = length + 1;
1257	m_rep->_hash = 0;
1258	}
1259	} else if (thisOffset + length == base->usedCapacity && length >= minShareSize && !base->isBufferReadOnly()) {
1260	// this reaches the end of the string - extend it and share
1261	expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1262	UChar* d = m_rep->data();
1263	if (d) {
1264	d[length] = c;
1265	m_rep = Rep::create(m_rep, 0, length + 1);
1266	}
1267	} else {
1268	// This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1269	size_t newCapacity = expandedSize(length + 1, 0);
1270	UChar* d;
1271	if (!allocChars(newCapacity).getValue(d))
1272	makeNull();
1273	else {
1274	copyChars(d, data(), length);
1275	d[length] = c;
1276	m_rep = Rep::create(d, length + 1);
1277	m_rep->baseString()->capacity = newCapacity;
1278	}
1279	}
1280
1281	m_rep->checkConsistency();
1282
1283	return *this;
1284	}
1285
1286	bool UString::getCString(CStringBuffer& buffer) const
1287	{
1288	int length = size();
1289	int neededSize = length + 1;
1290	buffer.resize(neededSize);
1291	char* buf = buffer.data();
1292
1293	UChar ored = 0;
1294	const UChar* p = data();
1295	char* q = buf;
1296	const UChar* limit = p + length;
1297	while (p != limit) {
1298	UChar c = p[0];
1299	ored \|= c;
1300	*q = static_cast<char>(c);
1301	++p;
1302	++q;
1303	}
1304	*q = '\0';
1305
1306	return !(ored & 0xFF00);
1307	}
1308
1309	char* UString::ascii() const
1310	{
1311	int length = size();
1312	int neededSize = length + 1;
1313	delete[] statBuffer;
1314	statBuffer = new char[neededSize];
1315
1316	const UChar* p = data();
1317	char* q = statBuffer;
1318	const UChar* limit = p + length;
1319	while (p != limit) {
1320	*q = static_cast<char>(p[0]);
1321	++p;
1322	++q;
1323	}
1324	*q = '\0';
1325
1326	return statBuffer;
1327	}
1328
1329	UString& UString::operator=(const char* c)
1330	{
1331	if (!c) {
1332	m_rep = &Rep::null();
1333	return *this;
1334	}
1335
1336	if (!c[0]) {
1337	m_rep = &Rep::empty();
1338	return *this;
1339	}
1340
1341	int l = static_cast<int>(strlen(c));
1342	UChar* d;
1343	BaseString* base = m_rep->baseString();
1344	if (!base->isShared() && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) {
1345	d = base->buf;
1346	m_rep->_hash = 0;
1347	m_rep->len = l;
1348	} else {
1349	if (!allocChars(l).getValue(d)) {
1350	makeNull();
1351	return *this;
1352	}
1353	m_rep = Rep::create(d, l);
1354	}
1355	for (int i = 0; i < l; i++)
1356	d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
1357
1358	return *this;
1359	}
1360
1361	bool UString::is8Bit() const
1362	{
1363	const UChar* u = data();
1364	const UChar* limit = u + size();
1365	while (u < limit) {
1366	if (u[0] > 0xFF)
1367	return false;
1368	++u;
1369	}
1370
1371	return true;
1372	}
1373
1374	UChar UString::operator[](int pos) const
1375	{
1376	if (pos >= size())
1377	return '\0';
1378	return data()[pos];
1379	}
1380
1381	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
1382	{
1383	if (size() == 1) {
1384	UChar c = data()[0];
1385	if (isASCIIDigit(c))
1386	return c - '0';
1387	if (isASCIISpace(c) && tolerateEmptyString)
1388	return 0;
1389	return NaN;
1390	}
1391
1392	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1393	// after the number, so this is too strict a check.
1394	CStringBuffer s;
1395	if (!getCString(s))
1396	return NaN;
1397	const char* c = s.data();
1398
1399	// skip leading white space
1400	while (isASCIISpace(*c))
1401	c++;
1402
1403	// empty string ?
1404	if (*c == '\0')
1405	return tolerateEmptyString ? 0.0 : NaN;
1406
1407	double d;
1408
1409	// hex number ?
1410	if (c == '0' && ((c + 1) == 'x' \|\| *(c + 1) == 'X')) {
1411	const char* firstDigitPosition = c + 2;
1412	c++;
1413	d = 0.0;
1414	while (*(++c)) {
1415	if (c >= '0' && c <= '9')
1416	d = d * 16.0 + *c - '0';
1417	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
1418	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1419	else
1420	break;
1421	}
1422
1423	if (d >= mantissaOverflowLowerBound)
1424	d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1425	} else {
1426	// regular number ?
1427	char* end;
1428	d = WTF::strtod(c, &end);
1429	if ((d != 0.0 \|\| end != c) && d != Inf && d != -Inf) {
1430	c = end;
1431	} else {
1432	double sign = 1.0;
1433
1434	if (*c == '+')
1435	c++;
1436	else if (*c == '-') {
1437	sign = -1.0;
1438	c++;
1439	}
1440
1441	// We used strtod() to do the conversion. However, strtod() handles
1442	// infinite values slightly differently than JavaScript in that it
1443	// converts the string "inf" with any capitalization to infinity,
1444	// whereas the ECMA spec requires that it be converted to NaN.
1445
1446	if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1447	d = sign * Inf;
1448	c += 8;
1449	} else if ((d == Inf \|\| d == -Inf) && c != 'I' && c != 'i')
1450	c = end;
1451	else
1452	return NaN;
1453	}
1454	}
1455
1456	// allow trailing white space
1457	while (isASCIISpace(*c))
1458	c++;
1459	// don't allow anything after - unless tolerant=true
1460	if (!tolerateTrailingJunk && *c != '\0')
1461	d = NaN;
1462
1463	return d;
1464	}
1465
1466	double UString::toDouble(bool tolerateTrailingJunk) const
1467	{
1468	return toDouble(tolerateTrailingJunk, true);
1469	}
1470
1471	double UString::toDouble() const
1472	{
1473	return toDouble(false, true);
1474	}
1475
1476	uint32_t UString::toUInt32(bool* ok) const
1477	{
1478	double d = toDouble();
1479	bool b = true;
1480
1481	if (d != static_cast<uint32_t>(d)) {
1482	b = false;
1483	d = 0;
1484	}
1485
1486	if (ok)
1487	*ok = b;
1488
1489	return static_cast<uint32_t>(d);
1490	}
1491
1492	uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
1493	{
1494	double d = toDouble(false, tolerateEmptyString);
1495	bool b = true;
1496
1497	if (d != static_cast<uint32_t>(d)) {
1498	b = false;
1499	d = 0;
1500	}
1501
1502	if (ok)
1503	*ok = b;
1504
1505	return static_cast<uint32_t>(d);
1506	}
1507
1508	uint32_t UString::toStrictUInt32(bool* ok) const
1509	{
1510	if (ok)
1511	*ok = false;
1512
1513	// Empty string is not OK.
1514	int len = m_rep->len;
1515	if (len == 0)
1516	return 0;
1517	const UChar* p = m_rep->data();
1518	unsigned short c = p[0];
1519
1520	// If the first digit is 0, only 0 itself is OK.
1521	if (c == '0') {
1522	if (len == 1 && ok)
1523	*ok = true;
1524	return 0;
1525	}
1526
1527	// Convert to UInt32, checking for overflow.
1528	uint32_t i = 0;
1529	while (1) {
1530	// Process character, turning it into a digit.
1531	if (c < '0' \|\| c > '9')
1532	return 0;
1533	const unsigned d = c - '0';
1534
1535	// Multiply by 10, checking for overflow out of 32 bits.
1536	if (i > 0xFFFFFFFFU / 10)
1537	return 0;
1538	i *= 10;
1539
1540	// Add in the digit, checking for overflow out of 32 bits.
1541	const unsigned max = 0xFFFFFFFFU - d;
1542	if (i > max)
1543	return 0;
1544	i += d;
1545
1546	// Handle end of string.
1547	if (--len == 0) {
1548	if (ok)
1549	*ok = true;
1550	return i;
1551	}
1552
1553	// Get next character.
1554	c = *(++p);
1555	}
1556	}
1557
1558	int UString::find(const UString& f, int pos) const
1559	{
1560	int fsz = f.size();
1561
1562	if (pos < 0)
1563	pos = 0;
1564
1565	if (fsz == 1) {
1566	UChar ch = f[0];
1567	const UChar* end = data() + size();
1568	for (const UChar* c = data() + pos; c < end; c++) {
1569	if (*c == ch)
1570	return static_cast<int>(c - data());
1571	}
1572	return -1;
1573	}
1574
1575	int sz = size();
1576	if (sz < fsz)
1577	return -1;
1578	if (fsz == 0)
1579	return pos;
1580	const UChar* end = data() + sz - fsz;
1581	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1582	const UChar* fdata = f.data();
1583	unsigned short fchar = fdata[0];
1584	++fdata;
1585	for (const UChar* c = data() + pos; c <= end; c++) {
1586	if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1587	return static_cast<int>(c - data());
1588	}
1589
1590	return -1;
1591	}
1592
1593	int UString::find(UChar ch, int pos) const
1594	{
1595	if (pos < 0)
1596	pos = 0;
1597	const UChar* end = data() + size();
1598	for (const UChar* c = data() + pos; c < end; c++) {
1599	if (*c == ch)
1600	return static_cast<int>(c - data());
1601	}
1602
1603	return -1;
1604	}
1605
1606	int UString::rfind(const UString& f, int pos) const
1607	{
1608	int sz = size();
1609	int fsz = f.size();
1610	if (sz < fsz)
1611	return -1;
1612	if (pos < 0)
1613	pos = 0;
1614	if (pos > sz - fsz)
1615	pos = sz - fsz;
1616	if (fsz == 0)
1617	return pos;
1618	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1619	const UChar* fdata = f.data();
1620	for (const UChar* c = data() + pos; c >= data(); c--) {
1621	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1622	return static_cast<int>(c - data());
1623	}
1624
1625	return -1;
1626	}
1627
1628	int UString::rfind(UChar ch, int pos) const
1629	{
1630	if (isEmpty())
1631	return -1;
1632	if (pos + 1 >= size())
1633	pos = size() - 1;
1634	for (const UChar* c = data() + pos; c >= data(); c--) {
1635	if (*c == ch)
1636	return static_cast<int>(c - data());
1637	}
1638
1639	return -1;
1640	}
1641
1642	UString UString::substr(int pos, int len) const
1643	{
1644	int s = size();
1645
1646	if (pos < 0)
1647	pos = 0;
1648	else if (pos >= s)
1649	pos = s;
1650	if (len < 0)
1651	len = s;
1652	if (pos + len >= s)
1653	len = s - pos;
1654
1655	if (pos == 0 && len == s)
1656	return *this;
1657
1658	return UString(Rep::create(m_rep, pos, len));
1659	}
1660
1661	bool operator==(const UString& s1, const char *s2)
1662	{
1663	if (s2 == 0)
1664	return s1.isEmpty();
1665
1666	const UChar* u = s1.data();
1667	const UChar* uend = u + s1.size();
1668	while (u != uend && *s2) {
1669	if (u[0] != (unsigned char)*s2)
1670	return false;
1671	s2++;
1672	u++;
1673	}
1674
1675	return u == uend && *s2 == 0;
1676	}
1677
1678	bool operator<(const UString& s1, const UString& s2)
1679	{
1680	const int l1 = s1.size();
1681	const int l2 = s2.size();
1682	const int lmin = l1 < l2 ? l1 : l2;
1683	const UChar* c1 = s1.data();
1684	const UChar* c2 = s2.data();
1685	int l = 0;
1686	while (l < lmin && c1 == c2) {
1687	c1++;
1688	c2++;
1689	l++;
1690	}
1691	if (l < lmin)
1692	return (c1[0] < c2[0]);
1693
1694	return (l1 < l2);
1695	}
1696
1697	bool operator>(const UString& s1, const UString& s2)
1698	{
1699	const int l1 = s1.size();
1700	const int l2 = s2.size();
1701	const int lmin = l1 < l2 ? l1 : l2;
1702	const UChar* c1 = s1.data();
1703	const UChar* c2 = s2.data();
1704	int l = 0;
1705	while (l < lmin && c1 == c2) {
1706	c1++;
1707	c2++;
1708	l++;
1709	}
1710	if (l < lmin)
1711	return (c1[0] > c2[0]);
1712
1713	return (l1 > l2);
1714	}
1715
1716	int compare(const UString& s1, const UString& s2)
1717	{
1718	const int l1 = s1.size();
1719	const int l2 = s2.size();
1720	const int lmin = l1 < l2 ? l1 : l2;
1721	const UChar* c1 = s1.data();
1722	const UChar* c2 = s2.data();
1723	int l = 0;
1724	while (l < lmin && c1 == c2) {
1725	c1++;
1726	c2++;
1727	l++;
1728	}
1729
1730	if (l < lmin)
1731	return (c1[0] > c2[0]) ? 1 : -1;
1732
1733	if (l1 == l2)
1734	return 0;
1735
1736	return (l1 > l2) ? 1 : -1;
1737	}
1738
1739	bool equal(const UString::Rep* r, const UString::Rep* b)
1740	{
1741	int length = r->len;
1742	if (length != b->len)
1743	return false;
1744	const UChar* d = r->data();
1745	const UChar* s = b->data();
1746	for (int i = 0; i != length; ++i) {
1747	if (d[i] != s[i])
1748	return false;
1749	}
1750	return true;
1751	}
1752
1753	CString UString::UTF8String(bool strict) const
1754	{
1755	// Allocate a buffer big enough to hold all the characters.
1756	const int length = size();
1757	Vector<char, 1024> buffer(length * 3);
1758
1759	// Convert to runs of 8-bit characters.
1760	char* p = buffer.data();
1761	const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1762	ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1763	if (result != conversionOK)
1764	return CString();
1765
1766	return CString(buffer.data(), p - buffer.data());
1767	}
1768
1769	// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1770	NEVER_INLINE void UString::makeNull()
1771	{
1772	m_rep = &Rep::null();
1773	}
1774
1775	// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1776	NEVER_INLINE UString::Rep* UString::nullRep()
1777	{
1778	return &Rep::null();
1779	}
1780
1781	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Download in other formats: