Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 27748

Visit:

Last change on this file since 27748 was 27748, checked in by [email protected], 18 years ago

Reviewed by Sam Weinig.

Fixed http://bugs.webkit.org/show_bug.cgi?id=15958
base64 spends 1.1% of total time checking for special Infinity case

Use a fast character test instead of calling strncmp.

1.1% speedup on string-base64. SunSpider reports a .4% speedup overall;
Sharks reports only .1%. Who are you going to believe? Huh?

kjs/ustring.cpp: (KJS::UString::toDouble):

Property svn:eol-style set to native

File size: 30.7 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* Copyright (C) 1999-2000 Harri Porten ([email protected])
4	* Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
5	* Copyright (C) 2007 Cameron Zwarich ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "ustring.h"
26
27	#include "JSLock.h"
28	#include "collector.h"
29	#include "dtoa.h"
30	#include "function.h"
31	#include "identifier.h"
32	#include "operations.h"
33	#include <ctype.h>
34	#include <float.h>
35	#include <limits.h>
36	#include <math.h>
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <wtf/Assertions.h>
40	#include <wtf/ASCIICType.h>
41	#include <wtf/MathExtras.h>
42	#include <wtf/Vector.h>
43
44	#if HAVE(STRING_H)
45	#include <string.h>
46	#endif
47	#if HAVE(STRINGS_H)
48	#include <strings.h>
49	#endif
50
51	using namespace WTF;
52	using namespace std;
53
54	namespace KJS {
55
56	extern const double NaN;
57	extern const double Inf;
58
59	static inline const size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
60	static inline const size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
61
62	static inline UChar* allocChars(size_t length)
63	{
64	ASSERT(length);
65	if (length > maxUChars())
66	return 0;
67	return static_cast<UChar>(fastMalloc(sizeof(UChar) length));
68	}
69
70	static inline UChar* reallocChars(UChar* buffer, size_t length)
71	{
72	ASSERT(length);
73	if (length > maxUChars())
74	return 0;
75	return static_cast<UChar>(fastRealloc(buffer, sizeof(UChar) length));
76	}
77
78	// we'd rather not do shared substring append for small strings, since
79	// this runs too much risk of a tiny initial string holding down a
80	// huge buffer. This is also tuned to match the extra cost size, so we
81	// don't ever share a buffer that wouldn't be over the extra cost
82	// threshold already.
83	// FIXME: this should be size_t but that would cause warnings until we
84	// fix UString sizes to be size_t instad of int
85	static const int minShareSize = Collector::minExtraCostSize / sizeof(UChar);
86
87	COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
88
89	CString::CString(const char *c)
90	{
91	length = strlen(c);
92	data = new char[length+1];
93	memcpy(data, c, length + 1);
94	}
95
96	CString::CString(const char *c, size_t len)
97	{
98	length = len;
99	data = new char[len+1];
100	memcpy(data, c, len);
101	data[len] = 0;
102	}
103
104	CString::CString(const CString &b)
105	{
106	length = b.length;
107	if (b.data) {
108	data = new char[length+1];
109	memcpy(data, b.data, length + 1);
110	}
111	else
112	data = 0;
113	}
114
115	CString::~CString()
116	{
117	delete [] data;
118	}
119
120	CString &CString::append(const CString &t)
121	{
122	char *n;
123	n = new char[length+t.length+1];
124	if (length)
125	memcpy(n, data, length);
126	if (t.length)
127	memcpy(n+length, t.data, t.length);
128	length += t.length;
129	n[length] = 0;
130
131	delete [] data;
132	data = n;
133
134	return *this;
135	}
136
137	CString &CString::operator=(const char *c)
138	{
139	if (data)
140	delete [] data;
141	length = strlen(c);
142	data = new char[length+1];
143	memcpy(data, c, length + 1);
144
145	return *this;
146	}
147
148	CString &CString::operator=(const CString &str)
149	{
150	if (this == &str)
151	return *this;
152
153	if (data)
154	delete [] data;
155	length = str.length;
156	if (str.data) {
157	data = new char[length + 1];
158	memcpy(data, str.data, length + 1);
159	}
160	else
161	data = 0;
162
163	return *this;
164	}
165
166	bool operator==(const CString& c1, const CString& c2)
167	{
168	size_t len = c1.size();
169	return len == c2.size() && (len == 0 \|\| memcmp(c1.c_str(), c2.c_str(), len) == 0);
170	}
171
172	// Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
173	static unsigned short almostUChar;
174	UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, &UString::Rep::null, 0, 0, 0, 0, 0 };
175	UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, &UString::Rep::empty, reinterpret_cast<UChar*>(&almostUChar), 0, 0, 0, 0 };
176	const int normalStatBufferSize = 4096;
177	static char *statBuffer = 0;
178	static int statBufferSize = 0;
179
180	PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
181	{
182	ASSERT(JSLock::lockCount() > 0);
183
184	int sizeInBytes = l * sizeof(UChar);
185	UChar copyD = static_cast<UChar >(fastMalloc(sizeInBytes));
186	memcpy(copyD, d, sizeInBytes);
187
188	return create(copyD, l);
189	}
190
191	PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
192	{
193	ASSERT(JSLock::lockCount() > 0);
194
195	Rep* r = new Rep;
196	r->offset = 0;
197	r->len = l;
198	r->rc = 1;
199	r->_hash = 0;
200	r->isIdentifier = 0;
201	r->baseString = r;
202	r->buf = d;
203	r->usedCapacity = l;
204	r->capacity = l;
205	r->usedPreCapacity = 0;
206	r->preCapacity = 0;
207
208	// steal the single reference this Rep was created with
209	return adoptRef(r);
210	}
211
212	PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
213	{
214	ASSERT(JSLock::lockCount() > 0);
215	ASSERT(base);
216
217	int baseOffset = base->offset;
218
219	base = base->baseString;
220
221	ASSERT(-(offset + baseOffset) <= base->usedPreCapacity);
222	ASSERT(offset + baseOffset + length <= base->usedCapacity);
223
224	Rep *r = new Rep;
225	r->offset = baseOffset + offset;
226	r->len = length;
227	r->rc = 1;
228	r->_hash = 0;
229	r->isIdentifier = 0;
230	r->baseString = base.releaseRef();
231	r->buf = 0;
232	r->usedCapacity = 0;
233	r->capacity = 0;
234	r->usedPreCapacity = 0;
235	r->preCapacity = 0;
236
237	// steal the single reference this Rep was created with
238	return adoptRef(r);
239	}
240
241	void UString::Rep::destroy()
242	{
243	ASSERT(JSLock::lockCount() > 0);
244
245	if (isIdentifier)
246	Identifier::remove(this);
247	if (baseString != this) {
248	baseString->deref();
249	} else {
250	fastFree(buf);
251	}
252	delete this;
253	}
254
255	// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
256	// or anything like that.
257	const unsigned PHI = 0x9e3779b9U;
258
259	// Paul Hsieh's SuperFastHash
260	// http://www.azillionmonkeys.com/qed/hash.html
261	unsigned UString::Rep::computeHash(const UChar *s, int len)
262	{
263	unsigned l = len;
264	uint32_t hash = PHI;
265	uint32_t tmp;
266
267	int rem = l & 1;
268	l >>= 1;
269
270	// Main loop
271	for (; l > 0; l--) {
272	hash += s[0].uc;
273	tmp = (s[1].uc << 11) ^ hash;
274	hash = (hash << 16) ^ tmp;
275	s += 2;
276	hash += hash >> 11;
277	}
278
279	// Handle end case
280	if (rem) {
281	hash += s[0].uc;
282	hash ^= hash << 11;
283	hash += hash >> 17;
284	}
285
286	// Force "avalanching" of final 127 bits
287	hash ^= hash << 3;
288	hash += hash >> 5;
289	hash ^= hash << 2;
290	hash += hash >> 15;
291	hash ^= hash << 10;
292
293	// this avoids ever returning a hash code of 0, since that is used to
294	// signal "hash not computed yet", using a value that is likely to be
295	// effectively the same as 0 when the low bits are masked
296	if (hash == 0)
297	hash = 0x80000000;
298
299	return hash;
300	}
301
302	// Paul Hsieh's SuperFastHash
303	// http://www.azillionmonkeys.com/qed/hash.html
304	unsigned UString::Rep::computeHash(const char *s)
305	{
306	// This hash is designed to work on 16-bit chunks at a time. But since the normal case
307	// (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
308	// were 16-bit chunks, which should give matching results
309
310	uint32_t hash = PHI;
311	uint32_t tmp;
312	size_t l = strlen(s);
313
314	size_t rem = l & 1;
315	l >>= 1;
316
317	// Main loop
318	for (; l > 0; l--) {
319	hash += (unsigned char)s[0];
320	tmp = ((unsigned char)s[1] << 11) ^ hash;
321	hash = (hash << 16) ^ tmp;
322	s += 2;
323	hash += hash >> 11;
324	}
325
326	// Handle end case
327	if (rem) {
328	hash += (unsigned char)s[0];
329	hash ^= hash << 11;
330	hash += hash >> 17;
331	}
332
333	// Force "avalanching" of final 127 bits
334	hash ^= hash << 3;
335	hash += hash >> 5;
336	hash ^= hash << 2;
337	hash += hash >> 15;
338	hash ^= hash << 10;
339
340	// this avoids ever returning a hash code of 0, since that is used to
341	// signal "hash not computed yet", using a value that is likely to be
342	// effectively the same as 0 when the low bits are masked
343	if (hash == 0)
344	hash = 0x80000000;
345
346	return hash;
347	}
348
349	// put these early so they can be inlined
350	inline size_t UString::expandedSize(size_t size, size_t otherSize) const
351	{
352	// Do the size calculation in two parts, returning overflowIndicator if
353	// we overflow the maximum value that we can handle.
354
355	if (size > maxUChars())
356	return overflowIndicator();
357
358	size_t expandedSize = ((size + 10) / 10 * 11) + 1;
359	if (maxUChars() - expandedSize < otherSize)
360	return overflowIndicator();
361
362	return expandedSize + otherSize;
363	}
364
365	inline int UString::usedCapacity() const
366	{
367	return m_rep->baseString->usedCapacity;
368	}
369
370	inline int UString::usedPreCapacity() const
371	{
372	return m_rep->baseString->usedPreCapacity;
373	}
374
375	void UString::expandCapacity(int requiredLength)
376	{
377	Rep* r = m_rep->baseString;
378
379	if (requiredLength > r->capacity) {
380	size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
381	UChar* oldBuf = r->buf;
382	r->buf = reallocChars(r->buf, newCapacity);
383	if (!r->buf) {
384	r->buf = oldBuf;
385	m_rep = &Rep::null;
386	return;
387	}
388	r->capacity = newCapacity - r->preCapacity;
389	}
390	if (requiredLength > r->usedCapacity) {
391	r->usedCapacity = requiredLength;
392	}
393	}
394
395	void UString::expandPreCapacity(int requiredPreCap)
396	{
397	Rep* r = m_rep->baseString;
398
399	if (requiredPreCap > r->preCapacity) {
400	size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
401	int delta = newCapacity - r->capacity - r->preCapacity;
402
403	UChar* newBuf = allocChars(newCapacity);
404	if (!newBuf) {
405	m_rep = &Rep::null;
406	return;
407	}
408	memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
409	fastFree(r->buf);
410	r->buf = newBuf;
411
412	r->preCapacity = newCapacity - r->capacity;
413	}
414	if (requiredPreCap > r->usedPreCapacity) {
415	r->usedPreCapacity = requiredPreCap;
416	}
417	}
418
419	UString::UString(const char *c)
420	{
421	if (!c) {
422	m_rep = &Rep::null;
423	return;
424	}
425
426	if (!c[0]) {
427	m_rep = &Rep::empty;
428	return;
429	}
430
431	size_t length = strlen(c);
432	UChar *d = allocChars(length);
433	if (!d)
434	m_rep = &Rep::null;
435	else {
436	for (size_t i = 0; i < length; i++)
437	d[i].uc = c[i];
438	m_rep = Rep::create(d, static_cast<int>(length));
439	}
440	}
441
442	UString::UString(const UChar *c, int length)
443	{
444	if (length == 0)
445	m_rep = &Rep::empty;
446	else
447	m_rep = Rep::createCopying(c, length);
448	}
449
450	UString::UString(UChar *c, int length, bool copy)
451	{
452	if (length == 0)
453	m_rep = &Rep::empty;
454	else if (copy)
455	m_rep = Rep::createCopying(c, length);
456	else
457	m_rep = Rep::create(c, length);
458	}
459
460	UString::UString(const UString &a, const UString &b)
461	{
462	int aSize = a.size();
463	int aOffset = a.m_rep->offset;
464	int bSize = b.size();
465	int bOffset = b.m_rep->offset;
466	int length = aSize + bSize;
467
468	// possible cases:
469
470	if (aSize == 0) {
471	// a is empty
472	m_rep = b.m_rep;
473	} else if (bSize == 0) {
474	// b is empty
475	m_rep = a.m_rep;
476	} else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
477	(-bOffset != b.usedPreCapacity() \|\| aSize >= bSize)) {
478	// - a reaches the end of its buffer so it qualifies for shared append
479	// - also, it's at least a quarter the length of b - appending to a much shorter
480	// string does more harm than good
481	// - however, if b qualifies for prepend and is longer than a, we'd rather prepend
482	UString x(a);
483	x.expandCapacity(aOffset + length);
484	if (a.data() && x.data()) {
485	memcpy(const_cast<UChar >(a.data() + aSize), b.data(), bSize sizeof(UChar));
486	m_rep = Rep::create(a.m_rep, 0, length);
487	} else
488	m_rep = &Rep::null;
489	} else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
490	// - b reaches the beginning of its buffer so it qualifies for shared prepend
491	// - also, it's at least a quarter the length of a - prepending to a much shorter
492	// string does more harm than good
493	UString y(b);
494	y.expandPreCapacity(-bOffset + aSize);
495	if (b.data() && y.data()) {
496	memcpy(const_cast<UChar >(b.data() - aSize), a.data(), aSize sizeof(UChar));
497	m_rep = Rep::create(b.m_rep, -aSize, length);
498	} else
499	m_rep = &Rep::null;
500	} else {
501	// a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
502	size_t newCapacity = expandedSize(length, 0);
503	UChar* d = allocChars(newCapacity);
504	if (!d)
505	m_rep = &Rep::null;
506	else {
507	memcpy(d, a.data(), aSize * sizeof(UChar));
508	memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
509	m_rep = Rep::create(d, length);
510	m_rep->capacity = newCapacity;
511	}
512	}
513	}
514
515	const UString& UString::null()
516	{
517	static UString* n = new UString;
518	return *n;
519	}
520
521	UString UString::from(int i)
522	{
523	UChar buf[1 + sizeof(i) * 3];
524	UChar *end = buf + sizeof(buf) / sizeof(UChar);
525	UChar *p = end;
526
527	if (i == 0) {
528	*--p = '0';
529	} else if (i == INT_MIN) {
530	char minBuf[1 + sizeof(i) * 3];
531	sprintf(minBuf, "%d", INT_MIN);
532	return UString(minBuf);
533	} else {
534	bool negative = false;
535	if (i < 0) {
536	negative = true;
537	i = -i;
538	}
539	while (i) {
540	*--p = (unsigned short)((i % 10) + '0');
541	i /= 10;
542	}
543	if (negative) {
544	*--p = '-';
545	}
546	}
547
548	return UString(p, static_cast<int>(end - p));
549	}
550
551	UString UString::from(unsigned int u)
552	{
553	UChar buf[sizeof(u) * 3];
554	UChar *end = buf + sizeof(buf) / sizeof(UChar);
555	UChar *p = end;
556
557	if (u == 0) {
558	*--p = '0';
559	} else {
560	while (u) {
561	*--p = (unsigned short)((u % 10) + '0');
562	u /= 10;
563	}
564	}
565
566	return UString(p, static_cast<int>(end - p));
567	}
568
569	UString UString::from(long l)
570	{
571	UChar buf[1 + sizeof(l) * 3];
572	UChar *end = buf + sizeof(buf) / sizeof(UChar);
573	UChar *p = end;
574
575	if (l == 0) {
576	*--p = '0';
577	} else if (l == LONG_MIN) {
578	char minBuf[1 + sizeof(l) * 3];
579	sprintf(minBuf, "%ld", LONG_MIN);
580	return UString(minBuf);
581	} else {
582	bool negative = false;
583	if (l < 0) {
584	negative = true;
585	l = -l;
586	}
587	while (l) {
588	*--p = (unsigned short)((l % 10) + '0');
589	l /= 10;
590	}
591	if (negative) {
592	*--p = '-';
593	}
594	}
595
596	return UString(p, static_cast<int>(end - p));
597	}
598
599	UString UString::from(double d)
600	{
601	// avoid ever printing -NaN, in JS conceptually there is only one NaN value
602	if (isnan(d))
603	return "NaN";
604
605	char buf[80];
606	int decimalPoint;
607	int sign;
608
609	char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
610	int length = static_cast<int>(strlen(result));
611
612	int i = 0;
613	if (sign) {
614	buf[i++] = '-';
615	}
616
617	if (decimalPoint <= 0 && decimalPoint > -6) {
618	buf[i++] = '0';
619	buf[i++] = '.';
620	for (int j = decimalPoint; j < 0; j++) {
621	buf[i++] = '0';
622	}
623	strcpy(buf + i, result);
624	} else if (decimalPoint <= 21 && decimalPoint > 0) {
625	if (length <= decimalPoint) {
626	strcpy(buf + i, result);
627	i += length;
628	for (int j = 0; j < decimalPoint - length; j++) {
629	buf[i++] = '0';
630	}
631	buf[i] = '\0';
632	} else {
633	strncpy(buf + i, result, decimalPoint);
634	i += decimalPoint;
635	buf[i++] = '.';
636	strcpy(buf + i, result + decimalPoint);
637	}
638	} else if (result[0] < '0' \|\| result[0] > '9') {
639	strcpy(buf + i, result);
640	} else {
641	buf[i++] = result[0];
642	if (length > 1) {
643	buf[i++] = '.';
644	strcpy(buf + i, result + 1);
645	i += length - 1;
646	}
647
648	buf[i++] = 'e';
649	buf[i++] = (decimalPoint >= 0) ? '+' : '-';
650	// decimalPoint can't be more than 3 digits decimal given the
651	// nature of float representation
652	int exponential = decimalPoint - 1;
653	if (exponential < 0)
654	exponential = -exponential;
655	if (exponential >= 100)
656	buf[i++] = static_cast<char>('0' + exponential / 100);
657	if (exponential >= 10)
658	buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
659	buf[i++] = static_cast<char>('0' + exponential % 10);
660	buf[i++] = '\0';
661	}
662
663	kjs_freedtoa(result);
664
665	return UString(buf);
666	}
667
668	UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
669	{
670	if (rangeCount == 1 && separatorCount == 0) {
671	int thisSize = size();
672	int position = substringRanges[0].position;
673	int length = substringRanges[0].length;
674	if (position <= 0 && length >= thisSize)
675	return *this;
676	return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
677	}
678
679	int totalLength = 0;
680	for (int i = 0; i < rangeCount; i++)
681	totalLength += substringRanges[i].length;
682	for (int i = 0; i < separatorCount; i++)
683	totalLength += separators[i].size();
684
685	if (totalLength == 0)
686	return "";
687
688	UChar* buffer = allocChars(totalLength);
689	if (!buffer)
690	return null();
691
692	int maxCount = max(rangeCount, separatorCount);
693	int bufferPos = 0;
694	for (int i = 0; i < maxCount; i++) {
695	if (i < rangeCount) {
696	memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
697	bufferPos += substringRanges[i].length;
698	}
699	if (i < separatorCount) {
700	memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
701	bufferPos += separators[i].size();
702	}
703	}
704
705	return UString::Rep::create(buffer, totalLength);
706	}
707
708	UString &UString::append(const UString &t)
709	{
710	int thisSize = size();
711	int thisOffset = m_rep->offset;
712	int tSize = t.size();
713	int length = thisSize + tSize;
714
715	// possible cases:
716	if (thisSize == 0) {
717	// this is empty
718	*this = t;
719	} else if (tSize == 0) {
720	// t is empty
721	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
722	// this is direct and has refcount of 1 (so we can just alter it directly)
723	expandCapacity(thisOffset + length);
724	if (data()) {
725	memcpy(const_cast<UChar>(data() + thisSize), t.data(), tSize sizeof(UChar));
726	m_rep->len = length;
727	m_rep->_hash = 0;
728	}
729	} else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
730	// this reaches the end of the buffer - extend it if it's long enough to append to
731	expandCapacity(thisOffset + length);
732	if (data()) {
733	memcpy(const_cast<UChar>(data() + thisSize), t.data(), tSize sizeof(UChar));
734	m_rep = Rep::create(m_rep, 0, length);
735	}
736	} else {
737	// this is shared with someone using more capacity, gotta make a whole new string
738	size_t newCapacity = expandedSize(length, 0);
739	UChar* d = allocChars(newCapacity);
740	if (!d)
741	m_rep = &Rep::null;
742	else {
743	memcpy(d, data(), thisSize * sizeof(UChar));
744	memcpy(const_cast<UChar>(d + thisSize), t.data(), tSize sizeof(UChar));
745	m_rep = Rep::create(d, length);
746	m_rep->capacity = newCapacity;
747	}
748	}
749
750	return *this;
751	}
752
753	UString &UString::append(const char *t)
754	{
755	int thisSize = size();
756	int thisOffset = m_rep->offset;
757	int tSize = static_cast<int>(strlen(t));
758	int length = thisSize + tSize;
759
760	// possible cases:
761	if (thisSize == 0) {
762	// this is empty
763	*this = t;
764	} else if (tSize == 0) {
765	// t is empty, we'll just return *this below.
766	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
767	// this is direct and has refcount of 1 (so we can just alter it directly)
768	expandCapacity(thisOffset + length);
769	UChar d = const_cast<UChar >(data());
770	if (d) {
771	for (int i = 0; i < tSize; ++i)
772	d[thisSize + i] = t[i];
773	m_rep->len = length;
774	m_rep->_hash = 0;
775	}
776	} else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
777	// this string reaches the end of the buffer - extend it
778	expandCapacity(thisOffset + length);
779	UChar d = const_cast<UChar >(data());
780	if (d) {
781	for (int i = 0; i < tSize; ++i)
782	d[thisSize + i] = t[i];
783	m_rep = Rep::create(m_rep, 0, length);
784	}
785	} else {
786	// this is shared with someone using more capacity, gotta make a whole new string
787	size_t newCapacity = expandedSize(length, 0);
788	UChar* d = allocChars(newCapacity);
789	if (!d)
790	m_rep = &Rep::null;
791	else {
792	memcpy(d, data(), thisSize * sizeof(UChar));
793	for (int i = 0; i < tSize; ++i)
794	d[thisSize + i] = t[i];
795	m_rep = Rep::create(d, length);
796	m_rep->capacity = newCapacity;
797	}
798	}
799
800	return *this;
801	}
802
803	UString &UString::append(unsigned short c)
804	{
805	int thisOffset = m_rep->offset;
806	int length = size();
807
808	// possible cases:
809	if (length == 0) {
810	// this is empty - must make a new m_rep because we don't want to pollute the shared empty one
811	size_t newCapacity = expandedSize(1, 0);
812	UChar* d = allocChars(newCapacity);
813	if (!d)
814	m_rep = &Rep::null;
815	else {
816	d[0] = c;
817	m_rep = Rep::create(d, 1);
818	m_rep->capacity = newCapacity;
819	}
820	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
821	// this is direct and has refcount of 1 (so we can just alter it directly)
822	expandCapacity(thisOffset + length + 1);
823	UChar d = const_cast<UChar >(data());
824	if (d) {
825	d[length] = c;
826	m_rep->len = length + 1;
827	m_rep->_hash = 0;
828	}
829	} else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
830	// this reaches the end of the string - extend it and share
831	expandCapacity(thisOffset + length + 1);
832	UChar d = const_cast<UChar >(data());
833	if (d) {
834	d[length] = c;
835	m_rep = Rep::create(m_rep, 0, length + 1);
836	}
837	} else {
838	// this is shared with someone using more capacity, gotta make a whole new string
839	size_t newCapacity = expandedSize(length + 1, 0);
840	UChar* d = allocChars(newCapacity);
841	if (!d)
842	m_rep = &Rep::null;
843	else {
844	memcpy(d, data(), length * sizeof(UChar));
845	d[length] = c;
846	m_rep = Rep::create(d, length + 1);
847	m_rep->capacity = newCapacity;
848	}
849	}
850
851	return *this;
852	}
853
854	CString UString::cstring() const
855	{
856	return ascii();
857	}
858
859	char *UString::ascii() const
860	{
861	// Never make the buffer smaller than normalStatBufferSize.
862	// Thus we almost never need to reallocate.
863	int length = size();
864	int neededSize = length + 1;
865	if (neededSize < normalStatBufferSize) {
866	neededSize = normalStatBufferSize;
867	}
868	if (neededSize != statBufferSize) {
869	delete [] statBuffer;
870	statBuffer = new char [neededSize];
871	statBufferSize = neededSize;
872	}
873
874	const UChar *p = data();
875	char *q = statBuffer;
876	const UChar *limit = p + length;
877	while (p != limit) {
878	*q = static_cast<char>(p->uc);
879	++p;
880	++q;
881	}
882	*q = '\0';
883
884	return statBuffer;
885	}
886
887	#ifdef KJS_DEBUG_MEM
888	void UString::globalClear()
889	{
890	delete [] statBuffer;
891	statBuffer = 0;
892	statBufferSize = 0;
893	}
894	#endif
895
896	UString &UString::operator=(const char *c)
897	{
898	if (!c) {
899	m_rep = &Rep::null;
900	return *this;
901	}
902
903	if (!c[0]) {
904	m_rep = &Rep::empty;
905	return *this;
906	}
907
908	int l = static_cast<int>(strlen(c));
909	UChar *d;
910	if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
911	d = m_rep->buf;
912	m_rep->_hash = 0;
913	m_rep->len = l;
914	} else {
915	d = allocChars(l);
916	if (!d) {
917	m_rep = &Rep::null;
918	return *this;
919	}
920	m_rep = Rep::create(d, l);
921	}
922	for (int i = 0; i < l; i++)
923	d[i].uc = c[i];
924
925	return *this;
926	}
927
928	bool UString::is8Bit() const
929	{
930	const UChar *u = data();
931	const UChar *limit = u + size();
932	while (u < limit) {
933	if (u->uc > 0xFF)
934	return false;
935	++u;
936	}
937
938	return true;
939	}
940
941	const UChar UString::operator[](int pos) const
942	{
943	if (pos >= size())
944	return '\0';
945	return data()[pos];
946	}
947
948	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
949	{
950	double d;
951
952	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
953	// after the number, so is8Bit is too strict a check.
954	if (!is8Bit())
955	return NaN;
956
957	const char *c = ascii();
958
959	// skip leading white space
960	while (isASCIISpace(*c))
961	c++;
962
963	// empty string ?
964	if (*c == '\0')
965	return tolerateEmptyString ? 0.0 : NaN;
966
967	// hex number ?
968	if (c == '0' && ((c+1) == 'x' \|\| *(c+1) == 'X')) {
969	const char* firstDigitPosition = c + 2;
970	c++;
971	d = 0.0;
972	while (*(++c)) {
973	if (c >= '0' && c <= '9')
974	d = d * 16.0 + *c - '0';
975	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
976	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
977	else
978	break;
979	}
980
981	if (d >= mantissaOverflowLowerBound)
982	d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
983	} else {
984	// regular number ?
985	char *end;
986	d = kjs_strtod(c, &end);
987	if ((d != 0.0 \|\| end != c) && d != Inf && d != -Inf) {
988	c = end;
989	} else {
990	double sign = 1.0;
991
992	if (*c == '+')
993	c++;
994	else if (*c == '-') {
995	sign = -1.0;
996	c++;
997	}
998
999	// We used strtod() to do the conversion. However, strtod() handles
1000	// infinite values slightly differently than JavaScript in that it
1001	// converts the string "inf" with any capitalization to infinity,
1002	// whereas the ECMA spec requires that it be converted to NaN.
1003
1004	if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1005	d = sign * Inf;
1006	c += 8;
1007	} else if ((d == Inf \|\| d == -Inf) && c != 'I' && c != 'i')
1008	c = end;
1009	else
1010	return NaN;
1011	}
1012	}
1013
1014	// allow trailing white space
1015	while (isASCIISpace(*c))
1016	c++;
1017	// don't allow anything after - unless tolerant=true
1018	if (!tolerateTrailingJunk && *c != '\0')
1019	d = NaN;
1020
1021	return d;
1022	}
1023
1024	double UString::toDouble(bool tolerateTrailingJunk) const
1025	{
1026	return toDouble(tolerateTrailingJunk, true);
1027	}
1028
1029	double UString::toDouble() const
1030	{
1031	return toDouble(false, true);
1032	}
1033
1034	uint32_t UString::toUInt32(bool *ok) const
1035	{
1036	double d = toDouble();
1037	bool b = true;
1038
1039	if (d != static_cast<uint32_t>(d)) {
1040	b = false;
1041	d = 0;
1042	}
1043
1044	if (ok)
1045	*ok = b;
1046
1047	return static_cast<uint32_t>(d);
1048	}
1049
1050	uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
1051	{
1052	double d = toDouble(false, tolerateEmptyString);
1053	bool b = true;
1054
1055	if (d != static_cast<uint32_t>(d)) {
1056	b = false;
1057	d = 0;
1058	}
1059
1060	if (ok)
1061	*ok = b;
1062
1063	return static_cast<uint32_t>(d);
1064	}
1065
1066	uint32_t UString::toStrictUInt32(bool *ok) const
1067	{
1068	if (ok)
1069	*ok = false;
1070
1071	// Empty string is not OK.
1072	int len = m_rep->len;
1073	if (len == 0)
1074	return 0;
1075	const UChar *p = m_rep->data();
1076	unsigned short c = p->unicode();
1077
1078	// If the first digit is 0, only 0 itself is OK.
1079	if (c == '0') {
1080	if (len == 1 && ok)
1081	*ok = true;
1082	return 0;
1083	}
1084
1085	// Convert to UInt32, checking for overflow.
1086	uint32_t i = 0;
1087	while (1) {
1088	// Process character, turning it into a digit.
1089	if (c < '0' \|\| c > '9')
1090	return 0;
1091	const unsigned d = c - '0';
1092
1093	// Multiply by 10, checking for overflow out of 32 bits.
1094	if (i > 0xFFFFFFFFU / 10)
1095	return 0;
1096	i *= 10;
1097
1098	// Add in the digit, checking for overflow out of 32 bits.
1099	const unsigned max = 0xFFFFFFFFU - d;
1100	if (i > max)
1101	return 0;
1102	i += d;
1103
1104	// Handle end of string.
1105	if (--len == 0) {
1106	if (ok)
1107	*ok = true;
1108	return i;
1109	}
1110
1111	// Get next character.
1112	c = (++p)->unicode();
1113	}
1114	}
1115
1116	int UString::find(const UString &f, int pos) const
1117	{
1118	int sz = size();
1119	int fsz = f.size();
1120	if (sz < fsz)
1121	return -1;
1122	if (pos < 0)
1123	pos = 0;
1124	if (fsz == 0)
1125	return pos;
1126	const UChar *end = data() + sz - fsz;
1127	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1128	const UChar *fdata = f.data();
1129	unsigned short fchar = fdata->uc;
1130	++fdata;
1131	for (const UChar *c = data() + pos; c <= end; c++)
1132	if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1133	return static_cast<int>(c - data());
1134
1135	return -1;
1136	}
1137
1138	int UString::find(UChar ch, int pos) const
1139	{
1140	if (pos < 0)
1141	pos = 0;
1142	const UChar *end = data() + size();
1143	for (const UChar *c = data() + pos; c < end; c++)
1144	if (*c == ch)
1145	return static_cast<int>(c - data());
1146
1147	return -1;
1148	}
1149
1150	int UString::rfind(const UString &f, int pos) const
1151	{
1152	int sz = size();
1153	int fsz = f.size();
1154	if (sz < fsz)
1155	return -1;
1156	if (pos < 0)
1157	pos = 0;
1158	if (pos > sz - fsz)
1159	pos = sz - fsz;
1160	if (fsz == 0)
1161	return pos;
1162	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1163	const UChar *fdata = f.data();
1164	for (const UChar *c = data() + pos; c >= data(); c--) {
1165	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1166	return static_cast<int>(c - data());
1167	}
1168
1169	return -1;
1170	}
1171
1172	int UString::rfind(UChar ch, int pos) const
1173	{
1174	if (isEmpty())
1175	return -1;
1176	if (pos + 1 >= size())
1177	pos = size() - 1;
1178	for (const UChar *c = data() + pos; c >= data(); c--) {
1179	if (*c == ch)
1180	return static_cast<int>(c-data());
1181	}
1182
1183	return -1;
1184	}
1185
1186	UString UString::substr(int pos, int len) const
1187	{
1188	int s = size();
1189
1190	if (pos < 0)
1191	pos = 0;
1192	else if (pos >= s)
1193	pos = s;
1194	if (len < 0)
1195	len = s;
1196	if (pos + len >= s)
1197	len = s - pos;
1198
1199	if (pos == 0 && len == s)
1200	return *this;
1201
1202	return UString(Rep::create(m_rep, pos, len));
1203	}
1204
1205	bool operator==(const UString& s1, const UString& s2)
1206	{
1207	if (s1.m_rep->len != s2.m_rep->len)
1208	return false;
1209
1210	return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1211	s1.m_rep->len * sizeof(UChar)) == 0);
1212	}
1213
1214	bool operator==(const UString& s1, const char *s2)
1215	{
1216	if (s2 == 0) {
1217	return s1.isEmpty();
1218	}
1219
1220	const UChar *u = s1.data();
1221	const UChar *uend = u + s1.size();
1222	while (u != uend && *s2) {
1223	if (u->uc != (unsigned char)*s2)
1224	return false;
1225	s2++;
1226	u++;
1227	}
1228
1229	return u == uend && *s2 == 0;
1230	}
1231
1232	bool operator<(const UString& s1, const UString& s2)
1233	{
1234	const int l1 = s1.size();
1235	const int l2 = s2.size();
1236	const int lmin = l1 < l2 ? l1 : l2;
1237	const UChar *c1 = s1.data();
1238	const UChar *c2 = s2.data();
1239	int l = 0;
1240	while (l < lmin && c1 == c2) {
1241	c1++;
1242	c2++;
1243	l++;
1244	}
1245	if (l < lmin)
1246	return (c1->uc < c2->uc);
1247
1248	return (l1 < l2);
1249	}
1250
1251	int compare(const UString& s1, const UString& s2)
1252	{
1253	const int l1 = s1.size();
1254	const int l2 = s2.size();
1255	const int lmin = l1 < l2 ? l1 : l2;
1256	const UChar *c1 = s1.data();
1257	const UChar *c2 = s2.data();
1258	int l = 0;
1259	while (l < lmin && c1 == c2) {
1260	c1++;
1261	c2++;
1262	l++;
1263	}
1264
1265	if (l < lmin)
1266	return (c1->uc > c2->uc) ? 1 : -1;
1267
1268	if (l1 == l2)
1269	return 0;
1270
1271	return (l1 > l2) ? 1 : -1;
1272	}
1273
1274	CString UString::UTF8String(bool strict) const
1275	{
1276	// Allocate a buffer big enough to hold all the characters.
1277	const int length = size();
1278	Vector<char, 1024> buffer(length * 3);
1279
1280	// Convert to runs of 8-bit characters.
1281	char* p = buffer.data();
1282	const ::UChar* d = &data()->uc;
1283	ConversionResult result = ConvertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1284	if (result != conversionOK)
1285	return CString();
1286
1287	return CString(buffer.data(), p - buffer.data());
1288	}
1289
1290
1291	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Download in other formats: