Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 30679

Visit:

Last change on this file since 30679 was 29653, checked in by [email protected], 17 years ago

Build fix.

kjs/ustring.cpp:
kjs/ustring.h: (KJS::UString::cost):

Property svn:eol-style set to native

File size: 30.5 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* Copyright (C) 1999-2000 Harri Porten ([email protected])
4	* Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
5	* Copyright (C) 2007 Cameron Zwarich ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "ustring.h"
26
27	#include "JSLock.h"
28	#include "collector.h"
29	#include "dtoa.h"
30	#include "function.h"
31	#include "identifier.h"
32	#include "operations.h"
33	#include <ctype.h>
34	#include <float.h>
35	#include <limits.h>
36	#include <math.h>
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <wtf/Assertions.h>
40	#include <wtf/ASCIICType.h>
41	#include <wtf/MathExtras.h>
42	#include <wtf/Vector.h>
43	#include <wtf/unicode/UTF8.h>
44
45	#if HAVE(STRING_H)
46	#include <string.h>
47	#endif
48	#if HAVE(STRINGS_H)
49	#include <strings.h>
50	#endif
51
52	using namespace WTF;
53	using namespace WTF::Unicode;
54	using namespace std;
55
56	namespace KJS {
57
58	extern const double NaN;
59	extern const double Inf;
60
61	static inline const size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
62	static inline const size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
63
64	static inline UChar* allocChars(size_t length)
65	{
66	ASSERT(length);
67	if (length > maxUChars())
68	return 0;
69	return static_cast<UChar>(fastMalloc(sizeof(UChar) length));
70	}
71
72	static inline UChar* reallocChars(UChar* buffer, size_t length)
73	{
74	ASSERT(length);
75	if (length > maxUChars())
76	return 0;
77	return static_cast<UChar>(fastRealloc(buffer, sizeof(UChar) length));
78	}
79
80	COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
81
82	CString::CString(const char *c)
83	{
84	length = strlen(c);
85	data = new char[length+1];
86	memcpy(data, c, length + 1);
87	}
88
89	CString::CString(const char *c, size_t len)
90	{
91	length = len;
92	data = new char[len+1];
93	memcpy(data, c, len);
94	data[len] = 0;
95	}
96
97	CString::CString(const CString &b)
98	{
99	length = b.length;
100	if (b.data) {
101	data = new char[length+1];
102	memcpy(data, b.data, length + 1);
103	}
104	else
105	data = 0;
106	}
107
108	CString::~CString()
109	{
110	delete [] data;
111	}
112
113	CString &CString::append(const CString &t)
114	{
115	char *n;
116	n = new char[length+t.length+1];
117	if (length)
118	memcpy(n, data, length);
119	if (t.length)
120	memcpy(n+length, t.data, t.length);
121	length += t.length;
122	n[length] = 0;
123
124	delete [] data;
125	data = n;
126
127	return *this;
128	}
129
130	CString &CString::operator=(const char *c)
131	{
132	if (data)
133	delete [] data;
134	length = strlen(c);
135	data = new char[length+1];
136	memcpy(data, c, length + 1);
137
138	return *this;
139	}
140
141	CString &CString::operator=(const CString &str)
142	{
143	if (this == &str)
144	return *this;
145
146	if (data)
147	delete [] data;
148	length = str.length;
149	if (str.data) {
150	data = new char[length + 1];
151	memcpy(data, str.data, length + 1);
152	}
153	else
154	data = 0;
155
156	return *this;
157	}
158
159	bool operator==(const CString& c1, const CString& c2)
160	{
161	size_t len = c1.size();
162	return len == c2.size() && (len == 0 \|\| memcmp(c1.c_str(), c2.c_str(), len) == 0);
163	}
164
165	// Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
166	static unsigned short almostUChar;
167	UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, &UString::Rep::null, 0, 0, 0, 0, 0, 0 };
168	UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, &UString::Rep::empty, 0, reinterpret_cast<UChar*>(&almostUChar), 0, 0, 0, 0 };
169	const int normalStatBufferSize = 4096;
170	static char *statBuffer = 0; // FIXME: This buffer is never deallocated.
171	static int statBufferSize = 0;
172
173	PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
174	{
175	ASSERT(JSLock::lockCount() > 0);
176
177	int sizeInBytes = l * sizeof(UChar);
178	UChar copyD = static_cast<UChar >(fastMalloc(sizeInBytes));
179	memcpy(copyD, d, sizeInBytes);
180
181	return create(copyD, l);
182	}
183
184	PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
185	{
186	ASSERT(JSLock::lockCount() > 0);
187
188	Rep* r = new Rep;
189	r->offset = 0;
190	r->len = l;
191	r->rc = 1;
192	r->_hash = 0;
193	r->isIdentifier = 0;
194	r->baseString = r;
195	r->reportedCost = 0;
196	r->buf = d;
197	r->usedCapacity = l;
198	r->capacity = l;
199	r->usedPreCapacity = 0;
200	r->preCapacity = 0;
201
202	// steal the single reference this Rep was created with
203	return adoptRef(r);
204	}
205
206	PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
207	{
208	ASSERT(JSLock::lockCount() > 0);
209	ASSERT(base);
210
211	int baseOffset = base->offset;
212
213	base = base->baseString;
214
215	ASSERT(-(offset + baseOffset) <= base->usedPreCapacity);
216	ASSERT(offset + baseOffset + length <= base->usedCapacity);
217
218	Rep *r = new Rep;
219	r->offset = baseOffset + offset;
220	r->len = length;
221	r->rc = 1;
222	r->_hash = 0;
223	r->isIdentifier = 0;
224	r->baseString = base.releaseRef();
225	r->reportedCost = 0;
226	r->buf = 0;
227	r->usedCapacity = 0;
228	r->capacity = 0;
229	r->usedPreCapacity = 0;
230	r->preCapacity = 0;
231
232	// steal the single reference this Rep was created with
233	return adoptRef(r);
234	}
235
236	void UString::Rep::destroy()
237	{
238	ASSERT(JSLock::lockCount() > 0);
239
240	if (isIdentifier)
241	Identifier::remove(this);
242	if (baseString != this) {
243	baseString->deref();
244	} else {
245	fastFree(buf);
246	}
247	delete this;
248	}
249
250	// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
251	// or anything like that.
252	const unsigned PHI = 0x9e3779b9U;
253
254	// Paul Hsieh's SuperFastHash
255	// http://www.azillionmonkeys.com/qed/hash.html
256	unsigned UString::Rep::computeHash(const UChar *s, int len)
257	{
258	unsigned l = len;
259	uint32_t hash = PHI;
260	uint32_t tmp;
261
262	int rem = l & 1;
263	l >>= 1;
264
265	// Main loop
266	for (; l > 0; l--) {
267	hash += s[0].uc;
268	tmp = (s[1].uc << 11) ^ hash;
269	hash = (hash << 16) ^ tmp;
270	s += 2;
271	hash += hash >> 11;
272	}
273
274	// Handle end case
275	if (rem) {
276	hash += s[0].uc;
277	hash ^= hash << 11;
278	hash += hash >> 17;
279	}
280
281	// Force "avalanching" of final 127 bits
282	hash ^= hash << 3;
283	hash += hash >> 5;
284	hash ^= hash << 2;
285	hash += hash >> 15;
286	hash ^= hash << 10;
287
288	// this avoids ever returning a hash code of 0, since that is used to
289	// signal "hash not computed yet", using a value that is likely to be
290	// effectively the same as 0 when the low bits are masked
291	if (hash == 0)
292	hash = 0x80000000;
293
294	return hash;
295	}
296
297	// Paul Hsieh's SuperFastHash
298	// http://www.azillionmonkeys.com/qed/hash.html
299	unsigned UString::Rep::computeHash(const char *s)
300	{
301	// This hash is designed to work on 16-bit chunks at a time. But since the normal case
302	// (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
303	// were 16-bit chunks, which should give matching results
304
305	uint32_t hash = PHI;
306	uint32_t tmp;
307	size_t l = strlen(s);
308
309	size_t rem = l & 1;
310	l >>= 1;
311
312	// Main loop
313	for (; l > 0; l--) {
314	hash += (unsigned char)s[0];
315	tmp = ((unsigned char)s[1] << 11) ^ hash;
316	hash = (hash << 16) ^ tmp;
317	s += 2;
318	hash += hash >> 11;
319	}
320
321	// Handle end case
322	if (rem) {
323	hash += (unsigned char)s[0];
324	hash ^= hash << 11;
325	hash += hash >> 17;
326	}
327
328	// Force "avalanching" of final 127 bits
329	hash ^= hash << 3;
330	hash += hash >> 5;
331	hash ^= hash << 2;
332	hash += hash >> 15;
333	hash ^= hash << 10;
334
335	// this avoids ever returning a hash code of 0, since that is used to
336	// signal "hash not computed yet", using a value that is likely to be
337	// effectively the same as 0 when the low bits are masked
338	if (hash == 0)
339	hash = 0x80000000;
340
341	return hash;
342	}
343
344	// put these early so they can be inlined
345	inline size_t UString::expandedSize(size_t size, size_t otherSize) const
346	{
347	// Do the size calculation in two parts, returning overflowIndicator if
348	// we overflow the maximum value that we can handle.
349
350	if (size > maxUChars())
351	return overflowIndicator();
352
353	size_t expandedSize = ((size + 10) / 10 * 11) + 1;
354	if (maxUChars() - expandedSize < otherSize)
355	return overflowIndicator();
356
357	return expandedSize + otherSize;
358	}
359
360	inline int UString::usedCapacity() const
361	{
362	return m_rep->baseString->usedCapacity;
363	}
364
365	inline int UString::usedPreCapacity() const
366	{
367	return m_rep->baseString->usedPreCapacity;
368	}
369
370	void UString::expandCapacity(int requiredLength)
371	{
372	Rep* r = m_rep->baseString;
373
374	if (requiredLength > r->capacity) {
375	size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
376	UChar* oldBuf = r->buf;
377	r->buf = reallocChars(r->buf, newCapacity);
378	if (!r->buf) {
379	r->buf = oldBuf;
380	m_rep = &Rep::null;
381	return;
382	}
383	r->capacity = newCapacity - r->preCapacity;
384	}
385	if (requiredLength > r->usedCapacity) {
386	r->usedCapacity = requiredLength;
387	}
388	}
389
390	void UString::expandPreCapacity(int requiredPreCap)
391	{
392	Rep* r = m_rep->baseString;
393
394	if (requiredPreCap > r->preCapacity) {
395	size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
396	int delta = newCapacity - r->capacity - r->preCapacity;
397
398	UChar* newBuf = allocChars(newCapacity);
399	if (!newBuf) {
400	m_rep = &Rep::null;
401	return;
402	}
403	memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
404	fastFree(r->buf);
405	r->buf = newBuf;
406
407	r->preCapacity = newCapacity - r->capacity;
408	}
409	if (requiredPreCap > r->usedPreCapacity) {
410	r->usedPreCapacity = requiredPreCap;
411	}
412	}
413
414	UString::UString(const char *c)
415	{
416	if (!c) {
417	m_rep = &Rep::null;
418	return;
419	}
420
421	if (!c[0]) {
422	m_rep = &Rep::empty;
423	return;
424	}
425
426	size_t length = strlen(c);
427	UChar *d = allocChars(length);
428	if (!d)
429	m_rep = &Rep::null;
430	else {
431	for (size_t i = 0; i < length; i++)
432	d[i].uc = c[i];
433	m_rep = Rep::create(d, static_cast<int>(length));
434	}
435	}
436
437	UString::UString(const UChar *c, int length)
438	{
439	if (length == 0)
440	m_rep = &Rep::empty;
441	else
442	m_rep = Rep::createCopying(c, length);
443	}
444
445	UString::UString(UChar *c, int length, bool copy)
446	{
447	if (length == 0)
448	m_rep = &Rep::empty;
449	else if (copy)
450	m_rep = Rep::createCopying(c, length);
451	else
452	m_rep = Rep::create(c, length);
453	}
454
455	UString::UString(const Vector<UChar>& buffer)
456	{
457	if (!buffer.size())
458	m_rep = &Rep::empty;
459	else
460	m_rep = Rep::createCopying(buffer.data(), buffer.size());
461	}
462
463
464	UString::UString(const UString &a, const UString &b)
465	{
466	int aSize = a.size();
467	int aOffset = a.m_rep->offset;
468	int bSize = b.size();
469	int bOffset = b.m_rep->offset;
470	int length = aSize + bSize;
471
472	// possible cases:
473
474	if (aSize == 0) {
475	// a is empty
476	m_rep = b.m_rep;
477	} else if (bSize == 0) {
478	// b is empty
479	m_rep = a.m_rep;
480	} else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
481	(-bOffset != b.usedPreCapacity() \|\| aSize >= bSize)) {
482	// - a reaches the end of its buffer so it qualifies for shared append
483	// - also, it's at least a quarter the length of b - appending to a much shorter
484	// string does more harm than good
485	// - however, if b qualifies for prepend and is longer than a, we'd rather prepend
486	UString x(a);
487	x.expandCapacity(aOffset + length);
488	if (a.data() && x.data()) {
489	memcpy(const_cast<UChar >(a.data() + aSize), b.data(), bSize sizeof(UChar));
490	m_rep = Rep::create(a.m_rep, 0, length);
491	} else
492	m_rep = &Rep::null;
493	} else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
494	// - b reaches the beginning of its buffer so it qualifies for shared prepend
495	// - also, it's at least a quarter the length of a - prepending to a much shorter
496	// string does more harm than good
497	UString y(b);
498	y.expandPreCapacity(-bOffset + aSize);
499	if (b.data() && y.data()) {
500	memcpy(const_cast<UChar >(b.data() - aSize), a.data(), aSize sizeof(UChar));
501	m_rep = Rep::create(b.m_rep, -aSize, length);
502	} else
503	m_rep = &Rep::null;
504	} else {
505	// a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
506	size_t newCapacity = expandedSize(length, 0);
507	UChar* d = allocChars(newCapacity);
508	if (!d)
509	m_rep = &Rep::null;
510	else {
511	memcpy(d, a.data(), aSize * sizeof(UChar));
512	memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
513	m_rep = Rep::create(d, length);
514	m_rep->capacity = newCapacity;
515	}
516	}
517	}
518
519	const UString& UString::null()
520	{
521	static UString* n = new UString;
522	return *n;
523	}
524
525	UString UString::from(int i)
526	{
527	UChar buf[1 + sizeof(i) * 3];
528	UChar *end = buf + sizeof(buf) / sizeof(UChar);
529	UChar *p = end;
530
531	if (i == 0) {
532	*--p = '0';
533	} else if (i == INT_MIN) {
534	char minBuf[1 + sizeof(i) * 3];
535	sprintf(minBuf, "%d", INT_MIN);
536	return UString(minBuf);
537	} else {
538	bool negative = false;
539	if (i < 0) {
540	negative = true;
541	i = -i;
542	}
543	while (i) {
544	*--p = (unsigned short)((i % 10) + '0');
545	i /= 10;
546	}
547	if (negative) {
548	*--p = '-';
549	}
550	}
551
552	return UString(p, static_cast<int>(end - p));
553	}
554
555	UString UString::from(unsigned int u)
556	{
557	UChar buf[sizeof(u) * 3];
558	UChar *end = buf + sizeof(buf) / sizeof(UChar);
559	UChar *p = end;
560
561	if (u == 0) {
562	*--p = '0';
563	} else {
564	while (u) {
565	*--p = (unsigned short)((u % 10) + '0');
566	u /= 10;
567	}
568	}
569
570	return UString(p, static_cast<int>(end - p));
571	}
572
573	UString UString::from(long l)
574	{
575	UChar buf[1 + sizeof(l) * 3];
576	UChar *end = buf + sizeof(buf) / sizeof(UChar);
577	UChar *p = end;
578
579	if (l == 0) {
580	*--p = '0';
581	} else if (l == LONG_MIN) {
582	char minBuf[1 + sizeof(l) * 3];
583	sprintf(minBuf, "%ld", LONG_MIN);
584	return UString(minBuf);
585	} else {
586	bool negative = false;
587	if (l < 0) {
588	negative = true;
589	l = -l;
590	}
591	while (l) {
592	*--p = (unsigned short)((l % 10) + '0');
593	l /= 10;
594	}
595	if (negative) {
596	*--p = '-';
597	}
598	}
599
600	return UString(p, static_cast<int>(end - p));
601	}
602
603	UString UString::from(double d)
604	{
605	// avoid ever printing -NaN, in JS conceptually there is only one NaN value
606	if (isnan(d))
607	return "NaN";
608
609	char buf[80];
610	int decimalPoint;
611	int sign;
612
613	char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
614	int length = static_cast<int>(strlen(result));
615
616	int i = 0;
617	if (sign) {
618	buf[i++] = '-';
619	}
620
621	if (decimalPoint <= 0 && decimalPoint > -6) {
622	buf[i++] = '0';
623	buf[i++] = '.';
624	for (int j = decimalPoint; j < 0; j++) {
625	buf[i++] = '0';
626	}
627	strcpy(buf + i, result);
628	} else if (decimalPoint <= 21 && decimalPoint > 0) {
629	if (length <= decimalPoint) {
630	strcpy(buf + i, result);
631	i += length;
632	for (int j = 0; j < decimalPoint - length; j++) {
633	buf[i++] = '0';
634	}
635	buf[i] = '\0';
636	} else {
637	strncpy(buf + i, result, decimalPoint);
638	i += decimalPoint;
639	buf[i++] = '.';
640	strcpy(buf + i, result + decimalPoint);
641	}
642	} else if (result[0] < '0' \|\| result[0] > '9') {
643	strcpy(buf + i, result);
644	} else {
645	buf[i++] = result[0];
646	if (length > 1) {
647	buf[i++] = '.';
648	strcpy(buf + i, result + 1);
649	i += length - 1;
650	}
651
652	buf[i++] = 'e';
653	buf[i++] = (decimalPoint >= 0) ? '+' : '-';
654	// decimalPoint can't be more than 3 digits decimal given the
655	// nature of float representation
656	int exponential = decimalPoint - 1;
657	if (exponential < 0)
658	exponential = -exponential;
659	if (exponential >= 100)
660	buf[i++] = static_cast<char>('0' + exponential / 100);
661	if (exponential >= 10)
662	buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
663	buf[i++] = static_cast<char>('0' + exponential % 10);
664	buf[i++] = '\0';
665	}
666
667	kjs_freedtoa(result);
668
669	return UString(buf);
670	}
671
672	UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
673	{
674	if (rangeCount == 1 && separatorCount == 0) {
675	int thisSize = size();
676	int position = substringRanges[0].position;
677	int length = substringRanges[0].length;
678	if (position <= 0 && length >= thisSize)
679	return *this;
680	return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
681	}
682
683	int totalLength = 0;
684	for (int i = 0; i < rangeCount; i++)
685	totalLength += substringRanges[i].length;
686	for (int i = 0; i < separatorCount; i++)
687	totalLength += separators[i].size();
688
689	if (totalLength == 0)
690	return "";
691
692	UChar* buffer = allocChars(totalLength);
693	if (!buffer)
694	return null();
695
696	int maxCount = max(rangeCount, separatorCount);
697	int bufferPos = 0;
698	for (int i = 0; i < maxCount; i++) {
699	if (i < rangeCount) {
700	memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
701	bufferPos += substringRanges[i].length;
702	}
703	if (i < separatorCount) {
704	memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
705	bufferPos += separators[i].size();
706	}
707	}
708
709	return UString::Rep::create(buffer, totalLength);
710	}
711
712	UString &UString::append(const UString &t)
713	{
714	int thisSize = size();
715	int thisOffset = m_rep->offset;
716	int tSize = t.size();
717	int length = thisSize + tSize;
718
719	// possible cases:
720	if (thisSize == 0) {
721	// this is empty
722	*this = t;
723	} else if (tSize == 0) {
724	// t is empty
725	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
726	// this is direct and has refcount of 1 (so we can just alter it directly)
727	expandCapacity(thisOffset + length);
728	if (data()) {
729	memcpy(const_cast<UChar>(data() + thisSize), t.data(), tSize sizeof(UChar));
730	m_rep->len = length;
731	m_rep->_hash = 0;
732	}
733	} else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
734	// this reaches the end of the buffer - extend it if it's long enough to append to
735	expandCapacity(thisOffset + length);
736	if (data()) {
737	memcpy(const_cast<UChar>(data() + thisSize), t.data(), tSize sizeof(UChar));
738	m_rep = Rep::create(m_rep, 0, length);
739	}
740	} else {
741	// this is shared with someone using more capacity, gotta make a whole new string
742	size_t newCapacity = expandedSize(length, 0);
743	UChar* d = allocChars(newCapacity);
744	if (!d)
745	m_rep = &Rep::null;
746	else {
747	memcpy(d, data(), thisSize * sizeof(UChar));
748	memcpy(const_cast<UChar>(d + thisSize), t.data(), tSize sizeof(UChar));
749	m_rep = Rep::create(d, length);
750	m_rep->capacity = newCapacity;
751	}
752	}
753
754	return *this;
755	}
756
757	UString &UString::append(const char *t)
758	{
759	int thisSize = size();
760	int thisOffset = m_rep->offset;
761	int tSize = static_cast<int>(strlen(t));
762	int length = thisSize + tSize;
763
764	// possible cases:
765	if (thisSize == 0) {
766	// this is empty
767	*this = t;
768	} else if (tSize == 0) {
769	// t is empty, we'll just return *this below.
770	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
771	// this is direct and has refcount of 1 (so we can just alter it directly)
772	expandCapacity(thisOffset + length);
773	UChar d = const_cast<UChar >(data());
774	if (d) {
775	for (int i = 0; i < tSize; ++i)
776	d[thisSize + i] = t[i];
777	m_rep->len = length;
778	m_rep->_hash = 0;
779	}
780	} else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
781	// this string reaches the end of the buffer - extend it
782	expandCapacity(thisOffset + length);
783	UChar d = const_cast<UChar >(data());
784	if (d) {
785	for (int i = 0; i < tSize; ++i)
786	d[thisSize + i] = t[i];
787	m_rep = Rep::create(m_rep, 0, length);
788	}
789	} else {
790	// this is shared with someone using more capacity, gotta make a whole new string
791	size_t newCapacity = expandedSize(length, 0);
792	UChar* d = allocChars(newCapacity);
793	if (!d)
794	m_rep = &Rep::null;
795	else {
796	memcpy(d, data(), thisSize * sizeof(UChar));
797	for (int i = 0; i < tSize; ++i)
798	d[thisSize + i] = t[i];
799	m_rep = Rep::create(d, length);
800	m_rep->capacity = newCapacity;
801	}
802	}
803
804	return *this;
805	}
806
807	UString &UString::append(unsigned short c)
808	{
809	int thisOffset = m_rep->offset;
810	int length = size();
811
812	// possible cases:
813	if (length == 0) {
814	// this is empty - must make a new m_rep because we don't want to pollute the shared empty one
815	size_t newCapacity = expandedSize(1, 0);
816	UChar* d = allocChars(newCapacity);
817	if (!d)
818	m_rep = &Rep::null;
819	else {
820	d[0] = c;
821	m_rep = Rep::create(d, 1);
822	m_rep->capacity = newCapacity;
823	}
824	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
825	// this is direct and has refcount of 1 (so we can just alter it directly)
826	expandCapacity(thisOffset + length + 1);
827	UChar d = const_cast<UChar >(data());
828	if (d) {
829	d[length] = c;
830	m_rep->len = length + 1;
831	m_rep->_hash = 0;
832	}
833	} else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
834	// this reaches the end of the string - extend it and share
835	expandCapacity(thisOffset + length + 1);
836	UChar d = const_cast<UChar >(data());
837	if (d) {
838	d[length] = c;
839	m_rep = Rep::create(m_rep, 0, length + 1);
840	}
841	} else {
842	// this is shared with someone using more capacity, gotta make a whole new string
843	size_t newCapacity = expandedSize(length + 1, 0);
844	UChar* d = allocChars(newCapacity);
845	if (!d)
846	m_rep = &Rep::null;
847	else {
848	memcpy(d, data(), length * sizeof(UChar));
849	d[length] = c;
850	m_rep = Rep::create(d, length + 1);
851	m_rep->capacity = newCapacity;
852	}
853	}
854
855	return *this;
856	}
857
858	CString UString::cstring() const
859	{
860	return ascii();
861	}
862
863	char *UString::ascii() const
864	{
865	// Never make the buffer smaller than normalStatBufferSize.
866	// Thus we almost never need to reallocate.
867	int length = size();
868	int neededSize = length + 1;
869	if (neededSize < normalStatBufferSize) {
870	neededSize = normalStatBufferSize;
871	}
872	if (neededSize != statBufferSize) {
873	delete [] statBuffer;
874	statBuffer = new char [neededSize];
875	statBufferSize = neededSize;
876	}
877
878	const UChar *p = data();
879	char *q = statBuffer;
880	const UChar *limit = p + length;
881	while (p != limit) {
882	*q = static_cast<char>(p->uc);
883	++p;
884	++q;
885	}
886	*q = '\0';
887
888	return statBuffer;
889	}
890
891	UString &UString::operator=(const char *c)
892	{
893	if (!c) {
894	m_rep = &Rep::null;
895	return *this;
896	}
897
898	if (!c[0]) {
899	m_rep = &Rep::empty;
900	return *this;
901	}
902
903	int l = static_cast<int>(strlen(c));
904	UChar *d;
905	if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
906	d = m_rep->buf;
907	m_rep->_hash = 0;
908	m_rep->len = l;
909	} else {
910	d = allocChars(l);
911	if (!d) {
912	m_rep = &Rep::null;
913	return *this;
914	}
915	m_rep = Rep::create(d, l);
916	}
917	for (int i = 0; i < l; i++)
918	d[i].uc = c[i];
919
920	return *this;
921	}
922
923	bool UString::is8Bit() const
924	{
925	const UChar *u = data();
926	const UChar *limit = u + size();
927	while (u < limit) {
928	if (u->uc > 0xFF)
929	return false;
930	++u;
931	}
932
933	return true;
934	}
935
936	const UChar UString::operator[](int pos) const
937	{
938	if (pos >= size())
939	return '\0';
940	return data()[pos];
941	}
942
943	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
944	{
945	double d;
946
947	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
948	// after the number, so is8Bit is too strict a check.
949	if (!is8Bit())
950	return NaN;
951
952	const char *c = ascii();
953
954	// skip leading white space
955	while (isASCIISpace(*c))
956	c++;
957
958	// empty string ?
959	if (*c == '\0')
960	return tolerateEmptyString ? 0.0 : NaN;
961
962	// hex number ?
963	if (c == '0' && ((c+1) == 'x' \|\| *(c+1) == 'X')) {
964	const char* firstDigitPosition = c + 2;
965	c++;
966	d = 0.0;
967	while (*(++c)) {
968	if (c >= '0' && c <= '9')
969	d = d * 16.0 + *c - '0';
970	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
971	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
972	else
973	break;
974	}
975
976	if (d >= mantissaOverflowLowerBound)
977	d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
978	} else {
979	// regular number ?
980	char *end;
981	d = kjs_strtod(c, &end);
982	if ((d != 0.0 \|\| end != c) && d != Inf && d != -Inf) {
983	c = end;
984	} else {
985	double sign = 1.0;
986
987	if (*c == '+')
988	c++;
989	else if (*c == '-') {
990	sign = -1.0;
991	c++;
992	}
993
994	// We used strtod() to do the conversion. However, strtod() handles
995	// infinite values slightly differently than JavaScript in that it
996	// converts the string "inf" with any capitalization to infinity,
997	// whereas the ECMA spec requires that it be converted to NaN.
998
999	if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1000	d = sign * Inf;
1001	c += 8;
1002	} else if ((d == Inf \|\| d == -Inf) && c != 'I' && c != 'i')
1003	c = end;
1004	else
1005	return NaN;
1006	}
1007	}
1008
1009	// allow trailing white space
1010	while (isASCIISpace(*c))
1011	c++;
1012	// don't allow anything after - unless tolerant=true
1013	if (!tolerateTrailingJunk && *c != '\0')
1014	d = NaN;
1015
1016	return d;
1017	}
1018
1019	double UString::toDouble(bool tolerateTrailingJunk) const
1020	{
1021	return toDouble(tolerateTrailingJunk, true);
1022	}
1023
1024	double UString::toDouble() const
1025	{
1026	return toDouble(false, true);
1027	}
1028
1029	uint32_t UString::toUInt32(bool *ok) const
1030	{
1031	double d = toDouble();
1032	bool b = true;
1033
1034	if (d != static_cast<uint32_t>(d)) {
1035	b = false;
1036	d = 0;
1037	}
1038
1039	if (ok)
1040	*ok = b;
1041
1042	return static_cast<uint32_t>(d);
1043	}
1044
1045	uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
1046	{
1047	double d = toDouble(false, tolerateEmptyString);
1048	bool b = true;
1049
1050	if (d != static_cast<uint32_t>(d)) {
1051	b = false;
1052	d = 0;
1053	}
1054
1055	if (ok)
1056	*ok = b;
1057
1058	return static_cast<uint32_t>(d);
1059	}
1060
1061	uint32_t UString::toStrictUInt32(bool *ok) const
1062	{
1063	if (ok)
1064	*ok = false;
1065
1066	// Empty string is not OK.
1067	int len = m_rep->len;
1068	if (len == 0)
1069	return 0;
1070	const UChar *p = m_rep->data();
1071	unsigned short c = p->unicode();
1072
1073	// If the first digit is 0, only 0 itself is OK.
1074	if (c == '0') {
1075	if (len == 1 && ok)
1076	*ok = true;
1077	return 0;
1078	}
1079
1080	// Convert to UInt32, checking for overflow.
1081	uint32_t i = 0;
1082	while (1) {
1083	// Process character, turning it into a digit.
1084	if (c < '0' \|\| c > '9')
1085	return 0;
1086	const unsigned d = c - '0';
1087
1088	// Multiply by 10, checking for overflow out of 32 bits.
1089	if (i > 0xFFFFFFFFU / 10)
1090	return 0;
1091	i *= 10;
1092
1093	// Add in the digit, checking for overflow out of 32 bits.
1094	const unsigned max = 0xFFFFFFFFU - d;
1095	if (i > max)
1096	return 0;
1097	i += d;
1098
1099	// Handle end of string.
1100	if (--len == 0) {
1101	if (ok)
1102	*ok = true;
1103	return i;
1104	}
1105
1106	// Get next character.
1107	c = (++p)->unicode();
1108	}
1109	}
1110
1111	int UString::find(const UString &f, int pos) const
1112	{
1113	int sz = size();
1114	int fsz = f.size();
1115	if (sz < fsz)
1116	return -1;
1117	if (pos < 0)
1118	pos = 0;
1119	if (fsz == 0)
1120	return pos;
1121	const UChar *end = data() + sz - fsz;
1122	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1123	const UChar *fdata = f.data();
1124	unsigned short fchar = fdata->uc;
1125	++fdata;
1126	for (const UChar *c = data() + pos; c <= end; c++)
1127	if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1128	return static_cast<int>(c - data());
1129
1130	return -1;
1131	}
1132
1133	int UString::find(UChar ch, int pos) const
1134	{
1135	if (pos < 0)
1136	pos = 0;
1137	const UChar *end = data() + size();
1138	for (const UChar *c = data() + pos; c < end; c++)
1139	if (*c == ch)
1140	return static_cast<int>(c - data());
1141
1142	return -1;
1143	}
1144
1145	int UString::rfind(const UString &f, int pos) const
1146	{
1147	int sz = size();
1148	int fsz = f.size();
1149	if (sz < fsz)
1150	return -1;
1151	if (pos < 0)
1152	pos = 0;
1153	if (pos > sz - fsz)
1154	pos = sz - fsz;
1155	if (fsz == 0)
1156	return pos;
1157	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1158	const UChar *fdata = f.data();
1159	for (const UChar *c = data() + pos; c >= data(); c--) {
1160	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1161	return static_cast<int>(c - data());
1162	}
1163
1164	return -1;
1165	}
1166
1167	int UString::rfind(UChar ch, int pos) const
1168	{
1169	if (isEmpty())
1170	return -1;
1171	if (pos + 1 >= size())
1172	pos = size() - 1;
1173	for (const UChar *c = data() + pos; c >= data(); c--) {
1174	if (*c == ch)
1175	return static_cast<int>(c-data());
1176	}
1177
1178	return -1;
1179	}
1180
1181	UString UString::substr(int pos, int len) const
1182	{
1183	int s = size();
1184
1185	if (pos < 0)
1186	pos = 0;
1187	else if (pos >= s)
1188	pos = s;
1189	if (len < 0)
1190	len = s;
1191	if (pos + len >= s)
1192	len = s - pos;
1193
1194	if (pos == 0 && len == s)
1195	return *this;
1196
1197	return UString(Rep::create(m_rep, pos, len));
1198	}
1199
1200	bool operator==(const UString& s1, const UString& s2)
1201	{
1202	if (s1.m_rep->len != s2.m_rep->len)
1203	return false;
1204
1205	return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1206	s1.m_rep->len * sizeof(UChar)) == 0);
1207	}
1208
1209	bool operator==(const UString& s1, const char *s2)
1210	{
1211	if (s2 == 0) {
1212	return s1.isEmpty();
1213	}
1214
1215	const UChar *u = s1.data();
1216	const UChar *uend = u + s1.size();
1217	while (u != uend && *s2) {
1218	if (u->uc != (unsigned char)*s2)
1219	return false;
1220	s2++;
1221	u++;
1222	}
1223
1224	return u == uend && *s2 == 0;
1225	}
1226
1227	bool operator<(const UString& s1, const UString& s2)
1228	{
1229	const int l1 = s1.size();
1230	const int l2 = s2.size();
1231	const int lmin = l1 < l2 ? l1 : l2;
1232	const UChar *c1 = s1.data();
1233	const UChar *c2 = s2.data();
1234	int l = 0;
1235	while (l < lmin && c1 == c2) {
1236	c1++;
1237	c2++;
1238	l++;
1239	}
1240	if (l < lmin)
1241	return (c1->uc < c2->uc);
1242
1243	return (l1 < l2);
1244	}
1245
1246	int compare(const UString& s1, const UString& s2)
1247	{
1248	const int l1 = s1.size();
1249	const int l2 = s2.size();
1250	const int lmin = l1 < l2 ? l1 : l2;
1251	const UChar *c1 = s1.data();
1252	const UChar *c2 = s2.data();
1253	int l = 0;
1254	while (l < lmin && c1 == c2) {
1255	c1++;
1256	c2++;
1257	l++;
1258	}
1259
1260	if (l < lmin)
1261	return (c1->uc > c2->uc) ? 1 : -1;
1262
1263	if (l1 == l2)
1264	return 0;
1265
1266	return (l1 > l2) ? 1 : -1;
1267	}
1268
1269	CString UString::UTF8String(bool strict) const
1270	{
1271	// Allocate a buffer big enough to hold all the characters.
1272	const int length = size();
1273	Vector<char, 1024> buffer(length * 3);
1274
1275	// Convert to runs of 8-bit characters.
1276	char* p = buffer.data();
1277	const ::UChar* d = reinterpret_cast<const ::UChar*>(&data()->uc);
1278	ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1279	if (result != conversionOK)
1280	return CString();
1281
1282	return CString(buffer.data(), p - buffer.data());
1283	}
1284
1285	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Download in other formats: