Context Navigation

ustring.cpp@ 34424

Visit:

Last change on this file since 34424 was 34361, checked in by [email protected], 17 years ago

Reviewed by Darin.

Fix JSClassCreate to work with old JSCore API threading model.

No change on SunSpider.

API/JSClassRef.cpp: (OpaqueJSClass::OpaqueJSClass): Since JSClass is constructed without a context, there is no way for it to create Identifiers. Also, added initializeThreading(), just for good measure.

API/JSCallbackObjectFunctions.h: (KJS::::getPropertyNames): Make an Identifier out of the string here, because propertyNames.add() needs that.

kjs/identifier.cpp:
kjs/identifier.h: (KJS::Identifier::equal):
kjs/ustring.cpp: (KJS::equal): Moved equal() from identifier.h to ustring.h, because it's not really about Identifiers, and to make it possible to use it from StrHash. Include StrHash.h from ustring.h to avoid having the behavior depend on headers that happen to be included.

wtf/StrHash.h: Removed.
kjs/ustring.h: Made RefPtr<UString::Rep> use the same default hash as UString::Rep* (it used to default to pointer equality). Moved the whole StrHash header into ustring.h.

JavaScriptCore.exp: Export equal() for WebCore use (this StrHash is used in c_class.cpp, jni_class.cpp, and npruntime.cpp).

Property svn:eol-style set to native

File size: 32.4 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* Copyright (C) 1999-2000 Harri Porten ([email protected])
4	* Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
5	* Copyright (C) 2007 Cameron Zwarich ([email protected])
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "ustring.h"
26
27	#include "JSLock.h"
28	#include "collector.h"
29	#include "dtoa.h"
30	#include "function.h"
31	#include "identifier.h"
32	#include "operations.h"
33	#include <ctype.h>
34	#include <float.h>
35	#include <limits.h>
36	#include <math.h>
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <wtf/Assertions.h>
40	#include <wtf/ASCIICType.h>
41	#include <wtf/MathExtras.h>
42	#include <wtf/Vector.h>
43	#include <wtf/unicode/UTF8.h>
44
45	#if HAVE(STRING_H)
46	#include <string.h>
47	#endif
48	#if HAVE(STRINGS_H)
49	#include <strings.h>
50	#endif
51
52	using namespace WTF;
53	using namespace WTF::Unicode;
54	using namespace std;
55
56	namespace KJS {
57
58	extern const double NaN;
59	extern const double Inf;
60
61	static inline const size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
62	static inline const size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
63
64	static inline UChar* allocChars(size_t length)
65	{
66	ASSERT(length);
67	if (length > maxUChars())
68	return 0;
69	return static_cast<UChar>(fastMalloc(sizeof(UChar) length));
70	}
71
72	static inline UChar* reallocChars(UChar* buffer, size_t length)
73	{
74	ASSERT(length);
75	if (length > maxUChars())
76	return 0;
77	return static_cast<UChar>(fastRealloc(buffer, sizeof(UChar) length));
78	}
79
80	COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
81
82	CString::CString(const char *c)
83	{
84	length = strlen(c);
85	data = new char[length+1];
86	memcpy(data, c, length + 1);
87	}
88
89	CString::CString(const char *c, size_t len)
90	{
91	length = len;
92	data = new char[len+1];
93	memcpy(data, c, len);
94	data[len] = 0;
95	}
96
97	CString::CString(const CString &b)
98	{
99	length = b.length;
100	if (b.data) {
101	data = new char[length+1];
102	memcpy(data, b.data, length + 1);
103	}
104	else
105	data = 0;
106	}
107
108	CString::~CString()
109	{
110	delete [] data;
111	}
112
113	CString CString::adopt(char* c, size_t len)
114	{
115	CString s;
116	s.data = c;
117	s.length = len;
118
119	return s;
120	}
121
122	CString &CString::append(const CString &t)
123	{
124	char *n;
125	n = new char[length+t.length+1];
126	if (length)
127	memcpy(n, data, length);
128	if (t.length)
129	memcpy(n+length, t.data, t.length);
130	length += t.length;
131	n[length] = 0;
132
133	delete [] data;
134	data = n;
135
136	return *this;
137	}
138
139	CString &CString::operator=(const char *c)
140	{
141	if (data)
142	delete [] data;
143	length = strlen(c);
144	data = new char[length+1];
145	memcpy(data, c, length + 1);
146
147	return *this;
148	}
149
150	CString &CString::operator=(const CString &str)
151	{
152	if (this == &str)
153	return *this;
154
155	if (data)
156	delete [] data;
157	length = str.length;
158	if (str.data) {
159	data = new char[length + 1];
160	memcpy(data, str.data, length + 1);
161	}
162	else
163	data = 0;
164
165	return *this;
166	}
167
168	bool operator==(const CString& c1, const CString& c2)
169	{
170	size_t len = c1.size();
171	return len == c2.size() && (len == 0 \|\| memcmp(c1.c_str(), c2.c_str(), len) == 0);
172	}
173
174	// These static strings are immutable, except for rc, whose initial value is chosen to reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
175	static UChar sharedEmptyChar;
176	UString::Rep UString::Rep::null = { 0, 0, INT_MAX / 2, 0, 0, &UString::Rep::null, true, 0, 0, 0, 0, 0, 0 };
177	UString::Rep UString::Rep::empty = { 0, 0, INT_MAX / 2, 0, 0, &UString::Rep::empty, true, 0, &sharedEmptyChar, 0, 0, 0, 0 };
178
179	static char* statBuffer = 0; // Only used for debugging via UString::ascii().
180
181	PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
182	{
183	int sizeInBytes = l * sizeof(UChar);
184	UChar copyD = static_cast<UChar >(fastMalloc(sizeInBytes));
185	memcpy(copyD, d, sizeInBytes);
186
187	return create(copyD, l);
188	}
189
190	PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
191	{
192	Rep* r = new Rep;
193	r->offset = 0;
194	r->len = l;
195	r->rc = 1;
196	r->_hash = 0;
197	r->identifierTable = 0;
198	r->baseString = r;
199	r->isStatic = false;
200	r->reportedCost = 0;
201	r->buf = d;
202	r->usedCapacity = l;
203	r->capacity = l;
204	r->usedPreCapacity = 0;
205	r->preCapacity = 0;
206
207	// steal the single reference this Rep was created with
208	return adoptRef(r);
209	}
210
211	PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
212	{
213	ASSERT(base);
214
215	int baseOffset = base->offset;
216
217	base = base->baseString;
218
219	ASSERT(-(offset + baseOffset) <= base->usedPreCapacity);
220	ASSERT(offset + baseOffset + length <= base->usedCapacity);
221
222	Rep *r = new Rep;
223	r->offset = baseOffset + offset;
224	r->len = length;
225	r->rc = 1;
226	r->_hash = 0;
227	r->identifierTable = 0;
228	r->baseString = base.releaseRef();
229	r->isStatic = false;
230	r->reportedCost = 0;
231	r->buf = 0;
232	r->usedCapacity = 0;
233	r->capacity = 0;
234	r->usedPreCapacity = 0;
235	r->preCapacity = 0;
236
237	// steal the single reference this Rep was created with
238	return adoptRef(r);
239	}
240
241	PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
242	{
243	if (!string)
244	return &UString::Rep::null;
245
246	size_t length = strlen(string);
247	Vector<UChar, 1024> buffer(length);
248	UChar* p = buffer.data();
249	if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
250	return &UString::Rep::null;
251
252	return UString::Rep::createCopying(buffer.data(), p - buffer.data());
253	}
254
255	void UString::Rep::destroy()
256	{
257	// Static null and empty strings can never be destroyed, but we cannot rely on reference counting, because ref/deref are not thread-safe.
258	if (!isStatic) {
259	if (identifierTable)
260	Identifier::remove(this);
261	if (baseString == this)
262	fastFree(buf);
263	else
264	baseString->deref();
265
266	delete this;
267	}
268	}
269
270	// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
271	// or anything like that.
272	const unsigned PHI = 0x9e3779b9U;
273
274	// Paul Hsieh's SuperFastHash
275	// http://www.azillionmonkeys.com/qed/hash.html
276	unsigned UString::Rep::computeHash(const UChar *s, int len)
277	{
278	unsigned l = len;
279	uint32_t hash = PHI;
280	uint32_t tmp;
281
282	int rem = l & 1;
283	l >>= 1;
284
285	// Main loop
286	for (; l > 0; l--) {
287	hash += s[0];
288	tmp = (s[1] << 11) ^ hash;
289	hash = (hash << 16) ^ tmp;
290	s += 2;
291	hash += hash >> 11;
292	}
293
294	// Handle end case
295	if (rem) {
296	hash += s[0];
297	hash ^= hash << 11;
298	hash += hash >> 17;
299	}
300
301	// Force "avalanching" of final 127 bits
302	hash ^= hash << 3;
303	hash += hash >> 5;
304	hash ^= hash << 2;
305	hash += hash >> 15;
306	hash ^= hash << 10;
307
308	// this avoids ever returning a hash code of 0, since that is used to
309	// signal "hash not computed yet", using a value that is likely to be
310	// effectively the same as 0 when the low bits are masked
311	if (hash == 0)
312	hash = 0x80000000;
313
314	return hash;
315	}
316
317	// Paul Hsieh's SuperFastHash
318	// http://www.azillionmonkeys.com/qed/hash.html
319	unsigned UString::Rep::computeHash(const char *s)
320	{
321	// This hash is designed to work on 16-bit chunks at a time. But since the normal case
322	// (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
323	// were 16-bit chunks, which should give matching results
324
325	uint32_t hash = PHI;
326	uint32_t tmp;
327	size_t l = strlen(s);
328
329	size_t rem = l & 1;
330	l >>= 1;
331
332	// Main loop
333	for (; l > 0; l--) {
334	hash += (unsigned char)s[0];
335	tmp = ((unsigned char)s[1] << 11) ^ hash;
336	hash = (hash << 16) ^ tmp;
337	s += 2;
338	hash += hash >> 11;
339	}
340
341	// Handle end case
342	if (rem) {
343	hash += (unsigned char)s[0];
344	hash ^= hash << 11;
345	hash += hash >> 17;
346	}
347
348	// Force "avalanching" of final 127 bits
349	hash ^= hash << 3;
350	hash += hash >> 5;
351	hash ^= hash << 2;
352	hash += hash >> 15;
353	hash ^= hash << 10;
354
355	// this avoids ever returning a hash code of 0, since that is used to
356	// signal "hash not computed yet", using a value that is likely to be
357	// effectively the same as 0 when the low bits are masked
358	if (hash == 0)
359	hash = 0x80000000;
360
361	return hash;
362	}
363
364	// put these early so they can be inlined
365	inline size_t UString::expandedSize(size_t size, size_t otherSize) const
366	{
367	// Do the size calculation in two parts, returning overflowIndicator if
368	// we overflow the maximum value that we can handle.
369
370	if (size > maxUChars())
371	return overflowIndicator();
372
373	size_t expandedSize = ((size + 10) / 10 * 11) + 1;
374	if (maxUChars() - expandedSize < otherSize)
375	return overflowIndicator();
376
377	return expandedSize + otherSize;
378	}
379
380	inline int UString::usedCapacity() const
381	{
382	return m_rep->baseString->usedCapacity;
383	}
384
385	inline int UString::usedPreCapacity() const
386	{
387	return m_rep->baseString->usedPreCapacity;
388	}
389
390	void UString::expandCapacity(int requiredLength)
391	{
392	Rep* r = m_rep->baseString;
393
394	if (requiredLength > r->capacity) {
395	size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
396	UChar* oldBuf = r->buf;
397	r->buf = reallocChars(r->buf, newCapacity);
398	if (!r->buf) {
399	r->buf = oldBuf;
400	m_rep = &Rep::null;
401	return;
402	}
403	r->capacity = newCapacity - r->preCapacity;
404	}
405	if (requiredLength > r->usedCapacity) {
406	r->usedCapacity = requiredLength;
407	}
408	}
409
410	void UString::expandPreCapacity(int requiredPreCap)
411	{
412	Rep* r = m_rep->baseString;
413
414	if (requiredPreCap > r->preCapacity) {
415	size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
416	int delta = newCapacity - r->capacity - r->preCapacity;
417
418	UChar* newBuf = allocChars(newCapacity);
419	if (!newBuf) {
420	m_rep = &Rep::null;
421	return;
422	}
423	memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
424	fastFree(r->buf);
425	r->buf = newBuf;
426
427	r->preCapacity = newCapacity - r->capacity;
428	}
429	if (requiredPreCap > r->usedPreCapacity) {
430	r->usedPreCapacity = requiredPreCap;
431	}
432	}
433
434	UString::UString(const char *c)
435	{
436	if (!c) {
437	m_rep = &Rep::null;
438	return;
439	}
440
441	if (!c[0]) {
442	m_rep = &Rep::empty;
443	return;
444	}
445
446	size_t length = strlen(c);
447	UChar *d = allocChars(length);
448	if (!d)
449	m_rep = &Rep::null;
450	else {
451	for (size_t i = 0; i < length; i++)
452	d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
453	m_rep = Rep::create(d, static_cast<int>(length));
454	}
455	}
456
457	UString::UString(const UChar *c, int length)
458	{
459	if (length == 0)
460	m_rep = &Rep::empty;
461	else
462	m_rep = Rep::createCopying(c, length);
463	}
464
465	UString::UString(UChar *c, int length, bool copy)
466	{
467	if (length == 0)
468	m_rep = &Rep::empty;
469	else if (copy)
470	m_rep = Rep::createCopying(c, length);
471	else
472	m_rep = Rep::create(c, length);
473	}
474
475	UString::UString(const Vector<UChar>& buffer)
476	{
477	if (!buffer.size())
478	m_rep = &Rep::empty;
479	else
480	m_rep = Rep::createCopying(buffer.data(), buffer.size());
481	}
482
483
484	UString::UString(const UString &a, const UString &b)
485	{
486	int aSize = a.size();
487	int aOffset = a.m_rep->offset;
488	int bSize = b.size();
489	int bOffset = b.m_rep->offset;
490	int length = aSize + bSize;
491
492	// possible cases:
493
494	if (aSize == 0) {
495	// a is empty
496	m_rep = b.m_rep;
497	} else if (bSize == 0) {
498	// b is empty
499	m_rep = a.m_rep;
500	} else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
501	(-bOffset != b.usedPreCapacity() \|\| aSize >= bSize)) {
502	// - a reaches the end of its buffer so it qualifies for shared append
503	// - also, it's at least a quarter the length of b - appending to a much shorter
504	// string does more harm than good
505	// - however, if b qualifies for prepend and is longer than a, we'd rather prepend
506	UString x(a);
507	x.expandCapacity(aOffset + length);
508	if (a.data() && x.data()) {
509	memcpy(const_cast<UChar >(a.data() + aSize), b.data(), bSize sizeof(UChar));
510	m_rep = Rep::create(a.m_rep, 0, length);
511	} else
512	m_rep = &Rep::null;
513	} else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
514	// - b reaches the beginning of its buffer so it qualifies for shared prepend
515	// - also, it's at least a quarter the length of a - prepending to a much shorter
516	// string does more harm than good
517	UString y(b);
518	y.expandPreCapacity(-bOffset + aSize);
519	if (b.data() && y.data()) {
520	memcpy(const_cast<UChar >(b.data() - aSize), a.data(), aSize sizeof(UChar));
521	m_rep = Rep::create(b.m_rep, -aSize, length);
522	} else
523	m_rep = &Rep::null;
524	} else {
525	// a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
526	size_t newCapacity = expandedSize(length, 0);
527	UChar* d = allocChars(newCapacity);
528	if (!d)
529	m_rep = &Rep::null;
530	else {
531	memcpy(d, a.data(), aSize * sizeof(UChar));
532	memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
533	m_rep = Rep::create(d, length);
534	m_rep->capacity = newCapacity;
535	}
536	}
537	}
538
539	const UString& UString::null()
540	{
541	static UString* n = new UString; // Should be called from main thread at least once to be safely initialized.
542	return *n;
543	}
544
545	UString UString::from(int i)
546	{
547	UChar buf[1 + sizeof(i) * 3];
548	UChar *end = buf + sizeof(buf) / sizeof(UChar);
549	UChar *p = end;
550
551	if (i == 0) {
552	*--p = '0';
553	} else if (i == INT_MIN) {
554	char minBuf[1 + sizeof(i) * 3];
555	sprintf(minBuf, "%d", INT_MIN);
556	return UString(minBuf);
557	} else {
558	bool negative = false;
559	if (i < 0) {
560	negative = true;
561	i = -i;
562	}
563	while (i) {
564	*--p = (unsigned short)((i % 10) + '0');
565	i /= 10;
566	}
567	if (negative) {
568	*--p = '-';
569	}
570	}
571
572	return UString(p, static_cast<int>(end - p));
573	}
574
575	UString UString::from(unsigned int u)
576	{
577	UChar buf[sizeof(u) * 3];
578	UChar *end = buf + sizeof(buf) / sizeof(UChar);
579	UChar *p = end;
580
581	if (u == 0) {
582	*--p = '0';
583	} else {
584	while (u) {
585	*--p = (unsigned short)((u % 10) + '0');
586	u /= 10;
587	}
588	}
589
590	return UString(p, static_cast<int>(end - p));
591	}
592
593	UString UString::from(long l)
594	{
595	UChar buf[1 + sizeof(l) * 3];
596	UChar *end = buf + sizeof(buf) / sizeof(UChar);
597	UChar *p = end;
598
599	if (l == 0) {
600	*--p = '0';
601	} else if (l == LONG_MIN) {
602	char minBuf[1 + sizeof(l) * 3];
603	sprintf(minBuf, "%ld", LONG_MIN);
604	return UString(minBuf);
605	} else {
606	bool negative = false;
607	if (l < 0) {
608	negative = true;
609	l = -l;
610	}
611	while (l) {
612	*--p = (unsigned short)((l % 10) + '0');
613	l /= 10;
614	}
615	if (negative) {
616	*--p = '-';
617	}
618	}
619
620	return UString(p, static_cast<int>(end - p));
621	}
622
623	UString UString::from(double d)
624	{
625	// avoid ever printing -NaN, in JS conceptually there is only one NaN value
626	if (isnan(d))
627	return "NaN";
628
629	char buf[80];
630	int decimalPoint;
631	int sign;
632
633	char *result = dtoa(d, 0, &decimalPoint, &sign, NULL);
634	int length = static_cast<int>(strlen(result));
635
636	int i = 0;
637	if (sign) {
638	buf[i++] = '-';
639	}
640
641	if (decimalPoint <= 0 && decimalPoint > -6) {
642	buf[i++] = '0';
643	buf[i++] = '.';
644	for (int j = decimalPoint; j < 0; j++) {
645	buf[i++] = '0';
646	}
647	strcpy(buf + i, result);
648	} else if (decimalPoint <= 21 && decimalPoint > 0) {
649	if (length <= decimalPoint) {
650	strcpy(buf + i, result);
651	i += length;
652	for (int j = 0; j < decimalPoint - length; j++) {
653	buf[i++] = '0';
654	}
655	buf[i] = '\0';
656	} else {
657	strncpy(buf + i, result, decimalPoint);
658	i += decimalPoint;
659	buf[i++] = '.';
660	strcpy(buf + i, result + decimalPoint);
661	}
662	} else if (result[0] < '0' \|\| result[0] > '9') {
663	strcpy(buf + i, result);
664	} else {
665	buf[i++] = result[0];
666	if (length > 1) {
667	buf[i++] = '.';
668	strcpy(buf + i, result + 1);
669	i += length - 1;
670	}
671
672	buf[i++] = 'e';
673	buf[i++] = (decimalPoint >= 0) ? '+' : '-';
674	// decimalPoint can't be more than 3 digits decimal given the
675	// nature of float representation
676	int exponential = decimalPoint - 1;
677	if (exponential < 0)
678	exponential = -exponential;
679	if (exponential >= 100)
680	buf[i++] = static_cast<char>('0' + exponential / 100);
681	if (exponential >= 10)
682	buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
683	buf[i++] = static_cast<char>('0' + exponential % 10);
684	buf[i++] = '\0';
685	}
686
687	freedtoa(result);
688
689	return UString(buf);
690	}
691
692	UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
693	{
694	if (rangeCount == 1 && separatorCount == 0) {
695	int thisSize = size();
696	int position = substringRanges[0].position;
697	int length = substringRanges[0].length;
698	if (position <= 0 && length >= thisSize)
699	return *this;
700	return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
701	}
702
703	int totalLength = 0;
704	for (int i = 0; i < rangeCount; i++)
705	totalLength += substringRanges[i].length;
706	for (int i = 0; i < separatorCount; i++)
707	totalLength += separators[i].size();
708
709	if (totalLength == 0)
710	return "";
711
712	UChar* buffer = allocChars(totalLength);
713	if (!buffer)
714	return null();
715
716	int maxCount = max(rangeCount, separatorCount);
717	int bufferPos = 0;
718	for (int i = 0; i < maxCount; i++) {
719	if (i < rangeCount) {
720	memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
721	bufferPos += substringRanges[i].length;
722	}
723	if (i < separatorCount) {
724	memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
725	bufferPos += separators[i].size();
726	}
727	}
728
729	return UString::Rep::create(buffer, totalLength);
730	}
731
732	UString& UString::append(const UString &t)
733	{
734	int thisSize = size();
735	int thisOffset = m_rep->offset;
736	int tSize = t.size();
737	int length = thisSize + tSize;
738
739	// possible cases:
740	if (thisSize == 0) {
741	// this is empty
742	*this = t;
743	} else if (tSize == 0) {
744	// t is empty
745	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
746	// this is direct and has refcount of 1 (so we can just alter it directly)
747	expandCapacity(thisOffset + length);
748	if (data()) {
749	memcpy(const_cast<UChar>(data() + thisSize), t.data(), tSize sizeof(UChar));
750	m_rep->len = length;
751	m_rep->_hash = 0;
752	}
753	} else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
754	// this reaches the end of the buffer - extend it if it's long enough to append to
755	expandCapacity(thisOffset + length);
756	if (data()) {
757	memcpy(const_cast<UChar>(data() + thisSize), t.data(), tSize sizeof(UChar));
758	m_rep = Rep::create(m_rep, 0, length);
759	}
760	} else {
761	// this is shared with someone using more capacity, gotta make a whole new string
762	size_t newCapacity = expandedSize(length, 0);
763	UChar* d = allocChars(newCapacity);
764	if (!d)
765	m_rep = &Rep::null;
766	else {
767	memcpy(d, data(), thisSize * sizeof(UChar));
768	memcpy(const_cast<UChar>(d + thisSize), t.data(), tSize sizeof(UChar));
769	m_rep = Rep::create(d, length);
770	m_rep->capacity = newCapacity;
771	}
772	}
773
774	return *this;
775	}
776
777	UString& UString::append(const char *t)
778	{
779	int thisSize = size();
780	int thisOffset = m_rep->offset;
781	int tSize = static_cast<int>(strlen(t));
782	int length = thisSize + tSize;
783
784	// possible cases:
785	if (thisSize == 0) {
786	// this is empty
787	*this = t;
788	} else if (tSize == 0) {
789	// t is empty, we'll just return *this below.
790	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
791	// this is direct and has refcount of 1 (so we can just alter it directly)
792	expandCapacity(thisOffset + length);
793	UChar d = const_cast<UChar >(data());
794	if (d) {
795	for (int i = 0; i < tSize; ++i)
796	d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
797	m_rep->len = length;
798	m_rep->_hash = 0;
799	}
800	} else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
801	// this string reaches the end of the buffer - extend it
802	expandCapacity(thisOffset + length);
803	UChar d = const_cast<UChar >(data());
804	if (d) {
805	for (int i = 0; i < tSize; ++i)
806	d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
807	m_rep = Rep::create(m_rep, 0, length);
808	}
809	} else {
810	// this is shared with someone using more capacity, gotta make a whole new string
811	size_t newCapacity = expandedSize(length, 0);
812	UChar* d = allocChars(newCapacity);
813	if (!d)
814	m_rep = &Rep::null;
815	else {
816	memcpy(d, data(), thisSize * sizeof(UChar));
817	for (int i = 0; i < tSize; ++i)
818	d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
819	m_rep = Rep::create(d, length);
820	m_rep->capacity = newCapacity;
821	}
822	}
823
824	return *this;
825	}
826
827	UString& UString::append(UChar c)
828	{
829	int thisOffset = m_rep->offset;
830	int length = size();
831
832	// possible cases:
833	if (length == 0) {
834	// this is empty - must make a new m_rep because we don't want to pollute the shared empty one
835	size_t newCapacity = expandedSize(1, 0);
836	UChar* d = allocChars(newCapacity);
837	if (!d)
838	m_rep = &Rep::null;
839	else {
840	d[0] = c;
841	m_rep = Rep::create(d, 1);
842	m_rep->capacity = newCapacity;
843	}
844	} else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
845	// this is direct and has refcount of 1 (so we can just alter it directly)
846	expandCapacity(thisOffset + length + 1);
847	UChar d = const_cast<UChar >(data());
848	if (d) {
849	d[length] = c;
850	m_rep->len = length + 1;
851	m_rep->_hash = 0;
852	}
853	} else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
854	// this reaches the end of the string - extend it and share
855	expandCapacity(thisOffset + length + 1);
856	UChar d = const_cast<UChar >(data());
857	if (d) {
858	d[length] = c;
859	m_rep = Rep::create(m_rep, 0, length + 1);
860	}
861	} else {
862	// this is shared with someone using more capacity, gotta make a whole new string
863	size_t newCapacity = expandedSize(length + 1, 0);
864	UChar* d = allocChars(newCapacity);
865	if (!d)
866	m_rep = &Rep::null;
867	else {
868	memcpy(d, data(), length * sizeof(UChar));
869	d[length] = c;
870	m_rep = Rep::create(d, length + 1);
871	m_rep->capacity = newCapacity;
872	}
873	}
874
875	return *this;
876	}
877
878	bool UString::getCString(CStringBuffer& buffer) const
879	{
880	int length = size();
881	int neededSize = length + 1;
882	buffer.resize(neededSize);
883	char* buf = buffer.data();
884
885	UChar ored = 0;
886	const UChar* p = data();
887	char* q = buf;
888	const UChar* limit = p + length;
889	while (p != limit) {
890	UChar c = p[0];
891	ored \|= c;
892	*q = static_cast<char>(c);
893	++p;
894	++q;
895	}
896	*q = '\0';
897
898	return !(ored & 0xFF00);
899	}
900
901	char *UString::ascii() const
902	{
903	int length = size();
904	int neededSize = length + 1;
905	delete[] statBuffer;
906	statBuffer = new char[neededSize];
907
908	const UChar *p = data();
909	char *q = statBuffer;
910	const UChar *limit = p + length;
911	while (p != limit) {
912	*q = static_cast<char>(p[0]);
913	++p;
914	++q;
915	}
916	*q = '\0';
917
918	return statBuffer;
919	}
920
921	UString& UString::operator=(const char *c)
922	{
923	if (!c) {
924	m_rep = &Rep::null;
925	return *this;
926	}
927
928	if (!c[0]) {
929	m_rep = &Rep::empty;
930	return *this;
931	}
932
933	int l = static_cast<int>(strlen(c));
934	UChar *d;
935	if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
936	d = m_rep->buf;
937	m_rep->_hash = 0;
938	m_rep->len = l;
939	} else {
940	d = allocChars(l);
941	if (!d) {
942	m_rep = &Rep::null;
943	return *this;
944	}
945	m_rep = Rep::create(d, l);
946	}
947	for (int i = 0; i < l; i++)
948	d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
949
950	return *this;
951	}
952
953	bool UString::is8Bit() const
954	{
955	const UChar *u = data();
956	const UChar *limit = u + size();
957	while (u < limit) {
958	if (u[0] > 0xFF)
959	return false;
960	++u;
961	}
962
963	return true;
964	}
965
966	UChar UString::operator[](int pos) const
967	{
968	if (pos >= size())
969	return '\0';
970	return data()[pos];
971	}
972
973	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
974	{
975	double d;
976
977	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
978	// after the number, so this is too strict a check.
979	CStringBuffer s;
980	if (!getCString(s))
981	return NaN;
982	const char* c = s.data();
983
984	// skip leading white space
985	while (isASCIISpace(*c))
986	c++;
987
988	// empty string ?
989	if (*c == '\0')
990	return tolerateEmptyString ? 0.0 : NaN;
991
992	// hex number ?
993	if (c == '0' && ((c+1) == 'x' \|\| *(c+1) == 'X')) {
994	const char* firstDigitPosition = c + 2;
995	c++;
996	d = 0.0;
997	while (*(++c)) {
998	if (c >= '0' && c <= '9')
999	d = d * 16.0 + *c - '0';
1000	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
1001	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1002	else
1003	break;
1004	}
1005
1006	if (d >= mantissaOverflowLowerBound)
1007	d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1008	} else {
1009	// regular number ?
1010	char *end;
1011	d = strtod(c, &end);
1012	if ((d != 0.0 \|\| end != c) && d != Inf && d != -Inf) {
1013	c = end;
1014	} else {
1015	double sign = 1.0;
1016
1017	if (*c == '+')
1018	c++;
1019	else if (*c == '-') {
1020	sign = -1.0;
1021	c++;
1022	}
1023
1024	// We used strtod() to do the conversion. However, strtod() handles
1025	// infinite values slightly differently than JavaScript in that it
1026	// converts the string "inf" with any capitalization to infinity,
1027	// whereas the ECMA spec requires that it be converted to NaN.
1028
1029	if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1030	d = sign * Inf;
1031	c += 8;
1032	} else if ((d == Inf \|\| d == -Inf) && c != 'I' && c != 'i')
1033	c = end;
1034	else
1035	return NaN;
1036	}
1037	}
1038
1039	// allow trailing white space
1040	while (isASCIISpace(*c))
1041	c++;
1042	// don't allow anything after - unless tolerant=true
1043	if (!tolerateTrailingJunk && *c != '\0')
1044	d = NaN;
1045
1046	return d;
1047	}
1048
1049	double UString::toDouble(bool tolerateTrailingJunk) const
1050	{
1051	return toDouble(tolerateTrailingJunk, true);
1052	}
1053
1054	double UString::toDouble() const
1055	{
1056	return toDouble(false, true);
1057	}
1058
1059	uint32_t UString::toUInt32(bool *ok) const
1060	{
1061	double d = toDouble();
1062	bool b = true;
1063
1064	if (d != static_cast<uint32_t>(d)) {
1065	b = false;
1066	d = 0;
1067	}
1068
1069	if (ok)
1070	*ok = b;
1071
1072	return static_cast<uint32_t>(d);
1073	}
1074
1075	uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
1076	{
1077	double d = toDouble(false, tolerateEmptyString);
1078	bool b = true;
1079
1080	if (d != static_cast<uint32_t>(d)) {
1081	b = false;
1082	d = 0;
1083	}
1084
1085	if (ok)
1086	*ok = b;
1087
1088	return static_cast<uint32_t>(d);
1089	}
1090
1091	uint32_t UString::toStrictUInt32(bool *ok) const
1092	{
1093	if (ok)
1094	*ok = false;
1095
1096	// Empty string is not OK.
1097	int len = m_rep->len;
1098	if (len == 0)
1099	return 0;
1100	const UChar *p = m_rep->data();
1101	unsigned short c = p[0];
1102
1103	// If the first digit is 0, only 0 itself is OK.
1104	if (c == '0') {
1105	if (len == 1 && ok)
1106	*ok = true;
1107	return 0;
1108	}
1109
1110	// Convert to UInt32, checking for overflow.
1111	uint32_t i = 0;
1112	while (1) {
1113	// Process character, turning it into a digit.
1114	if (c < '0' \|\| c > '9')
1115	return 0;
1116	const unsigned d = c - '0';
1117
1118	// Multiply by 10, checking for overflow out of 32 bits.
1119	if (i > 0xFFFFFFFFU / 10)
1120	return 0;
1121	i *= 10;
1122
1123	// Add in the digit, checking for overflow out of 32 bits.
1124	const unsigned max = 0xFFFFFFFFU - d;
1125	if (i > max)
1126	return 0;
1127	i += d;
1128
1129	// Handle end of string.
1130	if (--len == 0) {
1131	if (ok)
1132	*ok = true;
1133	return i;
1134	}
1135
1136	// Get next character.
1137	c = *(++p);
1138	}
1139	}
1140
1141	int UString::find(const UString &f, int pos) const
1142	{
1143	int sz = size();
1144	int fsz = f.size();
1145	if (sz < fsz)
1146	return -1;
1147	if (pos < 0)
1148	pos = 0;
1149	if (fsz == 0)
1150	return pos;
1151	const UChar *end = data() + sz - fsz;
1152	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1153	const UChar *fdata = f.data();
1154	unsigned short fchar = fdata[0];
1155	++fdata;
1156	for (const UChar *c = data() + pos; c <= end; c++)
1157	if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1158	return static_cast<int>(c - data());
1159
1160	return -1;
1161	}
1162
1163	int UString::find(UChar ch, int pos) const
1164	{
1165	if (pos < 0)
1166	pos = 0;
1167	const UChar *end = data() + size();
1168	for (const UChar *c = data() + pos; c < end; c++)
1169	if (*c == ch)
1170	return static_cast<int>(c - data());
1171
1172	return -1;
1173	}
1174
1175	int UString::rfind(const UString &f, int pos) const
1176	{
1177	int sz = size();
1178	int fsz = f.size();
1179	if (sz < fsz)
1180	return -1;
1181	if (pos < 0)
1182	pos = 0;
1183	if (pos > sz - fsz)
1184	pos = sz - fsz;
1185	if (fsz == 0)
1186	return pos;
1187	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1188	const UChar *fdata = f.data();
1189	for (const UChar *c = data() + pos; c >= data(); c--) {
1190	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1191	return static_cast<int>(c - data());
1192	}
1193
1194	return -1;
1195	}
1196
1197	int UString::rfind(UChar ch, int pos) const
1198	{
1199	if (isEmpty())
1200	return -1;
1201	if (pos + 1 >= size())
1202	pos = size() - 1;
1203	for (const UChar *c = data() + pos; c >= data(); c--) {
1204	if (*c == ch)
1205	return static_cast<int>(c-data());
1206	}
1207
1208	return -1;
1209	}
1210
1211	UString UString::substr(int pos, int len) const
1212	{
1213	int s = size();
1214
1215	if (pos < 0)
1216	pos = 0;
1217	else if (pos >= s)
1218	pos = s;
1219	if (len < 0)
1220	len = s;
1221	if (pos + len >= s)
1222	len = s - pos;
1223
1224	if (pos == 0 && len == s)
1225	return *this;
1226
1227	return UString(Rep::create(m_rep, pos, len));
1228	}
1229
1230	bool operator==(const UString& s1, const UString& s2)
1231	{
1232	if (s1.m_rep->len != s2.m_rep->len)
1233	return false;
1234
1235	return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1236	s1.m_rep->len * sizeof(UChar)) == 0);
1237	}
1238
1239	bool operator==(const UString& s1, const char *s2)
1240	{
1241	if (s2 == 0) {
1242	return s1.isEmpty();
1243	}
1244
1245	const UChar *u = s1.data();
1246	const UChar *uend = u + s1.size();
1247	while (u != uend && *s2) {
1248	if (u[0] != (unsigned char)*s2)
1249	return false;
1250	s2++;
1251	u++;
1252	}
1253
1254	return u == uend && *s2 == 0;
1255	}
1256
1257	bool operator<(const UString& s1, const UString& s2)
1258	{
1259	const int l1 = s1.size();
1260	const int l2 = s2.size();
1261	const int lmin = l1 < l2 ? l1 : l2;
1262	const UChar *c1 = s1.data();
1263	const UChar *c2 = s2.data();
1264	int l = 0;
1265	while (l < lmin && c1 == c2) {
1266	c1++;
1267	c2++;
1268	l++;
1269	}
1270	if (l < lmin)
1271	return (c1[0] < c2[0]);
1272
1273	return (l1 < l2);
1274	}
1275
1276	bool operator>(const UString& s1, const UString& s2)
1277	{
1278	const int l1 = s1.size();
1279	const int l2 = s2.size();
1280	const int lmin = l1 < l2 ? l1 : l2;
1281	const UChar *c1 = s1.data();
1282	const UChar *c2 = s2.data();
1283	int l = 0;
1284	while (l < lmin && c1 == c2) {
1285	c1++;
1286	c2++;
1287	l++;
1288	}
1289	if (l < lmin)
1290	return (c1[0] > c2[0]);
1291
1292	return (l1 > l2);
1293	}
1294
1295	int compare(const UString& s1, const UString& s2)
1296	{
1297	const int l1 = s1.size();
1298	const int l2 = s2.size();
1299	const int lmin = l1 < l2 ? l1 : l2;
1300	const UChar *c1 = s1.data();
1301	const UChar *c2 = s2.data();
1302	int l = 0;
1303	while (l < lmin && c1 == c2) {
1304	c1++;
1305	c2++;
1306	l++;
1307	}
1308
1309	if (l < lmin)
1310	return (c1[0] > c2[0]) ? 1 : -1;
1311
1312	if (l1 == l2)
1313	return 0;
1314
1315	return (l1 > l2) ? 1 : -1;
1316	}
1317
1318	bool equal(const UString::Rep* r, const UString::Rep* b)
1319	{
1320	int length = r->len;
1321	if (length != b->len)
1322	return false;
1323	const UChar* d = r->data();
1324	const UChar* s = b->data();
1325	for (int i = 0; i != length; ++i)
1326	if (d[i] != s[i])
1327	return false;
1328	return true;
1329	}
1330
1331	CString UString::UTF8String(bool strict) const
1332	{
1333	// Allocate a buffer big enough to hold all the characters.
1334	const int length = size();
1335	Vector<char, 1024> buffer(length * 3);
1336
1337	// Convert to runs of 8-bit characters.
1338	char* p = buffer.data();
1339	const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1340	ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1341	if (result != conversionOK)
1342	return CString();
1343
1344	return CString(buffer.data(), p - buffer.data());
1345	}
1346
1347	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 34424

Download in other formats: