Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 61623

Visit:

Last change on this file since 61623 was 60332, checked in by [email protected], 15 years ago

2010-05-27 Luiz Agostini <[email protected]>

Reviewed by Darin Adler.

UTF-16 code points compare() for String objects
https://bugs.webkit.org/show_bug.cgi?id=39701

Moving compare() implementation from UString to StringImpl for it to be shared
with String. Adding overloaded free functions codePointCompare() in StringImpl
and WTFString. Renaming function compare in UString to codePointCompare to be
consistent.

runtime/JSArray.cpp: (JSC::compareByStringPairForQSort):
runtime/UString.cpp:
runtime/UString.h: (JSC::codePointCompare):
wtf/text/StringImpl.cpp: (WebCore::codePointCompare):
wtf/text/StringImpl.h:
wtf/text/WTFString.cpp: (WebCore::codePointCompare):
wtf/text/WTFString.h:

Property svn:eol-style set to native

File size: 14.5 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2009 Google Inc. All rights reserved.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "UString.h"
26
27	#include "JSGlobalObjectFunctions.h"
28	#include "Collector.h"
29	#include "dtoa.h"
30	#include "Identifier.h"
31	#include "Operations.h"
32	#include <ctype.h>
33	#include <limits.h>
34	#include <limits>
35	#include <math.h>
36	#include <stdio.h>
37	#include <stdlib.h>
38	#include <string.h>
39	#include <wtf/ASCIICType.h>
40	#include <wtf/Assertions.h>
41	#include <wtf/MathExtras.h>
42	#include <wtf/StringExtras.h>
43	#include <wtf/Vector.h>
44	#include <wtf/unicode/UTF8.h>
45	#include <wtf/StringExtras.h>
46
47	#if HAVE(STRINGS_H)
48	#include <strings.h>
49	#endif
50
51	using namespace WTF;
52	using namespace WTF::Unicode;
53	using namespace std;
54
55	namespace JSC {
56
57	extern const double NaN;
58	extern const double Inf;
59
60	// The null string is immutable, except for refCount.
61	UString* UString::s_nullUString;
62
63	COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
64
65	void initializeUString()
66	{
67	// UStringImpl::empty() does not construct its static string in a threadsafe fashion,
68	// so ensure it has been initialized from here.
69	UStringImpl::empty();
70
71	UString::s_nullUString = new UString;
72	}
73
74	UString::UString(const char* c)
75	: m_rep(Rep::create(c))
76	{
77	}
78
79	UString::UString(const char* c, unsigned length)
80	: m_rep(Rep::create(c, length))
81	{
82	}
83
84	UString::UString(const UChar* c, unsigned length)
85	: m_rep(Rep::create(c, length))
86	{
87	}
88
89	UString UString::from(int i)
90	{
91	UChar buf[1 + sizeof(i) * 3];
92	UChar* end = buf + sizeof(buf) / sizeof(UChar);
93	UChar* p = end;
94
95	if (i == 0)
96	*--p = '0';
97	else if (i == INT_MIN) {
98	char minBuf[1 + sizeof(i) * 3];
99	sprintf(minBuf, "%d", INT_MIN);
100	return UString(minBuf);
101	} else {
102	bool negative = false;
103	if (i < 0) {
104	negative = true;
105	i = -i;
106	}
107	while (i) {
108	*--p = static_cast<unsigned short>((i % 10) + '0');
109	i /= 10;
110	}
111	if (negative)
112	*--p = '-';
113	}
114
115	return UString(p, static_cast<unsigned>(end - p));
116	}
117
118	UString UString::from(long long i)
119	{
120	UChar buf[1 + sizeof(i) * 3];
121	UChar* end = buf + sizeof(buf) / sizeof(UChar);
122	UChar* p = end;
123
124	if (i == 0)
125	*--p = '0';
126	else if (i == std::numeric_limits<long long>::min()) {
127	char minBuf[1 + sizeof(i) * 3];
128	#if OS(WINDOWS)
129	snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
130	#else
131	snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
132	#endif
133	return UString(minBuf);
134	} else {
135	bool negative = false;
136	if (i < 0) {
137	negative = true;
138	i = -i;
139	}
140	while (i) {
141	*--p = static_cast<unsigned short>((i % 10) + '0');
142	i /= 10;
143	}
144	if (negative)
145	*--p = '-';
146	}
147
148	return UString(p, static_cast<unsigned>(end - p));
149	}
150
151	UString UString::from(unsigned u)
152	{
153	UChar buf[sizeof(u) * 3];
154	UChar* end = buf + sizeof(buf) / sizeof(UChar);
155	UChar* p = end;
156
157	if (u == 0)
158	*--p = '0';
159	else {
160	while (u) {
161	*--p = static_cast<unsigned short>((u % 10) + '0');
162	u /= 10;
163	}
164	}
165
166	return UString(p, static_cast<unsigned>(end - p));
167	}
168
169	UString UString::from(long l)
170	{
171	UChar buf[1 + sizeof(l) * 3];
172	UChar* end = buf + sizeof(buf) / sizeof(UChar);
173	UChar* p = end;
174
175	if (l == 0)
176	*--p = '0';
177	else if (l == LONG_MIN) {
178	char minBuf[1 + sizeof(l) * 3];
179	sprintf(minBuf, "%ld", LONG_MIN);
180	return UString(minBuf);
181	} else {
182	bool negative = false;
183	if (l < 0) {
184	negative = true;
185	l = -l;
186	}
187	while (l) {
188	*--p = static_cast<unsigned short>((l % 10) + '0');
189	l /= 10;
190	}
191	if (negative)
192	*--p = '-';
193	}
194
195	return UString(p, end - p);
196	}
197
198	UString UString::from(double d)
199	{
200	DtoaBuffer buffer;
201	unsigned length;
202	doubleToStringInJavaScriptFormat(d, buffer, &length);
203	return UString(buffer, length);
204	}
205
206	char* UString::ascii() const
207	{
208	static char* asciiBuffer = 0;
209
210	unsigned length = size();
211	unsigned neededSize = length + 1;
212	delete[] asciiBuffer;
213	asciiBuffer = new char[neededSize];
214
215	const UChar* p = data();
216	char* q = asciiBuffer;
217	const UChar* limit = p + length;
218	while (p != limit) {
219	*q = static_cast<char>(p[0]);
220	++p;
221	++q;
222	}
223	*q = '\0';
224
225	return asciiBuffer;
226	}
227
228	bool UString::is8Bit() const
229	{
230	const UChar* u = data();
231	const UChar* limit = u + size();
232	while (u < limit) {
233	if (u[0] > 0xFF)
234	return false;
235	++u;
236	}
237
238	return true;
239	}
240
241	UChar UString::operator[](unsigned pos) const
242	{
243	if (pos >= size())
244	return '\0';
245	return data()[pos];
246	}
247
248	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
249	{
250	if (size() == 1) {
251	UChar c = data()[0];
252	if (isASCIIDigit(c))
253	return c - '0';
254	if (isASCIISpace(c) && tolerateEmptyString)
255	return 0;
256	return NaN;
257	}
258
259	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
260	// after the number, even if it contains invalid UTF-16 sequences. So we
261	// shouldn't use the UTF8String function, which returns null when it
262	// encounters invalid UTF-16. Further, we have no need to convert the
263	// non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
264	// unnecessary work.
265
266	// FIXME: The space skipping code below skips only ASCII spaces, but callers
267	// need to skip all StrWhiteSpace. The isStrWhiteSpace function does the
268	// right thing but requires UChar, not char, for its argument.
269
270	CString s = UTF8String();
271	if (s.isNull())
272	return NaN;
273	const char* c = s.data();
274
275	// skip leading white space
276	while (isASCIISpace(*c))
277	c++;
278
279	// empty string ?
280	if (*c == '\0')
281	return tolerateEmptyString ? 0.0 : NaN;
282
283	double d;
284
285	// hex number ?
286	if (c == '0' && ((c + 1) == 'x' \|\| *(c + 1) == 'X')) {
287	const char* firstDigitPosition = c + 2;
288	c++;
289	d = 0.0;
290	while (*(++c)) {
291	if (c >= '0' && c <= '9')
292	d = d * 16.0 + *c - '0';
293	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
294	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
295	else
296	break;
297	}
298
299	if (d >= mantissaOverflowLowerBound)
300	d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
301	} else {
302	// regular number ?
303	char* end;
304	d = WTF::strtod(c, &end);
305	if ((d != 0.0 \|\| end != c) && d != Inf && d != -Inf) {
306	c = end;
307	} else {
308	double sign = 1.0;
309
310	if (*c == '+')
311	c++;
312	else if (*c == '-') {
313	sign = -1.0;
314	c++;
315	}
316
317	// We used strtod() to do the conversion. However, strtod() handles
318	// infinite values slightly differently than JavaScript in that it
319	// converts the string "inf" with any capitalization to infinity,
320	// whereas the ECMA spec requires that it be converted to NaN.
321
322	if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
323	d = sign * Inf;
324	c += 8;
325	} else if ((d == Inf \|\| d == -Inf) && c != 'I' && c != 'i')
326	c = end;
327	else
328	return NaN;
329	}
330	}
331
332	if (!tolerateTrailingJunk) {
333	// allow trailing white space
334	while (isASCIISpace(*c))
335	c++;
336	if (c != s.data() + s.length())
337	d = NaN;
338	}
339
340	return d;
341	}
342
343	double UString::toDouble(bool tolerateTrailingJunk) const
344	{
345	return toDouble(tolerateTrailingJunk, true);
346	}
347
348	double UString::toDouble() const
349	{
350	return toDouble(false, true);
351	}
352
353	uint32_t UString::toUInt32(bool* ok) const
354	{
355	double d = toDouble();
356	bool b = true;
357
358	if (d != static_cast<uint32_t>(d)) {
359	b = false;
360	d = 0;
361	}
362
363	if (ok)
364	*ok = b;
365
366	return static_cast<uint32_t>(d);
367	}
368
369	uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
370	{
371	double d = toDouble(false, tolerateEmptyString);
372	bool b = true;
373
374	if (d != static_cast<uint32_t>(d)) {
375	b = false;
376	d = 0;
377	}
378
379	if (ok)
380	*ok = b;
381
382	return static_cast<uint32_t>(d);
383	}
384
385	uint32_t UString::toStrictUInt32(bool* ok) const
386	{
387	if (ok)
388	*ok = false;
389
390	// Empty string is not OK.
391	unsigned len = m_rep->length();
392	if (len == 0)
393	return 0;
394	const UChar* p = m_rep->characters();
395	unsigned short c = p[0];
396
397	// If the first digit is 0, only 0 itself is OK.
398	if (c == '0') {
399	if (len == 1 && ok)
400	*ok = true;
401	return 0;
402	}
403
404	// Convert to UInt32, checking for overflow.
405	uint32_t i = 0;
406	while (1) {
407	// Process character, turning it into a digit.
408	if (c < '0' \|\| c > '9')
409	return 0;
410	const unsigned d = c - '0';
411
412	// Multiply by 10, checking for overflow out of 32 bits.
413	if (i > 0xFFFFFFFFU / 10)
414	return 0;
415	i *= 10;
416
417	// Add in the digit, checking for overflow out of 32 bits.
418	const unsigned max = 0xFFFFFFFFU - d;
419	if (i > max)
420	return 0;
421	i += d;
422
423	// Handle end of string.
424	if (--len == 0) {
425	if (ok)
426	*ok = true;
427	return i;
428	}
429
430	// Get next character.
431	c = *(++p);
432	}
433	}
434
435	unsigned UString::find(const UString& f, unsigned pos) const
436	{
437	unsigned fsz = f.size();
438
439	if (fsz == 1) {
440	UChar ch = f[0];
441	const UChar* end = data() + size();
442	for (const UChar* c = data() + pos; c < end; c++) {
443	if (*c == ch)
444	return static_cast<unsigned>(c - data());
445	}
446	return NotFound;
447	}
448
449	unsigned sz = size();
450	if (sz < fsz)
451	return NotFound;
452	if (fsz == 0)
453	return pos;
454	const UChar* end = data() + sz - fsz;
455	unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
456	const UChar* fdata = f.data();
457	unsigned short fchar = fdata[0];
458	++fdata;
459	for (const UChar* c = data() + pos; c <= end; c++) {
460	if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
461	return static_cast<unsigned>(c - data());
462	}
463
464	return NotFound;
465	}
466
467	unsigned UString::find(UChar ch, unsigned pos) const
468	{
469	const UChar* end = data() + size();
470	for (const UChar* c = data() + pos; c < end; c++) {
471	if (*c == ch)
472	return static_cast<unsigned>(c - data());
473	}
474
475	return NotFound;
476	}
477
478	unsigned UString::rfind(const UString& f, unsigned pos) const
479	{
480	unsigned sz = size();
481	unsigned fsz = f.size();
482	if (sz < fsz)
483	return NotFound;
484	if (pos > sz - fsz)
485	pos = sz - fsz;
486	if (fsz == 0)
487	return pos;
488	unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
489	const UChar* fdata = f.data();
490	for (const UChar* c = data() + pos; c >= data(); c--) {
491	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
492	return static_cast<unsigned>(c - data());
493	}
494
495	return NotFound;
496	}
497
498	unsigned UString::rfind(UChar ch, unsigned pos) const
499	{
500	if (isEmpty())
501	return NotFound;
502	if (pos + 1 >= size())
503	pos = size() - 1;
504	for (const UChar* c = data() + pos; c >= data(); c--) {
505	if (*c == ch)
506	return static_cast<unsigned>(c - data());
507	}
508
509	return NotFound;
510	}
511
512	UString UString::substr(unsigned pos, unsigned len) const
513	{
514	unsigned s = size();
515
516	if (pos >= s)
517	pos = s;
518	unsigned limit = s - pos;
519	if (len > limit)
520	len = limit;
521
522	if (pos == 0 && len == s)
523	return *this;
524
525	return UString(Rep::create(m_rep, pos, len));
526	}
527
528	bool operator==(const UString& s1, const char *s2)
529	{
530	if (s2 == 0)
531	return s1.isEmpty();
532
533	const UChar* u = s1.data();
534	const UChar* uend = u + s1.size();
535	while (u != uend && *s2) {
536	if (u[0] != (unsigned char)*s2)
537	return false;
538	s2++;
539	u++;
540	}
541
542	return u == uend && *s2 == 0;
543	}
544
545	bool operator<(const UString& s1, const UString& s2)
546	{
547	const unsigned l1 = s1.size();
548	const unsigned l2 = s2.size();
549	const unsigned lmin = l1 < l2 ? l1 : l2;
550	const UChar* c1 = s1.data();
551	const UChar* c2 = s2.data();
552	unsigned l = 0;
553	while (l < lmin && c1 == c2) {
554	c1++;
555	c2++;
556	l++;
557	}
558	if (l < lmin)
559	return (c1[0] < c2[0]);
560
561	return (l1 < l2);
562	}
563
564	bool operator>(const UString& s1, const UString& s2)
565	{
566	const unsigned l1 = s1.size();
567	const unsigned l2 = s2.size();
568	const unsigned lmin = l1 < l2 ? l1 : l2;
569	const UChar* c1 = s1.data();
570	const UChar* c2 = s2.data();
571	unsigned l = 0;
572	while (l < lmin && c1 == c2) {
573	c1++;
574	c2++;
575	l++;
576	}
577	if (l < lmin)
578	return (c1[0] > c2[0]);
579
580	return (l1 > l2);
581	}
582
583	CString UString::UTF8String(bool strict) const
584	{
585	// Allocate a buffer big enough to hold all the characters.
586	const unsigned length = size();
587	Vector<char, 1024> buffer(length * 3);
588
589	// Convert to runs of 8-bit characters.
590	char* p = buffer.data();
591	const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
592	ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
593	if (result != conversionOK)
594	return CString();
595
596	return CString(buffer.data(), p - buffer.data());
597	}
598
599	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Download in other formats: