Context Navigation

UString.cpp@ 59969

Visit:

Last change on this file since 59969 was 59969, checked in by [email protected], 15 years ago

Enforce size constraints on various data structures in JavaScriptCore/wtf.
https://bugs.webkit.org/show_bug.cgi?id=39327

Reviewed by Darin Adler.

I only modified the default build for OSX and Chromium's build file to include WTFCompileAsserts.cpp
as those should be sufficient to catch regressions on the size of the data structures.

JavaScriptCore.gypi: Added the WTFCompileAsserts.cpp file.
JavaScriptCore.xcodeproj/project.pbxproj: Added the WTFCompileAsserts.cpp file.
runtime/UString.cpp: Added a compile assert for UString size.
wtf/SizeLimits.cpp: Added compile asserts for data structures that didn't have cpp files.
wtf/text/StringImpl.cpp: Added a compile assert for StringImpl size.

Property svn:eol-style set to native

File size: 14.8 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2009 Google Inc. All rights reserved.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "UString.h"
26
27	#include "JSGlobalObjectFunctions.h"
28	#include "Collector.h"
29	#include "dtoa.h"
30	#include "Identifier.h"
31	#include "Operations.h"
32	#include <ctype.h>
33	#include <limits.h>
34	#include <limits>
35	#include <math.h>
36	#include <stdio.h>
37	#include <stdlib.h>
38	#include <string.h>
39	#include <wtf/ASCIICType.h>
40	#include <wtf/Assertions.h>
41	#include <wtf/MathExtras.h>
42	#include <wtf/StringExtras.h>
43	#include <wtf/Vector.h>
44	#include <wtf/unicode/UTF8.h>
45	#include <wtf/StringExtras.h>
46
47	#if HAVE(STRINGS_H)
48	#include <strings.h>
49	#endif
50
51	using namespace WTF;
52	using namespace WTF::Unicode;
53	using namespace std;
54
55	namespace JSC {
56
57	extern const double NaN;
58	extern const double Inf;
59
60	// The null string is immutable, except for refCount.
61	UString* UString::s_nullUString;
62
63	COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
64
65	void initializeUString()
66	{
67	// UStringImpl::empty() does not construct its static string in a threadsafe fashion,
68	// so ensure it has been initialized from here.
69	UStringImpl::empty();
70
71	UString::s_nullUString = new UString;
72	}
73
74	UString::UString(const char* c)
75	: m_rep(Rep::create(c))
76	{
77	}
78
79	UString::UString(const char* c, unsigned length)
80	: m_rep(Rep::create(c, length))
81	{
82	}
83
84	UString::UString(const UChar* c, unsigned length)
85	: m_rep(Rep::create(c, length))
86	{
87	}
88
89	UString UString::from(int i)
90	{
91	UChar buf[1 + sizeof(i) * 3];
92	UChar* end = buf + sizeof(buf) / sizeof(UChar);
93	UChar* p = end;
94
95	if (i == 0)
96	*--p = '0';
97	else if (i == INT_MIN) {
98	char minBuf[1 + sizeof(i) * 3];
99	sprintf(minBuf, "%d", INT_MIN);
100	return UString(minBuf);
101	} else {
102	bool negative = false;
103	if (i < 0) {
104	negative = true;
105	i = -i;
106	}
107	while (i) {
108	*--p = static_cast<unsigned short>((i % 10) + '0');
109	i /= 10;
110	}
111	if (negative)
112	*--p = '-';
113	}
114
115	return UString(p, static_cast<unsigned>(end - p));
116	}
117
118	UString UString::from(long long i)
119	{
120	UChar buf[1 + sizeof(i) * 3];
121	UChar* end = buf + sizeof(buf) / sizeof(UChar);
122	UChar* p = end;
123
124	if (i == 0)
125	*--p = '0';
126	else if (i == std::numeric_limits<long long>::min()) {
127	char minBuf[1 + sizeof(i) * 3];
128	#if OS(WINDOWS)
129	snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
130	#else
131	snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
132	#endif
133	return UString(minBuf);
134	} else {
135	bool negative = false;
136	if (i < 0) {
137	negative = true;
138	i = -i;
139	}
140	while (i) {
141	*--p = static_cast<unsigned short>((i % 10) + '0');
142	i /= 10;
143	}
144	if (negative)
145	*--p = '-';
146	}
147
148	return UString(p, static_cast<unsigned>(end - p));
149	}
150
151	UString UString::from(unsigned u)
152	{
153	UChar buf[sizeof(u) * 3];
154	UChar* end = buf + sizeof(buf) / sizeof(UChar);
155	UChar* p = end;
156
157	if (u == 0)
158	*--p = '0';
159	else {
160	while (u) {
161	*--p = static_cast<unsigned short>((u % 10) + '0');
162	u /= 10;
163	}
164	}
165
166	return UString(p, static_cast<unsigned>(end - p));
167	}
168
169	UString UString::from(long l)
170	{
171	UChar buf[1 + sizeof(l) * 3];
172	UChar* end = buf + sizeof(buf) / sizeof(UChar);
173	UChar* p = end;
174
175	if (l == 0)
176	*--p = '0';
177	else if (l == LONG_MIN) {
178	char minBuf[1 + sizeof(l) * 3];
179	sprintf(minBuf, "%ld", LONG_MIN);
180	return UString(minBuf);
181	} else {
182	bool negative = false;
183	if (l < 0) {
184	negative = true;
185	l = -l;
186	}
187	while (l) {
188	*--p = static_cast<unsigned short>((l % 10) + '0');
189	l /= 10;
190	}
191	if (negative)
192	*--p = '-';
193	}
194
195	return UString(p, end - p);
196	}
197
198	UString UString::from(double d)
199	{
200	DtoaBuffer buffer;
201	unsigned length;
202	doubleToStringInJavaScriptFormat(d, buffer, &length);
203	return UString(buffer, length);
204	}
205
206	char* UString::ascii() const
207	{
208	static char* asciiBuffer = 0;
209
210	unsigned length = size();
211	unsigned neededSize = length + 1;
212	delete[] asciiBuffer;
213	asciiBuffer = new char[neededSize];
214
215	const UChar* p = data();
216	char* q = asciiBuffer;
217	const UChar* limit = p + length;
218	while (p != limit) {
219	*q = static_cast<char>(p[0]);
220	++p;
221	++q;
222	}
223	*q = '\0';
224
225	return asciiBuffer;
226	}
227
228	bool UString::is8Bit() const
229	{
230	const UChar* u = data();
231	const UChar* limit = u + size();
232	while (u < limit) {
233	if (u[0] > 0xFF)
234	return false;
235	++u;
236	}
237
238	return true;
239	}
240
241	UChar UString::operator[](unsigned pos) const
242	{
243	if (pos >= size())
244	return '\0';
245	return data()[pos];
246	}
247
248	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
249	{
250	if (size() == 1) {
251	UChar c = data()[0];
252	if (isASCIIDigit(c))
253	return c - '0';
254	if (isASCIISpace(c) && tolerateEmptyString)
255	return 0;
256	return NaN;
257	}
258
259	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
260	// after the number, even if it contains invalid UTF-16 sequences. So we
261	// shouldn't use the UTF8String function, which returns null when it
262	// encounters invalid UTF-16. Further, we have no need to convert the
263	// non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
264	// unnecessary work.
265	CString s = UTF8String();
266	if (s.isNull())
267	return NaN;
268	const char* c = s.data();
269
270	// skip leading white space
271	while (isASCIISpace(*c))
272	c++;
273
274	// empty string ?
275	if (*c == '\0')
276	return tolerateEmptyString ? 0.0 : NaN;
277
278	double d;
279
280	// hex number ?
281	if (c == '0' && ((c + 1) == 'x' \|\| *(c + 1) == 'X')) {
282	const char* firstDigitPosition = c + 2;
283	c++;
284	d = 0.0;
285	while (*(++c)) {
286	if (c >= '0' && c <= '9')
287	d = d * 16.0 + *c - '0';
288	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
289	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
290	else
291	break;
292	}
293
294	if (d >= mantissaOverflowLowerBound)
295	d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
296	} else {
297	// regular number ?
298	char* end;
299	d = WTF::strtod(c, &end);
300	if ((d != 0.0 \|\| end != c) && d != Inf && d != -Inf) {
301	c = end;
302	} else {
303	double sign = 1.0;
304
305	if (*c == '+')
306	c++;
307	else if (*c == '-') {
308	sign = -1.0;
309	c++;
310	}
311
312	// We used strtod() to do the conversion. However, strtod() handles
313	// infinite values slightly differently than JavaScript in that it
314	// converts the string "inf" with any capitalization to infinity,
315	// whereas the ECMA spec requires that it be converted to NaN.
316
317	if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
318	d = sign * Inf;
319	c += 8;
320	} else if ((d == Inf \|\| d == -Inf) && c != 'I' && c != 'i')
321	c = end;
322	else
323	return NaN;
324	}
325	}
326
327	// allow trailing white space
328	while (isASCIISpace(*c))
329	c++;
330	// don't allow anything after - unless tolerant=true
331	// FIXME: If string contains a U+0000 character, then this check is incorrect.
332	if (!tolerateTrailingJunk && *c != '\0')
333	d = NaN;
334
335	return d;
336	}
337
338	double UString::toDouble(bool tolerateTrailingJunk) const
339	{
340	return toDouble(tolerateTrailingJunk, true);
341	}
342
343	double UString::toDouble() const
344	{
345	return toDouble(false, true);
346	}
347
348	uint32_t UString::toUInt32(bool* ok) const
349	{
350	double d = toDouble();
351	bool b = true;
352
353	if (d != static_cast<uint32_t>(d)) {
354	b = false;
355	d = 0;
356	}
357
358	if (ok)
359	*ok = b;
360
361	return static_cast<uint32_t>(d);
362	}
363
364	uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
365	{
366	double d = toDouble(false, tolerateEmptyString);
367	bool b = true;
368
369	if (d != static_cast<uint32_t>(d)) {
370	b = false;
371	d = 0;
372	}
373
374	if (ok)
375	*ok = b;
376
377	return static_cast<uint32_t>(d);
378	}
379
380	uint32_t UString::toStrictUInt32(bool* ok) const
381	{
382	if (ok)
383	*ok = false;
384
385	// Empty string is not OK.
386	unsigned len = m_rep->length();
387	if (len == 0)
388	return 0;
389	const UChar* p = m_rep->characters();
390	unsigned short c = p[0];
391
392	// If the first digit is 0, only 0 itself is OK.
393	if (c == '0') {
394	if (len == 1 && ok)
395	*ok = true;
396	return 0;
397	}
398
399	// Convert to UInt32, checking for overflow.
400	uint32_t i = 0;
401	while (1) {
402	// Process character, turning it into a digit.
403	if (c < '0' \|\| c > '9')
404	return 0;
405	const unsigned d = c - '0';
406
407	// Multiply by 10, checking for overflow out of 32 bits.
408	if (i > 0xFFFFFFFFU / 10)
409	return 0;
410	i *= 10;
411
412	// Add in the digit, checking for overflow out of 32 bits.
413	const unsigned max = 0xFFFFFFFFU - d;
414	if (i > max)
415	return 0;
416	i += d;
417
418	// Handle end of string.
419	if (--len == 0) {
420	if (ok)
421	*ok = true;
422	return i;
423	}
424
425	// Get next character.
426	c = *(++p);
427	}
428	}
429
430	unsigned UString::find(const UString& f, unsigned pos) const
431	{
432	unsigned fsz = f.size();
433
434	if (fsz == 1) {
435	UChar ch = f[0];
436	const UChar* end = data() + size();
437	for (const UChar* c = data() + pos; c < end; c++) {
438	if (*c == ch)
439	return static_cast<unsigned>(c - data());
440	}
441	return NotFound;
442	}
443
444	unsigned sz = size();
445	if (sz < fsz)
446	return NotFound;
447	if (fsz == 0)
448	return pos;
449	const UChar* end = data() + sz - fsz;
450	unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
451	const UChar* fdata = f.data();
452	unsigned short fchar = fdata[0];
453	++fdata;
454	for (const UChar* c = data() + pos; c <= end; c++) {
455	if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
456	return static_cast<unsigned>(c - data());
457	}
458
459	return NotFound;
460	}
461
462	unsigned UString::find(UChar ch, unsigned pos) const
463	{
464	const UChar* end = data() + size();
465	for (const UChar* c = data() + pos; c < end; c++) {
466	if (*c == ch)
467	return static_cast<unsigned>(c - data());
468	}
469
470	return NotFound;
471	}
472
473	unsigned UString::rfind(const UString& f, unsigned pos) const
474	{
475	unsigned sz = size();
476	unsigned fsz = f.size();
477	if (sz < fsz)
478	return NotFound;
479	if (pos > sz - fsz)
480	pos = sz - fsz;
481	if (fsz == 0)
482	return pos;
483	unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
484	const UChar* fdata = f.data();
485	for (const UChar* c = data() + pos; c >= data(); c--) {
486	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
487	return static_cast<unsigned>(c - data());
488	}
489
490	return NotFound;
491	}
492
493	unsigned UString::rfind(UChar ch, unsigned pos) const
494	{
495	if (isEmpty())
496	return NotFound;
497	if (pos + 1 >= size())
498	pos = size() - 1;
499	for (const UChar* c = data() + pos; c >= data(); c--) {
500	if (*c == ch)
501	return static_cast<unsigned>(c - data());
502	}
503
504	return NotFound;
505	}
506
507	UString UString::substr(unsigned pos, unsigned len) const
508	{
509	unsigned s = size();
510
511	if (pos >= s)
512	pos = s;
513	unsigned limit = s - pos;
514	if (len > limit)
515	len = limit;
516
517	if (pos == 0 && len == s)
518	return *this;
519
520	return UString(Rep::create(m_rep, pos, len));
521	}
522
523	bool operator==(const UString& s1, const char *s2)
524	{
525	if (s2 == 0)
526	return s1.isEmpty();
527
528	const UChar* u = s1.data();
529	const UChar* uend = u + s1.size();
530	while (u != uend && *s2) {
531	if (u[0] != (unsigned char)*s2)
532	return false;
533	s2++;
534	u++;
535	}
536
537	return u == uend && *s2 == 0;
538	}
539
540	bool operator<(const UString& s1, const UString& s2)
541	{
542	const unsigned l1 = s1.size();
543	const unsigned l2 = s2.size();
544	const unsigned lmin = l1 < l2 ? l1 : l2;
545	const UChar* c1 = s1.data();
546	const UChar* c2 = s2.data();
547	unsigned l = 0;
548	while (l < lmin && c1 == c2) {
549	c1++;
550	c2++;
551	l++;
552	}
553	if (l < lmin)
554	return (c1[0] < c2[0]);
555
556	return (l1 < l2);
557	}
558
559	bool operator>(const UString& s1, const UString& s2)
560	{
561	const unsigned l1 = s1.size();
562	const unsigned l2 = s2.size();
563	const unsigned lmin = l1 < l2 ? l1 : l2;
564	const UChar* c1 = s1.data();
565	const UChar* c2 = s2.data();
566	unsigned l = 0;
567	while (l < lmin && c1 == c2) {
568	c1++;
569	c2++;
570	l++;
571	}
572	if (l < lmin)
573	return (c1[0] > c2[0]);
574
575	return (l1 > l2);
576	}
577
578	int compare(const UString& s1, const UString& s2)
579	{
580	const unsigned l1 = s1.size();
581	const unsigned l2 = s2.size();
582	const unsigned lmin = l1 < l2 ? l1 : l2;
583	const UChar* c1 = s1.data();
584	const UChar* c2 = s2.data();
585	unsigned l = 0;
586	while (l < lmin && c1 == c2) {
587	c1++;
588	c2++;
589	l++;
590	}
591
592	if (l < lmin)
593	return (c1[0] > c2[0]) ? 1 : -1;
594
595	if (l1 == l2)
596	return 0;
597
598	return (l1 > l2) ? 1 : -1;
599	}
600
601	CString UString::UTF8String(bool strict) const
602	{
603	// Allocate a buffer big enough to hold all the characters.
604	const unsigned length = size();
605	Vector<char, 1024> buffer(length * 3);
606
607	// Convert to runs of 8-bit characters.
608	char* p = buffer.data();
609	const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
610	ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
611	if (result != conversionOK)
612	return CString();
613
614	return CString(buffer.data(), p - buffer.data());
615	}
616
617	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 59969

Download in other formats: