Context Navigation

source: webkit/trunk/JavaScriptCore/runtime/UString.cpp@ 65920

Visit:

Last change on this file since 65920 was 65920, checked in by [email protected], 15 years ago

2010-08-24 Oliver Hunt <[email protected]>

Reviewed by Beth Dakin.

Make overflow guards in UString::utf8 explicit
https://bugs.webkit.org/show_bug.cgi?id=44540

Add an explicit overflow check prior to allocating our buffer,
rather than implicitly relying on the guard in convertUTF16ToUTF8.

runtime/UString.cpp: (JSC::UString::utf8):

Property svn:eol-style set to native

File size: 10.3 KB

Line
1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2009 Google Inc. All rights reserved.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "UString.h"
26
27	#include "JSGlobalObjectFunctions.h"
28	#include "Collector.h"
29	#include "dtoa.h"
30	#include "Identifier.h"
31	#include "Operations.h"
32	#include <ctype.h>
33	#include <limits.h>
34	#include <limits>
35	#include <stdio.h>
36	#include <stdlib.h>
37	#include <wtf/ASCIICType.h>
38	#include <wtf/Assertions.h>
39	#include <wtf/MathExtras.h>
40	#include <wtf/StringExtras.h>
41	#include <wtf/Vector.h>
42	#include <wtf/unicode/UTF8.h>
43
44	#if HAVE(STRINGS_H)
45	#include <strings.h>
46	#endif
47
48	using namespace WTF;
49	using namespace WTF::Unicode;
50	using namespace std;
51
52	namespace JSC {
53
54	extern const double NaN;
55	extern const double Inf;
56
57	COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
58
59	// Construct a string with UTF-16 data.
60	UString::UString(const UChar* characters, unsigned length)
61	: m_impl(characters ? StringImpl::create(characters, length) : 0)
62	{
63	}
64
65	// Construct a string with UTF-16 data, from a null-terminated source.
66	UString::UString(const UChar* characters)
67	{
68	if (!characters)
69	return;
70
71	int length = 0;
72	while (characters[length] != UChar(0))
73	++length;
74
75	m_impl = StringImpl::create(characters, length);
76	}
77
78	// Construct a string with latin1 data.
79	UString::UString(const char* characters, unsigned length)
80	: m_impl(characters ? StringImpl::create(characters, length) : 0)
81	{
82	}
83
84	// Construct a string with latin1 data, from a null-terminated source.
85	UString::UString(const char* characters)
86	: m_impl(characters ? StringImpl::create(characters) : 0)
87	{
88	}
89
90	UString UString::number(int i)
91	{
92	UChar buf[1 + sizeof(i) * 3];
93	UChar* end = buf + sizeof(buf) / sizeof(UChar);
94	UChar* p = end;
95
96	if (i == 0)
97	*--p = '0';
98	else if (i == INT_MIN) {
99	char minBuf[1 + sizeof(i) * 3];
100	snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
101	return UString(minBuf);
102	} else {
103	bool negative = false;
104	if (i < 0) {
105	negative = true;
106	i = -i;
107	}
108	while (i) {
109	*--p = static_cast<unsigned short>((i % 10) + '0');
110	i /= 10;
111	}
112	if (negative)
113	*--p = '-';
114	}
115
116	return UString(p, static_cast<unsigned>(end - p));
117	}
118
119	UString UString::number(long long i)
120	{
121	UChar buf[1 + sizeof(i) * 3];
122	UChar* end = buf + sizeof(buf) / sizeof(UChar);
123	UChar* p = end;
124
125	if (i == 0)
126	*--p = '0';
127	else if (i == std::numeric_limits<long long>::min()) {
128	char minBuf[1 + sizeof(i) * 3];
129	#if OS(WINDOWS)
130	snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
131	#else
132	snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
133	#endif
134	return UString(minBuf);
135	} else {
136	bool negative = false;
137	if (i < 0) {
138	negative = true;
139	i = -i;
140	}
141	while (i) {
142	*--p = static_cast<unsigned short>((i % 10) + '0');
143	i /= 10;
144	}
145	if (negative)
146	*--p = '-';
147	}
148
149	return UString(p, static_cast<unsigned>(end - p));
150	}
151
152	UString UString::number(unsigned u)
153	{
154	UChar buf[sizeof(u) * 3];
155	UChar* end = buf + sizeof(buf) / sizeof(UChar);
156	UChar* p = end;
157
158	if (u == 0)
159	*--p = '0';
160	else {
161	while (u) {
162	*--p = static_cast<unsigned short>((u % 10) + '0');
163	u /= 10;
164	}
165	}
166
167	return UString(p, static_cast<unsigned>(end - p));
168	}
169
170	UString UString::number(long l)
171	{
172	UChar buf[1 + sizeof(l) * 3];
173	UChar* end = buf + sizeof(buf) / sizeof(UChar);
174	UChar* p = end;
175
176	if (l == 0)
177	*--p = '0';
178	else if (l == LONG_MIN) {
179	char minBuf[1 + sizeof(l) * 3];
180	snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
181	return UString(minBuf);
182	} else {
183	bool negative = false;
184	if (l < 0) {
185	negative = true;
186	l = -l;
187	}
188	while (l) {
189	*--p = static_cast<unsigned short>((l % 10) + '0');
190	l /= 10;
191	}
192	if (negative)
193	*--p = '-';
194	}
195
196	return UString(p, end - p);
197	}
198
199	UString UString::number(double d)
200	{
201	DtoaBuffer buffer;
202	unsigned length;
203	doubleToStringInJavaScriptFormat(d, buffer, &length);
204	return UString(buffer, length);
205	}
206
207	UString UString::substringSharingImpl(unsigned offset, unsigned length) const
208	{
209	// FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
210
211	unsigned stringLength = this->length();
212	offset = min(offset, stringLength);
213	length = min(length, stringLength - offset);
214
215	if (!offset && length == stringLength)
216	return *this;
217	return UString(StringImpl::create(m_impl, offset, length));
218	}
219
220	bool operator==(const UString& s1, const char *s2)
221	{
222	if (s2 == 0)
223	return s1.isEmpty();
224
225	const UChar* u = s1.characters();
226	const UChar* uend = u + s1.length();
227	while (u != uend && *s2) {
228	if (u[0] != (unsigned char)*s2)
229	return false;
230	s2++;
231	u++;
232	}
233
234	return u == uend && *s2 == 0;
235	}
236
237	bool operator<(const UString& s1, const UString& s2)
238	{
239	const unsigned l1 = s1.length();
240	const unsigned l2 = s2.length();
241	const unsigned lmin = l1 < l2 ? l1 : l2;
242	const UChar* c1 = s1.characters();
243	const UChar* c2 = s2.characters();
244	unsigned l = 0;
245	while (l < lmin && c1 == c2) {
246	c1++;
247	c2++;
248	l++;
249	}
250	if (l < lmin)
251	return (c1[0] < c2[0]);
252
253	return (l1 < l2);
254	}
255
256	bool operator>(const UString& s1, const UString& s2)
257	{
258	const unsigned l1 = s1.length();
259	const unsigned l2 = s2.length();
260	const unsigned lmin = l1 < l2 ? l1 : l2;
261	const UChar* c1 = s1.characters();
262	const UChar* c2 = s2.characters();
263	unsigned l = 0;
264	while (l < lmin && c1 == c2) {
265	c1++;
266	c2++;
267	l++;
268	}
269	if (l < lmin)
270	return (c1[0] > c2[0]);
271
272	return (l1 > l2);
273	}
274
275	CString UString::ascii() const
276	{
277	// Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
278	// preserved, characters outside of this range are converted to '?'.
279
280	unsigned length = this->length();
281	const UChar* characters = this->characters();
282
283	char* characterBuffer;
284	CString result = CString::newUninitialized(length, characterBuffer);
285
286	for (unsigned i = 0; i < length; ++i) {
287	UChar ch = characters[i];
288	characterBuffer[i] = ch && (ch < 0x20 \|\| ch >= 0x7f) ? '?' : ch;
289	}
290
291	return result;
292	}
293
294	CString UString::latin1() const
295	{
296	// Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
297	// preserved, characters outside of this range are converted to '?'.
298
299	unsigned length = this->length();
300	const UChar* characters = this->characters();
301
302	char* characterBuffer;
303	CString result = CString::newUninitialized(length, characterBuffer);
304
305	for (unsigned i = 0; i < length; ++i) {
306	UChar ch = characters[i];
307	characterBuffer[i] = ch > 0xff ? '?' : ch;
308	}
309
310	return result;
311	}
312
313	// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
314	static inline void putUTF8Triple(char*& buffer, UChar ch)
315	{
316	ASSERT(ch >= 0x0800);
317	*buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);
318	*buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);
319	*buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);
320	}
321
322	CString UString::utf8(bool strict) const
323	{
324	unsigned length = this->length();
325	const UChar* characters = this->characters();
326
327	// Allocate a buffer big enough to hold all the characters
328	// (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
329	// Optimization ideas, if we find this function is hot:
330	// * We could speculatively create a CStringBuffer to contain 'length'
331	// characters, and resize if necessary (i.e. if the buffer contains
332	// non-ascii characters). (Alternatively, scan the buffer first for
333	// ascii characters, so we know this will be sufficient).
334	// * We could allocate a CStringBuffer with an appropriate size to
335	// have a good chance of being able to write the string into the
336	// buffer without reallocing (say, 1.5 x length).
337	if (length > numeric_limits<unsigned>::max() / 3)
338	return CString();
339	Vector<char, 1024> bufferVector(length * 3);
340
341	char* buffer = bufferVector.data();
342	ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
343	ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
344
345	// Only produced from strict conversion.
346	if (result == sourceIllegal)
347	return CString();
348
349	// Check for an unconverted high surrogate.
350	if (result == sourceExhausted) {
351	if (strict)
352	return CString();
353	// This should be one unpaired high surrogate. Treat it the same
354	// was as an unpaired high surrogate would have been handled in
355	// the middle of a string with non-strict conversion - which is
356	// to say, simply encode it to UTF-8.
357	ASSERT((characters + 1) == (this->characters() + length));
358	ASSERT((characters >= 0xD800) && (characters <= 0xDBFF));
359	// There should be room left, since one UChar hasn't been converted.
360	ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
361	putUTF8Triple(buffer, *characters);
362	}
363
364	return CString(bufferVector.data(), buffer - bufferVector.data());
365	}
366
367	} // namespace JSC

Note: See TracBrowser for help on using the repository browser.

Download in other formats: