Context Navigation

ustring.cpp@ 14256

Visit:

Last change on this file since 14256 was 14256, checked in by mjs, 19 years ago

JavaScriptCore:

Rubber stamped by Anders.

renamed kxmlcore to wtf

kxmlcore --> wtf
KXMLCore --> WTF
WKC --> WTF

JavaScriptCore.xcodeproj/project.pbxproj:
bindings/c/c_instance.cpp:
bindings/objc/WebScriptObject.mm:
kjs/JSImmediate.h:
kjs/Parser.cpp:
kjs/Parser.h:
kjs/array_object.cpp:
kjs/collector.cpp: (KJS::Collector::registerThread):
kjs/collector.h:
kjs/config.h:
kjs/function.cpp: (KJS::isStrWhiteSpace):
kjs/function.h:
kjs/identifier.cpp:
kjs/internal.cpp:
kjs/internal.h:
kjs/lexer.cpp: (Lexer::shift): (Lexer::isWhiteSpace): (Lexer::isIdentStart): (Lexer::isIdentPart):
kjs/lookup.cpp:
kjs/nodes.cpp:
kjs/nodes.h:
kjs/number_object.cpp:
kjs/object.h:
kjs/property_map.cpp:
kjs/property_map.h:
kjs/string_object.cpp: (StringProtoFunc::callAsFunction):
kjs/testkjs.cpp: (testIsInteger):
kjs/ustring.cpp:
kjs/ustring.h:
kxmlcore: Removed.
kxmlcore/AlwaysInline.h: Removed.
kxmlcore/Assertions.cpp: Removed.
kxmlcore/Assertions.h: Removed.
kxmlcore/FastMalloc.cpp: Removed.
kxmlcore/FastMalloc.h: Removed.
kxmlcore/FastMallocInternal.h: Removed.
kxmlcore/Forward.h: Removed.
kxmlcore/HashCountedSet.h: Removed.
kxmlcore/HashFunctions.h: Removed.
kxmlcore/HashMap.h: Removed.
kxmlcore/HashSet.h: Removed.
kxmlcore/HashTable.cpp: Removed.
kxmlcore/HashTable.h: Removed.
kxmlcore/HashTraits.h: Removed.
kxmlcore/ListRefPtr.h: Removed.
kxmlcore/Noncopyable.h: Removed.
kxmlcore/OwnArrayPtr.h: Removed.
kxmlcore/OwnPtr.h: Removed.
kxmlcore/PassRefPtr.h: Removed.
kxmlcore/Platform.h: Removed.
kxmlcore/RefPtr.h: Removed.
kxmlcore/TCPageMap.h: Removed.
kxmlcore/TCSpinLock.h: Removed.
kxmlcore/TCSystemAlloc.cpp: Removed.
kxmlcore/TCSystemAlloc.h: Removed.
kxmlcore/UnusedParam.h: Removed.
kxmlcore/Vector.h: Removed.
kxmlcore/VectorTraits.h: Removed.
kxmlcore/unicode: Removed.
kxmlcore/unicode/Unicode.h: Removed.
kxmlcore/unicode/UnicodeCategory.h: Removed.
kxmlcore/unicode/icu: Removed.
kxmlcore/unicode/icu/UnicodeIcu.h: Removed.
kxmlcore/unicode/posix: Removed.
kxmlcore/unicode/qt3: Removed.
kxmlcore/unicode/qt4: Removed.
kxmlcore/unicode/qt4/UnicodeQt4.h: Removed.
pcre/pcre_get.c:
wtf: Added.
wtf/Assertions.cpp:
wtf/Assertions.h:
wtf/FastMalloc.cpp: (WTF::TCMalloc_ThreadCache::Scavenge): (WTF::do_malloc): (WTF::do_free): (WTF::TCMallocGuard::TCMallocGuard): (WTF::malloc): (WTF::free): (WTF::calloc): (WTF::cfree): (WTF::realloc):
wtf/FastMalloc.h:
wtf/FastMallocInternal.h:
wtf/Forward.h:
wtf/HashCountedSet.h:
wtf/HashFunctions.h:
wtf/HashMap.h:
wtf/HashSet.h:
wtf/HashTable.cpp:
wtf/HashTable.h:
wtf/HashTraits.h:
wtf/ListRefPtr.h:
wtf/Noncopyable.h:
wtf/OwnArrayPtr.h:
wtf/OwnPtr.h:
wtf/PassRefPtr.h:
wtf/RefPtr.h:
wtf/TCSystemAlloc.cpp: (TCMalloc_SystemAlloc):
wtf/Vector.h:
wtf/VectorTraits.h:
wtf/unicode/UnicodeCategory.h:
wtf/unicode/icu/UnicodeIcu.h:

JavaScriptGlue:

Rubber stamped by Anders.

renamed kxmlcore to wtf

kxmlcore --> wtf
KXMLCore --> WTF
WKC --> WTF

config.h:
kxmlcore: Removed.
kxmlcore/AlwaysInline.h: Removed.
kxmlcore/Assertions.h: Removed.
kxmlcore/FastMalloc.h: Removed.
kxmlcore/Forward.h: Removed.
kxmlcore/HashCountedSet.h: Removed.
kxmlcore/HashSet.h: Removed.
kxmlcore/Noncopyable.h: Removed.
kxmlcore/OwnArrayPtr.h: Removed.
kxmlcore/OwnPtr.h: Removed.
kxmlcore/PassRefPtr.h: Removed.
kxmlcore/Platform.h: Removed.
kxmlcore/RefPtr.h: Removed.
kxmlcore/Vector.h: Removed.
wtf: Added.

WebCore:

Rubber stamped by Anders.

renamed kxmlcore to wtf

kxmlcore --> wtf
KXMLCore --> WTF
WKC --> WTF

ForwardingHeaders/kxmlcore: Removed.
ForwardingHeaders/kxmlcore/AlwaysInline.h: Removed.
ForwardingHeaders/kxmlcore/Assertions.h: Removed.
ForwardingHeaders/kxmlcore/FastMalloc.h: Removed.
ForwardingHeaders/kxmlcore/Forward.h: Removed.
ForwardingHeaders/kxmlcore/HashCountedSet.h: Removed.
ForwardingHeaders/kxmlcore/HashMap.h: Removed.
ForwardingHeaders/kxmlcore/HashSet.h: Removed.
ForwardingHeaders/kxmlcore/HashTraits.h: Removed.
ForwardingHeaders/kxmlcore/Noncopyable.h: Removed.
ForwardingHeaders/kxmlcore/OwnArrayPtr.h: Removed.
ForwardingHeaders/kxmlcore/OwnPtr.h: Removed.
ForwardingHeaders/kxmlcore/PassRefPtr.h: Removed.
ForwardingHeaders/kxmlcore/Platform.h: Removed.
ForwardingHeaders/kxmlcore/RefPtr.h: Removed.
ForwardingHeaders/kxmlcore/Vector.h: Removed.
ForwardingHeaders/wtf: Added.
bindings/js/JSHTMLElementWrapperFactory.h:
bindings/js/kjs_binding.cpp:
bindings/js/kjs_window.h:
bindings/objc/DOMImplementationFront.h:
bridge/JavaAppletWidget.h:
bridge/mac/WebCoreFrameNamespaces.mm:
bridge/mac/WebCorePageBridge.mm: (initializeLogChannel):
bridge/mac/WebCoreStringTruncator.mm:
bridge/mac/WebCoreViewFactory.m:
config.h:
css/css_base.h:
css/css_valueimpl.h:
css/csshelper.cpp:
css/cssparser.h:
dom/DOMImplementation.h:
dom/Document.h:
dom/NamedNodeMap.h:
dom/Node.h:
dom/NodeList.h:
dom/QualifiedName.cpp:
dom/Range.h:
dom/StyledElement.cpp:
dom/dom2_traversalimpl.h:
dom/xml_tokenizer.h:
editing/RebalanceWhitespaceCommand.cpp:
editing/RemoveCSSPropertyCommand.cpp:
editing/RemoveNodeAttributeCommand.cpp:
editing/RemoveNodeCommand.cpp:
editing/RemoveNodePreservingChildrenCommand.cpp:
editing/ReplaceSelectionCommand.h:
editing/Selection.cpp:
editing/SetNodeAttributeCommand.cpp:
editing/SplitElementCommand.cpp:
editing/SplitTextNodeCommand.cpp:
editing/SplitTextNodeContainingElementCommand.cpp:
editing/TextIterator.h:
editing/htmlediting.h:
editing/markup.h:
html/CanvasGradient.h:
html/CanvasRenderingContext2D.h:
html/CanvasStyle.cpp:
html/HTMLCollection.h:
html/HTMLElementFactory.h:
kcanvas/KCanvasFilters.cpp:
kcanvas/KCanvasPath.h:
kcanvas/RenderPath.cpp:
kcanvas/RenderSVGImage.cpp:
kcanvas/RenderSVGText.cpp:
kcanvas/device/quartz/KCanvasItemQuartz.mm:
kcanvas/device/quartz/KRenderingPaintServerGradientQuartz.mm:
kcanvas/device/quartz/QuartzSupport.mm:
ksvg2/misc/KSVGTimeScheduler.h:
ksvg2/misc/SVGDocumentExtensions.h:
ksvg2/scripts/make_names.pl:
ksvg2/svg/SVGDOMImplementation.cpp:
ksvg2/svg/SVGExternalResourcesRequired.h:
ksvg2/svg/SVGFilterPrimitiveStandardAttributes.cpp:
ksvg2/svg/SVGForeignObjectElement.cpp:
ksvg2/svg/SVGImageElement.cpp:
ksvg2/svg/SVGMaskElement.cpp:
ksvg2/svg/SVGStyledElement.cpp:
ksvg2/svg/SVGTests.h:
ksvg2/svg/SVGTransform.h:
ksvg2/svg/SVGTransformable.cpp:
kwq/AccessibilityObjectCache.h:
kwq/KWQCString.cpp:
kwq/KWQFormData.mm:
kwq/KWQListBox.mm:
kwq/KWQResourceLoader.mm:
kwq/KWQTextEdit.mm:
loader/Cache.h:
loader/CachedObject.h:
loader/CachedObjectClientWalker.h:
loader/Decoder.h:
loader/DocLoader.h:
loader/loader.cpp:
loader/loader.h:
page/DOMWindow.h:
page/Frame.h:
page/FramePrivate.h:
page/FrameTree.cpp:
page/Page.cpp:
page/Page.h:
page/Plugin.h:
platform/Arena.cpp:
platform/ArrayImpl.h:
platform/AtomicString.cpp:
platform/CharsetNames.cpp:
platform/Color.cpp:
platform/DeprecatedPtrListImpl.cpp:
platform/DeprecatedValueListImpl.h:
platform/FontFallbackList.h:
platform/GraphicsContext.h:
platform/GraphicsTypes.cpp:
platform/Image.h:
platform/KURL.cpp:
platform/Logging.cpp:
platform/Logging.h:
platform/PlatformString.h:
platform/PlugInInfoStore.h:
platform/StreamingTextDecoder.cpp:
platform/StreamingTextDecoder.h:
platform/String.cpp:
platform/StringHash.h:
platform/StringImpl.cpp:
platform/StringImpl.h:
platform/TextEncoding.cpp:
platform/Timer.cpp:
platform/Timer.h:
platform/TransferJob.h:
platform/TransferJobInternal.h:
platform/mac/BlockExceptions.mm:
platform/mac/ColorMac.mm:
platform/mac/FontData.mm:
platform/mac/KURLMac.mm:
platform/mac/QStringMac.mm:
platform/mac/SharedTimerMac.cpp:
platform/mac/TextEncodingMac.cpp:
platform/mac/WebCoreImageRendererFactory.m:
platform/mac/WebCoreKeyGenerator.m:
platform/mac/WebCoreTextArea.mm:
platform/mac/WebCoreTextField.mm:
platform/mac/WebTextRendererFactory.h:
platform/mac/WebTextRendererFactory.mm:
platform/win/TemporaryLinkStubs.cpp: (JavaAppletWidget::JavaAppletWidget):
rendering/InlineTextBox.cpp:
rendering/RenderText.cpp:
rendering/RenderTreeAsText.cpp:
rendering/bidi.cpp:
xml/XSLTProcessor.h:
xpath/impl/XPathExpressionNode.h:
xpath/impl/XPathParser.h:
xpath/impl/XPathPath.h:
xpath/impl/XPathUtil.h:

WebKit:

Rubber stamped by Anders.

renamed kxmlcore to wtf

kxmlcore --> wtf
KXMLCore --> WTF
WKC --> WTF

Misc/WebKitLogging.h:
Misc/WebKitLogging.m: (initializeLogChannel):

Property allow-tabs set to x
Property svn:eol-style set to native

File size: 31.0 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* This file is part of the KDE libraries
4	* Copyright (C) 1999-2000 Harri Porten ([email protected])
5	* Copyright (C) 2004 Apple Computer, Inc.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include "ustring.h"
26
27	#include <assert.h>
28	#include <stdlib.h>
29	#include <stdio.h>
30	#include <ctype.h>
31	#if HAVE(STRING_H)
32	#include <string.h>
33	#endif
34	#if HAVE(STRINGS_H)
35	#include <strings.h>
36	#endif
37
38	#include "operations.h"
39	#include "identifier.h"
40	#include <math.h>
41	#include "dtoa.h"
42
43	#include <wtf/Vector.h>
44
45	using std::max;
46
47	namespace KJS {
48
49	extern const double NaN;
50	extern const double Inf;
51
52	CString::CString(const char *c)
53	{
54	length = strlen(c);
55	data = new char[length+1];
56	memcpy(data, c, length + 1);
57	}
58
59	CString::CString(const char *c, int len)
60	{
61	length = len;
62	data = new char[len+1];
63	memcpy(data, c, len);
64	data[len] = 0;
65	}
66
67	CString::CString(const CString &b)
68	{
69	length = b.length;
70	if (b.data) {
71	data = new char[length+1];
72	memcpy(data, b.data, length + 1);
73	}
74	else
75	data = 0;
76	}
77
78	CString::~CString()
79	{
80	delete [] data;
81	}
82
83	CString &CString::append(const CString &t)
84	{
85	char *n;
86	n = new char[length+t.length+1];
87	if (length)
88	memcpy(n, data, length);
89	if (t.length)
90	memcpy(n+length, t.data, t.length);
91	length += t.length;
92	n[length] = 0;
93
94	delete [] data;
95	data = n;
96
97	return *this;
98	}
99
100	CString &CString::operator=(const char *c)
101	{
102	if (data)
103	delete [] data;
104	length = strlen(c);
105	data = new char[length+1];
106	memcpy(data, c, length + 1);
107
108	return *this;
109	}
110
111	CString &CString::operator=(const CString &str)
112	{
113	if (this == &str)
114	return *this;
115
116	if (data)
117	delete [] data;
118	length = str.length;
119	if (str.data) {
120	data = new char[length + 1];
121	memcpy(data, str.data, length + 1);
122	}
123	else
124	data = 0;
125
126	return *this;
127	}
128
129	bool operator==(const CString& c1, const CString& c2)
130	{
131	int len = c1.size();
132	return len == c2.size() && (len == 0 \|\| memcmp(c1.c_str(), c2.c_str(), len) == 0);
133	}
134
135	// Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
136	static unsigned short almostUChar;
137	UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
138	UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, 0, reinterpret_cast<UChar*>(&almostUChar), 0, 0, 0, 0 };
139	const int normalStatBufferSize = 4096;
140	static char *statBuffer = 0;
141	static int statBufferSize = 0;
142
143	UCharReference& UCharReference::operator=(UChar c)
144	{
145	str->copyForWriting();
146	if (offset < str->rep()->len)
147	*(str->rep()->data() + offset) = c;
148	/* TODO: lengthen string ? */
149	return *this;
150	}
151
152	UChar& UCharReference::ref() const
153	{
154	if (offset < str->rep()->len)
155	return *(str->rep()->data() + offset);
156	else {
157	static UChar callerBetterNotModifyThis('\0');
158	return callerBetterNotModifyThis;
159	}
160	}
161
162	PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
163	{
164	int sizeInBytes = l * sizeof(UChar);
165	UChar copyD = static_cast<UChar >(fastMalloc(sizeInBytes));
166	memcpy(copyD, d, sizeInBytes);
167
168	return create(copyD, l);
169	}
170
171	PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
172	{
173	Rep *r = new Rep;
174	r->offset = 0;
175	r->len = l;
176	r->rc = 1;
177	r->_hash = 0;
178	r->isIdentifier = 0;
179	r->baseString = 0;
180	r->buf = d;
181	r->usedCapacity = l;
182	r->capacity = l;
183	r->usedPreCapacity = 0;
184	r->preCapacity = 0;
185
186	// steal the single reference this Rep was created with
187	return adoptRef(r);
188	}
189
190	PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
191	{
192	assert(base);
193
194	int baseOffset = base->offset;
195
196	if (base->baseString) {
197	base = base->baseString;
198	}
199
200	assert(-(offset + baseOffset) <= base->usedPreCapacity);
201	assert(offset + baseOffset + length <= base->usedCapacity);
202
203	Rep *r = new Rep;
204	r->offset = baseOffset + offset;
205	r->len = length;
206	r->rc = 1;
207	r->_hash = 0;
208	r->isIdentifier = 0;
209	r->baseString = base.release();
210	r->buf = 0;
211	r->usedCapacity = 0;
212	r->capacity = 0;
213	r->usedPreCapacity = 0;
214	r->preCapacity = 0;
215
216	// steal the single reference this Rep was created with
217	return adoptRef(r);
218	}
219
220	void UString::Rep::destroy()
221	{
222	if (isIdentifier)
223	Identifier::remove(this);
224	if (baseString) {
225	baseString->deref();
226	} else {
227	fastFree(buf);
228	}
229	delete this;
230	}
231
232	// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
233	// or anything like that.
234	const unsigned PHI = 0x9e3779b9U;
235
236	// Paul Hsieh's SuperFastHash
237	// http://www.azillionmonkeys.com/qed/hash.html
238	unsigned UString::Rep::computeHash(const UChar *s, int len)
239	{
240	unsigned l = len;
241	uint32_t hash = PHI;
242	uint32_t tmp;
243
244	int rem = l & 1;
245	l >>= 1;
246
247	// Main loop
248	for (; l > 0; l--) {
249	hash += s[0].uc;
250	tmp = (s[1].uc << 11) ^ hash;
251	hash = (hash << 16) ^ tmp;
252	s += 2;
253	hash += hash >> 11;
254	}
255
256	// Handle end case
257	if (rem) {
258	hash += s[0].uc;
259	hash ^= hash << 11;
260	hash += hash >> 17;
261	}
262
263	// Force "avalanching" of final 127 bits
264	hash ^= hash << 3;
265	hash += hash >> 5;
266	hash ^= hash << 2;
267	hash += hash >> 15;
268	hash ^= hash << 10;
269
270	// this avoids ever returning a hash code of 0, since that is used to
271	// signal "hash not computed yet", using a value that is likely to be
272	// effectively the same as 0 when the low bits are masked
273	if (hash == 0)
274	hash = 0x80000000;
275
276	return hash;
277	}
278
279	// Paul Hsieh's SuperFastHash
280	// http://www.azillionmonkeys.com/qed/hash.html
281	unsigned UString::Rep::computeHash(const char *s)
282	{
283	// This hash is designed to work on 16-bit chunks at a time. But since the normal case
284	// (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
285	// were 16-bit chunks, which should give matching results
286
287	uint32_t hash = PHI;
288	uint32_t tmp;
289	unsigned l = strlen(s);
290
291	int rem = l & 1;
292	l >>= 1;
293
294	// Main loop
295	for (; l > 0; l--) {
296	hash += (unsigned char)s[0];
297	tmp = ((unsigned char)s[1] << 11) ^ hash;
298	hash = (hash << 16) ^ tmp;
299	s += 2;
300	hash += hash >> 11;
301	}
302
303	// Handle end case
304	if (rem) {
305	hash += (unsigned char)s[0];
306	hash ^= hash << 11;
307	hash += hash >> 17;
308	}
309
310	// Force "avalanching" of final 127 bits
311	hash ^= hash << 3;
312	hash += hash >> 5;
313	hash ^= hash << 2;
314	hash += hash >> 15;
315	hash ^= hash << 10;
316
317	// this avoids ever returning a hash code of 0, since that is used to
318	// signal "hash not computed yet", using a value that is likely to be
319	// effectively the same as 0 when the low bits are masked
320	if (hash == 0)
321	hash = 0x80000000;
322
323	return hash;
324	}
325
326	// put these early so they can be inlined
327	inline int UString::expandedSize(int size, int otherSize) const
328	{
329	int s = (size * 11 / 10) + 1 + otherSize;
330	return s;
331	}
332
333	inline int UString::usedCapacity() const
334	{
335	return m_rep->baseString ? m_rep->baseString->usedCapacity : m_rep->usedCapacity;
336	}
337
338	inline int UString::usedPreCapacity() const
339	{
340	return m_rep->baseString ? m_rep->baseString->usedPreCapacity : m_rep->usedPreCapacity;
341	}
342
343	void UString::expandCapacity(int requiredLength)
344	{
345	Rep *r = m_rep->baseString ? m_rep->baseString : rep();
346
347	if (requiredLength > r->capacity) {
348	int newCapacity = expandedSize(requiredLength, r->preCapacity);
349	r->buf = static_cast<UChar >(fastRealloc(r->buf, newCapacity sizeof(UChar)));
350	r->capacity = newCapacity - r->preCapacity;
351	}
352	if (requiredLength > r->usedCapacity) {
353	r->usedCapacity = requiredLength;
354	}
355	}
356
357	void UString::expandPreCapacity(int requiredPreCap)
358	{
359	Rep *r = m_rep->baseString ? m_rep->baseString : rep();
360
361	if (requiredPreCap > r->preCapacity) {
362	int newCapacity = expandedSize(requiredPreCap, r->capacity);
363	int delta = newCapacity - r->capacity - r->preCapacity;
364
365	UChar newBuf = static_cast<UChar >(fastMalloc(newCapacity * sizeof(UChar)));
366	memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
367	fastFree(r->buf);
368	r->buf = newBuf;
369
370	r->preCapacity = newCapacity - r->capacity;
371	}
372	if (requiredPreCap > r->usedPreCapacity) {
373	r->usedPreCapacity = requiredPreCap;
374	}
375	}
376
377
378	UString::UString(char c)
379	{
380	UChar d = static_cast<UChar >(fastMalloc(sizeof(UChar)));
381	d[0] = c;
382	m_rep = Rep::create(d, 1);
383	}
384
385	UString::UString(const char *c)
386	{
387	if (!c) {
388	m_rep = &Rep::null;
389	return;
390	}
391	int length = strlen(c);
392	if (length == 0) {
393	m_rep = &Rep::empty;
394	return;
395	}
396	UChar d = static_cast<UChar >(fastMalloc(sizeof(UChar) * length));
397	for (int i = 0; i < length; i++)
398	d[i].uc = c[i];
399	m_rep = Rep::create(d, length);
400	}
401
402	UString::UString(const UChar *c, int length)
403	{
404	if (length == 0)
405	m_rep = &Rep::empty;
406	else
407	m_rep = Rep::createCopying(c, length);
408	}
409
410	UString::UString(UChar *c, int length, bool copy)
411	{
412	if (length == 0)
413	m_rep = &Rep::empty;
414	else if (copy)
415	m_rep = Rep::createCopying(c, length);
416	else
417	m_rep = Rep::create(c, length);
418	}
419
420	UString::UString(const UString &a, const UString &b)
421	{
422	int aSize = a.size();
423	int aOffset = a.m_rep->offset;
424	int bSize = b.size();
425	int bOffset = b.m_rep->offset;
426	int length = aSize + bSize;
427
428	// possible cases:
429
430	if (aSize == 0) {
431	// a is empty
432	m_rep = b.m_rep;
433	} else if (bSize == 0) {
434	// b is empty
435	m_rep = a.m_rep;
436	} else if (aOffset + aSize == a.usedCapacity() && 4 * aSize >= bSize &&
437	(-bOffset != b.usedPreCapacity() \|\| aSize >= bSize)) {
438	// - a reaches the end of its buffer so it qualifies for shared append
439	// - also, it's at least a quarter the length of b - appending to a much shorter
440	// string does more harm than good
441	// - however, if b qualifies for prepend and is longer than a, we'd rather prepend
442	UString x(a);
443	x.expandCapacity(aOffset + length);
444	memcpy(const_cast<UChar >(a.data() + aSize), b.data(), bSize sizeof(UChar));
445	m_rep = Rep::create(a.m_rep, 0, length);
446	} else if (-bOffset == b.usedPreCapacity() && 4 * bSize >= aSize) {
447	// - b reaches the beginning of its buffer so it qualifies for shared prepend
448	// - also, it's at least a quarter the length of a - prepending to a much shorter
449	// string does more harm than good
450	UString y(b);
451	y.expandPreCapacity(-bOffset + aSize);
452	memcpy(const_cast<UChar >(b.data() - aSize), a.data(), aSize sizeof(UChar));
453	m_rep = Rep::create(b.m_rep, -aSize, length);
454	} else {
455	// a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
456	int newCapacity = expandedSize(length, 0);
457	UChar d = static_cast<UChar >(fastMalloc(sizeof(UChar) * newCapacity));
458	memcpy(d, a.data(), aSize * sizeof(UChar));
459	memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
460	m_rep = Rep::create(d, length);
461	m_rep->capacity = newCapacity;
462	}
463	}
464
465	const UString &UString::null()
466	{
467	static UString n;
468	return n;
469	}
470
471	UString UString::from(int i)
472	{
473	UChar buf[1 + sizeof(i) * 3];
474	UChar *end = buf + sizeof(buf) / sizeof(UChar);
475	UChar *p = end;
476
477	if (i == 0) {
478	*--p = '0';
479	} else if (i == INT_MIN) {
480	char minBuf[1 + sizeof(i) * 3];
481	sprintf(minBuf, "%d", INT_MIN);
482	return UString(minBuf);
483	} else {
484	bool negative = false;
485	if (i < 0) {
486	negative = true;
487	i = -i;
488	}
489	while (i) {
490	*--p = (unsigned short)((i % 10) + '0');
491	i /= 10;
492	}
493	if (negative) {
494	*--p = '-';
495	}
496	}
497
498	return UString(p, end - p);
499	}
500
501	UString UString::from(unsigned int u)
502	{
503	UChar buf[sizeof(u) * 3];
504	UChar *end = buf + sizeof(buf) / sizeof(UChar);
505	UChar *p = end;
506
507	if (u == 0) {
508	*--p = '0';
509	} else {
510	while (u) {
511	*--p = (unsigned short)((u % 10) + '0');
512	u /= 10;
513	}
514	}
515
516	return UString(p, end - p);
517	}
518
519	UString UString::from(long l)
520	{
521	UChar buf[1 + sizeof(l) * 3];
522	UChar *end = buf + sizeof(buf) / sizeof(UChar);
523	UChar *p = end;
524
525	if (l == 0) {
526	*--p = '0';
527	} else if (l == LONG_MIN) {
528	char minBuf[1 + sizeof(l) * 3];
529	sprintf(minBuf, "%ld", LONG_MIN);
530	return UString(minBuf);
531	} else {
532	bool negative = false;
533	if (l < 0) {
534	negative = true;
535	l = -l;
536	}
537	while (l) {
538	*--p = (unsigned short)((l % 10) + '0');
539	l /= 10;
540	}
541	if (negative) {
542	*--p = '-';
543	}
544	}
545
546	return UString(p, end - p);
547	}
548
549	UString UString::from(double d)
550	{
551	// avoid ever printing -NaN, in JS conceptually there is only one NaN value
552	if (isNaN(d))
553	return "NaN";
554
555	char buf[80];
556	int decimalPoint;
557	int sign;
558
559	char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
560	int length = strlen(result);
561
562	int i = 0;
563	if (sign) {
564	buf[i++] = '-';
565	}
566
567	if (decimalPoint <= 0 && decimalPoint > -6) {
568	buf[i++] = '0';
569	buf[i++] = '.';
570	for (int j = decimalPoint; j < 0; j++) {
571	buf[i++] = '0';
572	}
573	strcpy(buf + i, result);
574	} else if (decimalPoint <= 21 && decimalPoint > 0) {
575	if (length <= decimalPoint) {
576	strcpy(buf + i, result);
577	i += length;
578	for (int j = 0; j < decimalPoint - length; j++) {
579	buf[i++] = '0';
580	}
581	buf[i] = '\0';
582	} else {
583	strncpy(buf + i, result, decimalPoint);
584	i += decimalPoint;
585	buf[i++] = '.';
586	strcpy(buf + i, result + decimalPoint);
587	}
588	} else if (result[0] < '0' \|\| result[0] > '9') {
589	strcpy(buf + i, result);
590	} else {
591	buf[i++] = result[0];
592	if (length > 1) {
593	buf[i++] = '.';
594	strcpy(buf + i, result + 1);
595	i += length - 1;
596	}
597
598	buf[i++] = 'e';
599	buf[i++] = (decimalPoint >= 0) ? '+' : '-';
600	// decimalPoint can't be more than 3 digits decimal given the
601	// nature of float representation
602	int exponential = decimalPoint - 1;
603	if (exponential < 0) {
604	exponential = exponential * -1;
605	}
606	if (exponential >= 100) {
607	buf[i++] = '0' + exponential / 100;
608	}
609	if (exponential >= 10) {
610	buf[i++] = '0' + (exponential % 100) / 10;
611	}
612	buf[i++] = '0' + exponential % 10;
613	buf[i++] = '\0';
614	}
615
616	kjs_freedtoa(result);
617
618	return UString(buf);
619	}
620
621	UString UString::spliceSubstringsWithSeparators(const Range substringRanges, int rangeCount, const UString separators, int separatorCount) const
622	{
623	int totalLength = 0;
624
625	for (int i = 0; i < rangeCount; i++) {
626	totalLength += substringRanges[i].length;
627	}
628	for (int i = 0; i < separatorCount; i++) {
629	totalLength += separators[i].size();
630	}
631
632	UChar buffer = static_cast<UChar >(fastMalloc(totalLength * sizeof(UChar)));
633
634	int maxCount = max(rangeCount, separatorCount);
635	int bufferPos = 0;
636	for (int i = 0; i < maxCount; i++) {
637	if (i < rangeCount) {
638	memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
639	bufferPos += substringRanges[i].length;
640	}
641	if (i < separatorCount) {
642	memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
643	bufferPos += separators[i].size();
644	}
645	}
646
647	return UString(UString::Rep::create(buffer, totalLength));
648	}
649
650
651
652	UString &UString::append(const UString &t)
653	{
654	int thisSize = size();
655	int thisOffset = m_rep->offset;
656	int tSize = t.size();
657	int length = thisSize + tSize;
658
659	// possible cases:
660	if (thisSize == 0) {
661	// this is empty
662	*this = t;
663	} else if (tSize == 0) {
664	// t is empty
665	} else if (!m_rep->baseString && m_rep->rc == 1) {
666	// this is direct and has refcount of 1 (so we can just alter it directly)
667	expandCapacity(thisOffset + length);
668	memcpy(const_cast<UChar >(data() + thisSize), t.data(), tSize sizeof(UChar));
669	m_rep->len = length;
670	m_rep->_hash = 0;
671	} else if (thisOffset + thisSize == usedCapacity()) {
672	// this reaches the end of the buffer - extend it
673	expandCapacity(thisOffset + length);
674	memcpy(const_cast<UChar >(data() + thisSize), t.data(), tSize sizeof(UChar));
675	m_rep = Rep::create(m_rep, 0, length);
676	} else {
677	// this is shared with someone using more capacity, gotta make a whole new string
678	int newCapacity = expandedSize(length, 0);
679	UChar d = static_cast<UChar >(fastMalloc(sizeof(UChar) * newCapacity));
680	memcpy(d, data(), thisSize * sizeof(UChar));
681	memcpy(const_cast<UChar >(d + thisSize), t.data(), tSize sizeof(UChar));
682	m_rep = Rep::create(d, length);
683	m_rep->capacity = newCapacity;
684	}
685
686	return *this;
687	}
688
689	UString &UString::append(const char *t)
690	{
691	int thisSize = size();
692	int thisOffset = m_rep->offset;
693	int tSize = strlen(t);
694	int length = thisSize + tSize;
695
696	// possible cases:
697	if (thisSize == 0) {
698	// this is empty
699	*this = t;
700	} else if (tSize == 0) {
701	// t is empty, we'll just return *this below.
702	} else if (!m_rep->baseString && m_rep->rc == 1) {
703	// this is direct and has refcount of 1 (so we can just alter it directly)
704	expandCapacity(thisOffset + length);
705	UChar d = const_cast<UChar >(data());
706	for (int i = 0; i < tSize; ++i)
707	d[thisSize+i] = t[i];
708	m_rep->len = length;
709	m_rep->_hash = 0;
710	} else if (thisOffset + thisSize == usedCapacity()) {
711	// this string reaches the end of the buffer - extend it
712	expandCapacity(thisOffset + length);
713	UChar d = const_cast<UChar >(data());
714	for (int i = 0; i < tSize; ++i)
715	d[thisSize+i] = t[i];
716	m_rep = Rep::create(m_rep, 0, length);
717	} else {
718	// this is shared with someone using more capacity, gotta make a whole new string
719	int newCapacity = expandedSize(length, 0);
720	UChar d = static_cast<UChar >(fastMalloc(sizeof(UChar) * newCapacity));
721	memcpy(d, data(), thisSize * sizeof(UChar));
722	for (int i = 0; i < tSize; ++i)
723	d[thisSize+i] = t[i];
724	m_rep = Rep::create(d, length);
725	m_rep->capacity = newCapacity;
726	}
727
728	return *this;
729	}
730
731	UString &UString::append(unsigned short c)
732	{
733	int thisOffset = m_rep->offset;
734	int length = size();
735
736	// possible cases:
737	if (length == 0) {
738	// this is empty - must make a new m_rep because we don't want to pollute the shared empty one
739	int newCapacity = expandedSize(1, 0);
740	UChar d = static_cast<UChar >(fastMalloc(sizeof(UChar) * newCapacity));
741	d[0] = c;
742	m_rep = Rep::create(d, 1);
743	m_rep->capacity = newCapacity;
744	} else if (!m_rep->baseString && m_rep->rc == 1) {
745	// this is direct and has refcount of 1 (so we can just alter it directly)
746	expandCapacity(thisOffset + length + 1);
747	UChar d = const_cast<UChar >(data());
748	d[length] = c;
749	m_rep->len = length + 1;
750	m_rep->_hash = 0;
751	} else if (thisOffset + length == usedCapacity()) {
752	// this reaches the end of the string - extend it and share
753	expandCapacity(thisOffset + length + 1);
754	UChar d = const_cast<UChar >(data());
755	d[length] = c;
756	m_rep = Rep::create(m_rep, 0, length + 1);
757	} else {
758	// this is shared with someone using more capacity, gotta make a whole new string
759	int newCapacity = expandedSize((length + 1), 0);
760	UChar d = static_cast<UChar >(fastMalloc(sizeof(UChar) * newCapacity));
761	memcpy(d, data(), length * sizeof(UChar));
762	d[length] = c;
763	m_rep = Rep::create(d, length);
764	m_rep->capacity = newCapacity;
765	}
766
767	return *this;
768	}
769
770	CString UString::cstring() const
771	{
772	return ascii();
773	}
774
775	char *UString::ascii() const
776	{
777	// Never make the buffer smaller than normalStatBufferSize.
778	// Thus we almost never need to reallocate.
779	int length = size();
780	int neededSize = length + 1;
781	if (neededSize < normalStatBufferSize) {
782	neededSize = normalStatBufferSize;
783	}
784	if (neededSize != statBufferSize) {
785	delete [] statBuffer;
786	statBuffer = new char [neededSize];
787	statBufferSize = neededSize;
788	}
789
790	const UChar *p = data();
791	char *q = statBuffer;
792	const UChar *limit = p + length;
793	while (p != limit) {
794	*q = p->uc;
795	++p;
796	++q;
797	}
798	*q = '\0';
799
800	return statBuffer;
801	}
802
803	#ifdef KJS_DEBUG_MEM
804	void UString::globalClear()
805	{
806	delete [] statBuffer;
807	statBuffer = 0;
808	statBufferSize = 0;
809	}
810	#endif
811
812	UString &UString::operator=(const char *c)
813	{
814	int l = c ? strlen(c) : 0;
815	UChar *d;
816	if (m_rep->rc == 1 && l <= m_rep->capacity && !m_rep->baseString && m_rep->offset == 0 && m_rep->preCapacity == 0) {
817	d = m_rep->buf;
818	m_rep->_hash = 0;
819	} else {
820	d = static_cast<UChar >(fastMalloc(sizeof(UChar) l));
821	m_rep = Rep::create(d, l);
822	}
823	for (int i = 0; i < l; i++)
824	d[i].uc = c[i];
825
826	return *this;
827	}
828
829	bool UString::is8Bit() const
830	{
831	const UChar *u = data();
832	const UChar *limit = u + size();
833	while (u < limit) {
834	if (u->uc > 0xFF)
835	return false;
836	++u;
837	}
838
839	return true;
840	}
841
842	UChar UString::operator[](int pos) const
843	{
844	if (pos >= size())
845	return '\0';
846	return data()[pos];
847	}
848
849	UCharReference UString::operator[](int pos)
850	{
851	/* TODO: boundary check */
852	return UCharReference(this, pos);
853	}
854
855	double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
856	{
857	double d;
858
859	// FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
860	// after the number, so is8Bit is too strict a check.
861	if (!is8Bit())
862	return NaN;
863
864	const char *c = ascii();
865
866	// skip leading white space
867	while (isspace(*c))
868	c++;
869
870	// empty string ?
871	if (*c == '\0')
872	return tolerateEmptyString ? 0.0 : NaN;
873
874	// hex number ?
875	if (c == '0' && ((c+1) == 'x' \|\| *(c+1) == 'X')) {
876	c++;
877	d = 0.0;
878	while (*(++c)) {
879	if (c >= '0' && c <= '9')
880	d = d * 16.0 + *c - '0';
881	else if ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f'))
882	d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
883	else
884	break;
885	}
886	} else {
887	// regular number ?
888	char *end;
889	d = kjs_strtod(c, &end);
890	if ((d != 0.0 \|\| end != c) && d != HUGE_VAL && d != -HUGE_VAL) {
891	c = end;
892	} else {
893	// infinity ?
894	d = 1.0;
895	if (*c == '+')
896	c++;
897	else if (*c == '-') {
898	d = -1.0;
899	c++;
900	}
901	if (strncmp(c, "Infinity", 8) != 0)
902	return NaN;
903	d = d * Inf;
904	c += 8;
905	}
906	}
907
908	// allow trailing white space
909	while (isspace(*c))
910	c++;
911	// don't allow anything after - unless tolerant=true
912	if (!tolerateTrailingJunk && *c != '\0')
913	d = NaN;
914
915	return d;
916	}
917
918	double UString::toDouble(bool tolerateTrailingJunk) const
919	{
920	return toDouble(tolerateTrailingJunk, true);
921	}
922
923	double UString::toDouble() const
924	{
925	return toDouble(false, true);
926	}
927
928	uint32_t UString::toUInt32(bool *ok) const
929	{
930	double d = toDouble();
931	bool b = true;
932
933	if (d != static_cast<uint32_t>(d)) {
934	b = false;
935	d = 0;
936	}
937
938	if (ok)
939	*ok = b;
940
941	return static_cast<uint32_t>(d);
942	}
943
944	uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
945	{
946	double d = toDouble(false, tolerateEmptyString);
947	bool b = true;
948
949	if (d != static_cast<uint32_t>(d)) {
950	b = false;
951	d = 0;
952	}
953
954	if (ok)
955	*ok = b;
956
957	return static_cast<uint32_t>(d);
958	}
959
960	uint32_t UString::toStrictUInt32(bool *ok) const
961	{
962	if (ok)
963	*ok = false;
964
965	// Empty string is not OK.
966	int len = m_rep->len;
967	if (len == 0)
968	return 0;
969	const UChar *p = m_rep->data();
970	unsigned short c = p->unicode();
971
972	// If the first digit is 0, only 0 itself is OK.
973	if (c == '0') {
974	if (len == 1 && ok)
975	*ok = true;
976	return 0;
977	}
978
979	// Convert to UInt32, checking for overflow.
980	uint32_t i = 0;
981	while (1) {
982	// Process character, turning it into a digit.
983	if (c < '0' \|\| c > '9')
984	return 0;
985	const unsigned d = c - '0';
986
987	// Multiply by 10, checking for overflow out of 32 bits.
988	if (i > 0xFFFFFFFFU / 10)
989	return 0;
990	i *= 10;
991
992	// Add in the digit, checking for overflow out of 32 bits.
993	const unsigned max = 0xFFFFFFFFU - d;
994	if (i > max)
995	return 0;
996	i += d;
997
998	// Handle end of string.
999	if (--len == 0) {
1000	if (ok)
1001	*ok = true;
1002	return i;
1003	}
1004
1005	// Get next character.
1006	c = (++p)->unicode();
1007	}
1008	}
1009
1010	int UString::find(const UString &f, int pos) const
1011	{
1012	int sz = size();
1013	int fsz = f.size();
1014	if (sz < fsz)
1015	return -1;
1016	if (pos < 0)
1017	pos = 0;
1018	if (fsz == 0)
1019	return pos;
1020	const UChar *end = data() + sz - fsz;
1021	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1022	const UChar *fdata = f.data();
1023	unsigned short fchar = fdata->uc;
1024	++fdata;
1025	for (const UChar *c = data() + pos; c <= end; c++)
1026	if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1027	return (c-data());
1028
1029	return -1;
1030	}
1031
1032	int UString::find(UChar ch, int pos) const
1033	{
1034	if (pos < 0)
1035	pos = 0;
1036	const UChar *end = data() + size();
1037	for (const UChar *c = data() + pos; c < end; c++)
1038	if (*c == ch)
1039	return (c-data());
1040
1041	return -1;
1042	}
1043
1044	int UString::rfind(const UString &f, int pos) const
1045	{
1046	int sz = size();
1047	int fsz = f.size();
1048	if (sz < fsz)
1049	return -1;
1050	if (pos < 0)
1051	pos = 0;
1052	if (pos > sz - fsz)
1053	pos = sz - fsz;
1054	if (fsz == 0)
1055	return pos;
1056	int fsizeminusone = (fsz - 1) * sizeof(UChar);
1057	const UChar *fdata = f.data();
1058	for (const UChar *c = data() + pos; c >= data(); c--) {
1059	if (c == fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1060	return (c-data());
1061	}
1062
1063	return -1;
1064	}
1065
1066	int UString::rfind(UChar ch, int pos) const
1067	{
1068	if (isEmpty())
1069	return -1;
1070	if (pos + 1 >= size())
1071	pos = size() - 1;
1072	for (const UChar *c = data() + pos; c >= data(); c--) {
1073	if (*c == ch)
1074	return (c-data());
1075	}
1076
1077	return -1;
1078	}
1079
1080	UString UString::substr(int pos, int len) const
1081	{
1082	int s = size();
1083
1084	if (pos < 0)
1085	pos = 0;
1086	else if (pos >= s)
1087	pos = s;
1088	if (len < 0)
1089	len = s;
1090	if (pos + len >= s)
1091	len = s - pos;
1092
1093	if (pos == 0 && len == s)
1094	return *this;
1095
1096	return UString(Rep::create(m_rep, pos, len));
1097	}
1098
1099	void UString::copyForWriting()
1100	{
1101	if (m_rep->rc > 1 \|\| m_rep->baseString) {
1102	int l = size();
1103	UChar n = static_cast<UChar >(fastMalloc(sizeof(UChar) * l));
1104	memcpy(n, data(), l * sizeof(UChar));
1105	m_rep = Rep::create(n, l);
1106	}
1107	}
1108
1109	bool operator==(const UString& s1, const UString& s2)
1110	{
1111	if (s1.m_rep->len != s2.m_rep->len)
1112	return false;
1113
1114	return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1115	s1.m_rep->len * sizeof(UChar)) == 0);
1116	}
1117
1118	bool operator==(const UString& s1, const char *s2)
1119	{
1120	if (s2 == 0) {
1121	return s1.isEmpty();
1122	}
1123
1124	const UChar *u = s1.data();
1125	const UChar *uend = u + s1.size();
1126	while (u != uend && *s2) {
1127	if (u->uc != (unsigned char)*s2)
1128	return false;
1129	s2++;
1130	u++;
1131	}
1132
1133	return u == uend && *s2 == 0;
1134	}
1135
1136	bool operator<(const UString& s1, const UString& s2)
1137	{
1138	const int l1 = s1.size();
1139	const int l2 = s2.size();
1140	const int lmin = l1 < l2 ? l1 : l2;
1141	const UChar *c1 = s1.data();
1142	const UChar *c2 = s2.data();
1143	int l = 0;
1144	while (l < lmin && c1 == c2) {
1145	c1++;
1146	c2++;
1147	l++;
1148	}
1149	if (l < lmin)
1150	return (c1->uc < c2->uc);
1151
1152	return (l1 < l2);
1153	}
1154
1155	int compare(const UString& s1, const UString& s2)
1156	{
1157	const int l1 = s1.size();
1158	const int l2 = s2.size();
1159	const int lmin = l1 < l2 ? l1 : l2;
1160	const UChar *c1 = s1.data();
1161	const UChar *c2 = s2.data();
1162	int l = 0;
1163	while (l < lmin && c1 == c2) {
1164	c1++;
1165	c2++;
1166	l++;
1167	}
1168
1169	if (l < lmin)
1170	return (c1->uc > c2->uc) ? 1 : -1;
1171
1172	if (l1 == l2)
1173	return 0;
1174
1175	return (l1 > l2) ? 1 : -1;
1176	}
1177
1178	inline int inlineUTF8SequenceLengthNonASCII(char b0)
1179	{
1180	if ((b0 & 0xC0) != 0xC0)
1181	return 0;
1182	if ((b0 & 0xE0) == 0xC0)
1183	return 2;
1184	if ((b0 & 0xF0) == 0xE0)
1185	return 3;
1186	if ((b0 & 0xF8) == 0xF0)
1187	return 4;
1188	return 0;
1189	}
1190
1191	int UTF8SequenceLengthNonASCII(char b0)
1192	{
1193	return inlineUTF8SequenceLengthNonASCII(b0);
1194	}
1195
1196	inline int inlineUTF8SequenceLength(char b0)
1197	{
1198	return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0);
1199	}
1200
1201	// Given a first byte, gives the length of the UTF-8 sequence it begins.
1202	// Returns 0 for bytes that are not legal starts of UTF-8 sequences.
1203	// Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).
1204	int UTF8SequenceLength(char b0)
1205	{
1206	return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
1207	}
1208
1209	// Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.
1210	// Only allows Unicode characters (U-00000000 to U-0010FFFF).
1211	// Returns -1 if the sequence is not valid (including presence of extra bytes).
1212	int decodeUTF8Sequence(const char *sequence)
1213	{
1214	// Handle 0-byte sequences (never valid).
1215	const unsigned char b0 = sequence[0];
1216	const int length = inlineUTF8SequenceLength(b0);
1217	if (length == 0)
1218	return -1;
1219
1220	// Handle 1-byte sequences (plain ASCII).
1221	const unsigned char b1 = sequence[1];
1222	if (length == 1) {
1223	if (b1)
1224	return -1;
1225	return b0;
1226	}
1227
1228	// Handle 2-byte sequences.
1229	if ((b1 & 0xC0) != 0x80)
1230	return -1;
1231	const unsigned char b2 = sequence[2];
1232	if (length == 2) {
1233	if (b2)
1234	return -1;
1235	const int c = ((b0 & 0x1F) << 6) \| (b1 & 0x3F);
1236	if (c < 0x80)
1237	return -1;
1238	return c;
1239	}
1240
1241	// Handle 3-byte sequences.
1242	if ((b2 & 0xC0) != 0x80)
1243	return -1;
1244	const unsigned char b3 = sequence[3];
1245	if (length == 3) {
1246	if (b3)
1247	return -1;
1248	const int c = ((b0 & 0xF) << 12) \| ((b1 & 0x3F) << 6) \| (b2 & 0x3F);
1249	if (c < 0x800)
1250	return -1;
1251	// UTF-16 surrogates should never appear in UTF-8 data.
1252	if (c >= 0xD800 && c <= 0xDFFF)
1253	return -1;
1254	// Backwards BOM and U+FFFF should never appear in UTF-8 data.
1255	if (c == 0xFFFE \|\| c == 0xFFFF)
1256	return -1;
1257	return c;
1258	}
1259
1260	// Handle 4-byte sequences.
1261	if ((b3 & 0xC0) != 0x80)
1262	return -1;
1263	const unsigned char b4 = sequence[4];
1264	if (length == 4) {
1265	if (b4)
1266	return -1;
1267	const int c = ((b0 & 0x7) << 18) \| ((b1 & 0x3F) << 12) \| ((b2 & 0x3F) << 6) \| (b3 & 0x3F);
1268	if (c < 0x10000 \|\| c > 0x10FFFF)
1269	return -1;
1270	return c;
1271	}
1272
1273	return -1;
1274	}
1275
1276	CString UString::UTF8String() const
1277	{
1278	// Allocate a buffer big enough to hold all the characters.
1279	const int length = size();
1280	Vector<char, 1024> buffer(length * 3);
1281
1282	// Convert to runs of 8-bit characters.
1283	char *p = buffer.begin();
1284	const UChar *d = data();
1285	for (int i = 0; i != length; ++i) {
1286	unsigned short c = d[i].unicode();
1287	if (c < 0x80) {
1288	*p++ = (char)c;
1289	} else if (c < 0x800) {
1290	*p++ = (char)((c >> 6) \| 0xC0); // C0 is the 2-byte flag for UTF-8
1291	*p++ = (char)((c \| 0x80) & 0xBF); // next 6 bits, with high bit set
1292	} else if (c >= 0xD800 && c <= 0xDBFF && i < length && d[i+1].uc >= 0xDC00 && d[i+1].uc <= 0xDFFF) {
1293	unsigned sc = 0x10000 + (((c & 0x3FF) << 10) \| (d[i+1].uc & 0x3FF));
1294	*p++ = (char)((sc >> 18) \| 0xF0); // F0 is the 4-byte flag for UTF-8
1295	*p++ = (char)(((sc >> 12) \| 0x80) & 0xBF); // next 6 bits, with high bit set
1296	*p++ = (char)(((sc >> 6) \| 0x80) & 0xBF); // next 6 bits, with high bit set
1297	*p++ = (char)((sc \| 0x80) & 0xBF); // next 6 bits, with high bit set
1298	++i;
1299	} else {
1300	*p++ = (char)((c >> 12) \| 0xE0); // E0 is the 3-byte flag for UTF-8
1301	*p++ = (char)(((c >> 6) \| 0x80) & 0xBF); // next 6 bits, with high bit set
1302	*p++ = (char)((c \| 0x80) & 0xBF); // next 6 bits, with high bit set
1303	}
1304	}
1305
1306	// Return the result as a C string.
1307	CString result(buffer, p - buffer);
1308
1309	return result;
1310	}
1311
1312	} // namespace KJS

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 14256

Download in other formats: