Context Navigation

source: webkit/trunk/JavaScriptCore/kjs/ustring.h@ 14902

Visit:

Last change on this file since 14902 was 14734, checked in by ggaren, 19 years ago

Reviewed by aliu.

Changed CString length from int to size_t. We sould probably do this for UString, too. (Darin, if you're reading this: Maciej said so.)

kjs/function.cpp: (KJS::encode):
kjs/ustring.cpp: (KJS::CString::CString): (KJS::operator==):
kjs/ustring.h: (KJS::CString::size):

Property svn:eol-style set to native

File size: 14.9 KB

Line
1	// -- c-basic-offset: 2 --
2	/*
3	* This file is part of the KDE libraries
4	* Copyright (C) 1999-2000 Harri Porten ([email protected])
5	* Copyright (C) 2004 Apple Computer, Inc.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#ifndef _KJS_USTRING_H_
25	#define _KJS_USTRING_H_
26
27	#include <wtf/FastMalloc.h>
28	#include <wtf/RefPtr.h>
29	#include <wtf/PassRefPtr.h>
30
31	#include <stdint.h>
32
33	/**
34	* @internal
35	*/
36	namespace DOM {
37	class DOMString;
38	class AtomicString;
39	}
40	class KJScript;
41	class QString;
42	class QConstString;
43
44	namespace KJS {
45
46	class UCharReference;
47	class UString;
48
49	/**
50	* @short Unicode character.
51	*
52	* UChar represents a 16 bit Unicode character. It's internal data
53	* representation is compatible to XChar2b and QChar. It's therefore
54	* possible to exchange data with X and Qt with shallow copies.
55	*/
56	struct UChar {
57	/**
58	* Construct a character with uninitialized value.
59	*/
60	UChar();
61	/**
62	* Construct a character with the value denoted by the arguments.
63	* @param h higher byte
64	* @param l lower byte
65	*/
66	UChar(unsigned char h , unsigned char l);
67	/**
68	* Construct a character with the given value.
69	* @param u 16 bit Unicode value
70	*/
71	UChar(char u);
72	UChar(unsigned char u);
73	UChar(unsigned short u);
74	UChar(const UCharReference &c);
75	/**
76	* @return The higher byte of the character.
77	*/
78	unsigned char high() const { return uc >> 8; }
79	/**
80	* @return The lower byte of the character.
81	*/
82	unsigned char low() const { return uc; }
83	/**
84	* @return the 16 bit Unicode value of the character
85	*/
86	unsigned short unicode() const { return uc; }
87
88	unsigned short uc;
89	};
90
91	inline UChar::UChar() { }
92	inline UChar::UChar(unsigned char h , unsigned char l) : uc(h << 8 \| l) { }
93	inline UChar::UChar(char u) : uc((unsigned char)u) { }
94	inline UChar::UChar(unsigned char u) : uc(u) { }
95	inline UChar::UChar(unsigned short u) : uc(u) { }
96
97	/**
98	* @short Dynamic reference to a string character.
99	*
100	* UCharReference is the dynamic counterpart of UChar. It's used when
101	* characters retrieved via index from a UString are used in an
102	* assignment expression (and therefore can't be treated as being const):
103	* \code
104	* UString s("hello world");
105	* s[0] = 'H';
106	* \endcode
107	*
108	* If that sounds confusing your best bet is to simply forget about the
109	* existence of this class and treat is as being identical to UChar.
110	*/
111	class UCharReference {
112	friend class UString;
113	UCharReference(UString *s, unsigned int off) : str(s), offset(off) { }
114	public:
115	/**
116	* Set the referenced character to c.
117	*/
118	UCharReference& operator=(UChar c);
119	/**
120	* Same operator as above except the argument that it takes.
121	*/
122	UCharReference& operator=(char c) { return operator=(UChar(c)); }
123	/**
124	* @return Unicode value.
125	*/
126	unsigned short unicode() const { return ref().uc; }
127	/**
128	* @return Lower byte.
129	*/
130	unsigned char low() const { return ref().uc; }
131	/**
132	* @return Higher byte.
133	*/
134	unsigned char high() const { return ref().uc >> 8; }
135	private:
136	// not implemented, can only be constructed from UString
137	UCharReference();
138
139	UChar& ref() const;
140	UString *str;
141	int offset;
142	};
143
144	inline UChar::UChar(const UCharReference &c) : uc(c.unicode()) { }
145
146	/**
147	* @short 8 bit char based string class
148	*/
149	class CString {
150	public:
151	CString() : data(0), length(0) { }
152	CString(const char *c);
153	CString(const char *c, size_t len);
154	CString(const CString &);
155
156	~CString();
157
158	CString &append(const CString &);
159	CString &operator=(const char *c);
160	CString &operator=(const CString &);
161	CString &operator+=(const CString &c) { return append(c); }
162
163	size_t size() const { return length; }
164	const char *c_str() const { return data; }
165	private:
166	char *data;
167	size_t length;
168	};
169
170	/**
171	* @short Unicode string class
172	*/
173	class UString {
174	friend bool operator==(const UString&, const UString&);
175
176	public:
177	/**
178	* @internal
179	*/
180	struct Rep {
181
182	static PassRefPtr<Rep> create(UChar *d, int l);
183	static PassRefPtr<Rep> createCopying(const UChar *d, int l);
184	static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length);
185
186	void destroy();
187
188	UChar *data() const { return baseString ? (baseString->buf + baseString->preCapacity + offset) : (buf + preCapacity + offset); }
189	int size() const { return len; }
190
191	unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; }
192	static unsigned computeHash(const UChar *, int length);
193	static unsigned computeHash(const char *);
194
195	void ref() { ++rc; }
196	void deref() { if (--rc == 0) destroy(); }
197
198	// unshared data
199	int offset;
200	int len;
201	int rc;
202	mutable unsigned _hash;
203	bool isIdentifier;
204	UString::Rep *baseString;
205
206	// potentially shared data
207	UChar *buf;
208	int usedCapacity;
209	int capacity;
210	int usedPreCapacity;
211	int preCapacity;
212
213	static Rep null;
214	static Rep empty;
215	};
216
217	public:
218	/**
219	* Constructs a null string.
220	*/
221	UString();
222	/**
223	* Constructs a string from the single character c.
224	*/
225	explicit UString(char c);
226	/**
227	* Constructs a string from a classical zero determined char string.
228	*/
229	UString(const char *c);
230	/**
231	* Constructs a string from an array of Unicode characters of the specified
232	* length.
233	*/
234	UString(const UChar *c, int length);
235	/**
236	* If copy is false the string data will be adopted.
237	* That means that the data will NOT be copied and the pointer will
238	* be deleted when the UString object is modified or destroyed.
239	* Behaviour defaults to a deep copy if copy is true.
240	*/
241	UString(UChar *c, int length, bool copy);
242	/**
243	* Copy constructor. Makes a shallow copy only.
244	*/
245	UString(const UString &s) : m_rep(s.m_rep) {}
246	/**
247	* Convenience declaration only ! You'll be on your own to write the
248	* implementation for a construction from QString.
249	*
250	* Note: feel free to contact me if you want to see a dummy header for
251	* your favorite FooString class here !
252	*/
253	UString(const QString&);
254	/**
255	* Convenience declaration only ! See UString(const QString&).
256	*/
257	UString(const DOM::DOMString&);
258	/**
259	* Convenience declaration only ! See UString(const QString&).
260	*/
261	UString(const DOM::AtomicString&);
262
263	/**
264	* Concatenation constructor. Makes operator+ more efficient.
265	*/
266	UString(const UString &, const UString &);
267	/**
268	* Destructor.
269	*/
270	~UString() {}
271
272	/**
273	* Constructs a string from an int.
274	*/
275	static UString from(int i);
276	/**
277	* Constructs a string from an unsigned int.
278	*/
279	static UString from(unsigned int u);
280	/**
281	* Constructs a string from a long int.
282	*/
283	static UString from(long u);
284	/**
285	* Constructs a string from a double.
286	*/
287	static UString from(double d);
288
289	struct Range {
290	public:
291	Range(int pos, int len) : position(pos), length(len) {}
292	Range() {}
293	int position;
294	int length;
295	};
296
297	UString spliceSubstringsWithSeparators(const Range substringRanges, int rangeCount, const UString separators, int separatorCount) const;
298
299	/**
300	* Append another string.
301	*/
302	UString &append(const UString &);
303	UString &append(const char *);
304	UString &append(unsigned short);
305	UString &append(char c) { return append(static_cast<unsigned short>(static_cast<unsigned char>(c))); }
306	UString &append(UChar c) { return append(c.uc); }
307
308	/**
309	* @return The string converted to the 8-bit string type CString().
310	*/
311	CString cstring() const;
312	/**
313	* Convert the Unicode string to plain ASCII chars chopping of any higher
314	* bytes. This method should only be used for debugging purposes as it
315	* is neither Unicode safe nor free from side effects. In order not to
316	* waste any memory the char buffer is static and shared by all UString
317	* instances.
318	*/
319	char *ascii() const;
320
321	/**
322	* Convert the string to UTF-8, assuming it is UTF-16 encoded.
323	* Since this function is tolerant of badly formed UTF-16, it can create UTF-8
324	* strings that are invalid because they have characters in the range
325	* U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is guaranteed to
326	* be otherwise valid.
327	*/
328	CString UTF8String() const;
329
330	/**
331	* @see UString(const QString&).
332	*/
333	DOM::DOMString domString() const;
334	/**
335	* @see UString(const QString&).
336	*/
337	QString qstring() const;
338	/**
339	* @see UString(const QString&).
340	*/
341	QConstString qconststring() const;
342
343	/**
344	* Assignment operator.
345	*/
346	UString &operator=(const char *c);
347	/**
348	* Appends the specified string.
349	*/
350	UString &operator+=(const UString &s) { return append(s); }
351	UString &operator+=(const char *s) { return append(s); }
352
353	/**
354	* @return A pointer to the internal Unicode data.
355	*/
356	const UChar* data() const { return m_rep->data(); }
357	/**
358	* @return True if null.
359	*/
360	bool isNull() const { return (m_rep == &Rep::null); }
361	/**
362	* @return True if null or zero length.
363	*/
364	bool isEmpty() const { return (!m_rep->len); }
365	/**
366	* Use this if you want to make sure that this string is a plain ASCII
367	* string. For example, if you don't want to lose any information when
368	* using cstring() or ascii().
369	*
370	* @return True if the string doesn't contain any non-ASCII characters.
371	*/
372	bool is8Bit() const;
373	/**
374	* @return The length of the string.
375	*/
376	int size() const { return m_rep->size(); }
377	/**
378	* Const character at specified position.
379	*/
380	UChar operator[](int pos) const;
381	/**
382	* Writable reference to character at specified position.
383	*/
384	UCharReference operator[](int pos);
385
386	/**
387	* Attempts an conversion to a number. Apart from floating point numbers,
388	* the algorithm will recognize hexadecimal representations (as
389	* indicated by a 0x or 0X prefix) and +/- Infinity.
390	* Returns NaN if the conversion failed.
391	* @param tolerateTrailingJunk if true, toDouble can tolerate garbage after the number.
392	* @param tolerateEmptyString if false, toDouble will turn an empty string into NaN rather than 0.
393	*/
394	double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
395	double toDouble(bool tolerateTrailingJunk) const;
396	double toDouble() const;
397
398	/**
399	* Attempts an conversion to a 32-bit integer. ok will be set
400	* according to the success.
401	* @param tolerateEmptyString if false, toUInt32 will return false for *ok for an empty string.
402	*/
403	uint32_t toUInt32(bool *ok = 0) const;
404	uint32_t toUInt32(bool *ok, bool tolerateEmptyString) const;
405	uint32_t toStrictUInt32(bool *ok = 0) const;
406
407	/**
408	* Attempts an conversion to an array index. The "ok" boolean will be set
409	* to true if it is a valid array index according to the rule from
410	* ECMA 15.2 about what an array index is. It must exactly match the string
411	* form of an unsigned integer, and be less than 2^32 - 1.
412	*/
413	unsigned toArrayIndex(bool *ok = 0) const;
414
415	/**
416	* @return Position of first occurrence of f starting at position pos.
417	* -1 if the search was not successful.
418	*/
419	int find(const UString &f, int pos = 0) const;
420	int find(UChar, int pos = 0) const;
421	/**
422	* @return Position of first occurrence of f searching backwards from
423	* position pos.
424	* -1 if the search was not successful.
425	*/
426	int rfind(const UString &f, int pos) const;
427	int rfind(UChar, int pos) const;
428	/**
429	* @return The sub string starting at position pos and length len.
430	*/
431	UString substr(int pos = 0, int len = -1) const;
432	/**
433	* Static instance of a null string.
434	*/
435	static const UString &null();
436	#ifdef KJS_DEBUG_MEM
437	/**
438	* Clear statically allocated resources.
439	*/
440	static void globalClear();
441	#endif
442
443	Rep *rep() const { return m_rep.get(); }
444	UString(PassRefPtr<Rep> r) : m_rep(r) { }
445
446	void copyForWriting();
447
448	private:
449	int expandedSize(int size, int otherSize) const;
450	int usedCapacity() const;
451	int usedPreCapacity() const;
452	void expandCapacity(int requiredLength);
453	void expandPreCapacity(int requiredPreCap);
454
455	RefPtr<Rep> m_rep;
456	};
457
458	inline bool operator==(const UChar &c1, const UChar &c2) {
459	return (c1.uc == c2.uc);
460	}
461	bool operator==(const UString& s1, const UString& s2);
462	inline bool operator!=(const UString& s1, const UString& s2) {
463	return !KJS::operator==(s1, s2);
464	}
465	bool operator<(const UString& s1, const UString& s2);
466	bool operator==(const UString& s1, const char *s2);
467	inline bool operator!=(const UString& s1, const char *s2) {
468	return !KJS::operator==(s1, s2);
469	}
470	inline bool operator==(const char *s1, const UString& s2) {
471	return operator==(s2, s1);
472	}
473	inline bool operator!=(const char *s1, const UString& s2) {
474	return !KJS::operator==(s1, s2);
475	}
476	bool operator==(const CString& s1, const CString& s2);
477	inline UString operator+(const UString& s1, const UString& s2) {
478	return UString(s1, s2);
479	}
480
481	int compare(const UString &, const UString &);
482
483	// Given a first byte, gives the length of the UTF-8 sequence it begins.
484	// Returns 0 for bytes that are not legal starts of UTF-8 sequences.
485	// Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).
486	int UTF8SequenceLength(char);
487
488	// Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.
489	// Only allows Unicode characters (U-00000000 to U-0010FFFF).
490	// Returns -1 if the sequence is not valid (including presence of extra bytes).
491	int decodeUTF8Sequence(const char *);
492
493	inline UString::UString()
494	: m_rep(&Rep::null)
495	{
496	}
497
498	// Rule from ECMA 15.2 about what an array index is.
499	// Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
500	inline unsigned UString::toArrayIndex(bool *ok) const
501	{
502	unsigned i = toStrictUInt32(ok);
503	if (ok && i >= 0xFFFFFFFFU)
504	*ok = false;
505	return i;
506	}
507
508	} // namespace
509
510	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: