source: webkit/trunk/JavaScriptCore/runtime/UString.h@ 43104

Last change on this file since 43104 was 43104, checked in by [email protected], 16 years ago

2009-04-30 Maciej Stachowiak <[email protected]>

Reviewed by Gavin Barraclough.

  • Concatenate final three strings in simple replace case at one go

~0.2% SunSpider speedup

  • runtime/StringPrototype.cpp: (JSC::stringProtoFuncReplace): Use new replaceRange helper instead of taking substrings and concatenating three strings.
  • runtime/UString.cpp: (JSC::UString::replaceRange): New helper function.
  • runtime/UString.h:
  • Property svn:eol-style set to native
File size: 16.1 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (c) 2009, Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef UString_h
24#define UString_h
25
26#include "Collector.h"
27#include <stdint.h>
28#include <string.h>
29#include <wtf/Assertions.h>
30#include <wtf/PassRefPtr.h>
31#include <wtf/PtrAndFlags.h>
32#include <wtf/RefPtr.h>
33#include <wtf/Vector.h>
34#include <wtf/unicode/Unicode.h>
35
36namespace JSC {
37
38 using WTF::PlacementNewAdoptType;
39 using WTF::PlacementNewAdopt;
40
41 class IdentifierTable;
42
43 class CString {
44 public:
45 CString()
46 : m_length(0)
47 , m_data(0)
48 {
49 }
50
51 CString(const char*);
52 CString(const char*, size_t);
53 CString(const CString&);
54
55 ~CString();
56
57 static CString adopt(char*, size_t); // buffer should be allocated with new[].
58
59 CString& append(const CString&);
60 CString& operator=(const char* c);
61 CString& operator=(const CString&);
62 CString& operator+=(const CString& c) { return append(c); }
63
64 size_t size() const { return m_length; }
65 const char* c_str() const { return m_data; }
66
67 private:
68 size_t m_length;
69 char* m_data;
70 };
71
72 typedef Vector<char, 32> CStringBuffer;
73
74 class UString {
75 friend class JIT;
76
77 public:
78 struct BaseString;
79 struct Rep : Noncopyable {
80 friend class JIT;
81
82 static PassRefPtr<Rep> create(UChar* buffer, int length)
83 {
84 return adoptRef(new BaseString(buffer, length));
85 }
86
87 static PassRefPtr<Rep> createCopying(const UChar*, int);
88 static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length);
89
90 // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
91 // Returns UString::Rep::null for null input or conversion failure.
92 static PassRefPtr<Rep> createFromUTF8(const char*);
93
94 void destroy();
95
96 bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); }
97 UChar* data() const;
98 int size() const { return len; }
99
100 unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; }
101 unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers
102
103 static unsigned computeHash(const UChar*, int length);
104 static unsigned computeHash(const char*, int length);
105 static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); }
106
107 IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); }
108 void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); }
109
110 bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); }
111 void setStatic(bool);
112 void setBaseString(PassRefPtr<BaseString>);
113 BaseString* baseString();
114 const BaseString* baseString() const;
115
116 Rep* ref() { ++rc; return this; }
117 ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); }
118
119 void checkConsistency() const;
120 enum UStringFlags {
121 StaticFlag,
122 BaseStringFlag
123 };
124
125 // unshared data
126 int offset;
127 int len;
128 int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
129 mutable unsigned _hash;
130 PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags;
131
132 static BaseString& null() { return *nullBaseString; }
133 static BaseString& empty() { return *emptyBaseString; }
134
135 protected:
136 // constructor for use by BaseString subclass; they are their own bases
137 Rep(int length)
138 : offset(0)
139 , len(length)
140 , rc(1)
141 , _hash(0)
142 , m_baseString(static_cast<BaseString*>(this))
143 {
144 }
145
146 Rep(PassRefPtr<BaseString> base, int offsetInBase, int length)
147 : offset(offsetInBase)
148 , len(length)
149 , rc(1)
150 , _hash(0)
151 , m_baseString(base.releaseRef())
152 {
153 checkConsistency();
154 }
155
156
157 BaseString* m_baseString;
158
159 private:
160 // For SmallStringStorage which allocates an array and does initialization manually.
161 Rep() { }
162
163 friend class SmallStringsStorage;
164 friend void initializeUString();
165 JS_EXPORTDATA static BaseString* nullBaseString;
166 JS_EXPORTDATA static BaseString* emptyBaseString;
167 };
168
169
170 struct BaseString : public Rep {
171 bool isShared() { return rc != 1; }
172
173 // potentially shared data.
174 UChar* buf;
175 int preCapacity;
176 int usedPreCapacity;
177 int capacity;
178 int usedCapacity;
179
180 size_t reportedCost;
181
182 private:
183 BaseString(UChar* buffer, int length)
184 : Rep(length)
185 , buf(buffer)
186 , preCapacity(0)
187 , usedPreCapacity(0)
188 , capacity(length)
189 , usedCapacity(length)
190 , reportedCost(0)
191 {
192 m_identifierTableAndFlags.setFlag(BaseStringFlag);
193 checkConsistency();
194 }
195
196 friend struct Rep;
197 friend class SmallStringsStorage;
198 friend void initializeUString();
199 };
200
201 public:
202 UString();
203 UString(const char*);
204 UString(const UChar*, int length);
205 UString(UChar*, int length, bool copy);
206
207 UString(const UString& s)
208 : m_rep(s.m_rep)
209 {
210 }
211
212 UString(const Vector<UChar>& buffer);
213
214 ~UString()
215 {
216 }
217
218 // Special constructor for cases where we overwrite an object in place.
219 UString(PlacementNewAdoptType)
220 : m_rep(PlacementNewAdopt)
221 {
222 }
223
224 static UString from(int);
225 static UString from(unsigned int);
226 static UString from(long);
227 static UString from(double);
228
229 struct Range {
230 public:
231 Range(int pos, int len)
232 : position(pos)
233 , length(len)
234 {
235 }
236
237 Range()
238 {
239 }
240
241 int position;
242 int length;
243 };
244
245 UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const;
246
247 UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const;
248
249 UString& append(const UString&);
250 UString& append(const char*);
251 UString& append(UChar);
252 UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); }
253 UString& append(const UChar*, int size);
254
255 bool getCString(CStringBuffer&) const;
256
257 // NOTE: This method should only be used for *debugging* purposes as it
258 // is neither Unicode safe nor free from side effects nor thread-safe.
259 char* ascii() const;
260
261 /**
262 * Convert the string to UTF-8, assuming it is UTF-16 encoded.
263 * In non-strict mode, this function is tolerant of badly formed UTF-16, it
264 * can create UTF-8 strings that are invalid because they have characters in
265 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
266 * guaranteed to be otherwise valid.
267 * In strict mode, error is returned as null CString.
268 */
269 CString UTF8String(bool strict = false) const;
270
271 UString& operator=(const char*c);
272
273 UString& operator+=(const UString& s) { return append(s); }
274 UString& operator+=(const char* s) { return append(s); }
275
276 const UChar* data() const { return m_rep->data(); }
277
278 bool isNull() const { return (m_rep == &Rep::null()); }
279 bool isEmpty() const { return (!m_rep->len); }
280
281 bool is8Bit() const;
282
283 int size() const { return m_rep->size(); }
284
285 UChar operator[](int pos) const;
286
287 double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
288 double toDouble(bool tolerateTrailingJunk) const;
289 double toDouble() const;
290
291 uint32_t toUInt32(bool* ok = 0) const;
292 uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const;
293 uint32_t toStrictUInt32(bool* ok = 0) const;
294
295 unsigned toArrayIndex(bool* ok = 0) const;
296
297 int find(const UString& f, int pos = 0) const;
298 int find(UChar, int pos = 0) const;
299 int rfind(const UString& f, int pos) const;
300 int rfind(UChar, int pos) const;
301
302 UString substr(int pos = 0, int len = -1) const;
303
304 static const UString& null() { return *nullUString; }
305
306 Rep* rep() const { return m_rep.get(); }
307 static Rep* nullRep();
308
309 UString(PassRefPtr<Rep> r)
310 : m_rep(r)
311 {
312 ASSERT(m_rep);
313 }
314
315 size_t cost() const;
316
317 private:
318 void expandCapacity(int requiredLength);
319 void expandPreCapacity(int requiredPreCap);
320 void makeNull();
321
322 RefPtr<Rep> m_rep;
323 static UString* nullUString;
324
325 friend void initializeUString();
326 friend bool operator==(const UString&, const UString&);
327 friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory
328 };
329 PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*);
330 PassRefPtr<UString::Rep> concatenate(UString::Rep*, int);
331 PassRefPtr<UString::Rep> concatenate(UString::Rep*, double);
332
333 bool operator==(const UString&, const UString&);
334
335 inline bool operator!=(const UString& s1, const UString& s2)
336 {
337 return !JSC::operator==(s1, s2);
338 }
339
340 bool operator<(const UString& s1, const UString& s2);
341 bool operator>(const UString& s1, const UString& s2);
342
343 bool operator==(const UString& s1, const char* s2);
344
345 inline bool operator!=(const UString& s1, const char* s2)
346 {
347 return !JSC::operator==(s1, s2);
348 }
349
350 inline bool operator==(const char *s1, const UString& s2)
351 {
352 return operator==(s2, s1);
353 }
354
355 inline bool operator!=(const char *s1, const UString& s2)
356 {
357 return !JSC::operator==(s1, s2);
358 }
359
360 bool operator==(const CString&, const CString&);
361
362 inline UString operator+(const UString& s1, const UString& s2)
363 {
364 RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep());
365 return UString(result ? result.release() : UString::nullRep());
366 }
367
368 int compare(const UString&, const UString&);
369
370 bool equal(const UString::Rep*, const UString::Rep*);
371
372 inline PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<UString::Rep> rep, int offset, int length)
373 {
374 ASSERT(rep);
375 rep->checkConsistency();
376
377 int repOffset = rep->offset;
378
379 PassRefPtr<BaseString> base = rep->baseString();
380
381 ASSERT(-(offset + repOffset) <= base->usedPreCapacity);
382 ASSERT(offset + repOffset + length <= base->usedCapacity);
383
384 // Steal the single reference this Rep was created with.
385 return adoptRef(new Rep(base, repOffset + offset, length));
386 }
387
388 inline UChar* UString::Rep::data() const
389 {
390 const BaseString* base = baseString();
391 return base->buf + base->preCapacity + offset;
392 }
393
394 inline void UString::Rep::setStatic(bool v)
395 {
396 ASSERT(!identifierTable());
397 if (v)
398 m_identifierTableAndFlags.setFlag(StaticFlag);
399 else
400 m_identifierTableAndFlags.clearFlag(StaticFlag);
401 }
402
403 inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base)
404 {
405 ASSERT(base != this);
406 ASSERT(!baseIsSelf());
407 m_baseString = base.releaseRef();
408 }
409
410 inline UString::BaseString* UString::Rep::baseString()
411 {
412 return m_baseString;
413 }
414
415 inline const UString::BaseString* UString::Rep::baseString() const
416 {
417 return m_baseString;
418 }
419
420#ifdef NDEBUG
421 inline void UString::Rep::checkConsistency() const
422 {
423 }
424#endif
425
426 inline UString::UString()
427 : m_rep(&Rep::null())
428 {
429 }
430
431 // Rule from ECMA 15.2 about what an array index is.
432 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
433 inline unsigned UString::toArrayIndex(bool* ok) const
434 {
435 unsigned i = toStrictUInt32(ok);
436 if (ok && i >= 0xFFFFFFFFU)
437 *ok = false;
438 return i;
439 }
440
441 // We'd rather not do shared substring append for small strings, since
442 // this runs too much risk of a tiny initial string holding down a
443 // huge buffer.
444 // FIXME: this should be size_t but that would cause warnings until we
445 // fix UString sizes to be size_t instead of int
446 static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar);
447
448 inline size_t UString::cost() const
449 {
450 BaseString* base = m_rep->baseString();
451 size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar);
452 size_t reportedCost = base->reportedCost;
453 ASSERT(capacity >= reportedCost);
454
455 size_t capacityDelta = capacity - reportedCost;
456
457 if (capacityDelta < static_cast<size_t>(minShareSize))
458 return 0;
459
460 base->reportedCost = capacity;
461
462 return capacityDelta;
463 }
464
465 struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > {
466 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); }
467 static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); }
468 };
469
470 void initializeUString();
471} // namespace JSC
472
473namespace WTF {
474
475 template<typename T> struct DefaultHash;
476 template<typename T> struct StrHash;
477
478 template<> struct StrHash<JSC::UString::Rep*> {
479 static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); }
480 static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); }
481 static const bool safeToCompareToEmptyOrDeleted = false;
482 };
483
484 template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> {
485 using StrHash<JSC::UString::Rep*>::hash;
486 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); }
487 using StrHash<JSC::UString::Rep*>::equal;
488 static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); }
489 static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); }
490 static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); }
491
492 static const bool safeToCompareToEmptyOrDeleted = false;
493 };
494
495 template<> struct DefaultHash<JSC::UString::Rep*> {
496 typedef StrHash<JSC::UString::Rep*> Hash;
497 };
498
499 template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > {
500 typedef StrHash<RefPtr<JSC::UString::Rep> > Hash;
501
502 };
503
504} // namespace WTF
505
506#endif
Note: See TracBrowser for help on using the repository browser.