source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 29059

Last change on this file since 29059 was 27859, checked in by [email protected], 18 years ago

2007-11-16 Mark Rowe <[email protected]>

Reviewed by Eric.

Replace strings, identifier, buffer8 and buffer16 members of Lexer with vectors.
SunSpider claims this is a 0.7% speedup.

  • kjs/lexer.cpp: (KJS::Lexer::Lexer): (KJS::Lexer::lex): (KJS::Lexer::record8): (KJS::Lexer::record16): (KJS::Lexer::scanRegExp): (KJS::Lexer::clear): (KJS::Lexer::makeIdentifier): (KJS::Lexer::makeUString):
  • kjs/lexer.h:
  • kjs/ustring.cpp: (KJS::UString::UString): Add a convenience constructor that takes a const Vector<UChar>&.
  • kjs/ustring.h:
  • Property svn:eol-style set to native
File size: 30.9 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten ([email protected])
4 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Cameron Zwarich ([email protected])
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "ustring.h"
26
27#include "JSLock.h"
28#include "collector.h"
29#include "dtoa.h"
30#include "function.h"
31#include "identifier.h"
32#include "operations.h"
33#include <ctype.h>
34#include <float.h>
35#include <limits.h>
36#include <math.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <wtf/Assertions.h>
40#include <wtf/ASCIICType.h>
41#include <wtf/MathExtras.h>
42#include <wtf/Vector.h>
43#include <wtf/unicode/UTF8.h>
44
45#if HAVE(STRING_H)
46#include <string.h>
47#endif
48#if HAVE(STRINGS_H)
49#include <strings.h>
50#endif
51
52using namespace WTF;
53using namespace WTF::Unicode;
54using namespace std;
55
56namespace KJS {
57
58extern const double NaN;
59extern const double Inf;
60
61static inline const size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
62static inline const size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
63
64static inline UChar* allocChars(size_t length)
65{
66 ASSERT(length);
67 if (length > maxUChars())
68 return 0;
69 return static_cast<UChar*>(fastMalloc(sizeof(UChar) * length));
70}
71
72static inline UChar* reallocChars(UChar* buffer, size_t length)
73{
74 ASSERT(length);
75 if (length > maxUChars())
76 return 0;
77 return static_cast<UChar*>(fastRealloc(buffer, sizeof(UChar) * length));
78}
79
80// we'd rather not do shared substring append for small strings, since
81// this runs too much risk of a tiny initial string holding down a
82// huge buffer. This is also tuned to match the extra cost size, so we
83// don't ever share a buffer that wouldn't be over the extra cost
84// threshold already.
85// FIXME: this should be size_t but that would cause warnings until we
86// fix UString sizes to be size_t instad of int
87static const int minShareSize = Collector::minExtraCostSize / sizeof(UChar);
88
89COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
90
91CString::CString(const char *c)
92{
93 length = strlen(c);
94 data = new char[length+1];
95 memcpy(data, c, length + 1);
96}
97
98CString::CString(const char *c, size_t len)
99{
100 length = len;
101 data = new char[len+1];
102 memcpy(data, c, len);
103 data[len] = 0;
104}
105
106CString::CString(const CString &b)
107{
108 length = b.length;
109 if (b.data) {
110 data = new char[length+1];
111 memcpy(data, b.data, length + 1);
112 }
113 else
114 data = 0;
115}
116
117CString::~CString()
118{
119 delete [] data;
120}
121
122CString &CString::append(const CString &t)
123{
124 char *n;
125 n = new char[length+t.length+1];
126 if (length)
127 memcpy(n, data, length);
128 if (t.length)
129 memcpy(n+length, t.data, t.length);
130 length += t.length;
131 n[length] = 0;
132
133 delete [] data;
134 data = n;
135
136 return *this;
137}
138
139CString &CString::operator=(const char *c)
140{
141 if (data)
142 delete [] data;
143 length = strlen(c);
144 data = new char[length+1];
145 memcpy(data, c, length + 1);
146
147 return *this;
148}
149
150CString &CString::operator=(const CString &str)
151{
152 if (this == &str)
153 return *this;
154
155 if (data)
156 delete [] data;
157 length = str.length;
158 if (str.data) {
159 data = new char[length + 1];
160 memcpy(data, str.data, length + 1);
161 }
162 else
163 data = 0;
164
165 return *this;
166}
167
168bool operator==(const CString& c1, const CString& c2)
169{
170 size_t len = c1.size();
171 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
172}
173
174// Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
175static unsigned short almostUChar;
176UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, &UString::Rep::null, 0, 0, 0, 0, 0 };
177UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, &UString::Rep::empty, reinterpret_cast<UChar*>(&almostUChar), 0, 0, 0, 0 };
178const int normalStatBufferSize = 4096;
179static char *statBuffer = 0; // FIXME: This buffer is never deallocated.
180static int statBufferSize = 0;
181
182PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
183{
184 ASSERT(JSLock::lockCount() > 0);
185
186 int sizeInBytes = l * sizeof(UChar);
187 UChar *copyD = static_cast<UChar *>(fastMalloc(sizeInBytes));
188 memcpy(copyD, d, sizeInBytes);
189
190 return create(copyD, l);
191}
192
193PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
194{
195 ASSERT(JSLock::lockCount() > 0);
196
197 Rep* r = new Rep;
198 r->offset = 0;
199 r->len = l;
200 r->rc = 1;
201 r->_hash = 0;
202 r->isIdentifier = 0;
203 r->baseString = r;
204 r->buf = d;
205 r->usedCapacity = l;
206 r->capacity = l;
207 r->usedPreCapacity = 0;
208 r->preCapacity = 0;
209
210 // steal the single reference this Rep was created with
211 return adoptRef(r);
212}
213
214PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
215{
216 ASSERT(JSLock::lockCount() > 0);
217 ASSERT(base);
218
219 int baseOffset = base->offset;
220
221 base = base->baseString;
222
223 ASSERT(-(offset + baseOffset) <= base->usedPreCapacity);
224 ASSERT(offset + baseOffset + length <= base->usedCapacity);
225
226 Rep *r = new Rep;
227 r->offset = baseOffset + offset;
228 r->len = length;
229 r->rc = 1;
230 r->_hash = 0;
231 r->isIdentifier = 0;
232 r->baseString = base.releaseRef();
233 r->buf = 0;
234 r->usedCapacity = 0;
235 r->capacity = 0;
236 r->usedPreCapacity = 0;
237 r->preCapacity = 0;
238
239 // steal the single reference this Rep was created with
240 return adoptRef(r);
241}
242
243void UString::Rep::destroy()
244{
245 ASSERT(JSLock::lockCount() > 0);
246
247 if (isIdentifier)
248 Identifier::remove(this);
249 if (baseString != this) {
250 baseString->deref();
251 } else {
252 fastFree(buf);
253 }
254 delete this;
255}
256
257// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
258// or anything like that.
259const unsigned PHI = 0x9e3779b9U;
260
261// Paul Hsieh's SuperFastHash
262// http://www.azillionmonkeys.com/qed/hash.html
263unsigned UString::Rep::computeHash(const UChar *s, int len)
264{
265 unsigned l = len;
266 uint32_t hash = PHI;
267 uint32_t tmp;
268
269 int rem = l & 1;
270 l >>= 1;
271
272 // Main loop
273 for (; l > 0; l--) {
274 hash += s[0].uc;
275 tmp = (s[1].uc << 11) ^ hash;
276 hash = (hash << 16) ^ tmp;
277 s += 2;
278 hash += hash >> 11;
279 }
280
281 // Handle end case
282 if (rem) {
283 hash += s[0].uc;
284 hash ^= hash << 11;
285 hash += hash >> 17;
286 }
287
288 // Force "avalanching" of final 127 bits
289 hash ^= hash << 3;
290 hash += hash >> 5;
291 hash ^= hash << 2;
292 hash += hash >> 15;
293 hash ^= hash << 10;
294
295 // this avoids ever returning a hash code of 0, since that is used to
296 // signal "hash not computed yet", using a value that is likely to be
297 // effectively the same as 0 when the low bits are masked
298 if (hash == 0)
299 hash = 0x80000000;
300
301 return hash;
302}
303
304// Paul Hsieh's SuperFastHash
305// http://www.azillionmonkeys.com/qed/hash.html
306unsigned UString::Rep::computeHash(const char *s)
307{
308 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
309 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
310 // were 16-bit chunks, which should give matching results
311
312 uint32_t hash = PHI;
313 uint32_t tmp;
314 size_t l = strlen(s);
315
316 size_t rem = l & 1;
317 l >>= 1;
318
319 // Main loop
320 for (; l > 0; l--) {
321 hash += (unsigned char)s[0];
322 tmp = ((unsigned char)s[1] << 11) ^ hash;
323 hash = (hash << 16) ^ tmp;
324 s += 2;
325 hash += hash >> 11;
326 }
327
328 // Handle end case
329 if (rem) {
330 hash += (unsigned char)s[0];
331 hash ^= hash << 11;
332 hash += hash >> 17;
333 }
334
335 // Force "avalanching" of final 127 bits
336 hash ^= hash << 3;
337 hash += hash >> 5;
338 hash ^= hash << 2;
339 hash += hash >> 15;
340 hash ^= hash << 10;
341
342 // this avoids ever returning a hash code of 0, since that is used to
343 // signal "hash not computed yet", using a value that is likely to be
344 // effectively the same as 0 when the low bits are masked
345 if (hash == 0)
346 hash = 0x80000000;
347
348 return hash;
349}
350
351// put these early so they can be inlined
352inline size_t UString::expandedSize(size_t size, size_t otherSize) const
353{
354 // Do the size calculation in two parts, returning overflowIndicator if
355 // we overflow the maximum value that we can handle.
356
357 if (size > maxUChars())
358 return overflowIndicator();
359
360 size_t expandedSize = ((size + 10) / 10 * 11) + 1;
361 if (maxUChars() - expandedSize < otherSize)
362 return overflowIndicator();
363
364 return expandedSize + otherSize;
365}
366
367inline int UString::usedCapacity() const
368{
369 return m_rep->baseString->usedCapacity;
370}
371
372inline int UString::usedPreCapacity() const
373{
374 return m_rep->baseString->usedPreCapacity;
375}
376
377void UString::expandCapacity(int requiredLength)
378{
379 Rep* r = m_rep->baseString;
380
381 if (requiredLength > r->capacity) {
382 size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
383 UChar* oldBuf = r->buf;
384 r->buf = reallocChars(r->buf, newCapacity);
385 if (!r->buf) {
386 r->buf = oldBuf;
387 m_rep = &Rep::null;
388 return;
389 }
390 r->capacity = newCapacity - r->preCapacity;
391 }
392 if (requiredLength > r->usedCapacity) {
393 r->usedCapacity = requiredLength;
394 }
395}
396
397void UString::expandPreCapacity(int requiredPreCap)
398{
399 Rep* r = m_rep->baseString;
400
401 if (requiredPreCap > r->preCapacity) {
402 size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
403 int delta = newCapacity - r->capacity - r->preCapacity;
404
405 UChar* newBuf = allocChars(newCapacity);
406 if (!newBuf) {
407 m_rep = &Rep::null;
408 return;
409 }
410 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
411 fastFree(r->buf);
412 r->buf = newBuf;
413
414 r->preCapacity = newCapacity - r->capacity;
415 }
416 if (requiredPreCap > r->usedPreCapacity) {
417 r->usedPreCapacity = requiredPreCap;
418 }
419}
420
421UString::UString(const char *c)
422{
423 if (!c) {
424 m_rep = &Rep::null;
425 return;
426 }
427
428 if (!c[0]) {
429 m_rep = &Rep::empty;
430 return;
431 }
432
433 size_t length = strlen(c);
434 UChar *d = allocChars(length);
435 if (!d)
436 m_rep = &Rep::null;
437 else {
438 for (size_t i = 0; i < length; i++)
439 d[i].uc = c[i];
440 m_rep = Rep::create(d, static_cast<int>(length));
441 }
442}
443
444UString::UString(const UChar *c, int length)
445{
446 if (length == 0)
447 m_rep = &Rep::empty;
448 else
449 m_rep = Rep::createCopying(c, length);
450}
451
452UString::UString(UChar *c, int length, bool copy)
453{
454 if (length == 0)
455 m_rep = &Rep::empty;
456 else if (copy)
457 m_rep = Rep::createCopying(c, length);
458 else
459 m_rep = Rep::create(c, length);
460}
461
462UString::UString(const Vector<UChar>& buffer)
463{
464 if (!buffer.size())
465 m_rep = &Rep::empty;
466 else
467 m_rep = Rep::createCopying(buffer.data(), buffer.size());
468}
469
470
471UString::UString(const UString &a, const UString &b)
472{
473 int aSize = a.size();
474 int aOffset = a.m_rep->offset;
475 int bSize = b.size();
476 int bOffset = b.m_rep->offset;
477 int length = aSize + bSize;
478
479 // possible cases:
480
481 if (aSize == 0) {
482 // a is empty
483 m_rep = b.m_rep;
484 } else if (bSize == 0) {
485 // b is empty
486 m_rep = a.m_rep;
487 } else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
488 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
489 // - a reaches the end of its buffer so it qualifies for shared append
490 // - also, it's at least a quarter the length of b - appending to a much shorter
491 // string does more harm than good
492 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
493 UString x(a);
494 x.expandCapacity(aOffset + length);
495 if (a.data() && x.data()) {
496 memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar));
497 m_rep = Rep::create(a.m_rep, 0, length);
498 } else
499 m_rep = &Rep::null;
500 } else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
501 // - b reaches the beginning of its buffer so it qualifies for shared prepend
502 // - also, it's at least a quarter the length of a - prepending to a much shorter
503 // string does more harm than good
504 UString y(b);
505 y.expandPreCapacity(-bOffset + aSize);
506 if (b.data() && y.data()) {
507 memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar));
508 m_rep = Rep::create(b.m_rep, -aSize, length);
509 } else
510 m_rep = &Rep::null;
511 } else {
512 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
513 size_t newCapacity = expandedSize(length, 0);
514 UChar* d = allocChars(newCapacity);
515 if (!d)
516 m_rep = &Rep::null;
517 else {
518 memcpy(d, a.data(), aSize * sizeof(UChar));
519 memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
520 m_rep = Rep::create(d, length);
521 m_rep->capacity = newCapacity;
522 }
523 }
524}
525
526const UString& UString::null()
527{
528 static UString* n = new UString;
529 return *n;
530}
531
532UString UString::from(int i)
533{
534 UChar buf[1 + sizeof(i) * 3];
535 UChar *end = buf + sizeof(buf) / sizeof(UChar);
536 UChar *p = end;
537
538 if (i == 0) {
539 *--p = '0';
540 } else if (i == INT_MIN) {
541 char minBuf[1 + sizeof(i) * 3];
542 sprintf(minBuf, "%d", INT_MIN);
543 return UString(minBuf);
544 } else {
545 bool negative = false;
546 if (i < 0) {
547 negative = true;
548 i = -i;
549 }
550 while (i) {
551 *--p = (unsigned short)((i % 10) + '0');
552 i /= 10;
553 }
554 if (negative) {
555 *--p = '-';
556 }
557 }
558
559 return UString(p, static_cast<int>(end - p));
560}
561
562UString UString::from(unsigned int u)
563{
564 UChar buf[sizeof(u) * 3];
565 UChar *end = buf + sizeof(buf) / sizeof(UChar);
566 UChar *p = end;
567
568 if (u == 0) {
569 *--p = '0';
570 } else {
571 while (u) {
572 *--p = (unsigned short)((u % 10) + '0');
573 u /= 10;
574 }
575 }
576
577 return UString(p, static_cast<int>(end - p));
578}
579
580UString UString::from(long l)
581{
582 UChar buf[1 + sizeof(l) * 3];
583 UChar *end = buf + sizeof(buf) / sizeof(UChar);
584 UChar *p = end;
585
586 if (l == 0) {
587 *--p = '0';
588 } else if (l == LONG_MIN) {
589 char minBuf[1 + sizeof(l) * 3];
590 sprintf(minBuf, "%ld", LONG_MIN);
591 return UString(minBuf);
592 } else {
593 bool negative = false;
594 if (l < 0) {
595 negative = true;
596 l = -l;
597 }
598 while (l) {
599 *--p = (unsigned short)((l % 10) + '0');
600 l /= 10;
601 }
602 if (negative) {
603 *--p = '-';
604 }
605 }
606
607 return UString(p, static_cast<int>(end - p));
608}
609
610UString UString::from(double d)
611{
612 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
613 if (isnan(d))
614 return "NaN";
615
616 char buf[80];
617 int decimalPoint;
618 int sign;
619
620 char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
621 int length = static_cast<int>(strlen(result));
622
623 int i = 0;
624 if (sign) {
625 buf[i++] = '-';
626 }
627
628 if (decimalPoint <= 0 && decimalPoint > -6) {
629 buf[i++] = '0';
630 buf[i++] = '.';
631 for (int j = decimalPoint; j < 0; j++) {
632 buf[i++] = '0';
633 }
634 strcpy(buf + i, result);
635 } else if (decimalPoint <= 21 && decimalPoint > 0) {
636 if (length <= decimalPoint) {
637 strcpy(buf + i, result);
638 i += length;
639 for (int j = 0; j < decimalPoint - length; j++) {
640 buf[i++] = '0';
641 }
642 buf[i] = '\0';
643 } else {
644 strncpy(buf + i, result, decimalPoint);
645 i += decimalPoint;
646 buf[i++] = '.';
647 strcpy(buf + i, result + decimalPoint);
648 }
649 } else if (result[0] < '0' || result[0] > '9') {
650 strcpy(buf + i, result);
651 } else {
652 buf[i++] = result[0];
653 if (length > 1) {
654 buf[i++] = '.';
655 strcpy(buf + i, result + 1);
656 i += length - 1;
657 }
658
659 buf[i++] = 'e';
660 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
661 // decimalPoint can't be more than 3 digits decimal given the
662 // nature of float representation
663 int exponential = decimalPoint - 1;
664 if (exponential < 0)
665 exponential = -exponential;
666 if (exponential >= 100)
667 buf[i++] = static_cast<char>('0' + exponential / 100);
668 if (exponential >= 10)
669 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
670 buf[i++] = static_cast<char>('0' + exponential % 10);
671 buf[i++] = '\0';
672 }
673
674 kjs_freedtoa(result);
675
676 return UString(buf);
677}
678
679UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
680{
681 if (rangeCount == 1 && separatorCount == 0) {
682 int thisSize = size();
683 int position = substringRanges[0].position;
684 int length = substringRanges[0].length;
685 if (position <= 0 && length >= thisSize)
686 return *this;
687 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
688 }
689
690 int totalLength = 0;
691 for (int i = 0; i < rangeCount; i++)
692 totalLength += substringRanges[i].length;
693 for (int i = 0; i < separatorCount; i++)
694 totalLength += separators[i].size();
695
696 if (totalLength == 0)
697 return "";
698
699 UChar* buffer = allocChars(totalLength);
700 if (!buffer)
701 return null();
702
703 int maxCount = max(rangeCount, separatorCount);
704 int bufferPos = 0;
705 for (int i = 0; i < maxCount; i++) {
706 if (i < rangeCount) {
707 memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
708 bufferPos += substringRanges[i].length;
709 }
710 if (i < separatorCount) {
711 memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
712 bufferPos += separators[i].size();
713 }
714 }
715
716 return UString::Rep::create(buffer, totalLength);
717}
718
719UString &UString::append(const UString &t)
720{
721 int thisSize = size();
722 int thisOffset = m_rep->offset;
723 int tSize = t.size();
724 int length = thisSize + tSize;
725
726 // possible cases:
727 if (thisSize == 0) {
728 // this is empty
729 *this = t;
730 } else if (tSize == 0) {
731 // t is empty
732 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
733 // this is direct and has refcount of 1 (so we can just alter it directly)
734 expandCapacity(thisOffset + length);
735 if (data()) {
736 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
737 m_rep->len = length;
738 m_rep->_hash = 0;
739 }
740 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
741 // this reaches the end of the buffer - extend it if it's long enough to append to
742 expandCapacity(thisOffset + length);
743 if (data()) {
744 memcpy(const_cast<UChar*>(data() + thisSize), t.data(), tSize * sizeof(UChar));
745 m_rep = Rep::create(m_rep, 0, length);
746 }
747 } else {
748 // this is shared with someone using more capacity, gotta make a whole new string
749 size_t newCapacity = expandedSize(length, 0);
750 UChar* d = allocChars(newCapacity);
751 if (!d)
752 m_rep = &Rep::null;
753 else {
754 memcpy(d, data(), thisSize * sizeof(UChar));
755 memcpy(const_cast<UChar*>(d + thisSize), t.data(), tSize * sizeof(UChar));
756 m_rep = Rep::create(d, length);
757 m_rep->capacity = newCapacity;
758 }
759 }
760
761 return *this;
762}
763
764UString &UString::append(const char *t)
765{
766 int thisSize = size();
767 int thisOffset = m_rep->offset;
768 int tSize = static_cast<int>(strlen(t));
769 int length = thisSize + tSize;
770
771 // possible cases:
772 if (thisSize == 0) {
773 // this is empty
774 *this = t;
775 } else if (tSize == 0) {
776 // t is empty, we'll just return *this below.
777 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
778 // this is direct and has refcount of 1 (so we can just alter it directly)
779 expandCapacity(thisOffset + length);
780 UChar *d = const_cast<UChar *>(data());
781 if (d) {
782 for (int i = 0; i < tSize; ++i)
783 d[thisSize + i] = t[i];
784 m_rep->len = length;
785 m_rep->_hash = 0;
786 }
787 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
788 // this string reaches the end of the buffer - extend it
789 expandCapacity(thisOffset + length);
790 UChar *d = const_cast<UChar *>(data());
791 if (d) {
792 for (int i = 0; i < tSize; ++i)
793 d[thisSize + i] = t[i];
794 m_rep = Rep::create(m_rep, 0, length);
795 }
796 } else {
797 // this is shared with someone using more capacity, gotta make a whole new string
798 size_t newCapacity = expandedSize(length, 0);
799 UChar* d = allocChars(newCapacity);
800 if (!d)
801 m_rep = &Rep::null;
802 else {
803 memcpy(d, data(), thisSize * sizeof(UChar));
804 for (int i = 0; i < tSize; ++i)
805 d[thisSize + i] = t[i];
806 m_rep = Rep::create(d, length);
807 m_rep->capacity = newCapacity;
808 }
809 }
810
811 return *this;
812}
813
814UString &UString::append(unsigned short c)
815{
816 int thisOffset = m_rep->offset;
817 int length = size();
818
819 // possible cases:
820 if (length == 0) {
821 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
822 size_t newCapacity = expandedSize(1, 0);
823 UChar* d = allocChars(newCapacity);
824 if (!d)
825 m_rep = &Rep::null;
826 else {
827 d[0] = c;
828 m_rep = Rep::create(d, 1);
829 m_rep->capacity = newCapacity;
830 }
831 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
832 // this is direct and has refcount of 1 (so we can just alter it directly)
833 expandCapacity(thisOffset + length + 1);
834 UChar *d = const_cast<UChar *>(data());
835 if (d) {
836 d[length] = c;
837 m_rep->len = length + 1;
838 m_rep->_hash = 0;
839 }
840 } else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
841 // this reaches the end of the string - extend it and share
842 expandCapacity(thisOffset + length + 1);
843 UChar *d = const_cast<UChar *>(data());
844 if (d) {
845 d[length] = c;
846 m_rep = Rep::create(m_rep, 0, length + 1);
847 }
848 } else {
849 // this is shared with someone using more capacity, gotta make a whole new string
850 size_t newCapacity = expandedSize(length + 1, 0);
851 UChar* d = allocChars(newCapacity);
852 if (!d)
853 m_rep = &Rep::null;
854 else {
855 memcpy(d, data(), length * sizeof(UChar));
856 d[length] = c;
857 m_rep = Rep::create(d, length + 1);
858 m_rep->capacity = newCapacity;
859 }
860 }
861
862 return *this;
863}
864
865CString UString::cstring() const
866{
867 return ascii();
868}
869
870char *UString::ascii() const
871{
872 // Never make the buffer smaller than normalStatBufferSize.
873 // Thus we almost never need to reallocate.
874 int length = size();
875 int neededSize = length + 1;
876 if (neededSize < normalStatBufferSize) {
877 neededSize = normalStatBufferSize;
878 }
879 if (neededSize != statBufferSize) {
880 delete [] statBuffer;
881 statBuffer = new char [neededSize];
882 statBufferSize = neededSize;
883 }
884
885 const UChar *p = data();
886 char *q = statBuffer;
887 const UChar *limit = p + length;
888 while (p != limit) {
889 *q = static_cast<char>(p->uc);
890 ++p;
891 ++q;
892 }
893 *q = '\0';
894
895 return statBuffer;
896}
897
898UString &UString::operator=(const char *c)
899{
900 if (!c) {
901 m_rep = &Rep::null;
902 return *this;
903 }
904
905 if (!c[0]) {
906 m_rep = &Rep::empty;
907 return *this;
908 }
909
910 int l = static_cast<int>(strlen(c));
911 UChar *d;
912 if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
913 d = m_rep->buf;
914 m_rep->_hash = 0;
915 m_rep->len = l;
916 } else {
917 d = allocChars(l);
918 if (!d) {
919 m_rep = &Rep::null;
920 return *this;
921 }
922 m_rep = Rep::create(d, l);
923 }
924 for (int i = 0; i < l; i++)
925 d[i].uc = c[i];
926
927 return *this;
928}
929
930bool UString::is8Bit() const
931{
932 const UChar *u = data();
933 const UChar *limit = u + size();
934 while (u < limit) {
935 if (u->uc > 0xFF)
936 return false;
937 ++u;
938 }
939
940 return true;
941}
942
943const UChar UString::operator[](int pos) const
944{
945 if (pos >= size())
946 return '\0';
947 return data()[pos];
948}
949
950double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
951{
952 double d;
953
954 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
955 // after the number, so is8Bit is too strict a check.
956 if (!is8Bit())
957 return NaN;
958
959 const char *c = ascii();
960
961 // skip leading white space
962 while (isASCIISpace(*c))
963 c++;
964
965 // empty string ?
966 if (*c == '\0')
967 return tolerateEmptyString ? 0.0 : NaN;
968
969 // hex number ?
970 if (*c == '0' && (*(c+1) == 'x' || *(c+1) == 'X')) {
971 const char* firstDigitPosition = c + 2;
972 c++;
973 d = 0.0;
974 while (*(++c)) {
975 if (*c >= '0' && *c <= '9')
976 d = d * 16.0 + *c - '0';
977 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
978 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
979 else
980 break;
981 }
982
983 if (d >= mantissaOverflowLowerBound)
984 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
985 } else {
986 // regular number ?
987 char *end;
988 d = kjs_strtod(c, &end);
989 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
990 c = end;
991 } else {
992 double sign = 1.0;
993
994 if (*c == '+')
995 c++;
996 else if (*c == '-') {
997 sign = -1.0;
998 c++;
999 }
1000
1001 // We used strtod() to do the conversion. However, strtod() handles
1002 // infinite values slightly differently than JavaScript in that it
1003 // converts the string "inf" with any capitalization to infinity,
1004 // whereas the ECMA spec requires that it be converted to NaN.
1005
1006 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1007 d = sign * Inf;
1008 c += 8;
1009 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1010 c = end;
1011 else
1012 return NaN;
1013 }
1014 }
1015
1016 // allow trailing white space
1017 while (isASCIISpace(*c))
1018 c++;
1019 // don't allow anything after - unless tolerant=true
1020 if (!tolerateTrailingJunk && *c != '\0')
1021 d = NaN;
1022
1023 return d;
1024}
1025
1026double UString::toDouble(bool tolerateTrailingJunk) const
1027{
1028 return toDouble(tolerateTrailingJunk, true);
1029}
1030
1031double UString::toDouble() const
1032{
1033 return toDouble(false, true);
1034}
1035
1036uint32_t UString::toUInt32(bool *ok) const
1037{
1038 double d = toDouble();
1039 bool b = true;
1040
1041 if (d != static_cast<uint32_t>(d)) {
1042 b = false;
1043 d = 0;
1044 }
1045
1046 if (ok)
1047 *ok = b;
1048
1049 return static_cast<uint32_t>(d);
1050}
1051
1052uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
1053{
1054 double d = toDouble(false, tolerateEmptyString);
1055 bool b = true;
1056
1057 if (d != static_cast<uint32_t>(d)) {
1058 b = false;
1059 d = 0;
1060 }
1061
1062 if (ok)
1063 *ok = b;
1064
1065 return static_cast<uint32_t>(d);
1066}
1067
1068uint32_t UString::toStrictUInt32(bool *ok) const
1069{
1070 if (ok)
1071 *ok = false;
1072
1073 // Empty string is not OK.
1074 int len = m_rep->len;
1075 if (len == 0)
1076 return 0;
1077 const UChar *p = m_rep->data();
1078 unsigned short c = p->unicode();
1079
1080 // If the first digit is 0, only 0 itself is OK.
1081 if (c == '0') {
1082 if (len == 1 && ok)
1083 *ok = true;
1084 return 0;
1085 }
1086
1087 // Convert to UInt32, checking for overflow.
1088 uint32_t i = 0;
1089 while (1) {
1090 // Process character, turning it into a digit.
1091 if (c < '0' || c > '9')
1092 return 0;
1093 const unsigned d = c - '0';
1094
1095 // Multiply by 10, checking for overflow out of 32 bits.
1096 if (i > 0xFFFFFFFFU / 10)
1097 return 0;
1098 i *= 10;
1099
1100 // Add in the digit, checking for overflow out of 32 bits.
1101 const unsigned max = 0xFFFFFFFFU - d;
1102 if (i > max)
1103 return 0;
1104 i += d;
1105
1106 // Handle end of string.
1107 if (--len == 0) {
1108 if (ok)
1109 *ok = true;
1110 return i;
1111 }
1112
1113 // Get next character.
1114 c = (++p)->unicode();
1115 }
1116}
1117
1118int UString::find(const UString &f, int pos) const
1119{
1120 int sz = size();
1121 int fsz = f.size();
1122 if (sz < fsz)
1123 return -1;
1124 if (pos < 0)
1125 pos = 0;
1126 if (fsz == 0)
1127 return pos;
1128 const UChar *end = data() + sz - fsz;
1129 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1130 const UChar *fdata = f.data();
1131 unsigned short fchar = fdata->uc;
1132 ++fdata;
1133 for (const UChar *c = data() + pos; c <= end; c++)
1134 if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1135 return static_cast<int>(c - data());
1136
1137 return -1;
1138}
1139
1140int UString::find(UChar ch, int pos) const
1141{
1142 if (pos < 0)
1143 pos = 0;
1144 const UChar *end = data() + size();
1145 for (const UChar *c = data() + pos; c < end; c++)
1146 if (*c == ch)
1147 return static_cast<int>(c - data());
1148
1149 return -1;
1150}
1151
1152int UString::rfind(const UString &f, int pos) const
1153{
1154 int sz = size();
1155 int fsz = f.size();
1156 if (sz < fsz)
1157 return -1;
1158 if (pos < 0)
1159 pos = 0;
1160 if (pos > sz - fsz)
1161 pos = sz - fsz;
1162 if (fsz == 0)
1163 return pos;
1164 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1165 const UChar *fdata = f.data();
1166 for (const UChar *c = data() + pos; c >= data(); c--) {
1167 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1168 return static_cast<int>(c - data());
1169 }
1170
1171 return -1;
1172}
1173
1174int UString::rfind(UChar ch, int pos) const
1175{
1176 if (isEmpty())
1177 return -1;
1178 if (pos + 1 >= size())
1179 pos = size() - 1;
1180 for (const UChar *c = data() + pos; c >= data(); c--) {
1181 if (*c == ch)
1182 return static_cast<int>(c-data());
1183 }
1184
1185 return -1;
1186}
1187
1188UString UString::substr(int pos, int len) const
1189{
1190 int s = size();
1191
1192 if (pos < 0)
1193 pos = 0;
1194 else if (pos >= s)
1195 pos = s;
1196 if (len < 0)
1197 len = s;
1198 if (pos + len >= s)
1199 len = s - pos;
1200
1201 if (pos == 0 && len == s)
1202 return *this;
1203
1204 return UString(Rep::create(m_rep, pos, len));
1205}
1206
1207bool operator==(const UString& s1, const UString& s2)
1208{
1209 if (s1.m_rep->len != s2.m_rep->len)
1210 return false;
1211
1212 return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1213 s1.m_rep->len * sizeof(UChar)) == 0);
1214}
1215
1216bool operator==(const UString& s1, const char *s2)
1217{
1218 if (s2 == 0) {
1219 return s1.isEmpty();
1220 }
1221
1222 const UChar *u = s1.data();
1223 const UChar *uend = u + s1.size();
1224 while (u != uend && *s2) {
1225 if (u->uc != (unsigned char)*s2)
1226 return false;
1227 s2++;
1228 u++;
1229 }
1230
1231 return u == uend && *s2 == 0;
1232}
1233
1234bool operator<(const UString& s1, const UString& s2)
1235{
1236 const int l1 = s1.size();
1237 const int l2 = s2.size();
1238 const int lmin = l1 < l2 ? l1 : l2;
1239 const UChar *c1 = s1.data();
1240 const UChar *c2 = s2.data();
1241 int l = 0;
1242 while (l < lmin && *c1 == *c2) {
1243 c1++;
1244 c2++;
1245 l++;
1246 }
1247 if (l < lmin)
1248 return (c1->uc < c2->uc);
1249
1250 return (l1 < l2);
1251}
1252
1253int compare(const UString& s1, const UString& s2)
1254{
1255 const int l1 = s1.size();
1256 const int l2 = s2.size();
1257 const int lmin = l1 < l2 ? l1 : l2;
1258 const UChar *c1 = s1.data();
1259 const UChar *c2 = s2.data();
1260 int l = 0;
1261 while (l < lmin && *c1 == *c2) {
1262 c1++;
1263 c2++;
1264 l++;
1265 }
1266
1267 if (l < lmin)
1268 return (c1->uc > c2->uc) ? 1 : -1;
1269
1270 if (l1 == l2)
1271 return 0;
1272
1273 return (l1 > l2) ? 1 : -1;
1274}
1275
1276CString UString::UTF8String(bool strict) const
1277{
1278 // Allocate a buffer big enough to hold all the characters.
1279 const int length = size();
1280 Vector<char, 1024> buffer(length * 3);
1281
1282 // Convert to runs of 8-bit characters.
1283 char* p = buffer.data();
1284 const ::UChar* d = reinterpret_cast<const ::UChar*>(&data()->uc);
1285 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1286 if (result != conversionOK)
1287 return CString();
1288
1289 return CString(buffer.data(), p - buffer.data());
1290}
1291
1292
1293} // namespace KJS
Note: See TracBrowser for help on using the repository browser.