source: webkit/trunk/JavaScriptCore/kjs/ustring.cpp@ 13598

Last change on this file since 13598 was 13541, checked in by darin, 19 years ago

Reviewed by Geoff.

  • change some code that resulted in init routines on Mac OS X -- if the framework has init routines it will use memory and slow down applications that link with WebKit even in cases where those applications don't use WebKit
  • kjs/date_object.cpp: Changed constants that were derived by multiplying other constants to use immediate numbers instead. Apparently, double constant expressions of the type we had here are evaluated at load time.
  • kjs/list.cpp: Can't use OwnArrayPtr in ListImp because of the global instances of ListImp, so go back to using a plain old pointer. (KJS::List::List): Set overflow to 0 when initializing ListImp. (KJS::List::release): Replace a clear call with a delete and explicit set to 0. (KJS::List::append): Use raw pointers, and do a delete [] instead of finessing it with a swap of OwnArrayPtr. (KJS::List::copyFrom): Remove now-unneeded get(). (KJS::List::copyTail): Ditto.
  • kjs/ustring.cpp: Changed UString::Rep::empty initializer a bit so that it doesn't get a static initializer routine. Had to get rid of one level of constant to get the compiler to understand it could initialize without any code.
  • added a build step that checks for init routines
  • JavaScriptCore.xcodeproj/project.pbxproj: Deleted now-unused custom build rule that was replaced by the generate-derived-sources script a while back. Added a custom build phase that invokes the check-for-global-initializers script.
  • Property allow-tabs set to x
  • Property svn:eol-style set to native
File size: 31.2 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten ([email protected])
5 * Copyright (C) 2004 Apple Computer, Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "ustring.h"
26
27#include <assert.h>
28#include <stdlib.h>
29#include <stdio.h>
30#include <ctype.h>
31#if HAVE(STRING_H)
32#include <string.h>
33#endif
34#if HAVE(STRINGS_H)
35#include <strings.h>
36#endif
37
38#include "operations.h"
39#include "identifier.h"
40#include <math.h>
41#include "dtoa.h"
42
43#include <kxmlcore/Vector.h>
44
45using std::max;
46
47#include <unicode/uchar.h>
48
49namespace KJS {
50
51extern const double NaN;
52extern const double Inf;
53
54CString::CString(const char *c)
55{
56 length = strlen(c);
57 data = new char[length+1];
58 memcpy(data, c, length + 1);
59}
60
61CString::CString(const char *c, int len)
62{
63 length = len;
64 data = new char[len+1];
65 memcpy(data, c, len);
66 data[len] = 0;
67}
68
69CString::CString(const CString &b)
70{
71 length = b.length;
72 if (b.data) {
73 data = new char[length+1];
74 memcpy(data, b.data, length + 1);
75 }
76 else
77 data = 0;
78}
79
80CString::~CString()
81{
82 delete [] data;
83}
84
85CString &CString::append(const CString &t)
86{
87 char *n;
88 n = new char[length+t.length+1];
89 if (length)
90 memcpy(n, data, length);
91 if (t.length)
92 memcpy(n+length, t.data, t.length);
93 length += t.length;
94 n[length] = 0;
95
96 delete [] data;
97 data = n;
98
99 return *this;
100}
101
102CString &CString::operator=(const char *c)
103{
104 if (data)
105 delete [] data;
106 length = strlen(c);
107 data = new char[length+1];
108 memcpy(data, c, length + 1);
109
110 return *this;
111}
112
113CString &CString::operator=(const CString &str)
114{
115 if (this == &str)
116 return *this;
117
118 if (data)
119 delete [] data;
120 length = str.length;
121 if (str.data) {
122 data = new char[length + 1];
123 memcpy(data, str.data, length + 1);
124 }
125 else
126 data = 0;
127
128 return *this;
129}
130
131bool operator==(const CString& c1, const CString& c2)
132{
133 int len = c1.size();
134 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
135}
136
137// Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
138static unsigned short almostUChar;
139UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
140UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, 0, reinterpret_cast<UChar*>(&almostUChar), 0, 0, 0, 0 };
141const int normalStatBufferSize = 4096;
142static char *statBuffer = 0;
143static int statBufferSize = 0;
144
145UChar UChar::toLower() const
146{
147 return static_cast<unsigned short>(u_tolower(uc));
148}
149
150UChar UChar::toUpper() const
151{
152 return static_cast<unsigned short>(u_toupper(uc));
153}
154
155UCharReference& UCharReference::operator=(UChar c)
156{
157 str->copyForWriting();
158 if (offset < str->rep()->len)
159 *(str->rep()->data() + offset) = c;
160 /* TODO: lengthen string ? */
161 return *this;
162}
163
164UChar& UCharReference::ref() const
165{
166 if (offset < str->rep()->len)
167 return *(str->rep()->data() + offset);
168 else {
169 static UChar callerBetterNotModifyThis('\0');
170 return callerBetterNotModifyThis;
171 }
172}
173
174PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int l)
175{
176 int sizeInBytes = l * sizeof(UChar);
177 UChar *copyD = static_cast<UChar *>(fastMalloc(sizeInBytes));
178 memcpy(copyD, d, sizeInBytes);
179
180 return create(copyD, l);
181}
182
183PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
184{
185 Rep *r = new Rep;
186 r->offset = 0;
187 r->len = l;
188 r->rc = 1;
189 r->_hash = 0;
190 r->isIdentifier = 0;
191 r->baseString = 0;
192 r->buf = d;
193 r->usedCapacity = l;
194 r->capacity = l;
195 r->usedPreCapacity = 0;
196 r->preCapacity = 0;
197
198 // steal the single reference this Rep was created with
199 return adoptRef(r);
200}
201
202PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
203{
204 assert(base);
205
206 int baseOffset = base->offset;
207
208 if (base->baseString) {
209 base = base->baseString;
210 }
211
212 assert(-(offset + baseOffset) <= base->usedPreCapacity);
213 assert(offset + baseOffset + length <= base->usedCapacity);
214
215 Rep *r = new Rep;
216 r->offset = baseOffset + offset;
217 r->len = length;
218 r->rc = 1;
219 r->_hash = 0;
220 r->isIdentifier = 0;
221 r->baseString = base.release();
222 r->buf = 0;
223 r->usedCapacity = 0;
224 r->capacity = 0;
225 r->usedPreCapacity = 0;
226 r->preCapacity = 0;
227
228 // steal the single reference this Rep was created with
229 return adoptRef(r);
230}
231
232void UString::Rep::destroy()
233{
234 if (isIdentifier)
235 Identifier::remove(this);
236 if (baseString) {
237 baseString->deref();
238 } else {
239 fastFree(buf);
240 }
241 delete this;
242}
243
244// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
245// or anything like that.
246const unsigned PHI = 0x9e3779b9U;
247
248// Paul Hsieh's SuperFastHash
249// http://www.azillionmonkeys.com/qed/hash.html
250unsigned UString::Rep::computeHash(const UChar *s, int len)
251{
252 unsigned l = len;
253 uint32_t hash = PHI;
254 uint32_t tmp;
255
256 int rem = l & 1;
257 l >>= 1;
258
259 // Main loop
260 for (; l > 0; l--) {
261 hash += s[0].uc;
262 tmp = (s[1].uc << 11) ^ hash;
263 hash = (hash << 16) ^ tmp;
264 s += 2;
265 hash += hash >> 11;
266 }
267
268 // Handle end case
269 if (rem) {
270 hash += s[0].uc;
271 hash ^= hash << 11;
272 hash += hash >> 17;
273 }
274
275 // Force "avalanching" of final 127 bits
276 hash ^= hash << 3;
277 hash += hash >> 5;
278 hash ^= hash << 2;
279 hash += hash >> 15;
280 hash ^= hash << 10;
281
282 // this avoids ever returning a hash code of 0, since that is used to
283 // signal "hash not computed yet", using a value that is likely to be
284 // effectively the same as 0 when the low bits are masked
285 if (hash == 0)
286 hash = 0x80000000;
287
288 return hash;
289}
290
291// Paul Hsieh's SuperFastHash
292// http://www.azillionmonkeys.com/qed/hash.html
293unsigned UString::Rep::computeHash(const char *s)
294{
295 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
296 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
297 // were 16-bit chunks, which should give matching results
298
299 uint32_t hash = PHI;
300 uint32_t tmp;
301 unsigned l = strlen(s);
302
303 int rem = l & 1;
304 l >>= 1;
305
306 // Main loop
307 for (; l > 0; l--) {
308 hash += (unsigned char)s[0];
309 tmp = ((unsigned char)s[1] << 11) ^ hash;
310 hash = (hash << 16) ^ tmp;
311 s += 2;
312 hash += hash >> 11;
313 }
314
315 // Handle end case
316 if (rem) {
317 hash += (unsigned char)s[0];
318 hash ^= hash << 11;
319 hash += hash >> 17;
320 }
321
322 // Force "avalanching" of final 127 bits
323 hash ^= hash << 3;
324 hash += hash >> 5;
325 hash ^= hash << 2;
326 hash += hash >> 15;
327 hash ^= hash << 10;
328
329 // this avoids ever returning a hash code of 0, since that is used to
330 // signal "hash not computed yet", using a value that is likely to be
331 // effectively the same as 0 when the low bits are masked
332 if (hash == 0)
333 hash = 0x80000000;
334
335 return hash;
336}
337
338// put these early so they can be inlined
339inline int UString::expandedSize(int size, int otherSize) const
340{
341 int s = (size * 11 / 10) + 1 + otherSize;
342 return s;
343}
344
345inline int UString::usedCapacity() const
346{
347 return m_rep->baseString ? m_rep->baseString->usedCapacity : m_rep->usedCapacity;
348}
349
350inline int UString::usedPreCapacity() const
351{
352 return m_rep->baseString ? m_rep->baseString->usedPreCapacity : m_rep->usedPreCapacity;
353}
354
355void UString::expandCapacity(int requiredLength)
356{
357 Rep *r = m_rep->baseString ? m_rep->baseString : rep();
358
359 if (requiredLength > r->capacity) {
360 int newCapacity = expandedSize(requiredLength, r->preCapacity);
361 r->buf = static_cast<UChar *>(fastRealloc(r->buf, newCapacity * sizeof(UChar)));
362 r->capacity = newCapacity - r->preCapacity;
363 }
364 if (requiredLength > r->usedCapacity) {
365 r->usedCapacity = requiredLength;
366 }
367}
368
369void UString::expandPreCapacity(int requiredPreCap)
370{
371 Rep *r = m_rep->baseString ? m_rep->baseString : rep();
372
373 if (requiredPreCap > r->preCapacity) {
374 int newCapacity = expandedSize(requiredPreCap, r->capacity);
375 int delta = newCapacity - r->capacity - r->preCapacity;
376
377 UChar *newBuf = static_cast<UChar *>(fastMalloc(newCapacity * sizeof(UChar)));
378 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
379 fastFree(r->buf);
380 r->buf = newBuf;
381
382 r->preCapacity = newCapacity - r->capacity;
383 }
384 if (requiredPreCap > r->usedPreCapacity) {
385 r->usedPreCapacity = requiredPreCap;
386 }
387}
388
389
390UString::UString(char c)
391{
392 UChar *d = static_cast<UChar *>(fastMalloc(sizeof(UChar)));
393 d[0] = c;
394 m_rep = Rep::create(d, 1);
395}
396
397UString::UString(const char *c)
398{
399 if (!c) {
400 m_rep = &Rep::null;
401 return;
402 }
403 int length = strlen(c);
404 if (length == 0) {
405 m_rep = &Rep::empty;
406 return;
407 }
408 UChar *d = static_cast<UChar *>(fastMalloc(sizeof(UChar) * length));
409 for (int i = 0; i < length; i++)
410 d[i].uc = c[i];
411 m_rep = Rep::create(d, length);
412}
413
414UString::UString(const UChar *c, int length)
415{
416 if (length == 0)
417 m_rep = &Rep::empty;
418 else
419 m_rep = Rep::createCopying(c, length);
420}
421
422UString::UString(UChar *c, int length, bool copy)
423{
424 if (length == 0)
425 m_rep = &Rep::empty;
426 else if (copy)
427 m_rep = Rep::createCopying(c, length);
428 else
429 m_rep = Rep::create(c, length);
430}
431
432UString::UString(const UString &a, const UString &b)
433{
434 int aSize = a.size();
435 int aOffset = a.m_rep->offset;
436 int bSize = b.size();
437 int bOffset = b.m_rep->offset;
438 int length = aSize + bSize;
439
440 // possible cases:
441
442 if (aSize == 0) {
443 // a is empty
444 m_rep = b.m_rep;
445 } else if (bSize == 0) {
446 // b is empty
447 m_rep = a.m_rep;
448 } else if (aOffset + aSize == a.usedCapacity() && 4 * aSize >= bSize &&
449 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
450 // - a reaches the end of its buffer so it qualifies for shared append
451 // - also, it's at least a quarter the length of b - appending to a much shorter
452 // string does more harm than good
453 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
454 UString x(a);
455 x.expandCapacity(aOffset + length);
456 memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar));
457 m_rep = Rep::create(a.m_rep, 0, length);
458 } else if (-bOffset == b.usedPreCapacity() && 4 * bSize >= aSize) {
459 // - b reaches the beginning of its buffer so it qualifies for shared prepend
460 // - also, it's at least a quarter the length of a - prepending to a much shorter
461 // string does more harm than good
462 UString y(b);
463 y.expandPreCapacity(-bOffset + aSize);
464 memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar));
465 m_rep = Rep::create(b.m_rep, -aSize, length);
466 } else {
467 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
468 int newCapacity = expandedSize(length, 0);
469 UChar *d = static_cast<UChar *>(fastMalloc(sizeof(UChar) * newCapacity));
470 memcpy(d, a.data(), aSize * sizeof(UChar));
471 memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
472 m_rep = Rep::create(d, length);
473 m_rep->capacity = newCapacity;
474 }
475}
476
477const UString &UString::null()
478{
479 static UString n;
480 return n;
481}
482
483UString UString::from(int i)
484{
485 UChar buf[1 + sizeof(i) * 3];
486 UChar *end = buf + sizeof(buf) / sizeof(UChar);
487 UChar *p = end;
488
489 if (i == 0) {
490 *--p = '0';
491 } else if (i == INT_MIN) {
492 char minBuf[1 + sizeof(i) * 3];
493 sprintf(minBuf, "%d", INT_MIN);
494 return UString(minBuf);
495 } else {
496 bool negative = false;
497 if (i < 0) {
498 negative = true;
499 i = -i;
500 }
501 while (i) {
502 *--p = (unsigned short)((i % 10) + '0');
503 i /= 10;
504 }
505 if (negative) {
506 *--p = '-';
507 }
508 }
509
510 return UString(p, end - p);
511}
512
513UString UString::from(unsigned int u)
514{
515 UChar buf[sizeof(u) * 3];
516 UChar *end = buf + sizeof(buf) / sizeof(UChar);
517 UChar *p = end;
518
519 if (u == 0) {
520 *--p = '0';
521 } else {
522 while (u) {
523 *--p = (unsigned short)((u % 10) + '0');
524 u /= 10;
525 }
526 }
527
528 return UString(p, end - p);
529}
530
531UString UString::from(long l)
532{
533 UChar buf[1 + sizeof(l) * 3];
534 UChar *end = buf + sizeof(buf) / sizeof(UChar);
535 UChar *p = end;
536
537 if (l == 0) {
538 *--p = '0';
539 } else if (l == LONG_MIN) {
540 char minBuf[1 + sizeof(l) * 3];
541 sprintf(minBuf, "%ld", LONG_MIN);
542 return UString(minBuf);
543 } else {
544 bool negative = false;
545 if (l < 0) {
546 negative = true;
547 l = -l;
548 }
549 while (l) {
550 *--p = (unsigned short)((l % 10) + '0');
551 l /= 10;
552 }
553 if (negative) {
554 *--p = '-';
555 }
556 }
557
558 return UString(p, end - p);
559}
560
561UString UString::from(double d)
562{
563 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
564 if (isNaN(d))
565 return "NaN";
566
567 char buf[80];
568 int decimalPoint;
569 int sign;
570
571 char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, NULL);
572 int length = strlen(result);
573
574 int i = 0;
575 if (sign) {
576 buf[i++] = '-';
577 }
578
579 if (decimalPoint <= 0 && decimalPoint > -6) {
580 buf[i++] = '0';
581 buf[i++] = '.';
582 for (int j = decimalPoint; j < 0; j++) {
583 buf[i++] = '0';
584 }
585 strcpy(buf + i, result);
586 } else if (decimalPoint <= 21 && decimalPoint > 0) {
587 if (length <= decimalPoint) {
588 strcpy(buf + i, result);
589 i += length;
590 for (int j = 0; j < decimalPoint - length; j++) {
591 buf[i++] = '0';
592 }
593 buf[i] = '\0';
594 } else {
595 strncpy(buf + i, result, decimalPoint);
596 i += decimalPoint;
597 buf[i++] = '.';
598 strcpy(buf + i, result + decimalPoint);
599 }
600 } else if (result[0] < '0' || result[0] > '9') {
601 strcpy(buf + i, result);
602 } else {
603 buf[i++] = result[0];
604 if (length > 1) {
605 buf[i++] = '.';
606 strcpy(buf + i, result + 1);
607 i += length - 1;
608 }
609
610 buf[i++] = 'e';
611 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
612 // decimalPoint can't be more than 3 digits decimal given the
613 // nature of float representation
614 int exponential = decimalPoint - 1;
615 if (exponential < 0) {
616 exponential = exponential * -1;
617 }
618 if (exponential >= 100) {
619 buf[i++] = '0' + exponential / 100;
620 }
621 if (exponential >= 10) {
622 buf[i++] = '0' + (exponential % 100) / 10;
623 }
624 buf[i++] = '0' + exponential % 10;
625 buf[i++] = '\0';
626 }
627
628 kjs_freedtoa(result);
629
630 return UString(buf);
631}
632
633UString UString::spliceSubstringsWithSeparators(const Range *substringRanges, int rangeCount, const UString *separators, int separatorCount) const
634{
635 int totalLength = 0;
636
637 for (int i = 0; i < rangeCount; i++) {
638 totalLength += substringRanges[i].length;
639 }
640 for (int i = 0; i < separatorCount; i++) {
641 totalLength += separators[i].size();
642 }
643
644 UChar *buffer = static_cast<UChar *>(fastMalloc(totalLength * sizeof(UChar)));
645
646 int maxCount = max(rangeCount, separatorCount);
647 int bufferPos = 0;
648 for (int i = 0; i < maxCount; i++) {
649 if (i < rangeCount) {
650 memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
651 bufferPos += substringRanges[i].length;
652 }
653 if (i < separatorCount) {
654 memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
655 bufferPos += separators[i].size();
656 }
657 }
658
659 return UString(UString::Rep::create(buffer, totalLength));
660}
661
662
663
664UString &UString::append(const UString &t)
665{
666 int thisSize = size();
667 int thisOffset = m_rep->offset;
668 int tSize = t.size();
669 int length = thisSize + tSize;
670
671 // possible cases:
672 if (thisSize == 0) {
673 // this is empty
674 *this = t;
675 } else if (tSize == 0) {
676 // t is empty
677 } else if (!m_rep->baseString && m_rep->rc == 1) {
678 // this is direct and has refcount of 1 (so we can just alter it directly)
679 expandCapacity(thisOffset + length);
680 memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar));
681 m_rep->len = length;
682 m_rep->_hash = 0;
683 } else if (thisOffset + thisSize == usedCapacity()) {
684 // this reaches the end of the buffer - extend it
685 expandCapacity(thisOffset + length);
686 memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar));
687 m_rep = Rep::create(m_rep, 0, length);
688 } else {
689 // this is shared with someone using more capacity, gotta make a whole new string
690 int newCapacity = expandedSize(length, 0);
691 UChar *d = static_cast<UChar *>(fastMalloc(sizeof(UChar) * newCapacity));
692 memcpy(d, data(), thisSize * sizeof(UChar));
693 memcpy(const_cast<UChar *>(d + thisSize), t.data(), tSize * sizeof(UChar));
694 m_rep = Rep::create(d, length);
695 m_rep->capacity = newCapacity;
696 }
697
698 return *this;
699}
700
701UString &UString::append(const char *t)
702{
703 int thisSize = size();
704 int thisOffset = m_rep->offset;
705 int tSize = strlen(t);
706 int length = thisSize + tSize;
707
708 // possible cases:
709 if (thisSize == 0) {
710 // this is empty
711 *this = t;
712 } else if (tSize == 0) {
713 // t is empty, we'll just return *this below.
714 } else if (!m_rep->baseString && m_rep->rc == 1) {
715 // this is direct and has refcount of 1 (so we can just alter it directly)
716 expandCapacity(thisOffset + length);
717 UChar *d = const_cast<UChar *>(data());
718 for (int i = 0; i < tSize; ++i)
719 d[thisSize+i] = t[i];
720 m_rep->len = length;
721 m_rep->_hash = 0;
722 } else if (thisOffset + thisSize == usedCapacity()) {
723 // this string reaches the end of the buffer - extend it
724 expandCapacity(thisOffset + length);
725 UChar *d = const_cast<UChar *>(data());
726 for (int i = 0; i < tSize; ++i)
727 d[thisSize+i] = t[i];
728 m_rep = Rep::create(m_rep, 0, length);
729 } else {
730 // this is shared with someone using more capacity, gotta make a whole new string
731 int newCapacity = expandedSize(length, 0);
732 UChar *d = static_cast<UChar *>(fastMalloc(sizeof(UChar) * newCapacity));
733 memcpy(d, data(), thisSize * sizeof(UChar));
734 for (int i = 0; i < tSize; ++i)
735 d[thisSize+i] = t[i];
736 m_rep = Rep::create(d, length);
737 m_rep->capacity = newCapacity;
738 }
739
740 return *this;
741}
742
743UString &UString::append(unsigned short c)
744{
745 int thisOffset = m_rep->offset;
746 int length = size();
747
748 // possible cases:
749 if (length == 0) {
750 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
751 int newCapacity = expandedSize(1, 0);
752 UChar *d = static_cast<UChar *>(fastMalloc(sizeof(UChar) * newCapacity));
753 d[0] = c;
754 m_rep = Rep::create(d, 1);
755 m_rep->capacity = newCapacity;
756 } else if (!m_rep->baseString && m_rep->rc == 1) {
757 // this is direct and has refcount of 1 (so we can just alter it directly)
758 expandCapacity(thisOffset + length + 1);
759 UChar *d = const_cast<UChar *>(data());
760 d[length] = c;
761 m_rep->len = length + 1;
762 m_rep->_hash = 0;
763 } else if (thisOffset + length == usedCapacity()) {
764 // this reaches the end of the string - extend it and share
765 expandCapacity(thisOffset + length + 1);
766 UChar *d = const_cast<UChar *>(data());
767 d[length] = c;
768 m_rep = Rep::create(m_rep, 0, length + 1);
769 } else {
770 // this is shared with someone using more capacity, gotta make a whole new string
771 int newCapacity = expandedSize((length + 1), 0);
772 UChar *d = static_cast<UChar *>(fastMalloc(sizeof(UChar) * newCapacity));
773 memcpy(d, data(), length * sizeof(UChar));
774 d[length] = c;
775 m_rep = Rep::create(d, length);
776 m_rep->capacity = newCapacity;
777 }
778
779 return *this;
780}
781
782CString UString::cstring() const
783{
784 return ascii();
785}
786
787char *UString::ascii() const
788{
789 // Never make the buffer smaller than normalStatBufferSize.
790 // Thus we almost never need to reallocate.
791 int length = size();
792 int neededSize = length + 1;
793 if (neededSize < normalStatBufferSize) {
794 neededSize = normalStatBufferSize;
795 }
796 if (neededSize != statBufferSize) {
797 delete [] statBuffer;
798 statBuffer = new char [neededSize];
799 statBufferSize = neededSize;
800 }
801
802 const UChar *p = data();
803 char *q = statBuffer;
804 const UChar *limit = p + length;
805 while (p != limit) {
806 *q = p->uc;
807 ++p;
808 ++q;
809 }
810 *q = '\0';
811
812 return statBuffer;
813}
814
815#ifdef KJS_DEBUG_MEM
816void UString::globalClear()
817{
818 delete [] statBuffer;
819 statBuffer = 0;
820 statBufferSize = 0;
821}
822#endif
823
824UString &UString::operator=(const char *c)
825{
826 int l = c ? strlen(c) : 0;
827 UChar *d;
828 if (m_rep->rc == 1 && l <= m_rep->capacity && !m_rep->baseString && m_rep->offset == 0 && m_rep->preCapacity == 0) {
829 d = m_rep->buf;
830 m_rep->_hash = 0;
831 } else {
832 d = static_cast<UChar *>(fastMalloc(sizeof(UChar) * l));
833 m_rep = Rep::create(d, l);
834 }
835 for (int i = 0; i < l; i++)
836 d[i].uc = c[i];
837
838 return *this;
839}
840
841bool UString::is8Bit() const
842{
843 const UChar *u = data();
844 const UChar *limit = u + size();
845 while (u < limit) {
846 if (u->uc > 0xFF)
847 return false;
848 ++u;
849 }
850
851 return true;
852}
853
854UChar UString::operator[](int pos) const
855{
856 if (pos >= size())
857 return '\0';
858 return data()[pos];
859}
860
861UCharReference UString::operator[](int pos)
862{
863 /* TODO: boundary check */
864 return UCharReference(this, pos);
865}
866
867double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
868{
869 double d;
870
871 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
872 // after the number, so is8Bit is too strict a check.
873 if (!is8Bit())
874 return NaN;
875
876 const char *c = ascii();
877
878 // skip leading white space
879 while (isspace(*c))
880 c++;
881
882 // empty string ?
883 if (*c == '\0')
884 return tolerateEmptyString ? 0.0 : NaN;
885
886 // hex number ?
887 if (*c == '0' && (*(c+1) == 'x' || *(c+1) == 'X')) {
888 c++;
889 d = 0.0;
890 while (*(++c)) {
891 if (*c >= '0' && *c <= '9')
892 d = d * 16.0 + *c - '0';
893 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
894 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
895 else
896 break;
897 }
898 } else {
899 // regular number ?
900 char *end;
901 d = kjs_strtod(c, &end);
902 if ((d != 0.0 || end != c) && d != HUGE_VAL && d != -HUGE_VAL) {
903 c = end;
904 } else {
905 // infinity ?
906 d = 1.0;
907 if (*c == '+')
908 c++;
909 else if (*c == '-') {
910 d = -1.0;
911 c++;
912 }
913 if (strncmp(c, "Infinity", 8) != 0)
914 return NaN;
915 d = d * Inf;
916 c += 8;
917 }
918 }
919
920 // allow trailing white space
921 while (isspace(*c))
922 c++;
923 // don't allow anything after - unless tolerant=true
924 if (!tolerateTrailingJunk && *c != '\0')
925 d = NaN;
926
927 return d;
928}
929
930double UString::toDouble(bool tolerateTrailingJunk) const
931{
932 return toDouble(tolerateTrailingJunk, true);
933}
934
935double UString::toDouble() const
936{
937 return toDouble(false, true);
938}
939
940uint32_t UString::toUInt32(bool *ok) const
941{
942 double d = toDouble();
943 bool b = true;
944
945 if (d != static_cast<uint32_t>(d)) {
946 b = false;
947 d = 0;
948 }
949
950 if (ok)
951 *ok = b;
952
953 return static_cast<uint32_t>(d);
954}
955
956uint32_t UString::toUInt32(bool *ok, bool tolerateEmptyString) const
957{
958 double d = toDouble(false, tolerateEmptyString);
959 bool b = true;
960
961 if (d != static_cast<uint32_t>(d)) {
962 b = false;
963 d = 0;
964 }
965
966 if (ok)
967 *ok = b;
968
969 return static_cast<uint32_t>(d);
970}
971
972uint32_t UString::toStrictUInt32(bool *ok) const
973{
974 if (ok)
975 *ok = false;
976
977 // Empty string is not OK.
978 int len = m_rep->len;
979 if (len == 0)
980 return 0;
981 const UChar *p = m_rep->data();
982 unsigned short c = p->unicode();
983
984 // If the first digit is 0, only 0 itself is OK.
985 if (c == '0') {
986 if (len == 1 && ok)
987 *ok = true;
988 return 0;
989 }
990
991 // Convert to UInt32, checking for overflow.
992 uint32_t i = 0;
993 while (1) {
994 // Process character, turning it into a digit.
995 if (c < '0' || c > '9')
996 return 0;
997 const unsigned d = c - '0';
998
999 // Multiply by 10, checking for overflow out of 32 bits.
1000 if (i > 0xFFFFFFFFU / 10)
1001 return 0;
1002 i *= 10;
1003
1004 // Add in the digit, checking for overflow out of 32 bits.
1005 const unsigned max = 0xFFFFFFFFU - d;
1006 if (i > max)
1007 return 0;
1008 i += d;
1009
1010 // Handle end of string.
1011 if (--len == 0) {
1012 if (ok)
1013 *ok = true;
1014 return i;
1015 }
1016
1017 // Get next character.
1018 c = (++p)->unicode();
1019 }
1020}
1021
1022int UString::find(const UString &f, int pos) const
1023{
1024 int sz = size();
1025 int fsz = f.size();
1026 if (sz < fsz)
1027 return -1;
1028 if (pos < 0)
1029 pos = 0;
1030 if (fsz == 0)
1031 return pos;
1032 const UChar *end = data() + sz - fsz;
1033 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1034 const UChar *fdata = f.data();
1035 unsigned short fchar = fdata->uc;
1036 ++fdata;
1037 for (const UChar *c = data() + pos; c <= end; c++)
1038 if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1039 return (c-data());
1040
1041 return -1;
1042}
1043
1044int UString::find(UChar ch, int pos) const
1045{
1046 if (pos < 0)
1047 pos = 0;
1048 const UChar *end = data() + size();
1049 for (const UChar *c = data() + pos; c < end; c++)
1050 if (*c == ch)
1051 return (c-data());
1052
1053 return -1;
1054}
1055
1056int UString::rfind(const UString &f, int pos) const
1057{
1058 int sz = size();
1059 int fsz = f.size();
1060 if (sz < fsz)
1061 return -1;
1062 if (pos < 0)
1063 pos = 0;
1064 if (pos > sz - fsz)
1065 pos = sz - fsz;
1066 if (fsz == 0)
1067 return pos;
1068 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1069 const UChar *fdata = f.data();
1070 for (const UChar *c = data() + pos; c >= data(); c--) {
1071 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1072 return (c-data());
1073 }
1074
1075 return -1;
1076}
1077
1078int UString::rfind(UChar ch, int pos) const
1079{
1080 if (isEmpty())
1081 return -1;
1082 if (pos + 1 >= size())
1083 pos = size() - 1;
1084 for (const UChar *c = data() + pos; c >= data(); c--) {
1085 if (*c == ch)
1086 return (c-data());
1087 }
1088
1089 return -1;
1090}
1091
1092UString UString::substr(int pos, int len) const
1093{
1094 int s = size();
1095
1096 if (pos < 0)
1097 pos = 0;
1098 else if (pos >= s)
1099 pos = s;
1100 if (len < 0)
1101 len = s;
1102 if (pos + len >= s)
1103 len = s - pos;
1104
1105 if (pos == 0 && len == s)
1106 return *this;
1107
1108 return UString(Rep::create(m_rep, pos, len));
1109}
1110
1111void UString::copyForWriting()
1112{
1113 if (m_rep->rc > 1 || m_rep->baseString) {
1114 int l = size();
1115 UChar *n = static_cast<UChar *>(fastMalloc(sizeof(UChar) * l));
1116 memcpy(n, data(), l * sizeof(UChar));
1117 m_rep = Rep::create(n, l);
1118 }
1119}
1120
1121bool operator==(const UString& s1, const UString& s2)
1122{
1123 if (s1.m_rep->len != s2.m_rep->len)
1124 return false;
1125
1126 return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1127 s1.m_rep->len * sizeof(UChar)) == 0);
1128}
1129
1130bool operator==(const UString& s1, const char *s2)
1131{
1132 if (s2 == 0) {
1133 return s1.isEmpty();
1134 }
1135
1136 const UChar *u = s1.data();
1137 const UChar *uend = u + s1.size();
1138 while (u != uend && *s2) {
1139 if (u->uc != (unsigned char)*s2)
1140 return false;
1141 s2++;
1142 u++;
1143 }
1144
1145 return u == uend && *s2 == 0;
1146}
1147
1148bool operator<(const UString& s1, const UString& s2)
1149{
1150 const int l1 = s1.size();
1151 const int l2 = s2.size();
1152 const int lmin = l1 < l2 ? l1 : l2;
1153 const UChar *c1 = s1.data();
1154 const UChar *c2 = s2.data();
1155 int l = 0;
1156 while (l < lmin && *c1 == *c2) {
1157 c1++;
1158 c2++;
1159 l++;
1160 }
1161 if (l < lmin)
1162 return (c1->uc < c2->uc);
1163
1164 return (l1 < l2);
1165}
1166
1167int compare(const UString& s1, const UString& s2)
1168{
1169 const int l1 = s1.size();
1170 const int l2 = s2.size();
1171 const int lmin = l1 < l2 ? l1 : l2;
1172 const UChar *c1 = s1.data();
1173 const UChar *c2 = s2.data();
1174 int l = 0;
1175 while (l < lmin && *c1 == *c2) {
1176 c1++;
1177 c2++;
1178 l++;
1179 }
1180
1181 if (l < lmin)
1182 return (c1->uc > c2->uc) ? 1 : -1;
1183
1184 if (l1 == l2)
1185 return 0;
1186
1187 return (l1 > l2) ? 1 : -1;
1188}
1189
1190inline int inlineUTF8SequenceLengthNonASCII(char b0)
1191{
1192 if ((b0 & 0xC0) != 0xC0)
1193 return 0;
1194 if ((b0 & 0xE0) == 0xC0)
1195 return 2;
1196 if ((b0 & 0xF0) == 0xE0)
1197 return 3;
1198 if ((b0 & 0xF8) == 0xF0)
1199 return 4;
1200 return 0;
1201}
1202
1203int UTF8SequenceLengthNonASCII(char b0)
1204{
1205 return inlineUTF8SequenceLengthNonASCII(b0);
1206}
1207
1208inline int inlineUTF8SequenceLength(char b0)
1209{
1210 return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0);
1211}
1212
1213// Given a first byte, gives the length of the UTF-8 sequence it begins.
1214// Returns 0 for bytes that are not legal starts of UTF-8 sequences.
1215// Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).
1216int UTF8SequenceLength(char b0)
1217{
1218 return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
1219}
1220
1221// Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.
1222// Only allows Unicode characters (U-00000000 to U-0010FFFF).
1223// Returns -1 if the sequence is not valid (including presence of extra bytes).
1224int decodeUTF8Sequence(const char *sequence)
1225{
1226 // Handle 0-byte sequences (never valid).
1227 const unsigned char b0 = sequence[0];
1228 const int length = inlineUTF8SequenceLength(b0);
1229 if (length == 0)
1230 return -1;
1231
1232 // Handle 1-byte sequences (plain ASCII).
1233 const unsigned char b1 = sequence[1];
1234 if (length == 1) {
1235 if (b1)
1236 return -1;
1237 return b0;
1238 }
1239
1240 // Handle 2-byte sequences.
1241 if ((b1 & 0xC0) != 0x80)
1242 return -1;
1243 const unsigned char b2 = sequence[2];
1244 if (length == 2) {
1245 if (b2)
1246 return -1;
1247 const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F);
1248 if (c < 0x80)
1249 return -1;
1250 return c;
1251 }
1252
1253 // Handle 3-byte sequences.
1254 if ((b2 & 0xC0) != 0x80)
1255 return -1;
1256 const unsigned char b3 = sequence[3];
1257 if (length == 3) {
1258 if (b3)
1259 return -1;
1260 const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F);
1261 if (c < 0x800)
1262 return -1;
1263 // UTF-16 surrogates should never appear in UTF-8 data.
1264 if (c >= 0xD800 && c <= 0xDFFF)
1265 return -1;
1266 // Backwards BOM and U+FFFF should never appear in UTF-8 data.
1267 if (c == 0xFFFE || c == 0xFFFF)
1268 return -1;
1269 return c;
1270 }
1271
1272 // Handle 4-byte sequences.
1273 if ((b3 & 0xC0) != 0x80)
1274 return -1;
1275 const unsigned char b4 = sequence[4];
1276 if (length == 4) {
1277 if (b4)
1278 return -1;
1279 const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
1280 if (c < 0x10000 || c > 0x10FFFF)
1281 return -1;
1282 return c;
1283 }
1284
1285 return -1;
1286}
1287
1288CString UString::UTF8String() const
1289{
1290 // Allocate a buffer big enough to hold all the characters.
1291 const int length = size();
1292 Vector<char, 1024> buffer(length * 3);
1293
1294 // Convert to runs of 8-bit characters.
1295 char *p = buffer.begin();
1296 const UChar *d = data();
1297 for (int i = 0; i != length; ++i) {
1298 unsigned short c = d[i].unicode();
1299 if (c < 0x80) {
1300 *p++ = (char)c;
1301 } else if (c < 0x800) {
1302 *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8
1303 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
1304 } else if (c >= 0xD800 && c <= 0xDBFF && i < length && d[i+1].uc >= 0xDC00 && d[i+1].uc <= 0xDFFF) {
1305 unsigned sc = 0x10000 + (((c & 0x3FF) << 10) | (d[i+1].uc & 0x3FF));
1306 *p++ = (char)((sc >> 18) | 0xF0); // F0 is the 4-byte flag for UTF-8
1307 *p++ = (char)(((sc >> 12) | 0x80) & 0xBF); // next 6 bits, with high bit set
1308 *p++ = (char)(((sc >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
1309 *p++ = (char)((sc | 0x80) & 0xBF); // next 6 bits, with high bit set
1310 ++i;
1311 } else {
1312 *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8
1313 *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
1314 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
1315 }
1316 }
1317
1318 // Return the result as a C string.
1319 CString result(buffer, p - buffer);
1320
1321 return result;
1322}
1323
1324} // namespace KJS
Note: See TracBrowser for help on using the repository browser.