Changeset 65468 in webkit for trunk/JavaScriptCore
- Timestamp:
- Aug 16, 2010, 4:31:33 PM (15 years ago)
- Location:
- trunk/JavaScriptCore
- Files:
-
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/ChangeLog
r65445 r65468 1 2010-08-16 Gavin Barraclough <[email protected]> 2 3 Reviewed by Sam Weinig 4 5 Bug 44080 - String find/reverseFind methods need tidying up 6 These methods have a couple of problems with their interface, and implementation. 7 8 These methods take and int index, and return an int - however this is problematic 9 since on 64-bit string indices may have a full 32-bit range. This spills out into 10 surrounding code, which unsafely casts string indices from unsigned to int. Code 11 checking the result of these methods check for a mix of "== -1", "< 0", and 12 "== notFound". Clean this up by changing these methods to take an unsigned 13 starting index, and return a size_t. with a failed match indicated by notFound. 14 reverseFind also has a special meaning for the starting index argument, in that a 15 negative index is interpreted as an offset back from the end of the string. Remove 16 this functionality, in the (1!) case where it is used we should just calculate the 17 offset by subtracting from the string's length. 18 19 The implementation has a few problems too. The code is not in webkit style, in 20 using assorted abbreviations in variable names, and implementations of similar 21 find methods with differing argument types were unnecessarily inconsistent. When 22 find is passed const char* data the string would be handled as latin1 (zero 23 extended to UTF-16) for all characters but the first; this is sign extended. 24 Case-insensitive find is broken for unicode strings; the hashing optimization is 25 not unicode safe, and could result in false negatives. 26 27 Unify UString find methods to match String. 28 29 * JavaScriptCore.exp: 30 * bytecode/CodeBlock.cpp: 31 (JSC::escapeQuotes): 32 * bytecompiler/NodesCodegen.cpp: 33 (JSC::substitute): 34 * runtime/JSString.cpp: 35 (JSC::JSString::replaceCharacter): 36 * runtime/RegExp.cpp: 37 (JSC::RegExp::RegExp): 38 * runtime/RegExpKey.h: 39 (JSC::RegExpKey::getFlagsValue): 40 * runtime/StringPrototype.cpp: 41 (JSC::substituteBackreferencesSlow): 42 (JSC::substituteBackreferences): 43 (JSC::stringProtoFuncReplace): 44 (JSC::stringProtoFuncIndexOf): 45 (JSC::stringProtoFuncLastIndexOf): 46 (JSC::stringProtoFuncSplit): 47 * runtime/UString.cpp: 48 * runtime/UString.h: 49 (JSC::UString::find): 50 (JSC::UString::reverseFind): 51 * wtf/text/AtomicString.h: 52 (WTF::AtomicString::find): 53 * wtf/text/StringImpl.cpp: 54 (WTF::StringImpl::find): 55 (WTF::StringImpl::findCaseInsensitive): 56 (WTF::StringImpl::reverseFind): 57 (WTF::StringImpl::reverseFindCaseInsensitive): 58 (WTF::StringImpl::endsWith): 59 (WTF::StringImpl::replace): 60 * wtf/text/StringImpl.h: 61 (WTF::StringImpl::startsWith): 62 * wtf/text/WTFString.cpp: 63 (WTF::String::split): 64 * wtf/text/WTFString.h: 65 (WTF::String::find): 66 (WTF::String::reverseFind): 67 (WTF::String::findCaseInsensitive): 68 (WTF::String::reverseFindCaseInsensitive): 69 (WTF::String::contains): 70 (WTF::find): 71 (WTF::reverseFind): 72 1 73 2010-08-16 Kevin Ollivier <[email protected]> 2 74 -
trunk/JavaScriptCore/JavaScriptCore.exp
r65344 r65468 320 320 __ZN3JSCgtERKNS_7UStringES2_ 321 321 __ZN3JSCltERKNS_7UStringES2_ 322 __ZN3WTF10StringImpl11reverseFindEPS0_ ib323 __ZN3WTF10StringImpl11reverseFindEt i322 __ZN3WTF10StringImpl11reverseFindEPS0_j 323 __ZN3WTF10StringImpl11reverseFindEtj 324 324 __ZN3WTF10StringImpl12sharedBufferEv 325 __ZN3WTF10StringImpl16findIgnoringCaseEPKcj 326 __ZN3WTF10StringImpl16findIgnoringCaseEPS0_j 325 327 __ZN3WTF10StringImpl18simplifyWhiteSpaceEv 326 328 __ZN3WTF10StringImpl19characterStartingAtEj … … 328 330 __ZN3WTF10StringImpl22containsOnlyWhitespaceEv 329 331 __ZN3WTF10StringImpl23defaultWritingDirectionEv 332 __ZN3WTF10StringImpl23reverseFindIgnoringCaseEPS0_j 330 333 __ZN3WTF10StringImpl37createStrippingNullCharactersSlowCaseEPKtj 331 __ZN3WTF10StringImpl4findEPFbtE i332 __ZN3WTF10StringImpl4findEPKc ib333 __ZN3WTF10StringImpl4findEPS0_ ib334 __ZN3WTF10StringImpl4findEt i334 __ZN3WTF10StringImpl4findEPFbtEj 335 __ZN3WTF10StringImpl4findEPKcj 336 __ZN3WTF10StringImpl4findEPS0_j 337 __ZN3WTF10StringImpl4findEtj 335 338 __ZN3WTF10StringImpl5adoptERNS_12StringBufferE 336 339 __ZN3WTF10StringImpl5emptyEv -
trunk/JavaScriptCore/bytecode/CodeBlock.cpp
r65344 r65468 51 51 { 52 52 UString result = str; 53 unsignedpos = 0;54 while ((pos = result.find('\"', pos)) != UString::NotFound) {53 size_t pos = 0; 54 while ((pos = result.find('\"', pos)) != notFound) { 55 55 result = makeString(result.substr(0, pos), "\"\\\"\"", result.substr(pos + 1)); 56 56 pos += 4; -
trunk/JavaScriptCore/bytecompiler/NodesCodegen.cpp
r65177 r65468 79 79 static void substitute(UString& string, const UString& substring) 80 80 { 81 unsignedposition = string.find("%s");82 ASSERT(position != UString::NotFound);81 size_t position = string.find("%s"); 82 ASSERT(position != notFound); 83 83 string = makeString(string.substr(0, position), substring, string.substr(position + 2)); 84 84 } -
trunk/JavaScriptCore/runtime/JSString.cpp
r65177 r65468 109 109 { 110 110 if (!isRope()) { 111 unsignedmatchPosition = m_value.find(character);112 if (matchPosition == UString::NotFound)111 size_t matchPosition = m_value.find(character); 112 if (matchPosition == notFound) 113 113 return JSValue(this); 114 114 return jsString(exec, m_value.substr(0, matchPosition), replacement, m_value.substr(matchPosition + 1)); … … 120 120 size_t fiberCount = 0; 121 121 StringImpl* matchString = 0; 122 int matchPosition = -1;122 size_t matchPosition = notFound; 123 123 for (RopeIterator it(m_other.m_fibers.data(), m_fiberCount); it != end; ++it) { 124 124 ++fiberCount; … … 128 128 StringImpl* string = *it; 129 129 matchPosition = string->find(character); 130 if (matchPosition == -1)130 if (matchPosition == notFound) 131 131 continue; 132 132 matchString = string; -
trunk/JavaScriptCore/runtime/RegExp.cpp
r65188 r65468 57 57 // String::match and RegExpObject::match. 58 58 if (!flags.isNull()) { 59 if (flags.find('g') != UString::NotFound)59 if (flags.find('g') != notFound) 60 60 m_flagBits |= Global; 61 if (flags.find('i') != UString::NotFound)61 if (flags.find('i') != notFound) 62 62 m_flagBits |= IgnoreCase; 63 if (flags.find('m') != UString::NotFound)63 if (flags.find('m') != notFound) 64 64 m_flagBits |= Multiline; 65 65 } -
trunk/JavaScriptCore/runtime/RegExpKey.h
r65286 r65468 74 74 { 75 75 flagsValue = 0; 76 if (flags.find('g') != UString::NotFound)76 if (flags.find('g') != notFound) 77 77 flagsValue += 4; 78 if (flags.find('i') != UString::NotFound)78 if (flags.find('i') != notFound) 79 79 flagsValue += 2; 80 if (flags.find('m') != UString::NotFound)80 if (flags.find('m') != notFound) 81 81 flagsValue += 1; 82 82 return flagsValue; -
trunk/JavaScriptCore/runtime/StringPrototype.cpp
r65177 r65468 152 152 // ------------------------------ Functions -------------------------- 153 153 154 static NEVER_INLINE UString substituteBackreferencesSlow(const UString& replacement, const UString& source, const int* ovector, RegExp* reg, unsignedi)154 static NEVER_INLINE UString substituteBackreferencesSlow(const UString& replacement, const UString& source, const int* ovector, RegExp* reg, size_t i) 155 155 { 156 156 Vector<UChar> substitutedReplacement; … … 208 208 offset = i + 1; 209 209 substitutedReplacement.append(source.characters() + backrefStart, backrefLength); 210 } while ((i = replacement.find('$', i + 1)) != UString::NotFound);210 } while ((i = replacement.find('$', i + 1)) != notFound); 211 211 212 212 if (replacement.length() - offset) … … 219 219 static inline UString substituteBackreferences(const UString& replacement, const UString& source, const int* ovector, RegExp* reg) 220 220 { 221 unsignedi = replacement.find('$', 0);222 if (UNLIKELY(i != UString::NotFound))221 size_t i = replacement.find('$', 0); 222 if (UNLIKELY(i != notFound)) 223 223 return substituteBackreferencesSlow(replacement, source, ovector, reg, i); 224 224 return replacement; … … 430 430 431 431 const UString& source = sourceVal->value(exec); 432 unsignedmatchPos = source.find(patternString);433 434 if (matchPos == UString::NotFound)432 size_t matchPos = source.find(patternString); 433 434 if (matchPos == notFound) 435 435 return JSValue::encode(sourceVal); 436 436 … … 543 543 } 544 544 545 unsignedresult = s.find(u2, pos);546 if (result == UString::NotFound)545 size_t result = s.find(u2, pos); 546 if (result == notFound) 547 547 return JSValue::encode(jsNumber(exec, -1)); 548 548 return JSValue::encode(jsNumber(exec, result)); … … 572 572 #endif 573 573 574 unsigned result = s.rfind(u2, static_cast<unsigned>(dpos));575 if (result == UString::NotFound)574 size_t result = s.reverseFind(u2, static_cast<unsigned>(dpos)); 575 if (result == notFound) 576 576 return JSValue::encode(jsNumber(exec, -1)); 577 577 return JSValue::encode(jsNumber(exec, result)); … … 737 737 result->put(exec, i++, jsSingleCharacterSubstring(exec, s, p0++)); 738 738 } else { 739 unsigned pos; 740 741 while (i != limit && (pos = s.find(u2, p0)) != UString::NotFound) { 739 size_t pos; 740 while (i != limit && (pos = s.find(u2, p0)) != notFound) { 742 741 result->put(exec, i++, jsSubstring(exec, s, p0, pos - p0)); 743 742 p0 = pos + u2.length(); -
trunk/JavaScriptCore/runtime/UString.cpp
r65344 r65468 414 414 } 415 415 416 unsigned UString::find(const UString& f, unsigned pos) const417 {418 unsigned fsz = f.length();419 420 if (fsz == 1) {421 UChar ch = f[0];422 const UChar* end = characters() + length();423 for (const UChar* c = characters() + pos; c < end; c++) {424 if (*c == ch)425 return static_cast<unsigned>(c - characters());426 }427 return NotFound;428 }429 430 unsigned sz = length();431 if (sz < fsz)432 return NotFound;433 if (fsz == 0)434 return pos;435 const UChar* end = characters() + sz - fsz;436 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);437 const UChar* fdata = f.characters();438 unsigned short fchar = fdata[0];439 ++fdata;440 for (const UChar* c = characters() + pos; c <= end; c++) {441 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))442 return static_cast<unsigned>(c - characters());443 }444 445 return NotFound;446 }447 448 unsigned UString::find(UChar ch, unsigned pos) const449 {450 const UChar* end = characters() + length();451 for (const UChar* c = characters() + pos; c < end; c++) {452 if (*c == ch)453 return static_cast<unsigned>(c - characters());454 }455 456 return NotFound;457 }458 459 unsigned UString::rfind(const UString& f, unsigned pos) const460 {461 unsigned sz = length();462 unsigned fsz = f.length();463 if (sz < fsz)464 return NotFound;465 if (pos > sz - fsz)466 pos = sz - fsz;467 if (fsz == 0)468 return pos;469 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);470 const UChar* fdata = f.characters();471 for (const UChar* c = characters() + pos; c >= characters(); c--) {472 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))473 return static_cast<unsigned>(c - characters());474 }475 476 return NotFound;477 }478 479 unsigned UString::rfind(UChar ch, unsigned pos) const480 {481 if (isEmpty())482 return NotFound;483 if (pos + 1 >= length())484 pos = length() - 1;485 for (const UChar* c = characters() + pos; c >= characters(); c--) {486 if (*c == ch)487 return static_cast<unsigned>(c - characters());488 }489 490 return NotFound;491 }492 493 416 UString UString::substr(unsigned pos, unsigned len) const 494 417 { -
trunk/JavaScriptCore/runtime/UString.h
r65344 r65468 105 105 static UString number(double); 106 106 107 107 // Find a single character or string, also with match function & latin1 forms. 108 size_t find(UChar c, unsigned start = 0) const 109 { return m_impl ? m_impl->find(c, start) : notFound; } 110 size_t find(const UString& str, unsigned start = 0) const 111 { return m_impl ? m_impl->find(str.impl(), start) : notFound; } 112 size_t find(const char* str, unsigned start = 0) const 113 { return m_impl ? m_impl->find(str, start) : notFound; } 114 115 // Find the last instance of a single character or string. 116 size_t reverseFind(UChar c, unsigned start = UINT_MAX) const 117 { return m_impl ? m_impl->reverseFind(c, start) : notFound; } 118 size_t reverseFind(const UString& str, unsigned start = UINT_MAX) const 119 { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; } 108 120 109 121 double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const; … … 115 127 uint32_t toStrictUInt32(bool* ok = 0) const; 116 128 117 static const unsigned NotFound = 0xFFFFFFFFu; 118 unsigned find(const UString& f, unsigned pos = 0) const; 119 unsigned find(UChar, unsigned pos = 0) const; 120 unsigned rfind(const UString& f, unsigned pos) const; 121 unsigned rfind(UChar, unsigned pos) const; 122 123 UString substr(unsigned pos = 0, unsigned len = 0xFFFFFFFF) const; 129 UString substr(unsigned pos = 0, unsigned len = UINT_MAX) const; 124 130 125 131 private: -
trunk/JavaScriptCore/wtf/text/AtomicString.h
r65077 r65468 72 72 { return m_string.contains(s, caseSensitive); } 73 73 74 int find(UChar c, int start = 0) const { return m_string.find(c, start); }75 int find(const char* s, int start = 0, bool caseSentitive = true) const74 size_t find(UChar c, size_t start = 0) const { return m_string.find(c, start); } 75 size_t find(const char* s, size_t start = 0, bool caseSentitive = true) const 76 76 { return m_string.find(s, start, caseSentitive); } 77 int find(const String& s, int start = 0, bool caseSentitive = true) const77 size_t find(const String& s, size_t start = 0, bool caseSentitive = true) const 78 78 { return m_string.find(s, start, caseSentitive); } 79 79 -
trunk/JavaScriptCore/wtf/text/StringImpl.cpp
r65344 r65468 499 499 } 500 500 501 int StringImpl::find(const char* chs, int index, bool caseSensitive) 502 { 503 if (!chs || index < 0) 504 return -1; 505 506 int chsLength = strlen(chs); 507 int n = m_length - index; 508 if (n < 0) 509 return -1; 510 n -= chsLength - 1; 511 if (n <= 0) 512 return -1; 513 514 const char* chsPlusOne = chs + 1; 515 int chsLengthMinusOne = chsLength - 1; 516 517 const UChar* ptr = m_data + index - 1; 518 if (caseSensitive) { 519 UChar c = *chs; 520 do { 521 if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) 522 return m_length - chsLength - n + 1; 523 } while (--n); 524 } else { 525 UChar lc = Unicode::foldCase(*chs); 526 do { 527 if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne)) 528 return m_length - chsLength - n + 1; 529 } while (--n); 530 } 531 532 return -1; 533 } 534 535 int StringImpl::find(UChar c, int start) 501 size_t StringImpl::find(UChar c, unsigned start) 536 502 { 537 503 return WTF::find(m_data, m_length, c, start); 538 504 } 539 505 540 int StringImpl::find(CharacterMatchFunctionPtr matchFunction, intstart)506 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) 541 507 { 542 508 return WTF::find(m_data, m_length, matchFunction, start); 543 509 } 544 510 545 int StringImpl::find(StringImpl* str, int index, bool caseSensitive) 546 { 547 /* 548 We use a simple trick for efficiency's sake. Instead of 549 comparing strings, we compare the sum of str with that of 550 a part of this string. Only if that matches, we call memcmp 551 or ucstrnicmp. 552 */ 553 ASSERT(str); 554 if (index < 0) 555 index += m_length; 556 int lstr = str->m_length; 557 int lthis = m_length - index; 558 if ((unsigned)lthis > m_length) 559 return -1; 560 int delta = lthis - lstr; 561 if (delta < 0) 562 return -1; 563 564 const UChar* uthis = m_data + index; 565 const UChar* ustr = str->m_data; 566 unsigned hthis = 0; 567 unsigned hstr = 0; 568 if (caseSensitive) { 569 for (int i = 0; i < lstr; i++) { 570 hthis += uthis[i]; 571 hstr += ustr[i]; 572 } 573 int i = 0; 574 while (1) { 575 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) 576 return index + i; 577 if (i == delta) 578 return -1; 579 hthis += uthis[i + lstr]; 580 hthis -= uthis[i]; 581 i++; 582 } 583 } else { 584 for (int i = 0; i < lstr; i++ ) { 585 hthis += toASCIILower(uthis[i]); 586 hstr += toASCIILower(ustr[i]); 587 } 588 int i = 0; 589 while (1) { 590 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr)) 591 return index + i; 592 if (i == delta) 593 return -1; 594 hthis += toASCIILower(uthis[i + lstr]); 595 hthis -= toASCIILower(uthis[i]); 596 i++; 597 } 598 } 599 } 600 601 int StringImpl::reverseFind(UChar c, int index) 511 size_t StringImpl::find(const char* matchString, unsigned index) 512 { 513 // Check for null or empty string to match against 514 if (!matchString) 515 return notFound; 516 unsigned matchLength = strlen(matchString); 517 if (!matchLength) 518 return min(index, length()); 519 520 // Optimization 1: fast case for strings of length 1. 521 if (matchLength == 1) 522 return WTF::find(characters(), length(), *(const unsigned char*)matchString, index); 523 524 // Check index & matchLength are in range. 525 if (index > length()) 526 return notFound; 527 unsigned searchLength = length() - index; 528 if (matchLength > searchLength) 529 return notFound; 530 // delta is the number of additional times to test; delta == 0 means test only once. 531 unsigned delta = searchLength - matchLength; 532 533 const UChar* searchCharacters = characters() + index; 534 const unsigned char* matchCharacters = (const unsigned char*)matchString; 535 536 // Optimization 2: keep a running hash of the strings, 537 // only call memcmp if the hashes match. 538 unsigned searchHash = 0; 539 unsigned matchHash = 0; 540 for (unsigned i = 0; i < matchLength; ++i) { 541 searchHash += searchCharacters[i]; 542 matchHash += matchCharacters[i]; 543 } 544 545 for (unsigned i = 0; i <= delta; ++i) { 546 if (searchHash == matchHash && equal(searchCharacters + i, matchString, matchLength)) 547 return index + i; 548 searchHash += searchCharacters[i + matchLength]; 549 searchHash -= searchCharacters[i]; 550 } 551 return notFound; 552 } 553 554 size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index) 555 { 556 // Check for null or empty string to match against 557 if (!matchString) 558 return notFound; 559 unsigned matchLength = strlen(matchString); 560 if (!matchLength) 561 return min(index, length()); 562 563 // Check index & matchLength are in range. 564 if (index > length()) 565 return notFound; 566 unsigned searchLength = length() - index; 567 if (matchLength > searchLength) 568 return notFound; 569 // delta is the number of additional times to test; delta == 0 means test only once. 570 unsigned delta = searchLength - matchLength; 571 572 const UChar* searchCharacters = characters() + index; 573 574 for (unsigned i = 0; i <= delta; ++i) { 575 if (equalIgnoringCase(searchCharacters + i, matchString, matchLength)) 576 return index + i; 577 } 578 return notFound; 579 } 580 581 size_t StringImpl::find(StringImpl* matchString, unsigned index) 582 { 583 // Check for null or empty string to match against 584 if (!matchString) 585 return notFound; 586 unsigned matchLength = matchString->length(); 587 if (!matchLength) 588 return min(index, length()); 589 590 // Optimization 1: fast case for strings of length 1. 591 if (matchLength == 1) 592 return WTF::find(characters(), length(), matchString->characters()[0], index); 593 594 // Check index & matchLength are in range. 595 if (index > length()) 596 return notFound; 597 unsigned searchLength = length() - index; 598 if (matchLength > searchLength) 599 return notFound; 600 // delta is the number of additional times to test; delta == 0 means test only once. 601 unsigned delta = searchLength - matchLength; 602 603 const UChar* searchCharacters = characters() + index; 604 const UChar* matchCharacters = matchString->characters(); 605 606 // Optimization 2: keep a running hash of the strings, 607 // only call memcmp if the hashes match. 608 unsigned searchHash = 0; 609 unsigned matchHash = 0; 610 for (unsigned i = 0; i < matchLength; ++i) { 611 searchHash += searchCharacters[i]; 612 matchHash += matchCharacters[i]; 613 } 614 615 for (unsigned i = 0; i <= delta; ++i) { 616 if (searchHash == matchHash && memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar)) == 0) 617 return index + i; 618 searchHash += searchCharacters[i + matchLength]; 619 searchHash -= searchCharacters[i]; 620 } 621 return notFound; 622 } 623 624 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) 625 { 626 // Check for null or empty string to match against 627 if (!matchString) 628 return notFound; 629 unsigned matchLength = matchString->length(); 630 if (!matchLength) 631 return min(index, length()); 632 633 // Check index & matchLength are in range. 634 if (index > length()) 635 return notFound; 636 unsigned searchLength = length() - index; 637 if (matchLength > searchLength) 638 return notFound; 639 // delta is the number of additional times to test; delta == 0 means test only once. 640 unsigned delta = searchLength - matchLength; 641 642 const UChar* searchCharacters = characters() + index; 643 const UChar* matchCharacters = matchString->characters(); 644 645 for (unsigned i = 0; i <= delta; ++i) { 646 if (equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) 647 return index + i; 648 } 649 return notFound; 650 } 651 652 size_t StringImpl::reverseFind(UChar c, unsigned index) 602 653 { 603 654 return WTF::reverseFind(m_data, m_length, c, index); 604 655 } 605 656 606 int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) 607 { 608 /* 609 See StringImpl::find() for explanations. 610 */ 611 ASSERT(str); 612 int lthis = m_length; 613 if (index < 0) 614 index += lthis; 615 616 int lstr = str->m_length; 617 int delta = lthis - lstr; 618 if ( index < 0 || index > lthis || delta < 0 ) 619 return -1; 620 if ( index > delta ) 621 index = delta; 622 623 const UChar *uthis = m_data; 624 const UChar *ustr = str->m_data; 625 unsigned hthis = 0; 626 unsigned hstr = 0; 627 int i; 628 if (caseSensitive) { 629 for ( i = 0; i < lstr; i++ ) { 630 hthis += uthis[index + i]; 631 hstr += ustr[i]; 632 } 633 i = index; 634 while (1) { 635 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) 636 return i; 637 if (i == 0) 638 return -1; 639 i--; 640 hthis -= uthis[i + lstr]; 641 hthis += uthis[i]; 642 } 643 } else { 644 for (i = 0; i < lstr; i++) { 645 hthis += toASCIILower(uthis[index + i]); 646 hstr += toASCIILower(ustr[i]); 647 } 648 i = index; 649 while (1) { 650 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) ) 651 return i; 652 if (i == 0) 653 return -1; 654 i--; 655 hthis -= toASCIILower(uthis[i + lstr]); 656 hthis += toASCIILower(uthis[i]); 657 } 658 } 659 660 // Should never get here. 661 return -1; 657 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) 658 { 659 // Check for null or empty string to match against 660 if (!matchString) 661 return notFound; 662 unsigned matchLength = matchString->length(); 663 if (!matchLength) 664 return min(index, length()); 665 666 // Optimization 1: fast case for strings of length 1. 667 if (matchLength == 1) 668 return WTF::reverseFind(characters(), length(), matchString->characters()[0], index); 669 670 // Check index & matchLength are in range. 671 if (matchLength > length()) 672 return notFound; 673 // delta is the number of additional times to test; delta == 0 means test only once. 674 unsigned delta = min(index, length() - matchLength); 675 676 const UChar *searchCharacters = characters(); 677 const UChar *matchCharacters = matchString->characters(); 678 679 // Optimization 2: keep a running hash of the strings, 680 // only call memcmp if the hashes match. 681 unsigned searchHash = 0; 682 unsigned matchHash = 0; 683 for (unsigned i = 0; i < matchLength; ++i) { 684 searchHash += searchCharacters[delta + i]; 685 matchHash += matchCharacters[i]; 686 } 687 688 while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) { 689 if (!delta--) 690 return notFound; 691 searchHash -= searchCharacters[delta + matchLength]; 692 searchHash += searchCharacters[delta]; 693 } 694 return delta; 695 } 696 697 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) 698 { 699 // Check for null or empty string to match against 700 if (!matchString) 701 return notFound; 702 unsigned matchLength = matchString->length(); 703 if (!matchLength) 704 return min(index, length()); 705 706 // Check index & matchLength are in range. 707 if (matchLength > length()) 708 return notFound; 709 // delta is the number of additional times to test; delta == 0 means test only once. 710 unsigned delta = min(index, length() - matchLength); 711 712 const UChar *searchCharacters = characters(); 713 const UChar *matchCharacters = matchString->characters(); 714 715 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { 716 if (!delta--) 717 return notFound; 718 } 719 return delta; 662 720 } 663 721 … … 665 723 { 666 724 ASSERT(m_data); 667 int start = m_length - m_data->m_length; 668 if (start >= 0) 669 return (find(m_data, start, caseSensitive) == start); 725 if (m_length >= m_data->m_length) { 726 unsigned start = m_length - m_data->m_length; 727 return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start; 728 } 670 729 return false; 671 730 } … … 717 776 return this; 718 777 719 intrepStrLength = replacement->length();720 int srcSegmentStart = 0;721 intmatchCount = 0;778 unsigned repStrLength = replacement->length(); 779 size_t srcSegmentStart = 0; 780 unsigned matchCount = 0; 722 781 723 782 // Count the matches 724 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {783 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { 725 784 ++matchCount; 726 785 ++srcSegmentStart; … … 736 795 737 796 // Construct the new data 738 int srcSegmentEnd;739 intsrcSegmentLength;797 size_t srcSegmentEnd; 798 unsigned srcSegmentLength; 740 799 srcSegmentStart = 0; 741 intdstOffset = 0;742 743 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {800 unsigned dstOffset = 0; 801 802 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { 744 803 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 745 804 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); … … 753 812 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 754 813 755 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));814 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 756 815 757 816 return newImpl; … … 763 822 return this; 764 823 765 intpatternLength = pattern->length();824 unsigned patternLength = pattern->length(); 766 825 if (!patternLength) 767 826 return this; 768 827 769 intrepStrLength = replacement->length();770 int srcSegmentStart = 0;771 intmatchCount = 0;828 unsigned repStrLength = replacement->length(); 829 size_t srcSegmentStart = 0; 830 unsigned matchCount = 0; 772 831 773 832 // Count the matches 774 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {833 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { 775 834 ++matchCount; 776 835 srcSegmentStart += patternLength; … … 786 845 787 846 // Construct the new data 788 int srcSegmentEnd;789 intsrcSegmentLength;847 size_t srcSegmentEnd; 848 unsigned srcSegmentLength; 790 849 srcSegmentStart = 0; 791 intdstOffset = 0;792 793 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {850 unsigned dstOffset = 0; 851 852 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { 794 853 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 795 854 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); … … 803 862 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 804 863 805 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));864 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 806 865 807 866 return newImpl; -
trunk/JavaScriptCore/wtf/text/StringImpl.h
r65344 r65468 291 291 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); 292 292 293 int find(const char*, int index = 0, bool caseSensitive = true); 294 int find(UChar, int index = 0); 295 int find(CharacterMatchFunctionPtr, int index = 0); 296 int find(StringImpl*, int index, bool caseSensitive = true); 297 298 int reverseFind(UChar, int index); 299 int reverseFind(StringImpl*, int index, bool caseSensitive = true); 300 301 bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; } 293 size_t find(UChar, unsigned index = 0); 294 size_t find(CharacterMatchFunctionPtr, unsigned index = 0); 295 size_t find(const char*, unsigned index = 0); 296 size_t find(StringImpl*, unsigned index = 0); 297 size_t findIgnoringCase(const char*, unsigned index = 0); 298 size_t findIgnoringCase(StringImpl*, unsigned index = 0); 299 300 size_t reverseFind(UChar, unsigned index = UINT_MAX); 301 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); 302 size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); 303 304 bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } 302 305 bool endsWith(StringImpl*, bool caseSensitive = true); 303 306 -
trunk/JavaScriptCore/wtf/text/WTFString.cpp
r65344 r65468 577 577 result.clear(); 578 578 579 intstartPos = 0;580 int endPos;581 while ((endPos = find(separator, startPos)) != -1) {579 unsigned startPos = 0; 580 size_t endPos; 581 while ((endPos = find(separator, startPos)) != notFound) { 582 582 if (allowEmptyEntries || startPos != endPos) 583 583 result.append(substring(startPos, endPos - startPos)); 584 584 startPos = endPos + separator.length(); 585 585 } 586 if (allowEmptyEntries || startPos != static_cast<int>(length()))586 if (allowEmptyEntries || startPos != length()) 587 587 result.append(substring(startPos)); 588 588 } … … 597 597 result.clear(); 598 598 599 intstartPos = 0;600 int endPos;601 while ((endPos = find(separator, startPos)) != -1) {599 unsigned startPos = 0; 600 size_t endPos; 601 while ((endPos = find(separator, startPos)) != notFound) { 602 602 if (allowEmptyEntries || startPos != endPos) 603 603 result.append(substring(startPos, endPos - startPos)); 604 604 startPos = endPos + 1; 605 605 } 606 if (allowEmptyEntries || startPos != static_cast<int>(length()))606 if (allowEmptyEntries || startPos != length()) 607 607 result.append(substring(startPos)); 608 608 } -
trunk/JavaScriptCore/wtf/text/WTFString.h
r65344 r65468 73 73 float charactersToFloat(const UChar*, size_t, bool* ok = 0); 74 74 75 int find(const UChar*, size_t, UChar, int startPosition = 0);76 int reverseFind(const UChar*, size_t, UChar, int startPosition = -1);77 78 75 class String { 79 76 public: … … 147 144 static String number(double); 148 145 149 146 // Find a single character or string, also with match function & latin1 forms. 147 size_t find(UChar c, unsigned start = 0) const 148 { return m_impl ? m_impl->find(c, start) : notFound; } 149 size_t find(const String& str, unsigned start = 0) const 150 { return m_impl ? m_impl->find(str.impl(), start) : notFound; } 151 size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const 152 { return m_impl ? m_impl->find(matchFunction, start) : notFound; } 153 size_t find(const char* str, unsigned start = 0) const 154 { return m_impl ? m_impl->find(str, start) : notFound; } 155 156 // Find the last instance of a single character or string. 157 size_t reverseFind(UChar c, unsigned start = UINT_MAX) const 158 { return m_impl ? m_impl->reverseFind(c, start) : notFound; } 159 size_t reverseFind(const String& str, unsigned start = UINT_MAX) const 160 { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; } 161 162 // Case insensitive string matching. 163 size_t findIgnoringCase(const char* str, unsigned start = 0) const 164 { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; } 165 size_t findIgnoringCase(const String& str, unsigned start = 0) const 166 { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; } 167 size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const 168 { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; } 169 170 // Wrappers for find & reverseFind adding dynamic sensitivity check. 171 size_t find(const char* str, unsigned start, bool caseSensitive) const 172 { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } 173 size_t find(const String& str, unsigned start, bool caseSensitive) const 174 { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } 175 size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const 176 { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); } 150 177 151 178 const UChar* charactersWithNullTermination(); … … 153 180 UChar32 characterStartingAt(unsigned) const; // Ditto. 154 181 155 bool contains(UChar c) const { return find(c) != -1; } 156 bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; } 157 bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; } 158 159 int find(UChar c, int start = 0) const 160 { return m_impl ? m_impl->find(c, start) : -1; } 161 int find(CharacterMatchFunctionPtr matchFunction, int start = 0) const 162 { return m_impl ? m_impl->find(matchFunction, start) : -1; } 163 int find(const char* str, int start = 0, bool caseSensitive = true) const 164 { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; } 165 int find(const String& str, int start = 0, bool caseSensitive = true) const 166 { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; } 167 168 int reverseFind(UChar c, int start = -1) const 169 { return m_impl ? m_impl->reverseFind(c, start) : -1; } 170 int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const 171 { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; } 172 182 bool contains(UChar c) const { return find(c) != notFound; } 183 bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } 184 bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } 185 173 186 bool startsWith(const String& s, bool caseSensitive = true) const 174 187 { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); } … … 352 365 int codePointCompare(const String&, const String&); 353 366 354 inline int find(const UChar* characters, size_t length, UChar character, int startPosition) 355 { 356 if (startPosition >= static_cast<int>(length)) 357 return -1; 358 for (size_t i = startPosition; i < length; ++i) { 359 if (characters[i] == character) 360 return static_cast<int>(i); 361 } 362 return -1; 363 } 364 365 inline int find(const UChar* characters, size_t length, CharacterMatchFunctionPtr matchFunction, int startPosition) 366 { 367 if (startPosition >= static_cast<int>(length)) 368 return -1; 369 for (size_t i = startPosition; i < length; ++i) { 370 if (matchFunction(characters[i])) 371 return static_cast<int>(i); 372 } 373 return -1; 374 } 375 376 inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition) 377 { 378 if (startPosition >= static_cast<int>(length) || !length) 379 return -1; 380 if (startPosition < 0) 381 startPosition += static_cast<int>(length); 382 while (true) { 383 if (characters[startPosition] == character) 384 return startPosition; 385 if (!startPosition) 386 return -1; 387 startPosition--; 388 } 389 ASSERT_NOT_REACHED(); 390 return -1; 367 inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) 368 { 369 while (index < length) { 370 if (characters[index] == matchCharacter) 371 return index; 372 ++index; 373 } 374 return notFound; 375 } 376 377 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 378 { 379 while (index < length) { 380 if (matchFunction(characters[index])) 381 return index; 382 ++index; 383 } 384 return notFound; 385 } 386 387 inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) 388 { 389 if (!length) 390 return notFound; 391 if (index >= length) 392 index = length - 1; 393 while (characters[index] != matchCharacter) { 394 if (!index--) 395 return notFound; 396 } 397 return index; 391 398 } 392 399
Note:
See TracChangeset
for help on using the changeset viewer.