Context Navigation

← Previous Change
Next Change →

Changeset 65468 in webkit for trunk/JavaScriptCore

Timestamp:

Aug 16, 2010, 4:31:33 PM (15 years ago)

Author:

Message:

Bug 44080 - String find/reverseFind methods need tidying up
These methods have a couple of problems with their interface, and implementation.

Reviewed by Sam Weinig

These methods take and int index, and return an int - however this is problematic
since on 64-bit string indices may have a full 32-bit range. This spills out into
surrounding code, which unsafely casts string indices from unsigned to int. Code
checking the result of these methods check for a mix of "== -1", "< 0", and
"== notFound". Clean this up by changing these methods to take an unsigned
starting index, and return a size_t. with a failed match indicated by notFound.
reverseFind also has a special meaning for the starting index argument, in that a
negative index is interpreted as an offset back from the end of the string. Remove
this functionality, in the (1!) case where it is used we should just calculate the
offset by subtracting from the string's length.

The implementation has a few problems too. The code is not in webkit style, in
using assorted abbreviations in variable names, and implementations of similar
find methods with differing argument types were unnecessarily inconsistent. When
find is passed const char* data the string would be handled as latin1 (zero
extended to UTF-16) for all characters but the first; this is sign extended.
Case-insensitive find is broken for unicode strings; the hashing optimization is
not unicode safe, and could result in false negatives.

Unify UString find methods to match String.

JavaScriptCore:

JavaScriptCore.exp:
bytecode/CodeBlock.cpp:

(JSC::escapeQuotes):

bytecompiler/NodesCodegen.cpp:

(JSC::substitute):

runtime/JSString.cpp:

(JSC::JSString::replaceCharacter):

runtime/RegExp.cpp:

(JSC::RegExp::RegExp):

runtime/RegExpKey.h:

(JSC::RegExpKey::getFlagsValue):

runtime/StringPrototype.cpp:

(JSC::substituteBackreferencesSlow):
(JSC::substituteBackreferences):
(JSC::stringProtoFuncReplace):
(JSC::stringProtoFuncIndexOf):
(JSC::stringProtoFuncLastIndexOf):
(JSC::stringProtoFuncSplit):

runtime/UString.cpp:
runtime/UString.h:

(JSC::UString::find):
(JSC::UString::reverseFind):

wtf/text/AtomicString.h:

(WTF::AtomicString::find):

wtf/text/StringImpl.cpp:

(WTF::StringImpl::find):
(WTF::StringImpl::findCaseInsensitive):
(WTF::StringImpl::reverseFind):
(WTF::StringImpl::reverseFindCaseInsensitive):
(WTF::StringImpl::endsWith):
(WTF::StringImpl::replace):

wtf/text/StringImpl.h:

(WTF::StringImpl::startsWith):

wtf/text/WTFString.cpp:

(WTF::String::split):

wtf/text/WTFString.h:

(WTF::String::find):
(WTF::String::reverseFind):
(WTF::String::findCaseInsensitive):
(WTF::String::reverseFindCaseInsensitive):
(WTF::String::contains):
(WTF::find):
(WTF::reverseFind):

WebCore:

css/CSSSelector.cpp:

(WebCore::CSSSelector::RareData::parseNth):

css/CSSStyleDeclaration.cpp:

(WebCore::CSSStyleDeclaration::setProperty):

css/CSSStyleSelector.cpp:

(WebCore::CSSStyleSelector::SelectorChecker::checkOneSelector):

dom/Document.cpp:

(WebCore::Document::getImageMap):

editing/CompositeEditCommand.cpp:

(WebCore::CompositeEditCommand::inputText):

editing/InsertTextCommand.cpp:

(WebCore::InsertTextCommand::input):

editing/TextIterator.cpp:

(WebCore::TextIterator::handleTextBox):

editing/TypingCommand.cpp:

(WebCore::TypingCommand::insertText):

editing/markup.cpp:

(WebCore::fillContainerFromString):
(WebCore::createFragmentFromText):

html/File.cpp:

(WebCore::File::Init):

html/HTMLAnchorElement.cpp:

(WebCore::HTMLAnchorElement::setHost):

html/HTMLEmbedElement.cpp:

(WebCore::HTMLEmbedElement::parseMappedAttribute):

html/HTMLFormControlElement.cpp:

(WebCore::HTMLTextFormControlElement::isPlaceholderEmpty):

html/HTMLObjectElement.cpp:

(WebCore::HTMLObjectElement::parseMappedAttribute):

inspector/InspectorDOMAgent.cpp:

(WebCore::InspectorDOMAgent::performSearch):

loader/CrossOriginPreflightResultCache.cpp:

(WebCore::parseAccessControlAllowList):

loader/MainResourceLoader.cpp:

(WebCore::MainResourceLoader::substituteMIMETypeFromPluginDatabase):

loader/appcache/ApplicationCacheStorage.cpp:

(WebCore::parseHeader):
(WebCore::parseHeaders):

loader/icon/IconFetcher.cpp:

(WebCore::parseIconLink):

page/DOMWindow.cpp:

(WebCore::DOMWindow::parseModalDialogFeatures):

page/SecurityOrigin.cpp:

(WebCore::SecurityOrigin::createFromDatabaseIdentifier):

page/UserContentURLPattern.cpp:

(WebCore::UserContentURLPattern::parse):

page/XSSAuditor.cpp:

(WebCore::XSSAuditor::findInRequest):

platform/ContentType.cpp:

(WebCore::ContentType::parameter):
(WebCore::ContentType::type):

platform/KURL.cpp:

(WebCore::KURL::lastPathComponent):
(WebCore::KURL::setProtocol):
(WebCore::decodeURLEscapeSequences):
(WebCore::substituteBackslashes):
(WebCore::mimeTypeFromDataURL):

platform/Length.cpp:

(WebCore::newCoordsArray):
(WebCore::newLengthArray):

platform/LinkHash.cpp:

(WebCore::findSlashDotDotSlash):
(WebCore::findSlashSlash):
(WebCore::findSlashDotSlash):
(WebCore::cleanPath):

platform/MIMETypeRegistry.cpp:

(WebCore::MIMETypeRegistry::getMIMETypeForPath):

platform/SchemeRegistry.cpp:

(WebCore::SchemeRegistry::shouldTreatURLAsLocal):

platform/graphics/MediaPlayer.cpp:

(WebCore::MediaPlayer::load):

platform/mac/DragImageMac.mm:

(WebCore::createDragImageIconForCachedImage):

platform/network/CredentialStorage.cpp:

(WebCore::protectionSpaceMapKeyFromURL):
(WebCore::findDefaultProtectionSpaceForURL):

platform/network/HTTPParsers.cpp:

(WebCore::skipWhiteSpace):
(WebCore::skipToken):
(WebCore::parseHTTPRefresh):
(WebCore::filenameFromHTTPContentDisposition):
(WebCore::findCharsetInMediaType):
(WebCore::parseXSSProtectionHeader):
(WebCore::extractReasonPhraseFromHTTPStatusLine):

platform/network/ResourceResponseBase.cpp:

(WebCore::ResourceResponseBase::isAttachment):
(WebCore::parseCacheHeader):

rendering/RenderEmbeddedObject.cpp:

(WebCore::RenderEmbeddedObject::updateWidget):

storage/Entry.cpp:

(WebCore::Entry::Entry):

svg/SVGFont.cpp:

(WebCore::isCompatibleGlyph):

svg/SVGURIReference.cpp:

(WebCore::SVGURIReference::getTarget):

svg/animation/SVGSMILElement.cpp:

(WebCore::SVGSMILElement::parseClockValue):
(WebCore::SVGSMILElement::parseCondition):

xml/XPathFunctions.cpp:

(WebCore::XPath::FunSubstringBefore::evaluate):
(WebCore::XPath::FunSubstringAfter::evaluate):
(WebCore::XPath::FunTranslate::evaluate):
(WebCore::XPath::FunLang::evaluate):

xml/XPathParser.cpp:

(WebCore::XPath::Parser::expandQName):

Location:

trunk/JavaScriptCore

Files:

: 15 edited

ChangeLog (modified) (1 diff)
JavaScriptCore.exp (modified) (2 diffs)
bytecode/CodeBlock.cpp (modified) (1 diff)
bytecompiler/NodesCodegen.cpp (modified) (1 diff)
runtime/JSString.cpp (modified) (3 diffs)
runtime/RegExp.cpp (modified) (1 diff)
runtime/RegExpKey.h (modified) (1 diff)
runtime/StringPrototype.cpp (modified) (7 diffs)
runtime/UString.cpp (modified) (1 diff)
runtime/UString.h (modified) (2 diffs)
wtf/text/AtomicString.h (modified) (1 diff)
wtf/text/StringImpl.cpp (modified) (8 diffs)
wtf/text/StringImpl.h (modified) (1 diff)
wtf/text/WTFString.cpp (modified) (2 diffs)
wtf/text/WTFString.h (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/JavaScriptCore/ChangeLog

-              r65445
+              r65468
+-08-16  Gavin Barraclough  <[email protected]>
+        Reviewed by Sam Weinig
+        Bug 44080 - String find/reverseFind methods need tidying up
+        These methods have a couple of problems with their interface, and implementation.
+        These methods take and int index, and return an int - however this is problematic
+        since on 64-bit string indices may have a full 32-bit range.  This spills out into
+        surrounding code, which unsafely casts string indices from unsigned to int. Code
+        checking the result of these methods check for a mix of "== -1", "< 0", and
+        "== notFound".  Clean this up by changing these methods to take an unsigned
+        starting index, and return a size_t. with a failed match indicated by notFound.
+        reverseFind also has a special meaning for the starting index argument, in that a
+        negative index is interpreted as an offset back from the end of the string. Remove
+        this functionality, in the (1!) case where it is used we should just calculate the
+        offset by subtracting from the string's length.
+        The implementation has a few problems too.  The code is not in webkit style, in
+        using assorted abbreviations in variable names, and implementations of similar
+        find methods with differing argument types were unnecessarily inconsistent. When
+        find is passed const char* data the string would be handled as latin1 (zero
+        extended to UTF-16) for all characters but the first; this is sign extended.
+        Case-insensitive find is broken for unicode strings; the hashing optimization is
+        not unicode safe, and could result in false negatives.
+        Unify UString find methods to match String.
+        * JavaScriptCore.exp:
+        * bytecode/CodeBlock.cpp:
+        (JSC::escapeQuotes):
+        * bytecompiler/NodesCodegen.cpp:
+        (JSC::substitute):
+        * runtime/JSString.cpp:
+        (JSC::JSString::replaceCharacter):
+        * runtime/RegExp.cpp:
+        (JSC::RegExp::RegExp):
+        * runtime/RegExpKey.h:
+        (JSC::RegExpKey::getFlagsValue):
+        * runtime/StringPrototype.cpp:
+        (JSC::substituteBackreferencesSlow):
+        (JSC::substituteBackreferences):
+        (JSC::stringProtoFuncReplace):
+        (JSC::stringProtoFuncIndexOf):
+        (JSC::stringProtoFuncLastIndexOf):
+        (JSC::stringProtoFuncSplit):
+        * runtime/UString.cpp:
+        * runtime/UString.h:
+        (JSC::UString::find):
+        (JSC::UString::reverseFind):
+        * wtf/text/AtomicString.h:
+        (WTF::AtomicString::find):
+        * wtf/text/StringImpl.cpp:
+        (WTF::StringImpl::find):
+        (WTF::StringImpl::findCaseInsensitive):
+        (WTF::StringImpl::reverseFind):
+        (WTF::StringImpl::reverseFindCaseInsensitive):
+        (WTF::StringImpl::endsWith):
+        (WTF::StringImpl::replace):
+        * wtf/text/StringImpl.h:
+        (WTF::StringImpl::startsWith):
+        * wtf/text/WTFString.cpp:
+        (WTF::String::split):
+        * wtf/text/WTFString.h:
+        (WTF::String::find):
+        (WTF::String::reverseFind):
+        (WTF::String::findCaseInsensitive):
+        (WTF::String::reverseFindCaseInsensitive):
+        (WTF::String::contains):
+        (WTF::find):
+        (WTF::reverseFind):
 -08-16  Kevin Ollivier  <[email protected]>

trunk/JavaScriptCore/JavaScriptCore.exp

-              r65344
+              r65468
 __ZN3JSCgtERKNS_7UStringES2_
 __ZN3JSCltERKNS_7UStringES2_
 __ZN3WTF10StringImpl11reverseFindEPS0_ib
 __ZN3WTF10StringImpl11reverseFindEti
+__ZN3WTF10StringImpl11reverseFindEPS0_j
+__ZN3WTF10StringImpl11reverseFindEtj
 __ZN3WTF10StringImpl12sharedBufferEv
+__ZN3WTF10StringImpl16findIgnoringCaseEPKcj
+__ZN3WTF10StringImpl16findIgnoringCaseEPS0_j
 __ZN3WTF10StringImpl18simplifyWhiteSpaceEv
 __ZN3WTF10StringImpl19characterStartingAtEj
 …
 __ZN3WTF10StringImpl22containsOnlyWhitespaceEv
 __ZN3WTF10StringImpl23defaultWritingDirectionEv
+__ZN3WTF10StringImpl23reverseFindIgnoringCaseEPS0_j
 __ZN3WTF10StringImpl37createStrippingNullCharactersSlowCaseEPKtj
 __ZN3WTF10StringImpl4findEPFbtEi
 __ZN3WTF10StringImpl4findEPKcib
 __ZN3WTF10StringImpl4findEPS0_ib
 __ZN3WTF10StringImpl4findEti
+__ZN3WTF10StringImpl4findEPFbtEj
+__ZN3WTF10StringImpl4findEPKcj
+__ZN3WTF10StringImpl4findEPS0_j
+__ZN3WTF10StringImpl4findEtj
 __ZN3WTF10StringImpl5adoptERNS_12StringBufferE
 __ZN3WTF10StringImpl5emptyEv

trunk/JavaScriptCore/bytecode/CodeBlock.cpp

-              r65344
+              r65468
+{
     UString result = str;
     unsigned pos = 0;
     while ((pos = result.find('\"', pos)) != UString::NotFound) {
+    size_t pos = 0;
+    while ((pos = result.find('\"', pos)) != notFound) {
         result = makeString(result.substr(0, pos), "\"\\\"\"", result.substr(pos + 1));
         pos += 4;

trunk/JavaScriptCore/bytecompiler/NodesCodegen.cpp

r65177	r65468
79	79	static void substitute(UString& string, const UString& substring)
80	80	{
81		~~unsigned~~ position = string.find("%s");
82		ASSERT(position != ~~UString::N~~otFound);
	81	size_t position = string.find("%s");
	82	ASSERT(position != notFound);
83	83	string = makeString(string.substr(0, position), substring, string.substr(position + 2));
84	84	}

trunk/JavaScriptCore/runtime/JSString.cpp

-              r65177
+              r65468
+{
     if (!isRope()) {
         unsigned matchPosition = m_value.find(character);
         if (matchPosition == UString::NotFound)
+        size_t matchPosition = m_value.find(character);
+        if (matchPosition == notFound)
             return JSValue(this);
         return jsString(exec, m_value.substr(0, matchPosition), replacement, m_value.substr(matchPosition + 1));
 …
     size_t fiberCount = 0;
     StringImpl* matchString = 0;
     int matchPosition = -1;
+    size_t matchPosition = notFound;
     for (RopeIterator it(m_other.m_fibers.data(), m_fiberCount); it != end; ++it) {
         ++fiberCount;
 …
         StringImpl* string = *it;
         matchPosition = string->find(character);
         if (matchPosition == -1)
+        if (matchPosition == notFound)
             continue;
         matchString = string;

trunk/JavaScriptCore/runtime/RegExp.cpp

-              r65188
+              r65468
     // String::match and RegExpObject::match.
     if (!flags.isNull()) {
         if (flags.find('g') != UString::NotFound)
+        if (flags.find('g') != notFound)
             m_flagBits |= Global;
         if (flags.find('i') != UString::NotFound)
+        if (flags.find('i') != notFound)
             m_flagBits |= IgnoreCase;
         if (flags.find('m') != UString::NotFound)
+        if (flags.find('m') != notFound)
             m_flagBits |= Multiline;
+    }

trunk/JavaScriptCore/runtime/RegExpKey.h

-              r65286
+              r65468
+    {
         flagsValue = 0;
         if (flags.find('g') != UString::NotFound)
+        if (flags.find('g') != notFound)
             flagsValue += 4;
         if (flags.find('i') != UString::NotFound)
+        if (flags.find('i') != notFound)
             flagsValue += 2;
         if (flags.find('m') != UString::NotFound)
+        if (flags.find('m') != notFound)
             flagsValue += 1;
         return flagsValue;

trunk/JavaScriptCore/runtime/StringPrototype.cpp

-              r65177
+              r65468
 // ------------------------------ Functions --------------------------
 static NEVER_INLINE UString substituteBackreferencesSlow(const UString& replacement, const UString& source, const int* ovector, RegExp* reg, unsigned i)
+static NEVER_INLINE UString substituteBackreferencesSlow(const UString& replacement, const UString& source, const int* ovector, RegExp* reg, size_t i)
+{
     Vector<UChar> substitutedReplacement;
 …
         offset = i + 1;
         substitutedReplacement.append(source.characters() + backrefStart, backrefLength);
     } while ((i = replacement.find('$', i + 1)) != UString::NotFound);
+    } while ((i = replacement.find('$', i + 1)) != notFound);
     if (replacement.length() - offset)
 …
 static inline UString substituteBackreferences(const UString& replacement, const UString& source, const int* ovector, RegExp* reg)
+{
     unsigned i = replacement.find('$', 0);
     if (UNLIKELY(i != UString::NotFound))
+    size_t i = replacement.find('$', 0);
+    if (UNLIKELY(i != notFound))
         return substituteBackreferencesSlow(replacement, source, ovector, reg, i);
     return replacement;
 …
     const UString& source = sourceVal->value(exec);
     unsigned matchPos = source.find(patternString);
     if (matchPos == UString::NotFound)
+    size_t matchPos = source.find(patternString);
+    if (matchPos == notFound)
         return JSValue::encode(sourceVal);
 …
+    }
     unsigned result = s.find(u2, pos);
     if (result == UString::NotFound)
+    size_t result = s.find(u2, pos);
+    if (result == notFound)
         return JSValue::encode(jsNumber(exec, -1));
     return JSValue::encode(jsNumber(exec, result));
 …
 #endif
     unsigned result = s.rfind(u2, static_cast<unsigned>(dpos));
     if (result == UString::NotFound)
+    size_t result = s.reverseFind(u2, static_cast<unsigned>(dpos));
+    if (result == notFound)
         return JSValue::encode(jsNumber(exec, -1));
     return JSValue::encode(jsNumber(exec, result));
 …
                 result->put(exec, i++, jsSingleCharacterSubstring(exec, s, p0++));
         } else {
+            unsigned pos;
+            while (i != limit && (pos = s.find(u2, p0)) != UString::NotFound) {
+            size_t pos;
+            while (i != limit && (pos = s.find(u2, p0)) != notFound) {
                 result->put(exec, i++, jsSubstring(exec, s, p0, pos - p0));
                 p0 = pos + u2.length();

trunk/JavaScriptCore/runtime/UString.cpp

-              r65344
+              r65468
+}
-unsigned UString::find(const UString& f, unsigned pos) const
+{
-    unsigned fsz = f.length();
-    if (fsz == 1) {
-        UChar ch = f[0];
-        const UChar* end = characters() + length();
-        for (const UChar* c = characters() + pos; c < end; c++) {
-            if (*c == ch)
-                return static_cast<unsigned>(c - characters());
+        }
-        return NotFound;
+    }
-    unsigned sz = length();
-    if (sz < fsz)
-        return NotFound;
-    if (fsz == 0)
-        return pos;
-    const UChar* end = characters() + sz - fsz;
-    unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
-    const UChar* fdata = f.characters();
-    unsigned short fchar = fdata[0];
-    ++fdata;
-    for (const UChar* c = characters() + pos; c <= end; c++) {
-        if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
-            return static_cast<unsigned>(c - characters());
+    }
-    return NotFound;
+}
-unsigned UString::find(UChar ch, unsigned pos) const
+{
-    const UChar* end = characters() + length();
-    for (const UChar* c = characters() + pos; c < end; c++) {
-        if (*c == ch)
-            return static_cast<unsigned>(c - characters());
+    }
-    return NotFound;
+}
-unsigned UString::rfind(const UString& f, unsigned pos) const
+{
-    unsigned sz = length();
-    unsigned fsz = f.length();
-    if (sz < fsz)
-        return NotFound;
-    if (pos > sz - fsz)
-        pos = sz - fsz;
-    if (fsz == 0)
-        return pos;
-    unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
-    const UChar* fdata = f.characters();
-    for (const UChar* c = characters() + pos; c >= characters(); c--) {
-        if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
-            return static_cast<unsigned>(c - characters());
+    }
-    return NotFound;
+}
-unsigned UString::rfind(UChar ch, unsigned pos) const
+{
-    if (isEmpty())
-        return NotFound;
-    if (pos + 1 >= length())
-        pos = length() - 1;
-    for (const UChar* c = characters() + pos; c >= characters(); c--) {
-        if (*c == ch)
-            return static_cast<unsigned>(c - characters());
+    }
-    return NotFound;
+}
 UString UString::substr(unsigned pos, unsigned len) const
+{

trunk/JavaScriptCore/runtime/UString.h

-              r65344
+              r65468
     static UString number(double);
+    // Find a single character or string, also with match function & latin1 forms.
+    size_t find(UChar c, unsigned start = 0) const
+        { return m_impl ? m_impl->find(c, start) : notFound; }
+    size_t find(const UString& str, unsigned start = 0) const
+        { return m_impl ? m_impl->find(str.impl(), start) : notFound; }
+    size_t find(const char* str, unsigned start = 0) const
+        { return m_impl ? m_impl->find(str, start) : notFound; }
+    // Find the last instance of a single character or string.
+    size_t reverseFind(UChar c, unsigned start = UINT_MAX) const
+        { return m_impl ? m_impl->reverseFind(c, start) : notFound; }
+    size_t reverseFind(const UString& str, unsigned start = UINT_MAX) const
+        { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; }
     double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
 …
     uint32_t toStrictUInt32(bool* ok = 0) const;
+    static const unsigned NotFound = 0xFFFFFFFFu;
+    unsigned find(const UString& f, unsigned pos = 0) const;
+    unsigned find(UChar, unsigned pos = 0) const;
+    unsigned rfind(const UString& f, unsigned pos) const;
+    unsigned rfind(UChar, unsigned pos) const;
+    UString substr(unsigned pos = 0, unsigned len = 0xFFFFFFFF) const;
+    UString substr(unsigned pos = 0, unsigned len = UINT_MAX) const;
 private:

trunk/JavaScriptCore/wtf/text/AtomicString.h

-              r65077
+              r65468
         { return m_string.contains(s, caseSensitive); }
     int find(UChar c, int start = 0) const { return m_string.find(c, start); }
     int find(const char* s, int start = 0, bool caseSentitive = true) const
+    size_t find(UChar c, size_t start = 0) const { return m_string.find(c, start); }
+    size_t find(const char* s, size_t start = 0, bool caseSentitive = true) const
         { return m_string.find(s, start, caseSentitive); }
     int find(const String& s, int start = 0, bool caseSentitive = true) const
+    size_t find(const String& s, size_t start = 0, bool caseSentitive = true) const
         { return m_string.find(s, start, caseSentitive); }

trunk/JavaScriptCore/wtf/text/StringImpl.cpp

-              r65344
+              r65468
+}
+int StringImpl::find(const char* chs, int index, bool caseSensitive)
+{
+    if (!chs || index < 0)
+        return -1;
+    int chsLength = strlen(chs);
+    int n = m_length - index;
+    if (n < 0)
+        return -1;
+    n -= chsLength - 1;
+    if (n <= 0)
+        return -1;
+    const char* chsPlusOne = chs + 1;
+    int chsLengthMinusOne = chsLength - 1;
+    const UChar* ptr = m_data + index - 1;
+    if (caseSensitive) {
+        UChar c = *chs;
+        do {
+            if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
+                return m_length - chsLength - n + 1;
+        } while (--n);
+    } else {
+        UChar lc = Unicode::foldCase(*chs);
+        do {
+            if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
+                return m_length - chsLength - n + 1;
+        } while (--n);
+    }
+    return -1;
+}
+int StringImpl::find(UChar c, int start)
+size_t StringImpl::find(UChar c, unsigned start)
+{
     return WTF::find(m_data, m_length, c, start);
+}
 int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
+size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
+{
     return WTF::find(m_data, m_length, matchFunction, start);
+}
+int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
+{
+    /*
+      We use a simple trick for efficiency's sake. Instead of
+      comparing strings, we compare the sum of str with that of
+      a part of this string. Only if that matches, we call memcmp
+      or ucstrnicmp.
+    */
+    ASSERT(str);
+    if (index < 0)
+        index += m_length;
+    int lstr = str->m_length;
+    int lthis = m_length - index;
+    if ((unsigned)lthis > m_length)
+        return -1;
+    int delta = lthis - lstr;
+    if (delta < 0)
+        return -1;
+    const UChar* uthis = m_data + index;
+    const UChar* ustr = str->m_data;
+    unsigned hthis = 0;
+    unsigned hstr = 0;
+    if (caseSensitive) {
+        for (int i = 0; i < lstr; i++) {
+            hthis += uthis[i];
+            hstr += ustr[i];
+        }
+        int i = 0;
+        while (1) {
+            if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
+                return index + i;
+            if (i == delta)
+                return -1;
+            hthis += uthis[i + lstr];
+            hthis -= uthis[i];
+            i++;
+        }
+    } else {
+        for (int i = 0; i < lstr; i++ ) {
+            hthis += toASCIILower(uthis[i]);
+            hstr += toASCIILower(ustr[i]);
+        }
+        int i = 0;
+        while (1) {
+            if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
+                return index + i;
+            if (i == delta)
+                return -1;
+            hthis += toASCIILower(uthis[i + lstr]);
+            hthis -= toASCIILower(uthis[i]);
+            i++;
+        }
+    }
+}
+int StringImpl::reverseFind(UChar c, int index)
+size_t StringImpl::find(const char* matchString, unsigned index)
+{
+    // Check for null or empty string to match against
+    if (!matchString)
+        return notFound;
+    unsigned matchLength = strlen(matchString);
+    if (!matchLength)
+        return min(index, length());
+    // Optimization 1: fast case for strings of length 1.
+    if (matchLength == 1)
+        return WTF::find(characters(), length(), *(const unsigned char*)matchString, index);
+    // Check index & matchLength are in range.
+    if (index > length())
+        return notFound;
+    unsigned searchLength = length() - index;
+    if (matchLength > searchLength)
+        return notFound;
+    // delta is the number of additional times to test; delta == 0 means test only once.
+    unsigned delta = searchLength - matchLength;
+    const UChar* searchCharacters = characters() + index;
+    const unsigned char* matchCharacters = (const unsigned char*)matchString;
+    // Optimization 2: keep a running hash of the strings,
+    // only call memcmp if the hashes match.
+    unsigned searchHash = 0;
+    unsigned matchHash = 0;
+    for (unsigned i = 0; i < matchLength; ++i) {
+        searchHash += searchCharacters[i];
+        matchHash += matchCharacters[i];
+    }
+    for (unsigned i = 0; i <= delta; ++i) {
+        if (searchHash == matchHash && equal(searchCharacters + i, matchString, matchLength))
+            return index + i;
+        searchHash += searchCharacters[i + matchLength];
+        searchHash -= searchCharacters[i];
+    }
+    return notFound;
+}
+size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index)
+{
+    // Check for null or empty string to match against
+    if (!matchString)
+        return notFound;
+    unsigned matchLength = strlen(matchString);
+    if (!matchLength)
+        return min(index, length());
+    // Check index & matchLength are in range.
+    if (index > length())
+        return notFound;
+    unsigned searchLength = length() - index;
+    if (matchLength > searchLength)
+        return notFound;
+    // delta is the number of additional times to test; delta == 0 means test only once.
+    unsigned delta = searchLength - matchLength;
+    const UChar* searchCharacters = characters() + index;
+    for (unsigned i = 0; i <= delta; ++i) {
+        if (equalIgnoringCase(searchCharacters + i, matchString, matchLength))
+            return index + i;
+    }
+    return notFound;
+}
+size_t StringImpl::find(StringImpl* matchString, unsigned index)
+{
+    // Check for null or empty string to match against
+    if (!matchString)
+        return notFound;
+    unsigned matchLength = matchString->length();
+    if (!matchLength)
+        return min(index, length());
+    // Optimization 1: fast case for strings of length 1.
+    if (matchLength == 1)
+        return WTF::find(characters(), length(), matchString->characters()[0], index);
+    // Check index & matchLength are in range.
+    if (index > length())
+        return notFound;
+    unsigned searchLength = length() - index;
+    if (matchLength > searchLength)
+        return notFound;
+    // delta is the number of additional times to test; delta == 0 means test only once.
+    unsigned delta = searchLength - matchLength;
+    const UChar* searchCharacters = characters() + index;
+    const UChar* matchCharacters = matchString->characters();
+    // Optimization 2: keep a running hash of the strings,
+    // only call memcmp if the hashes match.
+    unsigned searchHash = 0;
+    unsigned matchHash = 0;
+    for (unsigned i = 0; i < matchLength; ++i) {
+        searchHash += searchCharacters[i];
+        matchHash += matchCharacters[i];
+    }
+    for (unsigned i = 0; i <= delta; ++i) {
+        if (searchHash == matchHash && memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar)) == 0)
+            return index + i;
+        searchHash += searchCharacters[i + matchLength];
+        searchHash -= searchCharacters[i];
+    }
+    return notFound;
+}
+size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
+{
+    // Check for null or empty string to match against
+    if (!matchString)
+        return notFound;
+    unsigned matchLength = matchString->length();
+    if (!matchLength)
+        return min(index, length());
+    // Check index & matchLength are in range.
+    if (index > length())
+        return notFound;
+    unsigned searchLength = length() - index;
+    if (matchLength > searchLength)
+        return notFound;
+    // delta is the number of additional times to test; delta == 0 means test only once.
+    unsigned delta = searchLength - matchLength;
+    const UChar* searchCharacters = characters() + index;
+    const UChar* matchCharacters = matchString->characters();
+    for (unsigned i = 0; i <= delta; ++i) {
+        if (equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength))
+            return index + i;
+    }
+    return notFound;
+}
+size_t StringImpl::reverseFind(UChar c, unsigned index)
+{
     return WTF::reverseFind(m_data, m_length, c, index);
+}
+int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
+{
+    /*
+     See StringImpl::find() for explanations.
+     */
+    ASSERT(str);
+    int lthis = m_length;
+    if (index < 0)
+        index += lthis;
+    int lstr = str->m_length;
+    int delta = lthis - lstr;
+    if ( index < 0 || index > lthis || delta < 0 )
+        return -1;
+    if ( index > delta )
+        index = delta;
+    const UChar *uthis = m_data;
+    const UChar *ustr = str->m_data;
+    unsigned hthis = 0;
+    unsigned hstr = 0;
+    int i;
+    if (caseSensitive) {
+        for ( i = 0; i < lstr; i++ ) {
+            hthis += uthis[index + i];
+            hstr += ustr[i];
+        }
+        i = index;
+        while (1) {
+            if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
+                return i;
+            if (i == 0)
+                return -1;
+            i--;
+            hthis -= uthis[i + lstr];
+            hthis += uthis[i];
+        }
+    } else {
+        for (i = 0; i < lstr; i++) {
+            hthis += toASCIILower(uthis[index + i]);
+            hstr += toASCIILower(ustr[i]);
+        }
+        i = index;
+        while (1) {
+            if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
+                return i;
+            if (i == 0)
+                return -1;
+            i--;
+            hthis -= toASCIILower(uthis[i + lstr]);
+            hthis += toASCIILower(uthis[i]);
+        }
+    }
+    // Should never get here.
+    return -1;
+size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
+{
+    // Check for null or empty string to match against
+    if (!matchString)
+        return notFound;
+    unsigned matchLength = matchString->length();
+    if (!matchLength)
+        return min(index, length());
+    // Optimization 1: fast case for strings of length 1.
+    if (matchLength == 1)
+        return WTF::reverseFind(characters(), length(), matchString->characters()[0], index);
+    // Check index & matchLength are in range.
+    if (matchLength > length())
+        return notFound;
+    // delta is the number of additional times to test; delta == 0 means test only once.
+    unsigned delta = min(index, length() - matchLength);
+    const UChar *searchCharacters = characters();
+    const UChar *matchCharacters = matchString->characters();
+    // Optimization 2: keep a running hash of the strings,
+    // only call memcmp if the hashes match.
+    unsigned searchHash = 0;
+    unsigned matchHash = 0;
+    for (unsigned i = 0; i < matchLength; ++i) {
+        searchHash += searchCharacters[delta + i];
+        matchHash += matchCharacters[i];
+    }
+    while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) {
+        if (!delta--)
+            return notFound;
+        searchHash -= searchCharacters[delta + matchLength];
+        searchHash += searchCharacters[delta];
+    }
+    return delta;
+}
+size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
+{
+    // Check for null or empty string to match against
+    if (!matchString)
+        return notFound;
+    unsigned matchLength = matchString->length();
+    if (!matchLength)
+        return min(index, length());
+    // Check index & matchLength are in range.
+    if (matchLength > length())
+        return notFound;
+    // delta is the number of additional times to test; delta == 0 means test only once.
+    unsigned delta = min(index, length() - matchLength);
+    const UChar *searchCharacters = characters();
+    const UChar *matchCharacters = matchString->characters();
+    while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
+        if (!delta--)
+            return notFound;
+    }
+    return delta;
+}
 …
+{
     ASSERT(m_data);
+    int start = m_length - m_data->m_length;
+    if (start >= 0)
+        return (find(m_data, start, caseSensitive) == start);
+    if (m_length >= m_data->m_length) {
+        unsigned start = m_length - m_data->m_length;
+        return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start;
+    }
     return false;
+}
 …
         return this;
     int repStrLength = replacement->length();
     int srcSegmentStart = 0;
     int matchCount = 0;
+    unsigned repStrLength = replacement->length();
+    size_t srcSegmentStart = 0;
+    unsigned matchCount = 0;
     // Count the matches
     while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+    while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
         ++matchCount;
         ++srcSegmentStart;
 …
     // Construct the new data
     int srcSegmentEnd;
     int srcSegmentLength;
+    size_t srcSegmentEnd;
+    unsigned srcSegmentLength;
     srcSegmentStart = 0;
     int dstOffset = 0;
     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+    unsigned dstOffset = 0;
+    while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 …
     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
     ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+    ASSERT(dstOffset + srcSegmentLength == newImpl->length());
     return newImpl;
 …
         return this;
     int patternLength = pattern->length();
+    unsigned patternLength = pattern->length();
     if (!patternLength)
         return this;
     int repStrLength = replacement->length();
     int srcSegmentStart = 0;
     int matchCount = 0;
+    unsigned repStrLength = replacement->length();
+    size_t srcSegmentStart = 0;
+    unsigned matchCount = 0;
     // Count the matches
     while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+    while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
         ++matchCount;
         srcSegmentStart += patternLength;
 …
     // Construct the new data
     int srcSegmentEnd;
     int srcSegmentLength;
+    size_t srcSegmentEnd;
+    unsigned srcSegmentLength;
     srcSegmentStart = 0;
     int dstOffset = 0;
     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+    unsigned dstOffset = 0;
+    while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 …
     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
     ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+    ASSERT(dstOffset + srcSegmentLength == newImpl->length());
     return newImpl;

trunk/JavaScriptCore/wtf/text/StringImpl.h

-              r65344
+              r65468
     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
+    int find(const char*, int index = 0, bool caseSensitive = true);
+    int find(UChar, int index = 0);
+    int find(CharacterMatchFunctionPtr, int index = 0);
+    int find(StringImpl*, int index, bool caseSensitive = true);
+    int reverseFind(UChar, int index);
+    int reverseFind(StringImpl*, int index, bool caseSensitive = true);
+    bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
+    size_t find(UChar, unsigned index = 0);
+    size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
+    size_t find(const char*, unsigned index = 0);
+    size_t find(StringImpl*, unsigned index = 0);
+    size_t findIgnoringCase(const char*, unsigned index = 0);
+    size_t findIgnoringCase(StringImpl*, unsigned index = 0);
+    size_t reverseFind(UChar, unsigned index = UINT_MAX);
+    size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
+    size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
+    bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
     bool endsWith(StringImpl*, bool caseSensitive = true);

trunk/JavaScriptCore/wtf/text/WTFString.cpp

-              r65344
+              r65468
     result.clear();
     int startPos = 0;
     int endPos;
     while ((endPos = find(separator, startPos)) != -1) {
+    unsigned startPos = 0;
+    size_t endPos;
+    while ((endPos = find(separator, startPos)) != notFound) {
         if (allowEmptyEntries || startPos != endPos)
             result.append(substring(startPos, endPos - startPos));
         startPos = endPos + separator.length();
+    }
     if (allowEmptyEntries || startPos != static_cast<int>(length()))
+    if (allowEmptyEntries || startPos != length())
         result.append(substring(startPos));
+}
 …
     result.clear();
     int startPos = 0;
     int endPos;
     while ((endPos = find(separator, startPos)) != -1) {
+    unsigned startPos = 0;
+    size_t endPos;
+    while ((endPos = find(separator, startPos)) != notFound) {
         if (allowEmptyEntries || startPos != endPos)
             result.append(substring(startPos, endPos - startPos));
         startPos = endPos + 1;
+    }
     if (allowEmptyEntries || startPos != static_cast<int>(length()))
+    if (allowEmptyEntries || startPos != length())
         result.append(substring(startPos));
+}

trunk/JavaScriptCore/wtf/text/WTFString.h

-              r65344
+              r65468
 float charactersToFloat(const UChar*, size_t, bool* ok = 0);
-int find(const UChar*, size_t, UChar, int startPosition = 0);
-int reverseFind(const UChar*, size_t, UChar, int startPosition = -1);
 class String {
 public:
 …
     static String number(double);
+    // Find a single character or string, also with match function & latin1 forms.
+    size_t find(UChar c, unsigned start = 0) const
+        { return m_impl ? m_impl->find(c, start) : notFound; }
+    size_t find(const String& str, unsigned start = 0) const
+        { return m_impl ? m_impl->find(str.impl(), start) : notFound; }
+    size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const
+        { return m_impl ? m_impl->find(matchFunction, start) : notFound; }
+    size_t find(const char* str, unsigned start = 0) const
+        { return m_impl ? m_impl->find(str, start) : notFound; }
+    // Find the last instance of a single character or string.
+    size_t reverseFind(UChar c, unsigned start = UINT_MAX) const
+        { return m_impl ? m_impl->reverseFind(c, start) : notFound; }
+    size_t reverseFind(const String& str, unsigned start = UINT_MAX) const
+        { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; }
+    // Case insensitive string matching.
+    size_t findIgnoringCase(const char* str, unsigned start = 0) const
+        { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; }
+    size_t findIgnoringCase(const String& str, unsigned start = 0) const
+        { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; }
+    size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const
+        { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; }
+    // Wrappers for find & reverseFind adding dynamic sensitivity check.
+    size_t find(const char* str, unsigned start, bool caseSensitive) const
+        { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
+    size_t find(const String& str, unsigned start, bool caseSensitive) const
+        { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
+    size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const
+        { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); }
     const UChar* charactersWithNullTermination();
 …
     UChar32 characterStartingAt(unsigned) const; // Ditto.
+    bool contains(UChar c) const { return find(c) != -1; }
+    bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
+    bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
+    int find(UChar c, int start = 0) const
+        { return m_impl ? m_impl->find(c, start) : -1; }
+    int find(CharacterMatchFunctionPtr matchFunction, int start = 0) const
+        { return m_impl ? m_impl->find(matchFunction, start) : -1; }
+    int find(const char* str, int start = 0, bool caseSensitive = true) const
+        { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; }
+    int find(const String& str, int start = 0, bool caseSensitive = true) const
+        { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; }
+    int reverseFind(UChar c, int start = -1) const
+        { return m_impl ? m_impl->reverseFind(c, start) : -1; }
+    int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const
+        { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; }
+    bool contains(UChar c) const { return find(c) != notFound; }
+    bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
+    bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
     bool startsWith(const String& s, bool caseSensitive = true) const
         { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
 …
 int codePointCompare(const String&, const String&);
+inline int find(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+    if (startPosition >= static_cast<int>(length))
+        return -1;
+    for (size_t i = startPosition; i < length; ++i) {
+        if (characters[i] == character)
+            return static_cast<int>(i);
+    }
+    return -1;
+}
+inline int find(const UChar* characters, size_t length, CharacterMatchFunctionPtr matchFunction, int startPosition)
+{
+    if (startPosition >= static_cast<int>(length))
+        return -1;
+    for (size_t i = startPosition; i < length; ++i) {
+        if (matchFunction(characters[i]))
+            return static_cast<int>(i);
+    }
+    return -1;
+}
+inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+    if (startPosition >= static_cast<int>(length) || !length)
+        return -1;
+    if (startPosition < 0)
+        startPosition += static_cast<int>(length);
+    while (true) {
+        if (characters[startPosition] == character)
+            return startPosition;
+        if (!startPosition)
+            return -1;
+        startPosition--;
+    }
+    ASSERT_NOT_REACHED();
+    return -1;
+inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
+{
+    while (index < length) {
+        if (characters[index] == matchCharacter)
+            return index;
+        ++index;
+    }
+    return notFound;
+}
+inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
+{
+    while (index < length) {
+        if (matchFunction(characters[index]))
+            return index;
+        ++index;
+    }
+    return notFound;
+}
+inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
+{
+    if (!length)
+        return notFound;
+    if (index >= length)
+        index = length - 1;
+    while (characters[index] != matchCharacter) {
+        if (!index--)
+            return notFound;
+    }
+    return index;
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 65468 in webkit for trunk/JavaScriptCore

Legend:

Download in other formats: