source: webkit/trunk/JavaScriptCore/runtime/RegExp.cpp@ 65468

Last change on this file since 65468 was 65468, checked in by [email protected], 15 years ago

Bug 44080 - String find/reverseFind methods need tidying up
These methods have a couple of problems with their interface, and implementation.

Reviewed by Sam Weinig

These methods take and int index, and return an int - however this is problematic
since on 64-bit string indices may have a full 32-bit range. This spills out into
surrounding code, which unsafely casts string indices from unsigned to int. Code
checking the result of these methods check for a mix of "== -1", "< 0", and
"== notFound". Clean this up by changing these methods to take an unsigned
starting index, and return a size_t. with a failed match indicated by notFound.
reverseFind also has a special meaning for the starting index argument, in that a
negative index is interpreted as an offset back from the end of the string. Remove
this functionality, in the (1!) case where it is used we should just calculate the
offset by subtracting from the string's length.

The implementation has a few problems too. The code is not in webkit style, in
using assorted abbreviations in variable names, and implementations of similar
find methods with differing argument types were unnecessarily inconsistent. When
find is passed const char* data the string would be handled as latin1 (zero
extended to UTF-16) for all characters but the first; this is sign extended.
Case-insensitive find is broken for unicode strings; the hashing optimization is
not unicode safe, and could result in false negatives.

Unify UString find methods to match String.

JavaScriptCore:

(JSC::escapeQuotes):

  • bytecompiler/NodesCodegen.cpp:

(JSC::substitute):

  • runtime/JSString.cpp:

(JSC::JSString::replaceCharacter):

  • runtime/RegExp.cpp:

(JSC::RegExp::RegExp):

  • runtime/RegExpKey.h:

(JSC::RegExpKey::getFlagsValue):

  • runtime/StringPrototype.cpp:

(JSC::substituteBackreferencesSlow):
(JSC::substituteBackreferences):
(JSC::stringProtoFuncReplace):
(JSC::stringProtoFuncIndexOf):
(JSC::stringProtoFuncLastIndexOf):
(JSC::stringProtoFuncSplit):

  • runtime/UString.cpp:
  • runtime/UString.h:

(JSC::UString::find):
(JSC::UString::reverseFind):

  • wtf/text/AtomicString.h:

(WTF::AtomicString::find):

  • wtf/text/StringImpl.cpp:

(WTF::StringImpl::find):
(WTF::StringImpl::findCaseInsensitive):
(WTF::StringImpl::reverseFind):
(WTF::StringImpl::reverseFindCaseInsensitive):
(WTF::StringImpl::endsWith):
(WTF::StringImpl::replace):

  • wtf/text/StringImpl.h:

(WTF::StringImpl::startsWith):

  • wtf/text/WTFString.cpp:

(WTF::String::split):

  • wtf/text/WTFString.h:

(WTF::String::find):
(WTF::String::reverseFind):
(WTF::String::findCaseInsensitive):
(WTF::String::reverseFindCaseInsensitive):
(WTF::String::contains):
(WTF::find):
(WTF::reverseFind):

WebCore:

  • css/CSSSelector.cpp:

(WebCore::CSSSelector::RareData::parseNth):

  • css/CSSStyleDeclaration.cpp:

(WebCore::CSSStyleDeclaration::setProperty):

  • css/CSSStyleSelector.cpp:

(WebCore::CSSStyleSelector::SelectorChecker::checkOneSelector):

  • dom/Document.cpp:

(WebCore::Document::getImageMap):

  • editing/CompositeEditCommand.cpp:

(WebCore::CompositeEditCommand::inputText):

  • editing/InsertTextCommand.cpp:

(WebCore::InsertTextCommand::input):

  • editing/TextIterator.cpp:

(WebCore::TextIterator::handleTextBox):

  • editing/TypingCommand.cpp:

(WebCore::TypingCommand::insertText):

  • editing/markup.cpp:

(WebCore::fillContainerFromString):
(WebCore::createFragmentFromText):

  • html/File.cpp:

(WebCore::File::Init):

  • html/HTMLAnchorElement.cpp:

(WebCore::HTMLAnchorElement::setHost):

  • html/HTMLEmbedElement.cpp:

(WebCore::HTMLEmbedElement::parseMappedAttribute):

  • html/HTMLFormControlElement.cpp:

(WebCore::HTMLTextFormControlElement::isPlaceholderEmpty):

  • html/HTMLObjectElement.cpp:

(WebCore::HTMLObjectElement::parseMappedAttribute):

  • inspector/InspectorDOMAgent.cpp:

(WebCore::InspectorDOMAgent::performSearch):

  • loader/CrossOriginPreflightResultCache.cpp:

(WebCore::parseAccessControlAllowList):

  • loader/MainResourceLoader.cpp:

(WebCore::MainResourceLoader::substituteMIMETypeFromPluginDatabase):

  • loader/appcache/ApplicationCacheStorage.cpp:

(WebCore::parseHeader):
(WebCore::parseHeaders):

  • loader/icon/IconFetcher.cpp:

(WebCore::parseIconLink):

  • page/DOMWindow.cpp:

(WebCore::DOMWindow::parseModalDialogFeatures):

  • page/SecurityOrigin.cpp:

(WebCore::SecurityOrigin::createFromDatabaseIdentifier):

  • page/UserContentURLPattern.cpp:

(WebCore::UserContentURLPattern::parse):

  • page/XSSAuditor.cpp:

(WebCore::XSSAuditor::findInRequest):

  • platform/ContentType.cpp:

(WebCore::ContentType::parameter):
(WebCore::ContentType::type):

  • platform/KURL.cpp:

(WebCore::KURL::lastPathComponent):
(WebCore::KURL::setProtocol):
(WebCore::decodeURLEscapeSequences):
(WebCore::substituteBackslashes):
(WebCore::mimeTypeFromDataURL):

  • platform/Length.cpp:

(WebCore::newCoordsArray):
(WebCore::newLengthArray):

  • platform/LinkHash.cpp:

(WebCore::findSlashDotDotSlash):
(WebCore::findSlashSlash):
(WebCore::findSlashDotSlash):
(WebCore::cleanPath):

  • platform/MIMETypeRegistry.cpp:

(WebCore::MIMETypeRegistry::getMIMETypeForPath):

  • platform/SchemeRegistry.cpp:

(WebCore::SchemeRegistry::shouldTreatURLAsLocal):

  • platform/graphics/MediaPlayer.cpp:

(WebCore::MediaPlayer::load):

  • platform/mac/DragImageMac.mm:

(WebCore::createDragImageIconForCachedImage):

  • platform/network/CredentialStorage.cpp:

(WebCore::protectionSpaceMapKeyFromURL):
(WebCore::findDefaultProtectionSpaceForURL):

  • platform/network/HTTPParsers.cpp:

(WebCore::skipWhiteSpace):
(WebCore::skipToken):
(WebCore::parseHTTPRefresh):
(WebCore::filenameFromHTTPContentDisposition):
(WebCore::findCharsetInMediaType):
(WebCore::parseXSSProtectionHeader):
(WebCore::extractReasonPhraseFromHTTPStatusLine):

  • platform/network/ResourceResponseBase.cpp:

(WebCore::ResourceResponseBase::isAttachment):
(WebCore::parseCacheHeader):

  • rendering/RenderEmbeddedObject.cpp:

(WebCore::RenderEmbeddedObject::updateWidget):

  • storage/Entry.cpp:

(WebCore::Entry::Entry):

  • svg/SVGFont.cpp:

(WebCore::isCompatibleGlyph):

  • svg/SVGURIReference.cpp:

(WebCore::SVGURIReference::getTarget):

  • svg/animation/SVGSMILElement.cpp:

(WebCore::SVGSMILElement::parseClockValue):
(WebCore::SVGSMILElement::parseCondition):

  • xml/XPathFunctions.cpp:

(WebCore::XPath::FunSubstringBefore::evaluate):
(WebCore::XPath::FunSubstringAfter::evaluate):
(WebCore::XPath::FunTranslate::evaluate):
(WebCore::XPath::FunLang::evaluate):

  • xml/XPathParser.cpp:

(WebCore::XPath::Parser::expandQName):

  • Property svn:eol-style set to native
File size: 6.6 KB
Line 
1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten ([email protected])
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22#include "config.h"
23#include "RegExp.h"
24#include "Lexer.h"
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <wtf/Assertions.h>
29#include <wtf/OwnArrayPtr.h>
30
31
32#if ENABLE(YARR)
33
34#include "yarr/RegexCompiler.h"
35#if ENABLE(YARR_JIT)
36#include "yarr/RegexJIT.h"
37#else
38#include "yarr/RegexInterpreter.h"
39#endif
40
41#else
42
43#include <pcre/pcre.h>
44
45#endif
46
47namespace JSC {
48
49inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
50 : m_pattern(pattern)
51 , m_flagBits(0)
52 , m_constructionError(0)
53 , m_numSubpatterns(0)
54 , m_lastMatchStart(-1)
55{
56 // NOTE: The global flag is handled on a case-by-case basis by functions like
57 // String::match and RegExpObject::match.
58 if (!flags.isNull()) {
59 if (flags.find('g') != notFound)
60 m_flagBits |= Global;
61 if (flags.find('i') != notFound)
62 m_flagBits |= IgnoreCase;
63 if (flags.find('m') != notFound)
64 m_flagBits |= Multiline;
65 }
66 compile(globalData);
67}
68
69#if !ENABLE(YARR)
70RegExp::~RegExp()
71{
72 jsRegExpFree(m_regExp);
73}
74#endif
75
76PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
77{
78 return adoptRef(new RegExp(globalData, pattern, flags));
79}
80
81#if ENABLE(YARR)
82
83void RegExp::compile(JSGlobalData* globalData)
84{
85#if ENABLE(YARR_JIT)
86 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
87#else
88 m_regExpBytecode = Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, &globalData->m_regexAllocator, ignoreCase(), multiline());
89#endif
90}
91
92int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
93{
94 if (startOffset < 0)
95 startOffset = 0;
96 if (ovector)
97 ovector->resize(0);
98
99 if (static_cast<unsigned>(startOffset) > s.length() || s.isNull()) {
100 m_lastMatchString = UString();
101 m_lastMatchStart = -1;
102 m_lastOVector.shrink(0);
103 return -1;
104 }
105
106 // Perform check to see if this match call is the same as the last match invocation
107 // and if it is return the prior result.
108 if ((startOffset == m_lastMatchStart) && (s.impl() == m_lastMatchString.impl())) {
109 if (ovector)
110 *ovector = m_lastOVector;
111
112 if (m_lastOVector.isEmpty())
113 return -1;
114
115 return m_lastOVector.at(0);
116 }
117
118#if ENABLE(YARR_JIT)
119 if (!!m_regExpJITCode) {
120#else
121 if (m_regExpBytecode) {
122#endif
123 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
124 int* offsetVector;
125 Vector<int, 32> nonReturnedOvector;
126 if (ovector) {
127 ovector->resize(offsetVectorSize);
128 offsetVector = ovector->data();
129 } else {
130 nonReturnedOvector.resize(offsetVectorSize);
131 offsetVector = nonReturnedOvector.data();
132 }
133
134 ASSERT(offsetVector);
135 for (int j = 0; j < offsetVectorSize; ++j)
136 offsetVector[j] = -1;
137
138#if ENABLE(YARR_JIT)
139 int result = Yarr::executeRegex(m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector, offsetVectorSize);
140#else
141 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
142#endif
143
144 if (result < 0) {
145#ifndef NDEBUG
146 // TODO: define up a symbol, rather than magic -1
147 if (result != -1)
148 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
149#endif
150 if (ovector)
151 ovector->clear();
152 }
153
154 m_lastMatchString = s;
155 m_lastMatchStart = startOffset;
156
157 if (ovector)
158 m_lastOVector = *ovector;
159 else
160 m_lastOVector = nonReturnedOvector;
161
162 return result;
163 }
164
165 m_lastMatchString = UString();
166 m_lastMatchStart = -1;
167 m_lastOVector.shrink(0);
168
169 return -1;
170}
171
172#else
173
174void RegExp::compile(JSGlobalData*)
175{
176 m_regExp = 0;
177 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
178 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
179 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.characters()), m_pattern.length(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
180}
181
182int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
183{
184 if (startOffset < 0)
185 startOffset = 0;
186 if (ovector)
187 ovector->clear();
188
189 if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
190 return -1;
191
192 if (m_regExp) {
193 // Set up the offset vector for the result.
194 // First 2/3 used for result, the last third used by PCRE.
195 int* offsetVector;
196 int offsetVectorSize;
197 int fixedSizeOffsetVector[3];
198 if (!ovector) {
199 offsetVectorSize = 3;
200 offsetVector = fixedSizeOffsetVector;
201 } else {
202 offsetVectorSize = (m_numSubpatterns + 1) * 3;
203 ovector->resize(offsetVectorSize);
204 offsetVector = ovector->data();
205 }
206
207 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.characters()), s.length(), startOffset, offsetVector, offsetVectorSize);
208
209 if (numMatches < 0) {
210#ifndef NDEBUG
211 if (numMatches != JSRegExpErrorNoMatch)
212 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
213#endif
214 if (ovector)
215 ovector->clear();
216 return -1;
217 }
218
219 return offsetVector[0];
220 }
221
222 return -1;
223}
224
225#endif
226
227} // namespace JSC
Note: See TracBrowser for help on using the repository browser.