source: webkit/trunk/JavaScriptCore/runtime/RegExp.cpp@ 49004

Last change on this file since 49004 was 45545, checked in by [email protected], 16 years ago

2009-07-03 Yong Li <[email protected]>

Reviewed by Maciej Stachowiak (and revised slightly)

RegExp::match to be optimized
https://bugs.webkit.org/show_bug.cgi?id=26957

Allow regexp matching to use Vectors with inline capacity instead of
allocating a new ovector buffer every time.


~5% speedup on SunSpider string-unpack-code test, 0.3% on SunSpider overall.

  • runtime/RegExp.cpp: (JSC::RegExp::match):
  • runtime/RegExp.h:
  • runtime/RegExpConstructor.cpp: (JSC::RegExpConstructorPrivate::RegExpConstructorPrivate): (JSC::RegExpConstructorPrivate::lastOvector): (JSC::RegExpConstructorPrivate::tempOvector): (JSC::RegExpConstructorPrivate::changeLastOvector): (JSC::RegExpConstructor::performMatch): (JSC::RegExpMatchesArray::RegExpMatchesArray): (JSC::RegExpMatchesArray::fillArrayInstance): (JSC::RegExpConstructor::getBackref): (JSC::RegExpConstructor::getLastParen): (JSC::RegExpConstructor::getLeftContext): (JSC::RegExpConstructor::getRightContext):
  • runtime/StringPrototype.cpp: (JSC::stringProtoFuncSplit):
  • Property svn:eol-style set to native
File size: 7.3 KB
Line 
1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten ([email protected])
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22#include "config.h"
23#include "RegExp.h"
24#include "Lexer.h"
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <wtf/Assertions.h>
29#include <wtf/OwnArrayPtr.h>
30
31
32#if ENABLE(YARR)
33
34#include "yarr/RegexCompiler.h"
35#if ENABLE(YARR_JIT)
36#include "yarr/RegexJIT.h"
37#else
38#include "yarr/RegexInterpreter.h"
39#endif
40
41#else
42
43#if ENABLE(WREC)
44#include "JIT.h"
45#include "WRECGenerator.h"
46#endif
47#include <pcre/pcre.h>
48
49#endif
50
51namespace JSC {
52
53#if ENABLE(WREC)
54using namespace WREC;
55#endif
56
57inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
58 : m_pattern(pattern)
59 , m_flagBits(0)
60 , m_constructionError(0)
61 , m_numSubpatterns(0)
62{
63 compile(globalData);
64}
65
66inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
67 : m_pattern(pattern)
68 , m_flags(flags)
69 , m_flagBits(0)
70 , m_constructionError(0)
71 , m_numSubpatterns(0)
72{
73 // NOTE: The global flag is handled on a case-by-case basis by functions like
74 // String::match and RegExpObject::match.
75 if (flags.find('g') != -1)
76 m_flagBits |= Global;
77 if (flags.find('i') != -1)
78 m_flagBits |= IgnoreCase;
79 if (flags.find('m') != -1)
80 m_flagBits |= Multiline;
81
82 compile(globalData);
83}
84
85#if !ENABLE(YARR)
86RegExp::~RegExp()
87{
88 jsRegExpFree(m_regExp);
89}
90#endif
91
92PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
93{
94 return adoptRef(new RegExp(globalData, pattern));
95}
96
97PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
98{
99 return adoptRef(new RegExp(globalData, pattern, flags));
100}
101
102#if ENABLE(YARR)
103
104void RegExp::compile(JSGlobalData* globalData)
105{
106#if ENABLE(YARR_JIT)
107 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
108#else
109 UNUSED_PARAM(globalData);
110 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
111#endif
112}
113
114int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
115{
116 if (startOffset < 0)
117 startOffset = 0;
118 if (ovector)
119 ovector->clear();
120
121 if (startOffset > s.size() || s.isNull())
122 return -1;
123
124#if ENABLE(YARR_JIT)
125 if (!!m_regExpJITCode) {
126#else
127 if (m_regExpBytecode) {
128#endif
129 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
130 int* offsetVector;
131 Vector<int, 32> nonReturnedOvector;
132 if (ovector) {
133 ovector->resize(offsetVectorSize);
134 offsetVector = ovector->data();
135 } else {
136 nonReturnedOvector.resize(offsetVectorSize);
137 offsetVector = nonReturnedOvector.data();
138 }
139
140 ASSERT(offsetVector);
141 for (int j = 0; j < offsetVectorSize; ++j)
142 offsetVector[j] = -1;
143
144
145#if ENABLE(YARR_JIT)
146 int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
147#else
148 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
149#endif
150
151 if (result < 0) {
152#ifndef NDEBUG
153 // TODO: define up a symbol, rather than magic -1
154 if (result != -1)
155 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
156#endif
157 if (ovector)
158 ovector->clear();
159 }
160 return result;
161 }
162
163 return -1;
164}
165
166#else
167
168void RegExp::compile(JSGlobalData* globalData)
169{
170 m_regExp = 0;
171#if ENABLE(WREC)
172 m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
173 if (m_wrecFunction || m_constructionError)
174 return;
175 // Fall through to non-WREC case.
176#else
177 UNUSED_PARAM(globalData);
178#endif
179
180 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
181 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
182 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
183}
184
185int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
186{
187 if (startOffset < 0)
188 startOffset = 0;
189 if (ovector)
190 ovector->clear();
191
192 if (startOffset > s.size() || s.isNull())
193 return -1;
194
195#if ENABLE(WREC)
196 if (m_wrecFunction) {
197 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
198 int* offsetVector;
199 Vector<int, 32> nonReturnedOvector;
200 if (ovector) {
201 ovector->resize(offsetVectorSize);
202 offsetVector = ovector->data();
203 } else {
204 nonReturnedOvector.resize(offsetVectorSize);
205 offsetVector = nonReturnedOvector.data();
206 }
207 ASSERT(offsetVector);
208 for (int j = 0; j < offsetVectorSize; ++j)
209 offsetVector[j] = -1;
210
211 int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
212
213 if (result < 0) {
214#ifndef NDEBUG
215 // TODO: define up a symbol, rather than magic -1
216 if (result != -1)
217 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
218#endif
219 if (ovector)
220 ovector->clear();
221 }
222 return result;
223 } else
224#endif
225 if (m_regExp) {
226 // Set up the offset vector for the result.
227 // First 2/3 used for result, the last third used by PCRE.
228 int* offsetVector;
229 int offsetVectorSize;
230 int fixedSizeOffsetVector[3];
231 if (!ovector) {
232 offsetVectorSize = 3;
233 offsetVector = fixedSizeOffsetVector;
234 } else {
235 offsetVectorSize = (m_numSubpatterns + 1) * 3;
236 ovector->resize(offsetVectorSize);
237 offsetVector = ovector->data();
238 }
239
240 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
241
242 if (numMatches < 0) {
243#ifndef NDEBUG
244 if (numMatches != JSRegExpErrorNoMatch)
245 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
246#endif
247 if (ovector)
248 ovector->clear();
249 return -1;
250 }
251
252 return offsetVector[0];
253 }
254
255 return -1;
256}
257
258#endif
259
260} // namespace JSC
Note: See TracBrowser for help on using the repository browser.