1 | /*
|
---|
2 | * Copyright (C) 1999-2001, 2004 Harri Porten ([email protected])
|
---|
3 | * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
|
---|
4 | * Copyright (C) 2009 Torch Mobile, Inc.
|
---|
5 | *
|
---|
6 | * This library is free software; you can redistribute it and/or
|
---|
7 | * modify it under the terms of the GNU Lesser General Public
|
---|
8 | * License as published by the Free Software Foundation; either
|
---|
9 | * version 2 of the License, or (at your option) any later version.
|
---|
10 | *
|
---|
11 | * This library is distributed in the hope that it will be useful,
|
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
14 | * Lesser General Public License for more details.
|
---|
15 | *
|
---|
16 | * You should have received a copy of the GNU Lesser General Public
|
---|
17 | * License along with this library; if not, write to the Free Software
|
---|
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
19 | *
|
---|
20 | */
|
---|
21 |
|
---|
22 | #include "config.h"
|
---|
23 | #include "RegExp.h"
|
---|
24 | #include "Lexer.h"
|
---|
25 | #include <stdio.h>
|
---|
26 | #include <stdlib.h>
|
---|
27 | #include <string.h>
|
---|
28 | #include <wtf/Assertions.h>
|
---|
29 | #include <wtf/OwnArrayPtr.h>
|
---|
30 |
|
---|
31 |
|
---|
32 | #if ENABLE(YARR)
|
---|
33 |
|
---|
34 | #include "yarr/RegexCompiler.h"
|
---|
35 | #if ENABLE(YARR_JIT)
|
---|
36 | #include "yarr/RegexJIT.h"
|
---|
37 | #else
|
---|
38 | #include "yarr/RegexInterpreter.h"
|
---|
39 | #endif
|
---|
40 |
|
---|
41 | #else
|
---|
42 |
|
---|
43 | #if ENABLE(WREC)
|
---|
44 | #include "JIT.h"
|
---|
45 | #include "WRECGenerator.h"
|
---|
46 | #endif
|
---|
47 | #include <pcre/pcre.h>
|
---|
48 |
|
---|
49 | #endif
|
---|
50 |
|
---|
51 | namespace JSC {
|
---|
52 |
|
---|
53 | #if ENABLE(WREC)
|
---|
54 | using namespace WREC;
|
---|
55 | #endif
|
---|
56 |
|
---|
57 | inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
|
---|
58 | : m_pattern(pattern)
|
---|
59 | , m_flagBits(0)
|
---|
60 | , m_constructionError(0)
|
---|
61 | , m_numSubpatterns(0)
|
---|
62 | {
|
---|
63 | compile(globalData);
|
---|
64 | }
|
---|
65 |
|
---|
66 | inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
|
---|
67 | : m_pattern(pattern)
|
---|
68 | , m_flags(flags)
|
---|
69 | , m_flagBits(0)
|
---|
70 | , m_constructionError(0)
|
---|
71 | , m_numSubpatterns(0)
|
---|
72 | {
|
---|
73 | // NOTE: The global flag is handled on a case-by-case basis by functions like
|
---|
74 | // String::match and RegExpObject::match.
|
---|
75 | if (flags.find('g') != -1)
|
---|
76 | m_flagBits |= Global;
|
---|
77 | if (flags.find('i') != -1)
|
---|
78 | m_flagBits |= IgnoreCase;
|
---|
79 | if (flags.find('m') != -1)
|
---|
80 | m_flagBits |= Multiline;
|
---|
81 |
|
---|
82 | compile(globalData);
|
---|
83 | }
|
---|
84 |
|
---|
85 | #if !ENABLE(YARR)
|
---|
86 | RegExp::~RegExp()
|
---|
87 | {
|
---|
88 | jsRegExpFree(m_regExp);
|
---|
89 | }
|
---|
90 | #endif
|
---|
91 |
|
---|
92 | PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
|
---|
93 | {
|
---|
94 | return adoptRef(new RegExp(globalData, pattern));
|
---|
95 | }
|
---|
96 |
|
---|
97 | PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
|
---|
98 | {
|
---|
99 | return adoptRef(new RegExp(globalData, pattern, flags));
|
---|
100 | }
|
---|
101 |
|
---|
102 | #if ENABLE(YARR)
|
---|
103 |
|
---|
104 | void RegExp::compile(JSGlobalData* globalData)
|
---|
105 | {
|
---|
106 | #if ENABLE(YARR_JIT)
|
---|
107 | Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
|
---|
108 | #else
|
---|
109 | UNUSED_PARAM(globalData);
|
---|
110 | m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
|
---|
111 | #endif
|
---|
112 | }
|
---|
113 |
|
---|
114 | int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
|
---|
115 | {
|
---|
116 | if (startOffset < 0)
|
---|
117 | startOffset = 0;
|
---|
118 | if (ovector)
|
---|
119 | ovector->clear();
|
---|
120 |
|
---|
121 | if (startOffset > s.size() || s.isNull())
|
---|
122 | return -1;
|
---|
123 |
|
---|
124 | #if ENABLE(YARR_JIT)
|
---|
125 | if (!!m_regExpJITCode) {
|
---|
126 | #else
|
---|
127 | if (m_regExpBytecode) {
|
---|
128 | #endif
|
---|
129 | int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
|
---|
130 | int* offsetVector;
|
---|
131 | Vector<int, 32> nonReturnedOvector;
|
---|
132 | if (ovector) {
|
---|
133 | ovector->resize(offsetVectorSize);
|
---|
134 | offsetVector = ovector->data();
|
---|
135 | } else {
|
---|
136 | nonReturnedOvector.resize(offsetVectorSize);
|
---|
137 | offsetVector = nonReturnedOvector.data();
|
---|
138 | }
|
---|
139 |
|
---|
140 | ASSERT(offsetVector);
|
---|
141 | for (int j = 0; j < offsetVectorSize; ++j)
|
---|
142 | offsetVector[j] = -1;
|
---|
143 |
|
---|
144 |
|
---|
145 | #if ENABLE(YARR_JIT)
|
---|
146 | int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
|
---|
147 | #else
|
---|
148 | int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
|
---|
149 | #endif
|
---|
150 |
|
---|
151 | if (result < 0) {
|
---|
152 | #ifndef NDEBUG
|
---|
153 | // TODO: define up a symbol, rather than magic -1
|
---|
154 | if (result != -1)
|
---|
155 | fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
|
---|
156 | #endif
|
---|
157 | if (ovector)
|
---|
158 | ovector->clear();
|
---|
159 | }
|
---|
160 | return result;
|
---|
161 | }
|
---|
162 |
|
---|
163 | return -1;
|
---|
164 | }
|
---|
165 |
|
---|
166 | #else
|
---|
167 |
|
---|
168 | void RegExp::compile(JSGlobalData* globalData)
|
---|
169 | {
|
---|
170 | m_regExp = 0;
|
---|
171 | #if ENABLE(WREC)
|
---|
172 | m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
|
---|
173 | if (m_wrecFunction || m_constructionError)
|
---|
174 | return;
|
---|
175 | // Fall through to non-WREC case.
|
---|
176 | #else
|
---|
177 | UNUSED_PARAM(globalData);
|
---|
178 | #endif
|
---|
179 |
|
---|
180 | JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
|
---|
181 | JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
|
---|
182 | m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
|
---|
183 | }
|
---|
184 |
|
---|
185 | int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
|
---|
186 | {
|
---|
187 | if (startOffset < 0)
|
---|
188 | startOffset = 0;
|
---|
189 | if (ovector)
|
---|
190 | ovector->clear();
|
---|
191 |
|
---|
192 | if (startOffset > s.size() || s.isNull())
|
---|
193 | return -1;
|
---|
194 |
|
---|
195 | #if ENABLE(WREC)
|
---|
196 | if (m_wrecFunction) {
|
---|
197 | int offsetVectorSize = (m_numSubpatterns + 1) * 2;
|
---|
198 | int* offsetVector;
|
---|
199 | Vector<int, 32> nonReturnedOvector;
|
---|
200 | if (ovector) {
|
---|
201 | ovector->resize(offsetVectorSize);
|
---|
202 | offsetVector = ovector->data();
|
---|
203 | } else {
|
---|
204 | nonReturnedOvector.resize(offsetVectorSize);
|
---|
205 | offsetVector = nonReturnedOvector.data();
|
---|
206 | }
|
---|
207 | ASSERT(offsetVector);
|
---|
208 | for (int j = 0; j < offsetVectorSize; ++j)
|
---|
209 | offsetVector[j] = -1;
|
---|
210 |
|
---|
211 | int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
|
---|
212 |
|
---|
213 | if (result < 0) {
|
---|
214 | #ifndef NDEBUG
|
---|
215 | // TODO: define up a symbol, rather than magic -1
|
---|
216 | if (result != -1)
|
---|
217 | fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
|
---|
218 | #endif
|
---|
219 | if (ovector)
|
---|
220 | ovector->clear();
|
---|
221 | }
|
---|
222 | return result;
|
---|
223 | } else
|
---|
224 | #endif
|
---|
225 | if (m_regExp) {
|
---|
226 | // Set up the offset vector for the result.
|
---|
227 | // First 2/3 used for result, the last third used by PCRE.
|
---|
228 | int* offsetVector;
|
---|
229 | int offsetVectorSize;
|
---|
230 | int fixedSizeOffsetVector[3];
|
---|
231 | if (!ovector) {
|
---|
232 | offsetVectorSize = 3;
|
---|
233 | offsetVector = fixedSizeOffsetVector;
|
---|
234 | } else {
|
---|
235 | offsetVectorSize = (m_numSubpatterns + 1) * 3;
|
---|
236 | ovector->resize(offsetVectorSize);
|
---|
237 | offsetVector = ovector->data();
|
---|
238 | }
|
---|
239 |
|
---|
240 | int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
|
---|
241 |
|
---|
242 | if (numMatches < 0) {
|
---|
243 | #ifndef NDEBUG
|
---|
244 | if (numMatches != JSRegExpErrorNoMatch)
|
---|
245 | fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
|
---|
246 | #endif
|
---|
247 | if (ovector)
|
---|
248 | ovector->clear();
|
---|
249 | return -1;
|
---|
250 | }
|
---|
251 |
|
---|
252 | return offsetVector[0];
|
---|
253 | }
|
---|
254 |
|
---|
255 | return -1;
|
---|
256 | }
|
---|
257 |
|
---|
258 | #endif
|
---|
259 |
|
---|
260 | } // namespace JSC
|
---|