source: webkit/trunk/JavaScriptCore/yarr/RegexPattern.h@ 57608

Last change on this file since 57608 was 57608, checked in by [email protected], 15 years ago

2010-04-14 Peter Varga <[email protected]>

Reviewed by Geoffrey Garen.

Move the YARR JIT fallback detection from RegexJIT.cpp to
RegexCompiler.cpp.

https://bugs.webkit.org/show_bug.cgi?id=37571

  • yarr/RegexCompiler.cpp: (JSC::Yarr::RegexPatternConstructor::atomBackReference): (JSC::Yarr::RegexPatternConstructor::quantifyAtom):
  • yarr/RegexJIT.cpp: (JSC::Yarr::RegexGenerator::generateTerm): (JSC::Yarr::RegexGenerator::RegexGenerator): (JSC::Yarr::jitCompileRegex):
  • yarr/RegexJIT.h: (JSC::Yarr::RegexCodeBlock::operator!):
  • yarr/RegexPattern.h: (JSC::Yarr::RegexPattern::RegexPattern): (JSC::Yarr::RegexPattern::reset):
File size: 9.6 KB
Line 
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef RegexPattern_h
27#define RegexPattern_h
28
29
30#if ENABLE(YARR)
31
32#include <wtf/Vector.h>
33#include <wtf/unicode/Unicode.h>
34
35
36namespace JSC { namespace Yarr {
37
38#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
39#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
40#define RegexStackSpaceForBackTrackInfoBackReference 2
41#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
42#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
43#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
44#define RegexStackSpaceForBackTrackInfoParentheses 4
45
46struct PatternDisjunction;
47
48struct CharacterRange {
49 UChar begin;
50 UChar end;
51
52 CharacterRange(UChar begin, UChar end)
53 : begin(begin)
54 , end(end)
55 {
56 }
57};
58
59struct CharacterClass : FastAllocBase {
60 Vector<UChar> m_matches;
61 Vector<CharacterRange> m_ranges;
62 Vector<UChar> m_matchesUnicode;
63 Vector<CharacterRange> m_rangesUnicode;
64};
65
66enum QuantifierType {
67 QuantifierFixedCount,
68 QuantifierGreedy,
69 QuantifierNonGreedy,
70};
71
72struct PatternTerm {
73 enum Type {
74 TypeAssertionBOL,
75 TypeAssertionEOL,
76 TypeAssertionWordBoundary,
77 TypePatternCharacter,
78 TypeCharacterClass,
79 TypeBackReference,
80 TypeForwardReference,
81 TypeParenthesesSubpattern,
82 TypeParentheticalAssertion,
83 } type;
84 bool invertOrCapture;
85 union {
86 UChar patternCharacter;
87 CharacterClass* characterClass;
88 unsigned subpatternId;
89 struct {
90 PatternDisjunction* disjunction;
91 unsigned subpatternId;
92 unsigned lastSubpatternId;
93 bool isCopy;
94 } parentheses;
95 };
96 QuantifierType quantityType;
97 unsigned quantityCount;
98 int inputPosition;
99 unsigned frameLocation;
100
101 PatternTerm(UChar ch)
102 : type(PatternTerm::TypePatternCharacter)
103 {
104 patternCharacter = ch;
105 quantityType = QuantifierFixedCount;
106 quantityCount = 1;
107 }
108
109 PatternTerm(CharacterClass* charClass, bool invert)
110 : type(PatternTerm::TypeCharacterClass)
111 , invertOrCapture(invert)
112 {
113 characterClass = charClass;
114 quantityType = QuantifierFixedCount;
115 quantityCount = 1;
116 }
117
118 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
119 : type(type)
120 , invertOrCapture(invertOrCapture)
121 {
122 parentheses.disjunction = disjunction;
123 parentheses.subpatternId = subpatternId;
124 parentheses.isCopy = false;
125 quantityType = QuantifierFixedCount;
126 quantityCount = 1;
127 }
128
129 PatternTerm(Type type, bool invert = false)
130 : type(type)
131 , invertOrCapture(invert)
132 {
133 quantityType = QuantifierFixedCount;
134 quantityCount = 1;
135 }
136
137 PatternTerm(unsigned spatternId)
138 : type(TypeBackReference)
139 , invertOrCapture(false)
140 {
141 subpatternId = spatternId;
142 quantityType = QuantifierFixedCount;
143 quantityCount = 1;
144 }
145
146 static PatternTerm ForwardReference()
147 {
148 return PatternTerm(TypeForwardReference);
149 }
150
151 static PatternTerm BOL()
152 {
153 return PatternTerm(TypeAssertionBOL);
154 }
155
156 static PatternTerm EOL()
157 {
158 return PatternTerm(TypeAssertionEOL);
159 }
160
161 static PatternTerm WordBoundary(bool invert)
162 {
163 return PatternTerm(TypeAssertionWordBoundary, invert);
164 }
165
166 bool invert()
167 {
168 return invertOrCapture;
169 }
170
171 bool capture()
172 {
173 return invertOrCapture;
174 }
175
176 void quantify(unsigned count, QuantifierType type)
177 {
178 quantityCount = count;
179 quantityType = type;
180 }
181};
182
183struct PatternAlternative : FastAllocBase {
184 PatternAlternative(PatternDisjunction* disjunction)
185 : m_parent(disjunction)
186 {
187 }
188
189 PatternTerm& lastTerm()
190 {
191 ASSERT(m_terms.size());
192 return m_terms[m_terms.size() - 1];
193 }
194
195 void removeLastTerm()
196 {
197 ASSERT(m_terms.size());
198 m_terms.shrink(m_terms.size() - 1);
199 }
200
201 Vector<PatternTerm> m_terms;
202 PatternDisjunction* m_parent;
203 unsigned m_minimumSize;
204 bool m_hasFixedSize;
205};
206
207struct PatternDisjunction : FastAllocBase {
208 PatternDisjunction(PatternAlternative* parent = 0)
209 : m_parent(parent)
210 {
211 }
212
213 ~PatternDisjunction()
214 {
215 deleteAllValues(m_alternatives);
216 }
217
218 PatternAlternative* addNewAlternative()
219 {
220 PatternAlternative* alternative = new PatternAlternative(this);
221 m_alternatives.append(alternative);
222 return alternative;
223 }
224
225 Vector<PatternAlternative*> m_alternatives;
226 PatternAlternative* m_parent;
227 unsigned m_minimumSize;
228 unsigned m_callFrameSize;
229 bool m_hasFixedSize;
230};
231
232// You probably don't want to be calling these functions directly
233// (please to be calling newlineCharacterClass() et al on your
234// friendly neighborhood RegexPattern instance to get nicely
235// cached copies).
236CharacterClass* newlineCreate();
237CharacterClass* digitsCreate();
238CharacterClass* spacesCreate();
239CharacterClass* wordcharCreate();
240CharacterClass* nondigitsCreate();
241CharacterClass* nonspacesCreate();
242CharacterClass* nonwordcharCreate();
243
244struct RegexPattern {
245 RegexPattern(bool ignoreCase, bool multiline)
246 : m_ignoreCase(ignoreCase)
247 , m_multiline(multiline)
248 , m_numSubpatterns(0)
249 , m_maxBackReference(0)
250 , m_shouldFallBack(false)
251 , newlineCached(0)
252 , digitsCached(0)
253 , spacesCached(0)
254 , wordcharCached(0)
255 , nondigitsCached(0)
256 , nonspacesCached(0)
257 , nonwordcharCached(0)
258 {
259 }
260
261 ~RegexPattern()
262 {
263 deleteAllValues(m_disjunctions);
264 deleteAllValues(m_userCharacterClasses);
265 }
266
267 void reset()
268 {
269 m_numSubpatterns = 0;
270 m_maxBackReference = 0;
271
272 m_shouldFallBack = false;
273
274 newlineCached = 0;
275 digitsCached = 0;
276 spacesCached = 0;
277 wordcharCached = 0;
278 nondigitsCached = 0;
279 nonspacesCached = 0;
280 nonwordcharCached = 0;
281
282 deleteAllValues(m_disjunctions);
283 m_disjunctions.clear();
284 deleteAllValues(m_userCharacterClasses);
285 m_userCharacterClasses.clear();
286 }
287
288 bool containsIllegalBackReference()
289 {
290 return m_maxBackReference > m_numSubpatterns;
291 }
292
293 CharacterClass* newlineCharacterClass()
294 {
295 if (!newlineCached)
296 m_userCharacterClasses.append(newlineCached = newlineCreate());
297 return newlineCached;
298 }
299 CharacterClass* digitsCharacterClass()
300 {
301 if (!digitsCached)
302 m_userCharacterClasses.append(digitsCached = digitsCreate());
303 return digitsCached;
304 }
305 CharacterClass* spacesCharacterClass()
306 {
307 if (!spacesCached)
308 m_userCharacterClasses.append(spacesCached = spacesCreate());
309 return spacesCached;
310 }
311 CharacterClass* wordcharCharacterClass()
312 {
313 if (!wordcharCached)
314 m_userCharacterClasses.append(wordcharCached = wordcharCreate());
315 return wordcharCached;
316 }
317 CharacterClass* nondigitsCharacterClass()
318 {
319 if (!nondigitsCached)
320 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
321 return nondigitsCached;
322 }
323 CharacterClass* nonspacesCharacterClass()
324 {
325 if (!nonspacesCached)
326 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
327 return nonspacesCached;
328 }
329 CharacterClass* nonwordcharCharacterClass()
330 {
331 if (!nonwordcharCached)
332 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
333 return nonwordcharCached;
334 }
335
336 bool m_ignoreCase;
337 bool m_multiline;
338 unsigned m_numSubpatterns;
339 unsigned m_maxBackReference;
340 bool m_shouldFallBack;
341 PatternDisjunction* m_body;
342 Vector<PatternDisjunction*, 4> m_disjunctions;
343 Vector<CharacterClass*> m_userCharacterClasses;
344
345private:
346 CharacterClass* newlineCached;
347 CharacterClass* digitsCached;
348 CharacterClass* spacesCached;
349 CharacterClass* wordcharCached;
350 CharacterClass* nondigitsCached;
351 CharacterClass* nonspacesCached;
352 CharacterClass* nonwordcharCached;
353};
354
355} } // namespace JSC::Yarr
356
357#endif
358
359#endif // RegexPattern_h
Note: See TracBrowser for help on using the repository browser.