1 | /*
|
---|
2 | * Copyright (C) 2009 Apple Inc. All rights reserved.
|
---|
3 | *
|
---|
4 | * Redistribution and use in source and binary forms, with or without
|
---|
5 | * modification, are permitted provided that the following conditions
|
---|
6 | * are met:
|
---|
7 | * 1. Redistributions of source code must retain the above copyright
|
---|
8 | * notice, this list of conditions and the following disclaimer.
|
---|
9 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
10 | * notice, this list of conditions and the following disclaimer in the
|
---|
11 | * documentation and/or other materials provided with the distribution.
|
---|
12 | *
|
---|
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
|
---|
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
---|
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
|
---|
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
---|
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
---|
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
---|
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
---|
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
---|
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
24 | */
|
---|
25 |
|
---|
26 | #ifndef RegexPattern_h
|
---|
27 | #define RegexPattern_h
|
---|
28 |
|
---|
29 |
|
---|
30 | #if ENABLE(YARR)
|
---|
31 |
|
---|
32 | #include <wtf/Vector.h>
|
---|
33 | #include <wtf/unicode/Unicode.h>
|
---|
34 |
|
---|
35 |
|
---|
36 | namespace JSC { namespace Yarr {
|
---|
37 |
|
---|
38 | #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
|
---|
39 | #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
|
---|
40 | #define RegexStackSpaceForBackTrackInfoBackReference 2
|
---|
41 | #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
|
---|
42 | #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
|
---|
43 | #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
|
---|
44 | #define RegexStackSpaceForBackTrackInfoParentheses 4
|
---|
45 |
|
---|
46 | struct PatternDisjunction;
|
---|
47 |
|
---|
48 | struct CharacterRange {
|
---|
49 | UChar begin;
|
---|
50 | UChar end;
|
---|
51 |
|
---|
52 | CharacterRange(UChar begin, UChar end)
|
---|
53 | : begin(begin)
|
---|
54 | , end(end)
|
---|
55 | {
|
---|
56 | }
|
---|
57 | };
|
---|
58 |
|
---|
59 | struct CharacterClass : FastAllocBase {
|
---|
60 | Vector<UChar> m_matches;
|
---|
61 | Vector<CharacterRange> m_ranges;
|
---|
62 | Vector<UChar> m_matchesUnicode;
|
---|
63 | Vector<CharacterRange> m_rangesUnicode;
|
---|
64 | };
|
---|
65 |
|
---|
66 | enum QuantifierType {
|
---|
67 | QuantifierFixedCount,
|
---|
68 | QuantifierGreedy,
|
---|
69 | QuantifierNonGreedy,
|
---|
70 | };
|
---|
71 |
|
---|
72 | struct PatternTerm {
|
---|
73 | enum Type {
|
---|
74 | TypeAssertionBOL,
|
---|
75 | TypeAssertionEOL,
|
---|
76 | TypeAssertionWordBoundary,
|
---|
77 | TypePatternCharacter,
|
---|
78 | TypeCharacterClass,
|
---|
79 | TypeBackReference,
|
---|
80 | TypeForwardReference,
|
---|
81 | TypeParenthesesSubpattern,
|
---|
82 | TypeParentheticalAssertion,
|
---|
83 | } type;
|
---|
84 | bool invertOrCapture;
|
---|
85 | union {
|
---|
86 | UChar patternCharacter;
|
---|
87 | CharacterClass* characterClass;
|
---|
88 | unsigned subpatternId;
|
---|
89 | struct {
|
---|
90 | PatternDisjunction* disjunction;
|
---|
91 | unsigned subpatternId;
|
---|
92 | unsigned lastSubpatternId;
|
---|
93 | bool isCopy;
|
---|
94 | } parentheses;
|
---|
95 | };
|
---|
96 | QuantifierType quantityType;
|
---|
97 | unsigned quantityCount;
|
---|
98 | int inputPosition;
|
---|
99 | unsigned frameLocation;
|
---|
100 |
|
---|
101 | PatternTerm(UChar ch)
|
---|
102 | : type(PatternTerm::TypePatternCharacter)
|
---|
103 | {
|
---|
104 | patternCharacter = ch;
|
---|
105 | quantityType = QuantifierFixedCount;
|
---|
106 | quantityCount = 1;
|
---|
107 | }
|
---|
108 |
|
---|
109 | PatternTerm(CharacterClass* charClass, bool invert)
|
---|
110 | : type(PatternTerm::TypeCharacterClass)
|
---|
111 | , invertOrCapture(invert)
|
---|
112 | {
|
---|
113 | characterClass = charClass;
|
---|
114 | quantityType = QuantifierFixedCount;
|
---|
115 | quantityCount = 1;
|
---|
116 | }
|
---|
117 |
|
---|
118 | PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
|
---|
119 | : type(type)
|
---|
120 | , invertOrCapture(invertOrCapture)
|
---|
121 | {
|
---|
122 | parentheses.disjunction = disjunction;
|
---|
123 | parentheses.subpatternId = subpatternId;
|
---|
124 | parentheses.isCopy = false;
|
---|
125 | quantityType = QuantifierFixedCount;
|
---|
126 | quantityCount = 1;
|
---|
127 | }
|
---|
128 |
|
---|
129 | PatternTerm(Type type, bool invert = false)
|
---|
130 | : type(type)
|
---|
131 | , invertOrCapture(invert)
|
---|
132 | {
|
---|
133 | quantityType = QuantifierFixedCount;
|
---|
134 | quantityCount = 1;
|
---|
135 | }
|
---|
136 |
|
---|
137 | PatternTerm(unsigned spatternId)
|
---|
138 | : type(TypeBackReference)
|
---|
139 | , invertOrCapture(false)
|
---|
140 | {
|
---|
141 | subpatternId = spatternId;
|
---|
142 | quantityType = QuantifierFixedCount;
|
---|
143 | quantityCount = 1;
|
---|
144 | }
|
---|
145 |
|
---|
146 | static PatternTerm ForwardReference()
|
---|
147 | {
|
---|
148 | return PatternTerm(TypeForwardReference);
|
---|
149 | }
|
---|
150 |
|
---|
151 | static PatternTerm BOL()
|
---|
152 | {
|
---|
153 | return PatternTerm(TypeAssertionBOL);
|
---|
154 | }
|
---|
155 |
|
---|
156 | static PatternTerm EOL()
|
---|
157 | {
|
---|
158 | return PatternTerm(TypeAssertionEOL);
|
---|
159 | }
|
---|
160 |
|
---|
161 | static PatternTerm WordBoundary(bool invert)
|
---|
162 | {
|
---|
163 | return PatternTerm(TypeAssertionWordBoundary, invert);
|
---|
164 | }
|
---|
165 |
|
---|
166 | bool invert()
|
---|
167 | {
|
---|
168 | return invertOrCapture;
|
---|
169 | }
|
---|
170 |
|
---|
171 | bool capture()
|
---|
172 | {
|
---|
173 | return invertOrCapture;
|
---|
174 | }
|
---|
175 |
|
---|
176 | void quantify(unsigned count, QuantifierType type)
|
---|
177 | {
|
---|
178 | quantityCount = count;
|
---|
179 | quantityType = type;
|
---|
180 | }
|
---|
181 | };
|
---|
182 |
|
---|
183 | struct PatternAlternative : FastAllocBase {
|
---|
184 | PatternAlternative(PatternDisjunction* disjunction)
|
---|
185 | : m_parent(disjunction)
|
---|
186 | {
|
---|
187 | }
|
---|
188 |
|
---|
189 | PatternTerm& lastTerm()
|
---|
190 | {
|
---|
191 | ASSERT(m_terms.size());
|
---|
192 | return m_terms[m_terms.size() - 1];
|
---|
193 | }
|
---|
194 |
|
---|
195 | void removeLastTerm()
|
---|
196 | {
|
---|
197 | ASSERT(m_terms.size());
|
---|
198 | m_terms.shrink(m_terms.size() - 1);
|
---|
199 | }
|
---|
200 |
|
---|
201 | Vector<PatternTerm> m_terms;
|
---|
202 | PatternDisjunction* m_parent;
|
---|
203 | unsigned m_minimumSize;
|
---|
204 | bool m_hasFixedSize;
|
---|
205 | };
|
---|
206 |
|
---|
207 | struct PatternDisjunction : FastAllocBase {
|
---|
208 | PatternDisjunction(PatternAlternative* parent = 0)
|
---|
209 | : m_parent(parent)
|
---|
210 | {
|
---|
211 | }
|
---|
212 |
|
---|
213 | ~PatternDisjunction()
|
---|
214 | {
|
---|
215 | deleteAllValues(m_alternatives);
|
---|
216 | }
|
---|
217 |
|
---|
218 | PatternAlternative* addNewAlternative()
|
---|
219 | {
|
---|
220 | PatternAlternative* alternative = new PatternAlternative(this);
|
---|
221 | m_alternatives.append(alternative);
|
---|
222 | return alternative;
|
---|
223 | }
|
---|
224 |
|
---|
225 | Vector<PatternAlternative*> m_alternatives;
|
---|
226 | PatternAlternative* m_parent;
|
---|
227 | unsigned m_minimumSize;
|
---|
228 | unsigned m_callFrameSize;
|
---|
229 | bool m_hasFixedSize;
|
---|
230 | };
|
---|
231 |
|
---|
232 | // You probably don't want to be calling these functions directly
|
---|
233 | // (please to be calling newlineCharacterClass() et al on your
|
---|
234 | // friendly neighborhood RegexPattern instance to get nicely
|
---|
235 | // cached copies).
|
---|
236 | CharacterClass* newlineCreate();
|
---|
237 | CharacterClass* digitsCreate();
|
---|
238 | CharacterClass* spacesCreate();
|
---|
239 | CharacterClass* wordcharCreate();
|
---|
240 | CharacterClass* nondigitsCreate();
|
---|
241 | CharacterClass* nonspacesCreate();
|
---|
242 | CharacterClass* nonwordcharCreate();
|
---|
243 |
|
---|
244 | struct RegexPattern {
|
---|
245 | RegexPattern(bool ignoreCase, bool multiline)
|
---|
246 | : m_ignoreCase(ignoreCase)
|
---|
247 | , m_multiline(multiline)
|
---|
248 | , m_numSubpatterns(0)
|
---|
249 | , m_maxBackReference(0)
|
---|
250 | , m_shouldFallBack(false)
|
---|
251 | , newlineCached(0)
|
---|
252 | , digitsCached(0)
|
---|
253 | , spacesCached(0)
|
---|
254 | , wordcharCached(0)
|
---|
255 | , nondigitsCached(0)
|
---|
256 | , nonspacesCached(0)
|
---|
257 | , nonwordcharCached(0)
|
---|
258 | {
|
---|
259 | }
|
---|
260 |
|
---|
261 | ~RegexPattern()
|
---|
262 | {
|
---|
263 | deleteAllValues(m_disjunctions);
|
---|
264 | deleteAllValues(m_userCharacterClasses);
|
---|
265 | }
|
---|
266 |
|
---|
267 | void reset()
|
---|
268 | {
|
---|
269 | m_numSubpatterns = 0;
|
---|
270 | m_maxBackReference = 0;
|
---|
271 |
|
---|
272 | m_shouldFallBack = false;
|
---|
273 |
|
---|
274 | newlineCached = 0;
|
---|
275 | digitsCached = 0;
|
---|
276 | spacesCached = 0;
|
---|
277 | wordcharCached = 0;
|
---|
278 | nondigitsCached = 0;
|
---|
279 | nonspacesCached = 0;
|
---|
280 | nonwordcharCached = 0;
|
---|
281 |
|
---|
282 | deleteAllValues(m_disjunctions);
|
---|
283 | m_disjunctions.clear();
|
---|
284 | deleteAllValues(m_userCharacterClasses);
|
---|
285 | m_userCharacterClasses.clear();
|
---|
286 | }
|
---|
287 |
|
---|
288 | bool containsIllegalBackReference()
|
---|
289 | {
|
---|
290 | return m_maxBackReference > m_numSubpatterns;
|
---|
291 | }
|
---|
292 |
|
---|
293 | CharacterClass* newlineCharacterClass()
|
---|
294 | {
|
---|
295 | if (!newlineCached)
|
---|
296 | m_userCharacterClasses.append(newlineCached = newlineCreate());
|
---|
297 | return newlineCached;
|
---|
298 | }
|
---|
299 | CharacterClass* digitsCharacterClass()
|
---|
300 | {
|
---|
301 | if (!digitsCached)
|
---|
302 | m_userCharacterClasses.append(digitsCached = digitsCreate());
|
---|
303 | return digitsCached;
|
---|
304 | }
|
---|
305 | CharacterClass* spacesCharacterClass()
|
---|
306 | {
|
---|
307 | if (!spacesCached)
|
---|
308 | m_userCharacterClasses.append(spacesCached = spacesCreate());
|
---|
309 | return spacesCached;
|
---|
310 | }
|
---|
311 | CharacterClass* wordcharCharacterClass()
|
---|
312 | {
|
---|
313 | if (!wordcharCached)
|
---|
314 | m_userCharacterClasses.append(wordcharCached = wordcharCreate());
|
---|
315 | return wordcharCached;
|
---|
316 | }
|
---|
317 | CharacterClass* nondigitsCharacterClass()
|
---|
318 | {
|
---|
319 | if (!nondigitsCached)
|
---|
320 | m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
|
---|
321 | return nondigitsCached;
|
---|
322 | }
|
---|
323 | CharacterClass* nonspacesCharacterClass()
|
---|
324 | {
|
---|
325 | if (!nonspacesCached)
|
---|
326 | m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
|
---|
327 | return nonspacesCached;
|
---|
328 | }
|
---|
329 | CharacterClass* nonwordcharCharacterClass()
|
---|
330 | {
|
---|
331 | if (!nonwordcharCached)
|
---|
332 | m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
|
---|
333 | return nonwordcharCached;
|
---|
334 | }
|
---|
335 |
|
---|
336 | bool m_ignoreCase;
|
---|
337 | bool m_multiline;
|
---|
338 | unsigned m_numSubpatterns;
|
---|
339 | unsigned m_maxBackReference;
|
---|
340 | bool m_shouldFallBack;
|
---|
341 | PatternDisjunction* m_body;
|
---|
342 | Vector<PatternDisjunction*, 4> m_disjunctions;
|
---|
343 | Vector<CharacterClass*> m_userCharacterClasses;
|
---|
344 |
|
---|
345 | private:
|
---|
346 | CharacterClass* newlineCached;
|
---|
347 | CharacterClass* digitsCached;
|
---|
348 | CharacterClass* spacesCached;
|
---|
349 | CharacterClass* wordcharCached;
|
---|
350 | CharacterClass* nondigitsCached;
|
---|
351 | CharacterClass* nonspacesCached;
|
---|
352 | CharacterClass* nonwordcharCached;
|
---|
353 | };
|
---|
354 |
|
---|
355 | } } // namespace JSC::Yarr
|
---|
356 |
|
---|
357 | #endif
|
---|
358 |
|
---|
359 | #endif // RegexPattern_h
|
---|