source: webkit/trunk/JavaScriptCore/yarr/RegexJIT.cpp@ 44514

Last change on this file since 44514 was 44514, checked in by [email protected], 16 years ago

2009-06-08 Gavin Barraclough <[email protected]>

Reviewed by Geoff Garen.

Add (incomplete) support to YARR for running with the jit enabled
on Arm thumb2 platforms. Adds new Assembler/MacroAssembler classes,
along with cache flushing support, tweaks to MacroAssemblerCodePtr
to support decorated thumb code pointers, and new enter/exit code
to YARR jit for the platform.

Support for this platform is still under development - the assembler
currrently only supports planting and linking jumps with a 16Mb range.
As such, initially commiting in a disabled state.

Add new assembler files.

  • assembler/ARMv7Assembler.h: Added.

Add new Assembler.

  • assembler/AbstractMacroAssembler.h:

Tweaks to ensure sizes of pointer values planted in JIT code do not change.

  • assembler/MacroAssembler.h:

On ARMv7 platforms use MacroAssemblerARMv7.

  • assembler/MacroAssemblerARMv7.h: Added.

Add new MacroAssembler.

  • assembler/MacroAssemblerCodeRef.h: (JSC::FunctionPtr::FunctionPtr):

Add better ASSERT.

(JSC::ReturnAddressPtr::ReturnAddressPtr):

Add better ASSERT.

(JSC::MacroAssemblerCodePtr::MacroAssemblerCodePtr):

On ARMv7, MacroAssemblerCodePtr's mush be 'decorated' with a low bit set,
to indicate to the processor that the code is thumb code, not traditional
32-bit ARM.

(JSC::MacroAssemblerCodePtr::dataLocation):

On ARMv7, decoration must be removed.

  • jit/ExecutableAllocator.h: (JSC::ExecutableAllocator::makeWritable):

Reformatted, no change.

(JSC::ExecutableAllocator::makeExecutable):

When marking code executable also cache flush it, where necessary.

(JSC::ExecutableAllocator::MakeWritable::MakeWritable):

Only use the null implementation of this class if both !ASSEMBLER_WX_EXCLUSIVE
and running on x86(_64) - on other platforms we may also need ensure that
makeExecutable is called at the end to flush caches.

(JSC::ExecutableAllocator::reprotectRegion):

Reformatted, no change.

(JSC::ExecutableAllocator::cacheFlush):

Cache flush a region of memory, or platforms where this is necessary.

  • wtf/Platform.h:

Add changes necessary to allow YARR jit to build on this platform, disabled.

  • yarr/RegexJIT.cpp: (JSC::Yarr::RegexGenerator::generateEnter): (JSC::Yarr::RegexGenerator::generateReturn):

Add support to these methods for ARMv7.

File size: 55.8 KB
Line 
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "RegexJIT.h"
28
29#include "ASCIICType.h"
30#include "JSGlobalData.h"
31#include "MacroAssembler.h"
32#include "RegexCompiler.h"
33
34#include "pcre.h" // temporary, remove when fallback is removed.
35
36#if ENABLE(YARR_JIT)
37
38using namespace WTF;
39
40namespace JSC { namespace Yarr {
41
42
43class RegexGenerator : private MacroAssembler {
44 friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
45
46#if PLATFORM(ARM_V7)
47 static const RegisterID input = ARM::r0;
48 static const RegisterID index = ARM::r1;
49 static const RegisterID length = ARM::r2;
50
51 static const RegisterID output = ARM::r4;
52 static const RegisterID regT0 = ARM::r5;
53 static const RegisterID regT1 = ARM::r6;
54
55 static const RegisterID returnRegister = ARM::r0;
56#endif
57#if PLATFORM(X86)
58 static const RegisterID input = X86::eax;
59 static const RegisterID index = X86::edx;
60 static const RegisterID length = X86::ecx;
61 static const RegisterID output = X86::edi;
62
63 static const RegisterID regT0 = X86::ebx;
64 static const RegisterID regT1 = X86::esi;
65
66 static const RegisterID returnRegister = X86::eax;
67#endif
68#if PLATFORM(X86_64)
69 static const RegisterID input = X86::edi;
70 static const RegisterID index = X86::esi;
71 static const RegisterID length = X86::edx;
72 static const RegisterID output = X86::ecx;
73
74 static const RegisterID regT0 = X86::eax;
75 static const RegisterID regT1 = X86::ebx;
76
77 static const RegisterID returnRegister = X86::eax;
78#endif
79
80 void optimizeAlternative(PatternAlternative* alternative)
81 {
82 if (!alternative->m_terms.size())
83 return;
84
85 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
86 PatternTerm& term = alternative->m_terms[i];
87 PatternTerm& nextTerm = alternative->m_terms[i + 1];
88
89 if ((term.type == PatternTerm::TypeCharacterClass)
90 && (term.quantityType == QuantifierFixedCount)
91 && (nextTerm.type == PatternTerm::TypePatternCharacter)
92 && (nextTerm.quantityType == QuantifierFixedCount)) {
93 PatternTerm termCopy = term;
94 alternative->m_terms[i] = nextTerm;
95 alternative->m_terms[i + 1] = termCopy;
96 }
97 }
98 }
99
100 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
101 {
102 do {
103 // pick which range we're going to generate
104 int which = count >> 1;
105 char lo = ranges[which].begin;
106 char hi = ranges[which].end;
107
108 // check if there are any ranges or matches below lo. If not, just jl to failure -
109 // if there is anything else to check, check that first, if it falls through jmp to failure.
110 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
111 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
112
113 // generate code for all ranges before this one
114 if (which)
115 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
116
117 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
118 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
119 ++*matchIndex;
120 }
121 failures.append(jump());
122
123 loOrAbove.link(this);
124 } else if (which) {
125 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
126
127 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
128 failures.append(jump());
129
130 loOrAbove.link(this);
131 } else
132 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
133
134 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
135 ++*matchIndex;
136
137 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
138 // fall through to here, the value is above hi.
139
140 // shuffle along & loop around if there are any more matches to handle.
141 unsigned next = which + 1;
142 ranges += next;
143 count -= next;
144 } while (count);
145 }
146
147 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
148 {
149 Jump unicodeFail;
150 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
151 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
152
153 if (charClass->m_matchesUnicode.size()) {
154 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
155 UChar ch = charClass->m_matchesUnicode[i];
156 matchDest.append(branch32(Equal, character, Imm32(ch)));
157 }
158 }
159
160 if (charClass->m_rangesUnicode.size()) {
161 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
162 UChar lo = charClass->m_rangesUnicode[i].begin;
163 UChar hi = charClass->m_rangesUnicode[i].end;
164
165 Jump below = branch32(LessThan, character, Imm32(lo));
166 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
167 below.link(this);
168 }
169 }
170
171 unicodeFail = jump();
172 isAscii.link(this);
173 }
174
175 if (charClass->m_ranges.size()) {
176 unsigned matchIndex = 0;
177 JumpList failures;
178 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
179 while (matchIndex < charClass->m_matches.size())
180 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
181
182 failures.link(this);
183 } else if (charClass->m_matches.size()) {
184 // optimization: gather 'a','A' etc back together, can mask & test once.
185 Vector<char> matchesAZaz;
186
187 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
188 char ch = charClass->m_matches[i];
189 if (m_pattern.m_ignoreCase) {
190 if (isASCIILower(ch)) {
191 matchesAZaz.append(ch);
192 continue;
193 }
194 if (isASCIIUpper(ch))
195 continue;
196 }
197 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
198 }
199
200 if (unsigned countAZaz = matchesAZaz.size()) {
201 or32(Imm32(32), character);
202 for (unsigned i = 0; i < countAZaz; ++i)
203 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
204 }
205 }
206
207 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
208 unicodeFail.link(this);
209 }
210
211 // Jumps if input not available; will have (incorrectly) incremented already!
212 Jump jumpIfNoAvailableInput(unsigned countToCheck)
213 {
214 add32(Imm32(countToCheck), index);
215 return branch32(Above, index, length);
216 }
217
218 Jump jumpIfAvailableInput(unsigned countToCheck)
219 {
220 add32(Imm32(countToCheck), index);
221 return branch32(BelowOrEqual, index, length);
222 }
223
224 Jump checkInput()
225 {
226 return branch32(BelowOrEqual, index, length);
227 }
228
229 Jump atEndOfInput()
230 {
231 return branch32(Equal, index, length);
232 }
233
234 Jump notAtEndOfInput()
235 {
236 return branch32(NotEqual, index, length);
237 }
238
239 Jump jumpIfCharEquals(UChar ch, int inputPosition)
240 {
241 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
242 }
243
244 Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
245 {
246 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
247 }
248
249 void readCharacter(int inputPosition, RegisterID reg)
250 {
251 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
252 }
253
254 void storeToFrame(RegisterID reg, unsigned frameLocation)
255 {
256 poke(reg, frameLocation);
257 }
258
259 void storeToFrame(Imm32 imm, unsigned frameLocation)
260 {
261 poke(imm, frameLocation);
262 }
263
264 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
265 {
266 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
267 }
268
269 void loadFromFrame(unsigned frameLocation, RegisterID reg)
270 {
271 peek(reg, frameLocation);
272 }
273
274 void loadFromFrameAndJump(unsigned frameLocation)
275 {
276 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
277 }
278
279 struct AlternativeBacktrackRecord {
280 DataLabelPtr dataLabel;
281 Label backtrackLocation;
282
283 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
284 : dataLabel(dataLabel)
285 , backtrackLocation(backtrackLocation)
286 {
287 }
288 };
289
290 struct TermGenerationState {
291 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
292 : disjunction(disjunction)
293 , checkedTotal(checkedTotal)
294 {
295 }
296
297 void resetAlternative()
298 {
299 isBackTrackGenerated = false;
300 alt = 0;
301 }
302 bool alternativeValid()
303 {
304 return alt < disjunction->m_alternatives.size();
305 }
306 void nextAlternative()
307 {
308 ++alt;
309 }
310 PatternAlternative* alternative()
311 {
312 return disjunction->m_alternatives[alt];
313 }
314
315 void resetTerm()
316 {
317 ASSERT(alternativeValid());
318 t = 0;
319 }
320 bool termValid()
321 {
322 ASSERT(alternativeValid());
323 return t < alternative()->m_terms.size();
324 }
325 void nextTerm()
326 {
327 ASSERT(alternativeValid());
328 ++t;
329 }
330 PatternTerm& term()
331 {
332 ASSERT(alternativeValid());
333 return alternative()->m_terms[t];
334 }
335
336 PatternTerm& lookaheadTerm()
337 {
338 ASSERT(alternativeValid());
339 ASSERT((t + 1) < alternative()->m_terms.size());
340 return alternative()->m_terms[t + 1];
341 }
342 bool isSinglePatternCharacterLookaheadTerm()
343 {
344 ASSERT(alternativeValid());
345 return ((t + 1) < alternative()->m_terms.size())
346 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
347 && (lookaheadTerm().quantityType == QuantifierFixedCount)
348 && (lookaheadTerm().quantityCount == 1);
349 }
350
351 int inputOffset()
352 {
353 return term().inputPosition - checkedTotal;
354 }
355
356 void jumpToBacktrack(Jump jump, MacroAssembler* masm)
357 {
358 if (isBackTrackGenerated)
359 jump.linkTo(backtrackLabel, masm);
360 else
361 backTrackJumps.append(jump);
362 }
363 void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
364 {
365 if (isBackTrackGenerated)
366 jumps.linkTo(backtrackLabel, masm);
367 else
368 backTrackJumps.append(jumps);
369 }
370 bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
371 {
372 if (isBackTrackGenerated) {
373 masm->jump(backtrackLabel);
374 return true;
375 }
376 return false;
377 }
378 void addBacktrackJump(Jump jump)
379 {
380 backTrackJumps.append(jump);
381 }
382 void setBacktrackGenerated(Label label)
383 {
384 isBackTrackGenerated = true;
385 backtrackLabel = label;
386 }
387 void linkAlternativeBacktracks(MacroAssembler* masm)
388 {
389 isBackTrackGenerated = false;
390 backTrackJumps.link(masm);
391 }
392 void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
393 {
394 isBackTrackGenerated = false;
395 backTrackJumps.linkTo(label, masm);
396 }
397 void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
398 {
399 jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
400 if (nestedParenthesesState.isBackTrackGenerated)
401 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
402 }
403
404 PatternDisjunction* disjunction;
405 int checkedTotal;
406 private:
407 unsigned alt;
408 unsigned t;
409 JumpList backTrackJumps;
410 Label backtrackLabel;
411 bool isBackTrackGenerated;
412 };
413
414 void generateAssertionBOL(TermGenerationState& state)
415 {
416 PatternTerm& term = state.term();
417
418 if (m_pattern.m_multiline) {
419 const RegisterID character = regT0;
420
421 JumpList matchDest;
422 if (!term.inputPosition)
423 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
424
425 readCharacter(state.inputOffset() - 1, character);
426 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
427 state.jumpToBacktrack(jump(), this);
428
429 matchDest.link(this);
430 } else {
431 // Erk, really should poison out these alternatives early. :-/
432 if (term.inputPosition)
433 state.jumpToBacktrack(jump(), this);
434 else
435 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
436 }
437 }
438
439 void generateAssertionEOL(TermGenerationState& state)
440 {
441 PatternTerm& term = state.term();
442
443 if (m_pattern.m_multiline) {
444 const RegisterID character = regT0;
445
446 JumpList matchDest;
447 if (term.inputPosition == state.checkedTotal)
448 matchDest.append(atEndOfInput());
449
450 readCharacter(state.inputOffset(), character);
451 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
452 state.jumpToBacktrack(jump(), this);
453
454 matchDest.link(this);
455 } else {
456 if (term.inputPosition == state.checkedTotal)
457 state.jumpToBacktrack(notAtEndOfInput(), this);
458 // Erk, really should poison out these alternatives early. :-/
459 else
460 state.jumpToBacktrack(jump(), this);
461 }
462 }
463
464 // Also falls though on nextIsNotWordChar.
465 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
466 {
467 const RegisterID character = regT0;
468 PatternTerm& term = state.term();
469
470 if (term.inputPosition == state.checkedTotal)
471 nextIsNotWordChar.append(atEndOfInput());
472
473 readCharacter(state.inputOffset(), character);
474 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
475 }
476
477 void generateAssertionWordBoundary(TermGenerationState& state)
478 {
479 const RegisterID character = regT0;
480 PatternTerm& term = state.term();
481
482 Jump atBegin;
483 JumpList matchDest;
484 if (!term.inputPosition)
485 atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
486 readCharacter(state.inputOffset() - 1, character);
487 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
488 if (!term.inputPosition)
489 atBegin.link(this);
490
491 // We fall through to here if the last character was not a wordchar.
492 JumpList nonWordCharThenWordChar;
493 JumpList nonWordCharThenNonWordChar;
494 if (term.invertOrCapture) {
495 matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
496 nonWordCharThenWordChar.append(jump());
497 } else {
498 matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
499 nonWordCharThenNonWordChar.append(jump());
500 }
501 state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
502
503 // We jump here if the last character was a wordchar.
504 matchDest.link(this);
505 JumpList wordCharThenWordChar;
506 JumpList wordCharThenNonWordChar;
507 if (term.invertOrCapture) {
508 matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
509 wordCharThenWordChar.append(jump());
510 } else {
511 matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
512 // This can fall-though!
513 }
514
515 state.jumpToBacktrack(wordCharThenWordChar, this);
516
517 nonWordCharThenWordChar.link(this);
518 wordCharThenNonWordChar.link(this);
519 }
520
521 void generatePatternCharacterSingle(TermGenerationState& state)
522 {
523 const RegisterID character = regT0;
524 UChar ch = state.term().patternCharacter;
525
526 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
527 readCharacter(state.inputOffset(), character);
528 or32(Imm32(32), character);
529 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
530 } else {
531 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
532 state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
533 }
534 }
535
536 void generatePatternCharacterPair(TermGenerationState& state)
537 {
538 const RegisterID character = regT0;
539 UChar ch1 = state.term().patternCharacter;
540 UChar ch2 = state.lookaheadTerm().patternCharacter;
541
542 int mask = 0;
543 int chPair = ch1 | (ch2 << 16);
544
545 if (m_pattern.m_ignoreCase) {
546 if (isASCIIAlpha(ch1))
547 mask |= 32;
548 if (isASCIIAlpha(ch2))
549 mask |= 32 << 16;
550 }
551
552 if (mask) {
553 load32(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
554 or32(Imm32(mask), character);
555 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
556 } else
557 state.jumpToBacktrack(branch32(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
558 }
559
560 void generatePatternCharacterFixed(TermGenerationState& state)
561 {
562 const RegisterID character = regT0;
563 const RegisterID countRegister = regT1;
564 PatternTerm& term = state.term();
565 UChar ch = term.patternCharacter;
566
567 move(index, countRegister);
568 sub32(Imm32(term.quantityCount), countRegister);
569
570 Label loop(this);
571 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
572 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
573 or32(Imm32(32), character);
574 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
575 } else {
576 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
577 state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
578 }
579 add32(Imm32(1), countRegister);
580 branch32(NotEqual, countRegister, index).linkTo(loop, this);
581 }
582
583 void generatePatternCharacterGreedy(TermGenerationState& state)
584 {
585 const RegisterID character = regT0;
586 const RegisterID countRegister = regT1;
587 PatternTerm& term = state.term();
588 UChar ch = term.patternCharacter;
589
590 move(Imm32(0), countRegister);
591
592 JumpList failures;
593 Label loop(this);
594 failures.append(atEndOfInput());
595 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
596 readCharacter(state.inputOffset(), character);
597 or32(Imm32(32), character);
598 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
599 } else {
600 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
601 failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
602 }
603 add32(Imm32(1), countRegister);
604 add32(Imm32(1), index);
605 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
606 failures.append(jump());
607
608 Label backtrackBegin(this);
609 loadFromFrame(term.frameLocation, countRegister);
610 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
611 sub32(Imm32(1), countRegister);
612 sub32(Imm32(1), index);
613
614 failures.link(this);
615
616 storeToFrame(countRegister, term.frameLocation);
617
618 state.setBacktrackGenerated(backtrackBegin);
619 }
620
621 void generatePatternCharacterNonGreedy(TermGenerationState& state)
622 {
623 const RegisterID character = regT0;
624 const RegisterID countRegister = regT1;
625 PatternTerm& term = state.term();
626 UChar ch = term.patternCharacter;
627
628 move(Imm32(0), countRegister);
629
630 Jump firstTimeDoNothing = jump();
631
632 Label hardFail(this);
633 sub32(countRegister, index);
634 state.jumpToBacktrack(jump(), this);
635
636 Label backtrackBegin(this);
637 loadFromFrame(term.frameLocation, countRegister);
638
639 atEndOfInput().linkTo(hardFail, this);
640 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
641 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
642 readCharacter(state.inputOffset(), character);
643 or32(Imm32(32), character);
644 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
645 } else {
646 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
647 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
648 }
649
650 add32(Imm32(1), countRegister);
651 add32(Imm32(1), index);
652
653 firstTimeDoNothing.link(this);
654 storeToFrame(countRegister, term.frameLocation);
655
656 state.setBacktrackGenerated(backtrackBegin);
657 }
658
659 void generateCharacterClassSingle(TermGenerationState& state)
660 {
661 const RegisterID character = regT0;
662 PatternTerm& term = state.term();
663
664 JumpList matchDest;
665 readCharacter(state.inputOffset(), character);
666 matchCharacterClass(character, matchDest, term.characterClass);
667
668 if (term.invertOrCapture)
669 state.jumpToBacktrack(matchDest, this);
670 else {
671 state.jumpToBacktrack(jump(), this);
672 matchDest.link(this);
673 }
674 }
675
676 void generateCharacterClassFixed(TermGenerationState& state)
677 {
678 const RegisterID character = regT0;
679 const RegisterID countRegister = regT1;
680 PatternTerm& term = state.term();
681
682 move(index, countRegister);
683 sub32(Imm32(term.quantityCount), countRegister);
684
685 Label loop(this);
686 JumpList matchDest;
687 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
688 matchCharacterClass(character, matchDest, term.characterClass);
689
690 if (term.invertOrCapture)
691 state.jumpToBacktrack(matchDest, this);
692 else {
693 state.jumpToBacktrack(jump(), this);
694 matchDest.link(this);
695 }
696
697 add32(Imm32(1), countRegister);
698 branch32(NotEqual, countRegister, index).linkTo(loop, this);
699 }
700
701 void generateCharacterClassGreedy(TermGenerationState& state)
702 {
703 const RegisterID character = regT0;
704 const RegisterID countRegister = regT1;
705 PatternTerm& term = state.term();
706
707 move(Imm32(0), countRegister);
708
709 JumpList failures;
710 Label loop(this);
711 failures.append(atEndOfInput());
712
713 if (term.invertOrCapture) {
714 readCharacter(state.inputOffset(), character);
715 matchCharacterClass(character, failures, term.characterClass);
716 } else {
717 JumpList matchDest;
718 readCharacter(state.inputOffset(), character);
719 matchCharacterClass(character, matchDest, term.characterClass);
720 failures.append(jump());
721 matchDest.link(this);
722 }
723
724 add32(Imm32(1), countRegister);
725 add32(Imm32(1), index);
726 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
727 failures.append(jump());
728
729 Label backtrackBegin(this);
730 loadFromFrame(term.frameLocation, countRegister);
731 state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
732 sub32(Imm32(1), countRegister);
733 sub32(Imm32(1), index);
734
735 failures.link(this);
736
737 storeToFrame(countRegister, term.frameLocation);
738
739 state.setBacktrackGenerated(backtrackBegin);
740 }
741
742 void generateCharacterClassNonGreedy(TermGenerationState& state)
743 {
744 const RegisterID character = regT0;
745 const RegisterID countRegister = regT1;
746 PatternTerm& term = state.term();
747
748 move(Imm32(0), countRegister);
749
750 Jump firstTimeDoNothing = jump();
751
752 Label hardFail(this);
753 sub32(countRegister, index);
754 state.jumpToBacktrack(jump(), this);
755
756 Label backtrackBegin(this);
757 loadFromFrame(term.frameLocation, countRegister);
758
759 atEndOfInput().linkTo(hardFail, this);
760 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
761
762 JumpList matchDest;
763 readCharacter(state.inputOffset(), character);
764 matchCharacterClass(character, matchDest, term.characterClass);
765
766 if (term.invertOrCapture)
767 matchDest.linkTo(hardFail, this);
768 else {
769 jump(hardFail);
770 matchDest.link(this);
771 }
772
773 add32(Imm32(1), countRegister);
774 add32(Imm32(1), index);
775
776 firstTimeDoNothing.link(this);
777 storeToFrame(countRegister, term.frameLocation);
778
779 state.setBacktrackGenerated(backtrackBegin);
780 }
781
782 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
783 {
784 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
785 ASSERT(parenthesesTerm.quantityCount == 1);
786
787 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
788 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
789
790 if (disjunction->m_alternatives.size() == 1) {
791 state.resetAlternative();
792 ASSERT(state.alternativeValid());
793 PatternAlternative* alternative = state.alternative();
794 optimizeAlternative(alternative);
795
796 int countToCheck = alternative->m_minimumSize - preCheckedCount;
797 if (countToCheck) {
798 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
799
800 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
801 // will be forced to always trampoline into here, just to decrement the index.
802 // Ick.
803 Jump skip = jump();
804
805 Label backtrackBegin(this);
806 sub32(Imm32(countToCheck), index);
807 state.addBacktrackJump(jump());
808
809 skip.link(this);
810
811 state.setBacktrackGenerated(backtrackBegin);
812
813 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
814 state.checkedTotal += countToCheck;
815 }
816
817 for (state.resetTerm(); state.termValid(); state.nextTerm())
818 generateTerm(state);
819
820 state.checkedTotal -= countToCheck;
821 } else {
822 JumpList successes;
823
824 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
825
826 PatternAlternative* alternative = state.alternative();
827 optimizeAlternative(alternative);
828
829 ASSERT(alternative->m_minimumSize >= preCheckedCount);
830 int countToCheck = alternative->m_minimumSize - preCheckedCount;
831 if (countToCheck) {
832 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
833 state.checkedTotal += countToCheck;
834 }
835
836 for (state.resetTerm(); state.termValid(); state.nextTerm())
837 generateTerm(state);
838
839 // Matched an alternative.
840 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
841 successes.append(jump());
842
843 // Alternative did not match.
844 Label backtrackLocation(this);
845
846 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
847 state.plantJumpToBacktrackIfExists(this);
848
849 state.linkAlternativeBacktracks(this);
850
851 if (countToCheck) {
852 sub32(Imm32(countToCheck), index);
853 state.checkedTotal -= countToCheck;
854 }
855
856 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
857 }
858 // We fall through to here when the last alternative fails.
859 // Add a backtrack out of here for the parenthese handling code to link up.
860 state.addBacktrackJump(jump());
861
862 // Generate a trampoline for the parens code to backtrack to, to retry the
863 // next alternative.
864 state.setBacktrackGenerated(label());
865 loadFromFrameAndJump(alternativeFrameLocation);
866
867 // FIXME: both of the above hooks are a little inefficient, in that you
868 // may end up trampolining here, just to trampoline back out to the
869 // parentheses code, or vice versa. We can probably eliminate a jump
870 // by restructuring, but coding this way for now for simplicity during
871 // development.
872
873 successes.link(this);
874 }
875 }
876
877 void generateParenthesesSingle(TermGenerationState& state)
878 {
879 const RegisterID indexTemporary = regT0;
880 PatternTerm& term = state.term();
881 PatternDisjunction* disjunction = term.parentheses.disjunction;
882 ASSERT(term.quantityCount == 1);
883
884 unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
885
886 unsigned parenthesesFrameLocation = term.frameLocation;
887 unsigned alternativeFrameLocation = parenthesesFrameLocation;
888 if (term.quantityType != QuantifierFixedCount)
889 alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
890
891 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
892 if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
893 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
894 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
895 // this expects that any backtracks back out of the parentheses will be in the
896 // parenthesesState's backTrackJumps vector, and that if they need backtracking
897 // they will have set an entry point on the parenthesesState's backtrackLabel.
898 state.propagateBacktrackingFrom(parenthesesState, this);
899 } else {
900 Jump nonGreedySkipParentheses;
901 Label nonGreedyTryParentheses;
902 if (term.quantityType == QuantifierGreedy)
903 storeToFrame(Imm32(1), parenthesesFrameLocation);
904 else if (term.quantityType == QuantifierNonGreedy) {
905 storeToFrame(Imm32(0), parenthesesFrameLocation);
906 nonGreedySkipParentheses = jump();
907 nonGreedyTryParentheses = label();
908 storeToFrame(Imm32(1), parenthesesFrameLocation);
909 }
910
911 // store the match start index
912 if (term.invertOrCapture) {
913 int inputOffset = state.inputOffset() - preCheckedCount;
914 if (inputOffset) {
915 move(index, indexTemporary);
916 add32(Imm32(inputOffset), indexTemporary);
917 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
918 } else
919 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
920 }
921
922 // generate the body of the parentheses
923 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
924 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
925
926 // store the match end index
927 if (term.invertOrCapture) {
928 int inputOffset = state.inputOffset();
929 if (inputOffset) {
930 move(index, indexTemporary);
931 add32(Imm32(state.inputOffset()), indexTemporary);
932 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
933 } else
934 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
935 }
936 Jump success = jump();
937
938 // A failure AFTER the parens jumps here
939 Label backtrackFromAfterParens(this);
940
941 if (term.quantityType == QuantifierGreedy) {
942 // If this is zero we have now tested with both with and without the parens.
943 loadFromFrame(parenthesesFrameLocation, indexTemporary);
944 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
945 } else if (term.quantityType == QuantifierNonGreedy) {
946 // If this is zero we have now tested with both with and without the parens.
947 loadFromFrame(parenthesesFrameLocation, indexTemporary);
948 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
949 }
950
951 parenthesesState.plantJumpToBacktrackIfExists(this);
952 // A failure WITHIN the parens jumps here
953 parenthesesState.linkAlternativeBacktracks(this);
954 if (term.invertOrCapture) {
955 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
956 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
957 }
958
959 if (term.quantityType == QuantifierGreedy)
960 storeToFrame(Imm32(0), parenthesesFrameLocation);
961 else
962 state.jumpToBacktrack(jump(), this);
963
964 state.setBacktrackGenerated(backtrackFromAfterParens);
965 if (term.quantityType == QuantifierNonGreedy)
966 nonGreedySkipParentheses.link(this);
967 success.link(this);
968 }
969 }
970
971 void generateParentheticalAssertion(TermGenerationState& state)
972 {
973 PatternTerm& term = state.term();
974 PatternDisjunction* disjunction = term.parentheses.disjunction;
975 ASSERT(term.quantityCount == 1);
976 ASSERT(term.quantityType == QuantifierFixedCount);
977
978 unsigned parenthesesFrameLocation = term.frameLocation;
979 unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
980
981 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
982
983 if (term.invertOrCapture) {
984 // Inverted case
985 storeToFrame(index, parenthesesFrameLocation);
986
987 state.checkedTotal -= countCheckedAfterAssertion;
988 if (countCheckedAfterAssertion)
989 sub32(Imm32(countCheckedAfterAssertion), index);
990
991 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
992 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
993 // Success! - which means - Fail!
994 loadFromFrame(parenthesesFrameLocation, index);
995 state.jumpToBacktrack(jump(), this);
996
997 // And fail means success.
998 parenthesesState.linkAlternativeBacktracks(this);
999 loadFromFrame(parenthesesFrameLocation, index);
1000
1001 state.checkedTotal += countCheckedAfterAssertion;
1002 } else {
1003 // Normal case
1004 storeToFrame(index, parenthesesFrameLocation);
1005
1006 state.checkedTotal -= countCheckedAfterAssertion;
1007 if (countCheckedAfterAssertion)
1008 sub32(Imm32(countCheckedAfterAssertion), index);
1009
1010 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1011 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1012 // Success! - which means - Success!
1013 loadFromFrame(parenthesesFrameLocation, index);
1014 Jump success = jump();
1015
1016 parenthesesState.linkAlternativeBacktracks(this);
1017 loadFromFrame(parenthesesFrameLocation, index);
1018 state.jumpToBacktrack(jump(), this);
1019
1020 success.link(this);
1021
1022 state.checkedTotal += countCheckedAfterAssertion;
1023 }
1024 }
1025
1026 void generateTerm(TermGenerationState& state)
1027 {
1028 PatternTerm& term = state.term();
1029
1030 switch (term.type) {
1031 case PatternTerm::TypeAssertionBOL:
1032 generateAssertionBOL(state);
1033 break;
1034
1035 case PatternTerm::TypeAssertionEOL:
1036 generateAssertionEOL(state);
1037 break;
1038
1039 case PatternTerm::TypeAssertionWordBoundary:
1040 generateAssertionWordBoundary(state);
1041 break;
1042
1043 case PatternTerm::TypePatternCharacter:
1044 switch (term.quantityType) {
1045 case QuantifierFixedCount:
1046 if (term.quantityCount == 1) {
1047 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1048 generatePatternCharacterPair(state);
1049 state.nextTerm();
1050 } else
1051 generatePatternCharacterSingle(state);
1052 } else
1053 generatePatternCharacterFixed(state);
1054 break;
1055 case QuantifierGreedy:
1056 generatePatternCharacterGreedy(state);
1057 break;
1058 case QuantifierNonGreedy:
1059 generatePatternCharacterNonGreedy(state);
1060 break;
1061 }
1062 break;
1063
1064 case PatternTerm::TypeCharacterClass:
1065 switch (term.quantityType) {
1066 case QuantifierFixedCount:
1067 if (term.quantityCount == 1)
1068 generateCharacterClassSingle(state);
1069 else
1070 generateCharacterClassFixed(state);
1071 break;
1072 case QuantifierGreedy:
1073 generateCharacterClassGreedy(state);
1074 break;
1075 case QuantifierNonGreedy:
1076 generateCharacterClassNonGreedy(state);
1077 break;
1078 }
1079 break;
1080
1081 case PatternTerm::TypeBackReference:
1082 m_generationFailed = true;
1083 break;
1084
1085 case PatternTerm::TypeForwardReference:
1086 break;
1087
1088 case PatternTerm::TypeParenthesesSubpattern:
1089 if ((term.quantityCount == 1) && !term.parentheses.isCopy)
1090 generateParenthesesSingle(state);
1091 else
1092 m_generationFailed = true;
1093 break;
1094
1095 case PatternTerm::TypeParentheticalAssertion:
1096 generateParentheticalAssertion(state);
1097 break;
1098 }
1099 }
1100
1101 void generateDisjunction(PatternDisjunction* disjunction)
1102 {
1103 TermGenerationState state(disjunction, 0);
1104 state.resetAlternative();
1105
1106 // Plant a check to see if there is sufficient input available to run the first alternative.
1107 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1108 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1109 // skipped this check.
1110
1111 Label firstAlternative(this);
1112
1113 // check availability for the next alternative
1114 int countCheckedForCurrentAlternative = 0;
1115 int countToCheckForFirstAlternative = 0;
1116 bool hasShorterAlternatives = false;
1117 JumpList notEnoughInputForPreviousAlternative;
1118
1119 if (state.alternativeValid()) {
1120 PatternAlternative* alternative = state.alternative();
1121 countToCheckForFirstAlternative = alternative->m_minimumSize;
1122 state.checkedTotal += countToCheckForFirstAlternative;
1123 if (countToCheckForFirstAlternative)
1124 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
1125 countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1126 }
1127
1128 Label firstAlternativeInputChecked(this);
1129
1130 while (state.alternativeValid()) {
1131 // Track whether any alternatives are shorter than the first one.
1132 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1133
1134 PatternAlternative* alternative = state.alternative();
1135 optimizeAlternative(alternative);
1136
1137 for (state.resetTerm(); state.termValid(); state.nextTerm())
1138 generateTerm(state);
1139
1140 // If we get here, the alternative matched.
1141 if (m_pattern.m_body->m_callFrameSize)
1142 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1143
1144 ASSERT(index != returnRegister);
1145 if (m_pattern.m_body->m_hasFixedSize) {
1146 move(index, returnRegister);
1147 if (alternative->m_minimumSize)
1148 sub32(Imm32(alternative->m_minimumSize), returnRegister);
1149 } else
1150 pop(returnRegister);
1151 store32(index, Address(output, 4));
1152 store32(returnRegister, output);
1153
1154 generateReturn();
1155
1156 state.nextAlternative();
1157
1158 // if there are any more alternatives, plant the check for input before looping.
1159 if (state.alternativeValid()) {
1160 PatternAlternative* nextAlternative = state.alternative();
1161 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1162
1163 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1164 // If we get here, there the last input checked failed.
1165 notEnoughInputForPreviousAlternative.link(this);
1166
1167 // Check if sufficent input available to run the next alternative
1168 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1169 // We are now in the correct state to enter the next alternative; this add is only required
1170 // to mirror and revert operation of the sub32, just below.
1171 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1172
1173 // If we get here, there the last input checked passed.
1174 state.linkAlternativeBacktracks(this);
1175 // No need to check if we can run the next alternative, since it is shorter -
1176 // just update index.
1177 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1178 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1179 // If we get here, there the last input checked failed.
1180 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1181 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1182 // we had checked.
1183 notEnoughInputForPreviousAlternative.link(this);
1184 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
1185 notEnoughInputForPreviousAlternative.append(jump());
1186
1187 // The next alternative is longer than the current one; check the difference.
1188 state.linkAlternativeBacktracks(this);
1189 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1190 } else { // CASE 3: Both alternatives are the same length.
1191 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1192
1193 // If the next alterative is the same length as this one, then no need to check the input -
1194 // if there was sufficent input to run the current alternative then there is sufficient
1195 // input to run the next one; if not, there isn't.
1196 state.linkAlternativeBacktracks(this);
1197 }
1198
1199 state.checkedTotal -= countCheckedForCurrentAlternative;
1200 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1201 state.checkedTotal += countCheckedForCurrentAlternative;
1202 }
1203 }
1204
1205 // If we get here, all Alternatives failed...
1206
1207 state.checkedTotal -= countCheckedForCurrentAlternative;
1208
1209 // How much more input need there be to be able to retry from the first alternative?
1210 // examples:
1211 // /yarr_jit/ or /wrec|pcre/
1212 // In these examples we need check for one more input before looping.
1213 // /yarr_jit|pcre/
1214 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1215 // being four longer than the last alternative checked, and another +1 to effectively move
1216 // the start position along by one).
1217 // /yarr|rules/ or /wrec|notsomuch/
1218 // In these examples, provided that there was sufficient input to have just been matching for
1219 // the second alternative we can loop without checking for available input (since the second
1220 // alternative is longer than the first). In the latter example we need to decrement index
1221 // (by 4) so the start position is only progressed by 1 from the last iteration.
1222 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1223
1224 // First, deal with the cases where there was sufficient input to try the last alternative.
1225 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1226 state.linkAlternativeBacktracks(this);
1227 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1228 state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
1229 else { // no need to check the input, but we do have some bookkeeping to do first.
1230 state.linkAlternativeBacktracks(this);
1231
1232 // Where necessary update our preserved start position.
1233 if (!m_pattern.m_body->m_hasFixedSize) {
1234 move(index, regT0);
1235 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1236 poke(regT0, m_pattern.m_body->m_callFrameSize);
1237 }
1238
1239 // Update index if necessary, and loop (without checking).
1240 if (incrementForNextIter)
1241 add32(Imm32(incrementForNextIter), index);
1242 jump().linkTo(firstAlternativeInputChecked, this);
1243 }
1244
1245 notEnoughInputForPreviousAlternative.link(this);
1246 // Update our idea of the start position, if we're tracking this.
1247 if (!m_pattern.m_body->m_hasFixedSize) {
1248 if (countCheckedForCurrentAlternative - 1) {
1249 move(index, regT0);
1250 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1251 poke(regT0, m_pattern.m_body->m_callFrameSize);
1252 } else
1253 poke(index, m_pattern.m_body->m_callFrameSize);
1254 }
1255 // Check if there is sufficent input to run the first alternative again.
1256 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
1257 // No - insufficent input to run the first alteranative, are there any other alternatives we
1258 // might need to check? If so, the last check will have left the index incremented by
1259 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1260 // LESS input is available, to have the effect of just progressing the start position by 1
1261 // from the last iteration. If this check passes we can just jump up to the check associated
1262 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1263 // first alternative again, and this check will fail (otherwise the check planted just above
1264 // here would have passed). This is a bit sad, however it saves trying to do something more
1265 // complex here in compilation, and in the common case we should end up coallescing the checks.
1266 //
1267 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1268 // of the minimum-alterantive-lengths. E.g. if I have two alternatives of length 200 and 150,
1269 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1270 // is sufficient input to run either alternative (constantly failing). If there had been only
1271 // one alternative, or if the shorter alternative had come first, we would have terminated
1272 // immediately. :-/
1273 if (hasShorterAlternatives)
1274 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
1275 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1276 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1277 // but since we're about to return a failure this doesn't really matter!)
1278
1279 unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1280 if (!m_pattern.m_body->m_hasFixedSize)
1281 ++frameSize;
1282 if (frameSize)
1283 addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
1284
1285 move(Imm32(-1), returnRegister);
1286
1287 generateReturn();
1288 }
1289
1290 void generateEnter()
1291 {
1292#if PLATFORM(X86_64)
1293 push(X86::ebp);
1294 move(stackPointerRegister, X86::ebp);
1295#elif PLATFORM(X86)
1296 push(X86::ebp);
1297 move(stackPointerRegister, X86::ebp);
1298 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1299 push(X86::ebx);
1300 push(X86::edi);
1301 push(X86::esi);
1302 // load output into edi (2 = saved ebp + return address).
1303 #if COMPILER(MSVC)
1304 loadPtr(Address(X86::ebp, 2 * sizeof(void*)), input);
1305 loadPtr(Address(X86::ebp, 3 * sizeof(void*)), index);
1306 loadPtr(Address(X86::ebp, 4 * sizeof(void*)), length);
1307 loadPtr(Address(X86::ebp, 5 * sizeof(void*)), output);
1308 #else
1309 loadPtr(Address(X86::ebp, 2 * sizeof(void*)), output);
1310 #endif
1311#elif PLATFORM(ARM_V7)
1312 push(ARM::r4);
1313 push(ARM::r5);
1314 push(ARM::r6);
1315 move(ARM::r3, output);
1316#endif
1317 }
1318
1319 void generateReturn()
1320 {
1321#if PLATFORM(X86_64)
1322 pop(X86::ebp);
1323#elif PLATFORM(X86)
1324 pop(X86::esi);
1325 pop(X86::edi);
1326 pop(X86::ebx);
1327 pop(X86::ebp);
1328#elif PLATFORM(ARM_V7)
1329 pop(ARM::r6);
1330 pop(ARM::r5);
1331 pop(ARM::r4);
1332#endif
1333 ret();
1334 }
1335
1336public:
1337 RegexGenerator(RegexPattern& pattern)
1338 : m_pattern(pattern)
1339 , m_generationFailed(false)
1340 {
1341 }
1342
1343 void generate()
1344 {
1345 generateEnter();
1346
1347 // TODO: do I really want this on the stack?
1348 if (!m_pattern.m_body->m_hasFixedSize)
1349 push(index);
1350
1351 if (m_pattern.m_body->m_callFrameSize)
1352 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1353
1354 generateDisjunction(m_pattern.m_body);
1355 }
1356
1357 void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1358 {
1359 generate();
1360
1361 PatchBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
1362
1363 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1364 patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
1365
1366 jitObject.set(patchBuffer.finalizeCode());
1367 }
1368
1369 bool generationFailed()
1370 {
1371 return m_generationFailed;
1372 }
1373
1374private:
1375 RegexPattern& m_pattern;
1376 Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1377 bool m_generationFailed;
1378};
1379
1380void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1381{
1382 RegexPattern pattern(ignoreCase, multiline);
1383
1384 if ((error = compileRegex(patternString, pattern)))
1385 return;
1386
1387 numSubpatterns = pattern.m_numSubpatterns;
1388
1389 RegexGenerator generator(pattern);
1390 generator.compile(globalData, jitObject);
1391
1392 if (generator.generationFailed()) {
1393 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1394 JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1395 jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
1396 }
1397}
1398
1399int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize)
1400{
1401 if (JSRegExp* fallback = jitObject.getFallback())
1402 return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0];
1403
1404 return jitObject.execute(input, start, length, output);
1405}
1406
1407}}
1408
1409#endif
1410
1411
1412
1413
1414
Note: See TracBrowser for help on using the repository browser.