source: webkit/trunk/Source/JavaScriptCore/testRegExp.cpp@ 225695

Last change on this file since 225695 was 225695, checked in by [email protected], 7 years ago

YARR: JIT RegExps with greedy parenthesized sub patterns
https://bugs.webkit.org/show_bug.cgi?id=180538

Reviewed by JF Bastien.

This patch adds JIT support for regular expressions containing greedy counted
parenthesis. An example expression that couldn't be JIT'ed before is /q(a|b)*q/.

Just like in the interpreter, expressions with nested parenthetical subpatterns
require saving the results of previous matches of the parentheses contents along
with any associated state. This saved state is needed in the case that we need
to backtrack. This state is called ParenContext within the code space allocated
for this ParenContext is managed using a simple block allocator within the JIT'ed
code. The raw space managed by this allocator is passed into the JIT'ed function.

Since this fixed sized space may be exceeded, this patch adds a fallback mechanism.
If the JIT'ed code exhausts all its ParenContext space, it returns a new error
JSRegExpJITCodeFailure. The caller will then bytecompile and interpret the
expression.

Due to increased register usage by the parenthesis handling code, the use of
registers by the JIT engine was restructured, with registers used for Unicode
pattern matching replaced with constants.

Reworked some of the context structures that are used across the interpreter
and JIT implementations to make them a little more uniform and to handle the
needs of JIT'ing the new parentheses forms.

To help with development and debugging of this code, compiled patterns dumping
code was enhanced. Also added the ability to also dump interpreter ByteCodes.

  • runtime/RegExp.cpp:

(JSC::byteCodeCompilePattern):
(JSC::RegExp::byteCodeCompileIfNecessary):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):

  • runtime/RegExp.h:
  • runtime/RegExpInlines.h:

(JSC::RegExp::matchInline):

  • testRegExp.cpp:

(parseRegExpLine):
(runFromFiles):

  • yarr/Yarr.h:
  • yarr/YarrInterpreter.cpp:

(JSC::Yarr::ByteCompiler::compile):
(JSC::Yarr::ByteCompiler::dumpDisjunction):

  • yarr/YarrJIT.cpp:

(JSC::Yarr::YarrGenerator::ParenContextSizes::ParenContextSizes):
(JSC::Yarr::YarrGenerator::ParenContextSizes::numSubpatterns):
(JSC::Yarr::YarrGenerator::ParenContextSizes::frameSlots):
(JSC::Yarr::YarrGenerator::ParenContext::sizeFor):
(JSC::Yarr::YarrGenerator::ParenContext::nextOffset):
(JSC::Yarr::YarrGenerator::ParenContext::beginOffset):
(JSC::Yarr::YarrGenerator::ParenContext::matchAmountOffset):
(JSC::Yarr::YarrGenerator::ParenContext::subpatternOffset):
(JSC::Yarr::YarrGenerator::ParenContext::savedFrameOffset):
(JSC::Yarr::YarrGenerator::initParenContextFreeList):
(JSC::Yarr::YarrGenerator::allocatePatternContext):
(JSC::Yarr::YarrGenerator::freePatternContext):
(JSC::Yarr::YarrGenerator::savePatternContext):
(JSC::Yarr::YarrGenerator::restorePatternContext):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::storeToFrame):
(JSC::Yarr::YarrGenerator::generateJITFailReturn):
(JSC::Yarr::YarrGenerator::clearMatches):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::opCompileParenthesesSubpattern):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):

  • yarr/YarrJIT.h:

(JSC::Yarr::YarrCodeBlock::execute):

  • yarr/YarrPattern.cpp:

(JSC::Yarr::indentForNestingLevel):
(JSC::Yarr::dumpUChar32):
(JSC::Yarr::dumpCharacterClass):
(JSC::Yarr::PatternTerm::dump):
(JSC::Yarr::YarrPattern::dumpPattern):

  • yarr/YarrPattern.h:

(JSC::Yarr::PatternTerm::containsAnyCaptures):
(JSC::Yarr::BackTrackInfoParenthesesOnce::returnAddressIndex):
(JSC::Yarr::BackTrackInfoParentheses::beginIndex):
(JSC::Yarr::BackTrackInfoParentheses::returnAddressIndex):
(JSC::Yarr::BackTrackInfoParentheses::matchAmountIndex):
(JSC::Yarr::BackTrackInfoParentheses::patternContextHeadIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex): Deleted.

  • Property svn:eol-style set to native
File size: 15.3 KB
Line 
1/*
2 * Copyright (C) 2011, 2015 Apple Inc. All rights reserved.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public License
15 * along with this library; see the file COPYING.LIB. If not, write to
16 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 *
19 */
20
21#include "config.h"
22#include "RegExp.h"
23
24#include <wtf/CurrentTime.h>
25#include "InitializeThreading.h"
26#include "JSCInlines.h"
27#include "JSGlobalObject.h"
28#include <errno.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <wtf/text/StringBuilder.h>
33
34#if !OS(WINDOWS)
35#include <unistd.h>
36#endif
37
38#if HAVE(SYS_TIME_H)
39#include <sys/time.h>
40#endif
41
42#if COMPILER(MSVC)
43#include <crtdbg.h>
44#include <mmsystem.h>
45#include <windows.h>
46#endif
47
48const int MaxLineLength = 100 * 1024;
49
50using namespace JSC;
51using namespace WTF;
52
53struct CommandLine {
54 CommandLine()
55 : interactive(false)
56 , verbose(false)
57 {
58 }
59
60 bool interactive;
61 bool verbose;
62 Vector<String> arguments;
63 Vector<String> files;
64};
65
66class StopWatch {
67public:
68 void start();
69 void stop();
70 long getElapsedMS(); // call stop() first
71
72private:
73 double m_startTime;
74 double m_stopTime;
75};
76
77void StopWatch::start()
78{
79 m_startTime = monotonicallyIncreasingTime();
80}
81
82void StopWatch::stop()
83{
84 m_stopTime = monotonicallyIncreasingTime();
85}
86
87long StopWatch::getElapsedMS()
88{
89 return static_cast<long>((m_stopTime - m_startTime) * 1000);
90}
91
92struct RegExpTest {
93 RegExpTest()
94 : offset(0)
95 , result(0)
96 {
97 }
98
99 String subject;
100 int offset;
101 int result;
102 Vector<int, 32> expectVector;
103};
104
105class GlobalObject : public JSGlobalObject {
106private:
107 GlobalObject(VM&, Structure*, const Vector<String>& arguments);
108
109public:
110 typedef JSGlobalObject Base;
111
112 static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments)
113 {
114 GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments);
115 return globalObject;
116 }
117
118 DECLARE_INFO;
119
120 static const bool needsDestructor = false;
121
122 static Structure* createStructure(VM& vm, JSValue prototype)
123 {
124 return Structure::create(vm, 0, prototype, TypeInfo(GlobalObjectType, StructureFlags), info());
125 }
126
127protected:
128 void finishCreation(VM& vm, const Vector<String>& arguments)
129 {
130 Base::finishCreation(vm);
131 UNUSED_PARAM(arguments);
132 }
133};
134
135const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(GlobalObject) };
136
137GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments)
138 : JSGlobalObject(vm, structure)
139{
140 finishCreation(vm, arguments);
141}
142
143// Use SEH for Release builds only to get rid of the crash report dialog
144// (luckily the same tests fail in Release and Debug builds so far). Need to
145// be in a separate main function because the realMain function requires object
146// unwinding.
147
148#if COMPILER(MSVC) && !defined(_DEBUG)
149#define TRY __try {
150#define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; }
151#else
152#define TRY
153#define EXCEPT(x)
154#endif
155
156int realMain(int argc, char** argv);
157
158int main(int argc, char** argv)
159{
160#if OS(WINDOWS)
161 // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for
162 // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the
163 // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>.
164 ::SetErrorMode(0);
165
166#if defined(_DEBUG)
167 _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
168 _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
169 _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
170 _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE);
171 _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
172 _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
173#endif
174
175 timeBeginPeriod(1);
176#endif
177
178 // Initialize JSC before getting VM.
179 JSC::initializeThreading();
180
181 // We can't use destructors in the following code because it uses Windows
182 // Structured Exception Handling
183 int res = 0;
184 TRY
185 res = realMain(argc, argv);
186 EXCEPT(res = 3)
187 return res;
188}
189
190static bool testOneRegExp(VM& vm, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned int lineNumber)
191{
192 bool result = true;
193 Vector<int> outVector;
194 outVector.resize(regExpTest->expectVector.size());
195 int matchResult = regexp->match(vm, regExpTest->subject, regExpTest->offset, outVector);
196
197 if (matchResult != regExpTest->result) {
198 result = false;
199 if (verbose)
200 printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult);
201 } else if (matchResult != -1) {
202 if (outVector.size() != regExpTest->expectVector.size()) {
203 result = false;
204 if (verbose) {
205#if OS(WINDOWS)
206 printf("Line %d: output vector size mismatch - expected %Iu got %Iu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
207#else
208 printf("Line %d: output vector size mismatch - expected %zu got %zu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
209#endif
210 }
211 } else if (outVector.size() % 2) {
212 result = false;
213 if (verbose) {
214#if OS(WINDOWS)
215 printf("Line %d: output vector size is odd (%Iu), should be even\n", lineNumber, outVector.size());
216#else
217 printf("Line %d: output vector size is odd (%zu), should be even\n", lineNumber, outVector.size());
218#endif
219 }
220 } else {
221 // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter.
222 size_t pairCount = outVector.size() / 2;
223 for (size_t i = 0; i < pairCount; ++i) {
224 size_t startIndex = i*2;
225 if (outVector[startIndex] != regExpTest->expectVector[startIndex]) {
226 result = false;
227 if (verbose) {
228#if OS(WINDOWS)
229 printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
230#else
231 printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
232#endif
233 }
234 }
235 if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) {
236 result = false;
237 if (verbose) {
238#if OS(WINDOWS)
239 printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
240#else
241 printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
242#endif
243 }
244 }
245 }
246 }
247 }
248
249 return result;
250}
251
252static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar)
253{
254 bool escape = false;
255
256 for (int i = 0; i < bufferLength; ++i) {
257 UChar c = buffer[i];
258
259 if (escape) {
260 switch (c) {
261 case '0':
262 c = '\0';
263 break;
264 case 'a':
265 c = '\a';
266 break;
267 case 'b':
268 c = '\b';
269 break;
270 case 'f':
271 c = '\f';
272 break;
273 case 'n':
274 c = '\n';
275 break;
276 case 'r':
277 c = '\r';
278 break;
279 case 't':
280 c = '\t';
281 break;
282 case 'v':
283 c = '\v';
284 break;
285 case '\\':
286 c = '\\';
287 break;
288 case '?':
289 c = '\?';
290 break;
291 case 'u':
292 if ((i + 4) >= bufferLength)
293 return -1;
294 unsigned int charValue;
295 if (sscanf(buffer+i+1, "%04x", &charValue) != 1)
296 return -1;
297 c = static_cast<UChar>(charValue);
298 i += 4;
299 break;
300 }
301
302 builder.append(c);
303 escape = false;
304 } else {
305 if (c == termChar)
306 return i;
307
308 if (c == '\\')
309 escape = true;
310 else
311 builder.append(c);
312 }
313 }
314
315 return -1;
316}
317
318static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength, const char** regexpError)
319{
320 StringBuilder pattern;
321
322 if (line[0] != '/')
323 return 0;
324
325 int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1;
326
327 if ((i >= lineLength) || (line[i] != '/'))
328 return 0;
329
330 ++i;
331
332 RegExp* r = RegExp::create(vm, pattern.toString(), regExpFlags(line + i));
333 if (!r->isValid()) {
334 *regexpError = r->errorMessage();
335 return nullptr;
336 }
337 return r;
338}
339
340static RegExpTest* parseTestLine(char* line, int lineLength)
341{
342 StringBuilder subjectString;
343
344 if ((line[0] != ' ') || (line[1] != '"'))
345 return 0;
346
347 int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2;
348
349 if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' '))
350 return 0;
351
352 i += 3;
353
354 int offset;
355
356 if (sscanf(line + i, "%d, ", &offset) != 1)
357 return 0;
358
359 while (line[i] && line[i] != ' ')
360 ++i;
361
362 ++i;
363
364 int matchResult;
365
366 if (sscanf(line + i, "%d, ", &matchResult) != 1)
367 return 0;
368
369 while (line[i] && line[i] != ' ')
370 ++i;
371
372 ++i;
373
374 if (line[i++] != '(')
375 return 0;
376
377 int start, end;
378
379 RegExpTest* result = new RegExpTest();
380
381 result->subject = subjectString.toString();
382 result->offset = offset;
383 result->result = matchResult;
384
385 while (line[i] && line[i] != ')') {
386 if (sscanf(line + i, "%d, %d", &start, &end) != 2) {
387 delete result;
388 return 0;
389 }
390
391 result->expectVector.append(start);
392 result->expectVector.append(end);
393
394 while (line[i] && (line[i] != ',') && (line[i] != ')'))
395 i++;
396 i++;
397 while (line[i] && (line[i] != ',') && (line[i] != ')'))
398 i++;
399
400 if (line[i] == ')')
401 break;
402 if (!line[i] || (line[i] != ',')) {
403 delete result;
404 return 0;
405 }
406 i++;
407 }
408
409 return result;
410}
411
412static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose)
413{
414 String script;
415 String fileName;
416 Vector<char> scriptBuffer;
417 unsigned tests = 0;
418 unsigned failures = 0;
419 char* lineBuffer = new char[MaxLineLength + 1];
420
421 VM& vm = globalObject->vm();
422
423 bool success = true;
424 for (size_t i = 0; i < files.size(); i++) {
425 FILE* testCasesFile = fopen(files[i].utf8().data(), "rb");
426
427 if (!testCasesFile) {
428 printf("Unable to open test data file \"%s\"\n", files[i].utf8().data());
429 continue;
430 }
431
432 RegExp* regexp = 0;
433 size_t lineLength = 0;
434 char* linePtr = 0;
435 unsigned int lineNumber = 0;
436 const char* regexpError = nullptr;
437
438 while ((linePtr = fgets(&lineBuffer[0], MaxLineLength, testCasesFile))) {
439 lineLength = strlen(linePtr);
440 if (linePtr[lineLength - 1] == '\n') {
441 linePtr[lineLength - 1] = '\0';
442 --lineLength;
443 }
444 ++lineNumber;
445
446 if (linePtr[0] == '#')
447 continue;
448
449 if (linePtr[0] == '/') {
450 regexp = parseRegExpLine(vm, linePtr, lineLength, &regexpError);
451 if (!regexp) {
452 failures++;
453 fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr, regexpError);
454 }
455 } else if (linePtr[0] == ' ') {
456 RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
457
458 if (regexp && regExpTest) {
459 ++tests;
460 if (!testOneRegExp(vm, regexp, regExpTest, verbose, lineNumber)) {
461 failures++;
462 printf("Failure on line %u\n", lineNumber);
463 }
464 }
465
466 if (regExpTest)
467 delete regExpTest;
468 } else if (linePtr[0] == '-') {
469 tests++;
470 regexp = 0; // Reset the live regexp to avoid confusing other subsequent tests
471 bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1, &regexpError);
472 if (successfullyParsed) {
473 failures++;
474 fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr + 1, regexpError);
475 }
476 }
477 }
478
479 fclose(testCasesFile);
480 }
481
482 if (failures)
483 printf("%u tests run, %u failures\n", tests, failures);
484 else
485 printf("%u tests passed\n", tests);
486
487 delete[] lineBuffer;
488
489#if ENABLE(REGEXP_TRACING)
490 vm.dumpRegExpTrace();
491#endif
492 return success;
493}
494
495#define RUNNING_FROM_XCODE 0
496
497static NO_RETURN void printUsageStatement(bool help = false)
498{
499 fprintf(stderr, "Usage: regexp_test [options] file\n");
500 fprintf(stderr, " -h|--help Prints this help message\n");
501 fprintf(stderr, " -v|--verbose Verbose output\n");
502
503 exit(help ? EXIT_SUCCESS : EXIT_FAILURE);
504}
505
506static void parseArguments(int argc, char** argv, CommandLine& options)
507{
508 int i = 1;
509 for (; i < argc; ++i) {
510 const char* arg = argv[i];
511 if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
512 printUsageStatement(true);
513 if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
514 options.verbose = true;
515 else
516 options.files.append(argv[i]);
517 }
518
519 for (; i < argc; ++i)
520 options.arguments.append(argv[i]);
521}
522
523int realMain(int argc, char** argv)
524{
525 VM* vm = &VM::create(LargeHeap).leakRef();
526 JSLockHolder locker(vm);
527
528 CommandLine options;
529 parseArguments(argc, argv, options);
530
531 GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments);
532 bool success = runFromFiles(globalObject, options.files, options.verbose);
533
534 return success ? 0 : 3;
535}
536
537#if OS(WINDOWS)
538extern "C" __declspec(dllexport) int WINAPI dllLauncherEntryPoint(int argc, const char* argv[])
539{
540 return main(argc, const_cast<char**>(argv));
541}
542#endif
Note: See TracBrowser for help on using the repository browser.