Changeset 27686 in webkit for trunk/JavaScriptCore/pcre/pcre_exec.cpp
- Timestamp:
- Nov 11, 2007, 10:56:13 AM (18 years ago)
- File:
-
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/pcre/pcre_exec.cpp
r27681 r27686 1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 1 /* This is JavaScriptCore's variant of the PCRE library. While this library 2 started out as a copy of PCRE, many of the features of PCRE have been 3 removed. This library now supports only the regular expression features 4 required by the JavaScript language specification, and has only the functions 5 needed by JavaScriptCore and the rest of WebKit. 6 7 Originally written by Philip Hazel 9 8 Copyright (c) 1997-2006 University of Cambridge 10 11 9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. 12 10 … … 40 38 */ 41 39 42 43 /* This module contains pcre_exec(), the externally visible function that does 44 pattern matching using an NFA algorithm, trying to mimic Perl as closely as 45 possible. There are also some static supporting functions. */ 46 40 /* This module contains jsRegExpExecute(), the externally visible function 41 that does pattern matching using an NFA algorithm, following the rules from 42 the JavaScript specification. There are also some supporting functions. */ 47 43 48 44 #include "pcre_internal.h" 45 46 #include <wtf/ASCIICType.h> 47 #include <wtf/Vector.h> 48 49 using namespace WTF; 50 51 #ifdef __GNUC__ 52 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION 53 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP 54 #endif 49 55 50 56 /* Avoid warnings on Windows. */ … … 62 68 USPTR epb_saved_eptr; 63 69 } eptrblock; 70 71 /* Structure for remembering the local variables in a private frame */ 72 73 typedef struct matchframe { 74 /* Where to jump back to */ 75 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION 76 int where; 77 #else 78 void *where; 79 #endif 80 81 struct matchframe *prevframe; 82 83 /* Function arguments that may change */ 84 85 const pcre_uchar *eptr; 86 const uschar *ecode; 87 int offset_top; 88 eptrblock *eptrb; 89 90 /* Function local variables */ 91 92 const uschar *data; 93 const uschar *next; 94 const pcre_uchar *pp; 95 const uschar *prev; 96 const pcre_uchar *saved_eptr; 97 98 int repeat_othercase; 99 100 int ctype; 101 int fc; 102 int fi; 103 int length; 104 int max; 105 int number; 106 int offset; 107 int save_offset1, save_offset2, save_offset3; 108 109 eptrblock newptrb; 110 } matchframe; 111 112 /* Structure for passing "static" information around between the functions 113 doing traditional NFA matching, so that they are thread-safe. */ 114 115 typedef struct match_data { 116 unsigned long int match_call_count; /* As it says */ 117 int *offset_vector; /* Offset vector */ 118 int offset_end; /* One past the end */ 119 int offset_max; /* The maximum usable for return data */ 120 const uschar *lcc; /* Points to lower casing table */ 121 const uschar *ctypes; /* Points to table of type maps */ 122 BOOL offset_overflow; /* Set if too many extractions */ 123 USPTR start_subject; /* Start of the subject string */ 124 USPTR end_subject; /* End of the subject string */ 125 USPTR end_match_ptr; /* Subject position at end match */ 126 int end_offset_top; /* Highwater mark at end of match */ 127 BOOL multiline; 128 BOOL caseless; 129 } match_data; 64 130 65 131 #define match_isgroup TRUE /* Set if start of bracketed group */ … … 171 237 RECURSION IN THE match() FUNCTION 172 238 173 The match() function is highly recursive, though not every recursive call 174 increases the recursive depth. Nevertheless, some regular expressions can cause 175 it to recurse to a great depth. I was writing for Unix, so I just let it call 176 itself recursively. This uses the stack for saving everything that has to be 177 saved for a recursive call. On Unix, the stack can be large, and this works 178 fine. 179 180 It turns out that on some non-Unix-like systems there are problems with 181 programs that use a lot of stack. (This despite the fact that every last chip 182 has oodles of memory these days, and techniques for extending the stack have 183 been known for decades.) So.... 184 185 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive 186 calls by keeping local variables that need to be preserved in blocks of memory 187 obtained from malloc() instead instead of on the stack. Macros are used to 188 achieve this so that the actual code doesn't look very different to what it 189 always used to. 239 The original match() function was highly recursive. The current version 240 still has the remnants of the original in that recursive processing of the 241 regular expression is triggered by invoking a macro named RMATCH. This is 242 no longer really much like a recursive call to match() itself. 190 243 **************************************************************************** 191 244 ***************************************************************************/ 192 245 193 194 246 /* These versions of the macros use the stack, as normal. There are debugging 195 247 versions and production versions. */ 196 248 197 #ifndef __GNUC__249 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION 198 250 199 251 /* Use numbered labels and switch statement at the bottom of the match function. */ … … 215 267 216 268 #endif 217 218 269 219 270 #define RMATCH(num, ra, rb, rc)\ … … 222 273 newframe = frame + 1;\ 223 274 else\ 224 newframe = (pcre_malloc)(sizeof(matchframe));\ 225 frame->where = RMATCH_WHERE(num);\ 275 newframe = new matchframe;\ 226 276 newframe->eptr = frame->eptr;\ 227 277 newframe->ecode = (ra);\ … … 232 282 newframe->prevframe = frame;\ 233 283 frame = newframe;\ 284 frame->where = RMATCH_WHERE(num);\ 234 285 DPRINTF(("restarting from line %d\n", __LINE__));\ 235 goto HEAP_RECURSE;\286 goto RECURSE;\ 236 287 RRETURN_##num:\ 288 newframe = frame;\ 289 frame = frame->prevframe;\ 290 if (!(newframe >= stackframes && newframe < stackframesend))\ 291 delete newframe;\ 237 292 --rdepth;\ 238 293 DPRINTF(("did a goto back to line %d\n", __LINE__));\ 239 294 } 240 241 #define RRETURN(ra)\ 295 296 #define RRETURN goto RRETURN_LABEL 297 298 #define RRETURN_NO_MATCH \ 242 299 {\ 243 newframe = frame;\ 244 frame = newframe->prevframe;\ 245 if (!(newframe >= stackframes && newframe < stackframesend))\ 246 (pcre_free)(newframe);\ 247 if (frame != NULL)\ 248 {\ 249 rrc = (ra);\ 250 goto RRETURN_LABEL;\ 251 }\ 252 return ra;\ 300 is_match = FALSE;\ 301 RRETURN;\ 253 302 } 254 303 255 /* Structure for remembering the local variables in a private frame */ 256 257 typedef struct matchframe { 258 struct matchframe *prevframe; 259 260 /* Function arguments that may change */ 261 262 const pcre_uchar *eptr; 263 const uschar *ecode; 264 int offset_top; 265 eptrblock *eptrb; 266 267 /* Function local variables */ 268 269 const uschar *data; 270 const uschar *next; 271 const pcre_uchar *pp; 272 const uschar *prev; 273 const pcre_uchar *saved_eptr; 274 275 int repeat_othercase; 276 277 int ctype; 278 int fc; 279 int fi; 280 int length; 281 int max; 282 int number; 283 int offset; 284 int save_offset1, save_offset2, save_offset3; 285 286 eptrblock newptrb; 287 288 /* Where to jump back to */ 289 290 #ifndef __GNUC__ 291 int where; 292 #else 293 void *where; 294 #endif 295 296 } matchframe; 297 298 299 /*************************************************************************** 300 ***************************************************************************/ 301 302 304 #define RRETURN_ERROR(error) \ 305 { \ 306 i = (error); \ 307 goto RETURN_ERROR; \ 308 } 303 309 304 310 /************************************************* … … 325 331 */ 326 332 327 static int 328 match(USPTR eptr, const uschar *ecode, int offset_top, match_data *md) 333 static int match(USPTR eptr, const uschar *ecode, int offset_top, match_data *md) 329 334 { 330 /* These variables do not need to be preserved over recursion in this function, 331 so they can be ordinary variables in all cases. Mark them with "register" 332 because they are used a lot in loops. */ 333 334 register int rrc; /* Returns from recursive calls */ 335 register int i; /* Used for loops not involving calls to RMATCH() */ 336 register int c; /* Character values not kept over RMATCH() calls */ 335 register int is_match = FALSE; 336 register int i; 337 register int c; 338 337 339 unsigned rdepth = 0; 338 340 … … 343 345 BOOL minimize = FALSE; /* Initialization not really needed, but some compilers think so. */ 344 346 345 /* When recursion is not being used, all "local" variables that have to be346 preserved over calls to RMATCH() are part of a "frame" which is obtained from347 heap storage. Set up the top-level frame here; others are obtained from the348 heap whenever RMATCH() does a "recursion". See the macro definitions above. */349 350 347 /* The value 16 here is large enough that most regular expressions don't require 351 348 any calls to pcre_stack_malloc, yet the amount of stack used for the array is … … 356 353 matchframe *frame = stackframes; 357 354 matchframe *newframe; 358 frame->prevframe = NULL; /* Marks the top level */ 359 360 /* Copy in the original argument variables */ 355 356 /* The opcode jump table. */ 357 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP 358 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode, 359 static void* opcode_jump_table[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) }; 360 #undef EMIT_JUMP_TABLE_ENTRY 361 #endif 362 363 /* One-time setup of the opcode jump table. */ 364 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP 365 i = 255; 366 while (!opcode_jump_table[i]) 367 opcode_jump_table[i--] = &&CAPTURING_BRACKET; 368 #endif 369 370 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION 371 frame->where = &&RETURN; 372 #else 373 frame->where = 0; 374 #endif 361 375 362 376 frame->eptr = eptr; … … 367 381 /* This is where control jumps back to to effect "recursion" */ 368 382 369 HEAP_RECURSE:383 RECURSE: 370 384 371 385 /* OK, now we can get on with the real code of the function. Recursive calls … … 380 394 haven't exceeded the recursive call limit. */ 381 395 382 if (md->match_call_count++ >= MATCH_LIMIT) RRETURN (JS_REGEXP_ERROR_MATCHLIMIT);383 if (rdepth >= MATCH_LIMIT_RECURSION) RRETURN (JS_REGEXP_ERROR_RECURSIONLIMIT);396 if (md->match_call_count++ >= MATCH_LIMIT) RRETURN_ERROR(JSRegExpErrorMatchLimit); 397 if (rdepth >= MATCH_LIMIT_RECURSION) RRETURN_ERROR(JSRegExpErrorRecursionLimit); 384 398 385 399 /* At the start of a bracketed group, add the current subject pointer to the … … 397 411 /* Now start processing the operations. */ 398 412 413 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP 399 414 for (;;) 415 #endif 400 416 { 417 418 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP 419 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode 420 #define NEXT_OPCODE goto *opcode_jump_table[*frame->ecode] 421 #else 422 #define BEGIN_OPCODE(opcode) case OP_##opcode 423 #define NEXT_OPCODE continue 424 #endif 425 426 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP 427 NEXT_OPCODE; 428 #else 401 429 switch (*frame->ecode) 430 #endif 402 431 { 403 case OP_BRA: /* Non-capturing bracket: optimized */ 432 /* Non-capturing bracket: optimized */ 433 434 BEGIN_OPCODE(BRA): 404 435 NON_CAPTURING_BRACKET: 405 436 DPRINTF(("start bracket 0\n")); … … 407 438 { 408 439 RMATCH(2, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup); 409 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);440 if (is_match) RRETURN; 410 441 frame->ecode += GET(frame->ecode, 1); 411 442 } 412 443 while (*frame->ecode == OP_ALT); 413 444 DPRINTF(("bracket 0 failed\n")); 414 RRETURN (MATCH_NOMATCH);445 RRETURN; 415 446 416 447 /* End of the pattern. */ 417 448 418 case OP_END:449 BEGIN_OPCODE(END): 419 450 md->end_match_ptr = frame->eptr; /* Record where we ended */ 420 451 md->end_offset_top = frame->offset_top; /* and how many extracts were taken */ 421 RRETURN(MATCH_MATCH); 452 is_match = TRUE; 453 RRETURN; 422 454 423 455 /* Assertion brackets. Check the alternative branches in turn - the … … 427 459 this level is identical to the lookahead case. */ 428 460 429 case OP_ASSERT:461 BEGIN_OPCODE(ASSERT): 430 462 do 431 463 { 432 464 RMATCH(6, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup); 433 if (rrc == MATCH_MATCH) break; 434 if (rrc != MATCH_NOMATCH) RRETURN(rrc); 465 if (is_match) break; 435 466 frame->ecode += GET(frame->ecode, 1); 436 467 } 437 468 while (*frame->ecode == OP_ALT); 438 if (*frame->ecode == OP_KET) RRETURN (MATCH_NOMATCH);469 if (*frame->ecode == OP_KET) RRETURN_NO_MATCH; 439 470 440 471 /* Continue from after the assertion, updating the offsets high water … … 444 475 frame->ecode += 1 + LINK_SIZE; 445 476 frame->offset_top = md->end_offset_top; 446 continue;477 NEXT_OPCODE; 447 478 448 479 /* Negative assertion: all branches must fail to match */ 449 480 450 case OP_ASSERT_NOT:481 BEGIN_OPCODE(ASSERT_NOT): 451 482 do 452 483 { 453 484 RMATCH(7, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup); 454 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH); 455 if (rrc != MATCH_NOMATCH) RRETURN(rrc); 485 if (is_match) RRETURN_NO_MATCH; 456 486 frame->ecode += GET(frame->ecode,1); 457 487 } … … 459 489 460 490 frame->ecode += 1 + LINK_SIZE; 461 continue;491 NEXT_OPCODE; 462 492 463 493 /* "Once" brackets are like assertion brackets except that after a match, … … 468 498 the end of a normal bracket, leaving the subject pointer. */ 469 499 470 case OP_ONCE:500 BEGIN_OPCODE(ONCE): 471 501 { 472 502 frame->prev = frame->ecode; … … 476 506 { 477 507 RMATCH(9, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup); 478 if (rrc == MATCH_MATCH) break; 479 if (rrc != MATCH_NOMATCH) RRETURN(rrc); 508 if (is_match) break; 480 509 frame->ecode += GET(frame->ecode,1); 481 510 } … … 484 513 /* If hit the end of the group (which could be repeated), fail */ 485 514 486 if (*frame->ecode != OP_ONCE && *frame->ecode != OP_ALT) RRETURN (MATCH_NOMATCH);515 if (*frame->ecode != OP_ONCE && *frame->ecode != OP_ALT) RRETURN; 487 516 488 517 /* Continue as from after the assertion, updating the offsets high water … … 503 532 { 504 533 frame->ecode += 1+LINK_SIZE; 505 break;534 NEXT_OPCODE; 506 535 } 507 536 … … 514 543 { 515 544 RMATCH(10, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0); 516 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);545 if (is_match) RRETURN; 517 546 RMATCH(11, frame->prev, frame->eptrb, match_isgroup); 518 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);547 if (is_match) RRETURN; 519 548 } 520 549 else /* OP_KETRMAX */ 521 550 { 522 551 RMATCH(12, frame->prev, frame->eptrb, match_isgroup); 523 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);552 if (is_match) RRETURN; 524 553 RMATCH(13, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0); 525 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);526 } 527 } 528 RRETURN (MATCH_NOMATCH);554 if (is_match) RRETURN; 555 } 556 } 557 RRETURN; 529 558 530 559 /* An alternation is the end of a branch; scan along to find the end of the 531 560 bracketed group and go to there. */ 532 561 533 case OP_ALT:562 BEGIN_OPCODE(ALT): 534 563 do frame->ecode += GET(frame->ecode,1); while (*frame->ecode == OP_ALT); 535 break;564 NEXT_OPCODE; 536 565 537 566 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating … … 541 570 preceded by BRAZERO or BRAMINZERO. */ 542 571 543 case OP_BRAZERO:572 BEGIN_OPCODE(BRAZERO): 544 573 { 545 574 frame->next = frame->ecode+1; 546 575 RMATCH(14, frame->next, frame->eptrb, match_isgroup); 547 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);576 if (is_match) RRETURN; 548 577 do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT); 549 578 frame->ecode = frame->next + 1+LINK_SIZE; 550 579 } 551 break;552 553 case OP_BRAMINZERO:580 NEXT_OPCODE; 581 582 BEGIN_OPCODE(BRAMINZERO): 554 583 { 555 584 frame->next = frame->ecode+1; 556 585 do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT); 557 586 RMATCH(15, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup); 558 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);587 if (is_match) RRETURN; 559 588 frame->ecode++; 560 589 } 561 break;590 NEXT_OPCODE; 562 591 563 592 /* End of a group, repeated or non-repeating. If we are at the end of … … 566 595 for the "once" (not-backup up) groups. */ 567 596 568 case OP_KET:569 case OP_KETRMIN:570 case OP_KETRMAX:597 BEGIN_OPCODE(KET): 598 BEGIN_OPCODE(KETRMIN): 599 BEGIN_OPCODE(KETRMAX): 571 600 { 572 601 frame->prev = frame->ecode - GET(frame->ecode, 1); … … 581 610 md->end_match_ptr = frame->eptr; /* For ONCE */ 582 611 md->end_offset_top = frame->offset_top; 583 RRETURN(MATCH_MATCH); 612 is_match = TRUE; 613 RRETURN; 584 614 } 585 615 … … 628 658 { 629 659 frame->ecode += 1 + LINK_SIZE; 630 break;660 NEXT_OPCODE; 631 661 } 632 662 … … 637 667 { 638 668 RMATCH(16, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0); 639 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);669 if (is_match) RRETURN; 640 670 RMATCH(17, frame->prev, frame->eptrb, match_isgroup); 641 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);671 if (is_match) RRETURN; 642 672 } 643 673 else /* OP_KETRMAX */ 644 674 { 645 675 RMATCH(18, frame->prev, frame->eptrb, match_isgroup); 646 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);676 if (is_match) RRETURN; 647 677 RMATCH(19, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0); 648 if (rrc != MATCH_NOMATCH) RRETURN(rrc); 649 } 650 } 651 652 RRETURN(MATCH_NOMATCH); 678 if (is_match) RRETURN; 679 } 680 } 681 RRETURN; 653 682 654 683 /* Start of subject unless notbol, or after internal newline if multiline */ 655 684 656 case OP_CIRC:685 BEGIN_OPCODE(CIRC): 657 686 if (md->multiline) 658 687 { 659 688 if (frame->eptr != md->start_subject && !IS_NEWLINE(frame->eptr[-1])) 660 RRETURN (MATCH_NOMATCH);689 RRETURN_NO_MATCH; 661 690 frame->ecode++; 662 break;663 } 664 if (frame->eptr != md->start_subject) RRETURN (MATCH_NOMATCH);691 NEXT_OPCODE; 692 } 693 if (frame->eptr != md->start_subject) RRETURN_NO_MATCH; 665 694 frame->ecode++; 666 break;695 NEXT_OPCODE; 667 696 668 697 /* Assert before internal newline if multiline, or before a terminating 669 698 newline unless endonly is set, else end of subject unless noteol is set. */ 670 699 671 case OP_DOLL:700 BEGIN_OPCODE(DOLL): 672 701 if (md->multiline) 673 702 { 674 703 if (frame->eptr < md->end_subject) 675 { if (!IS_NEWLINE(*frame->eptr)) RRETURN (MATCH_NOMATCH); }704 { if (!IS_NEWLINE(*frame->eptr)) RRETURN_NO_MATCH; } 676 705 frame->ecode++; 677 break;678 706 } 679 707 else … … 681 709 if (frame->eptr < md->end_subject - 1 || 682 710 (frame->eptr == md->end_subject - 1 && !IS_NEWLINE(*frame->eptr))) 683 RRETURN (MATCH_NOMATCH);711 RRETURN_NO_MATCH; 684 712 frame->ecode++; 685 break; 686 } 687 break; 713 } 714 NEXT_OPCODE; 688 715 689 716 /* Word boundary assertions */ 690 717 691 case OP_NOT_WORD_BOUNDARY:692 case OP_WORD_BOUNDARY:718 BEGIN_OPCODE(NOT_WORD_BOUNDARY): 719 BEGIN_OPCODE(WORD_BOUNDARY): 693 720 { 694 721 /* Find out if the previous and current characters are "word" characters. … … 715 742 if ((*frame->ecode++ == OP_WORD_BOUNDARY)? 716 743 cur_is_word == prev_is_word : cur_is_word != prev_is_word) 717 RRETURN (MATCH_NOMATCH);718 } 719 break;744 RRETURN_NO_MATCH; 745 } 746 NEXT_OPCODE; 720 747 721 748 /* Match a single character type; inline for speed */ 722 749 723 case OP_ANY:750 BEGIN_OPCODE(ANY): 724 751 if (frame->eptr < md->end_subject && IS_NEWLINE(*frame->eptr)) 725 RRETURN (MATCH_NOMATCH);726 if (frame->eptr++ >= md->end_subject) RRETURN (MATCH_NOMATCH);752 RRETURN_NO_MATCH; 753 if (frame->eptr++ >= md->end_subject) RRETURN_NO_MATCH; 727 754 while (frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)) frame->eptr++; 728 755 frame->ecode++; 729 break;730 731 case OP_NOT_DIGIT:732 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);756 NEXT_OPCODE; 757 758 BEGIN_OPCODE(NOT_DIGIT): 759 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 733 760 GETCHARINCTEST(c, frame->eptr); 734 if ( 735 c < 128 && 736 (md->ctypes[c] & ctype_digit) != 0 737 ) 738 RRETURN(MATCH_NOMATCH); 761 if (isASCIIDigit(c)) 762 RRETURN_NO_MATCH; 739 763 frame->ecode++; 740 break;741 742 case OP_DIGIT:743 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);764 NEXT_OPCODE; 765 766 BEGIN_OPCODE(DIGIT): 767 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 744 768 GETCHARINCTEST(c, frame->eptr); 745 if ( 746 c >= 128 || 747 (md->ctypes[c] & ctype_digit) == 0 748 ) 749 RRETURN(MATCH_NOMATCH); 769 if (!isASCIIDigit(c)) 770 RRETURN_NO_MATCH; 750 771 frame->ecode++; 751 break;752 753 case OP_NOT_WHITESPACE:754 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);772 NEXT_OPCODE; 773 774 BEGIN_OPCODE(NOT_WHITESPACE): 775 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 755 776 GETCHARINCTEST(c, frame->eptr); 756 if ( 757 c < 128 && 758 (md->ctypes[c] & ctype_space) != 0 759 ) 760 RRETURN(MATCH_NOMATCH); 777 if (c < 128 && (md->ctypes[c] & ctype_space) != 0) 778 RRETURN_NO_MATCH; 761 779 frame->ecode++; 762 break;763 764 case OP_WHITESPACE:765 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);780 NEXT_OPCODE; 781 782 BEGIN_OPCODE(WHITESPACE): 783 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 766 784 GETCHARINCTEST(c, frame->eptr); 767 if ( 768 c >= 128 || 769 (md->ctypes[c] & ctype_space) == 0 770 ) 771 RRETURN(MATCH_NOMATCH); 785 if (c >= 128 || (md->ctypes[c] & ctype_space) == 0) 786 RRETURN_NO_MATCH; 772 787 frame->ecode++; 773 break;774 775 case OP_NOT_WORDCHAR:776 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);788 NEXT_OPCODE; 789 790 BEGIN_OPCODE(NOT_WORDCHAR): 791 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 777 792 GETCHARINCTEST(c, frame->eptr); 778 if ( 779 c < 128 && 780 (md->ctypes[c] & ctype_word) != 0 781 ) 782 RRETURN(MATCH_NOMATCH); 793 if (c < 128 && (md->ctypes[c] & ctype_word) != 0) 794 RRETURN_NO_MATCH; 783 795 frame->ecode++; 784 break;785 786 case OP_WORDCHAR:787 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);796 NEXT_OPCODE; 797 798 BEGIN_OPCODE(WORDCHAR): 799 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 788 800 GETCHARINCTEST(c, frame->eptr); 789 if ( 790 c >= 128 || 791 (md->ctypes[c] & ctype_word) == 0 792 ) 793 RRETURN(MATCH_NOMATCH); 801 if (c >= 128 || (md->ctypes[c] & ctype_word) == 0) 802 RRETURN_NO_MATCH; 794 803 frame->ecode++; 795 break;804 NEXT_OPCODE; 796 805 797 806 /* Match a back reference, possibly repeatedly. Look past the end of the … … 803 812 loops). */ 804 813 805 case OP_REF:814 BEGIN_OPCODE(REF): 806 815 { 807 816 frame->offset = GET2(frame->ecode, 1) << 1; /* Doubled ref number */ … … 844 853 845 854 default: /* No repeat follows */ 846 if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN (MATCH_NOMATCH);855 if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN_NO_MATCH; 847 856 frame->eptr += frame->length; 848 continue; /* With the main loop */857 NEXT_OPCODE; 849 858 } 850 859 … … 852 861 main loop. */ 853 862 854 if (frame->length == 0) continue; 863 if (frame->length == 0) 864 NEXT_OPCODE; 855 865 856 866 /* First, ensure the minimum number of matches are present. */ … … 858 868 for (i = 1; i <= min; i++) 859 869 { 860 if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN (MATCH_NOMATCH);870 if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN_NO_MATCH; 861 871 frame->eptr += frame->length; 862 872 } … … 865 875 They are not both allowed to be zero. */ 866 876 867 if (min == frame->max) continue; 877 if (min == frame->max) 878 NEXT_OPCODE; 868 879 869 880 /* If minimizing, keep trying and advancing the pointer */ … … 874 885 { 875 886 RMATCH(20, frame->ecode, frame->eptrb, 0); 876 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);887 if (is_match) RRETURN; 877 888 if (frame->fi >= frame->max || !match_ref(frame->offset, frame->eptr, frame->length, md)) 878 RRETURN (MATCH_NOMATCH);889 RRETURN; 879 890 frame->eptr += frame->length; 880 891 } … … 895 906 { 896 907 RMATCH(21, frame->ecode, frame->eptrb, 0); 897 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);908 if (is_match) RRETURN; 898 909 frame->eptr -= frame->length; 899 910 } 900 RRETURN (MATCH_NOMATCH);911 RRETURN_NO_MATCH; 901 912 } 902 913 } 903 914 /* Control never gets here */ 904 905 906 915 907 916 /* Match a bit-mapped character class, possibly repeatedly. This op code is … … 916 925 again for speed. */ 917 926 918 case OP_NCLASS:919 case OP_CLASS:927 BEGIN_OPCODE(NCLASS): 928 BEGIN_OPCODE(CLASS): 920 929 { 921 930 frame->data = frame->ecode + 1; /* Save for matching */ … … 956 965 for (i = 1; i <= min; i++) 957 966 { 958 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);967 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 959 968 GETCHARINC(c, frame->eptr); 960 969 if (c > 255) 961 970 { 962 if (frame->data[-1] == OP_CLASS) RRETURN (MATCH_NOMATCH);971 if (frame->data[-1] == OP_CLASS) RRETURN_NO_MATCH; 963 972 } 964 973 else 965 974 { 966 if ((frame->data[c/8] & (1 << (c&7))) == 0) RRETURN (MATCH_NOMATCH);975 if ((frame->data[c/8] & (1 << (c&7))) == 0) RRETURN_NO_MATCH; 967 976 } 968 977 } … … 972 981 need to recurse. */ 973 982 974 if (min == frame->max) continue; 983 if (min == frame->max) 984 NEXT_OPCODE; 975 985 976 986 /* If minimizing, keep testing the rest of the expression and advancing 977 987 the pointer while it matches the class. */ 978 979 988 if (minimize) 980 989 { … … 983 992 { 984 993 RMATCH(22, frame->ecode, frame->eptrb, 0); 985 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);986 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);994 if (is_match) RRETURN; 995 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN; 987 996 GETCHARINC(c, frame->eptr); 988 997 if (c > 255) 989 998 { 990 if (frame->data[-1] == OP_CLASS) RRETURN (MATCH_NOMATCH);999 if (frame->data[-1] == OP_CLASS) RRETURN; 991 1000 } 992 1001 else 993 1002 { 994 if ((frame->data[c/8] & (1 << (c&7))) == 0) RRETURN (MATCH_NOMATCH);1003 if ((frame->data[c/8] & (1 << (c&7))) == 0) RRETURN; 995 1004 } 996 1005 } … … 998 1007 /* Control never gets here */ 999 1008 } 1000 1001 1009 /* If maximizing, find the longest possible run, then work backwards. */ 1002 1003 1010 else 1004 1011 { 1005 1012 frame->pp = frame->eptr; 1006 1013 1007 {1008 1014 for (i = min; i < frame->max; i++) 1009 1015 { … … 1024 1030 { 1025 1031 RMATCH(24, frame->ecode, frame->eptrb, 0); 1026 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1032 if (is_match) RRETURN; 1027 1033 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */ 1028 1034 BACKCHAR(frame->eptr); 1029 1035 } 1030 } 1031 RRETURN (MATCH_NOMATCH);1036 1037 RRETURN; 1032 1038 } 1033 1039 } 1034 1040 /* Control never gets here */ 1035 1036 1041 1037 1042 /* Match an extended character class. This opcode is encountered only 1038 1043 in UTF-8 mode, because that's the only time it is compiled. */ 1039 1044 1040 case OP_XCLASS:1045 BEGIN_OPCODE(XCLASS): 1041 1046 { 1042 1047 frame->data = frame->ecode + 1 + LINK_SIZE; /* Save for matching */ … … 1069 1074 default: /* No repeat follows */ 1070 1075 min = frame->max = 1; 1071 break;1072 1076 } 1073 1077 … … 1076 1080 for (i = 1; i <= min; i++) 1077 1081 { 1078 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);1082 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 1079 1083 GETCHARINC(c, frame->eptr); 1080 if (!_pcre_xclass(c, frame->data)) RRETURN (MATCH_NOMATCH);1084 if (!_pcre_xclass(c, frame->data)) RRETURN_NO_MATCH; 1081 1085 } 1082 1086 … … 1084 1088 need to recurse. */ 1085 1089 1086 if (min == frame->max) continue; 1090 if (min == frame->max) 1091 NEXT_OPCODE; 1087 1092 1088 1093 /* If minimizing, keep testing the rest of the expression and advancing … … 1094 1099 { 1095 1100 RMATCH(26, frame->ecode, frame->eptrb, 0); 1096 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1097 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);1101 if (is_match) RRETURN; 1102 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN; 1098 1103 GETCHARINC(c, frame->eptr); 1099 if (!_pcre_xclass(c, frame->data)) RRETURN (MATCH_NOMATCH);1104 if (!_pcre_xclass(c, frame->data)) RRETURN; 1100 1105 } 1101 1106 /* Control never gets here */ … … 1118 1123 { 1119 1124 RMATCH(27, frame->ecode, frame->eptrb, 0); 1120 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1125 if (is_match) RRETURN; 1121 1126 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */ 1122 1127 BACKCHAR(frame->eptr) 1123 1128 } 1124 RRETURN (MATCH_NOMATCH);1129 RRETURN; 1125 1130 } 1126 1131 … … 1130 1135 /* Match a single character, casefully */ 1131 1136 1132 case OP_CHAR:1137 BEGIN_OPCODE(CHAR): 1133 1138 { 1134 1139 frame->length = 1; … … 1141 1146 { 1142 1147 case 0: 1143 RRETURN (MATCH_NOMATCH);1148 RRETURN_NO_MATCH; 1144 1149 case 1: 1145 1150 dc = *frame->eptr++; 1146 1151 if (IS_LEADING_SURROGATE(dc)) 1147 RRETURN (MATCH_NOMATCH);1152 RRETURN_NO_MATCH; 1148 1153 break; 1149 1154 default: 1150 1155 GETCHARINC(dc, frame->eptr); 1151 1156 } 1152 if (frame->fc != dc) RRETURN (MATCH_NOMATCH);1153 }1154 } 1155 break;1157 if (frame->fc != dc) RRETURN_NO_MATCH; 1158 } 1159 } 1160 NEXT_OPCODE; 1156 1161 1157 1162 /* Match a single character, caselessly */ 1158 1163 1159 case OP_CHARNC:1164 BEGIN_OPCODE(CHARNC): 1160 1165 { 1161 1166 frame->length = 1; … … 1163 1168 GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length); 1164 1169 1165 if (md->end_subject - frame->eptr == 0) RRETURN(MATCH_NOMATCH); 1166 1167 /* If the pattern character's value is < 128, we have only one byte, and 1168 can use the fast lookup table. */ 1169 1170 if (frame->fc < 128) 1171 { 1172 int dc; 1173 frame->ecode++; 1174 dc = *frame->eptr++; 1175 if (dc >= 128 || md->lcc[frame->fc] != md->lcc[dc]) RRETURN(MATCH_NOMATCH); 1176 } 1177 1178 /* Otherwise we must pick up the subject character */ 1179 1180 else 1170 if (md->end_subject - frame->eptr == 0) RRETURN_NO_MATCH; 1171 1181 1172 { 1182 1173 int dc; … … 1184 1175 dc = *frame->eptr++; 1185 1176 if (IS_LEADING_SURROGATE(dc)) 1186 RRETURN (MATCH_NOMATCH);1177 RRETURN_NO_MATCH; 1187 1178 } else 1188 1179 GETCHARINC(dc, frame->eptr); … … 1195 1186 { 1196 1187 if (dc != _pcre_ucp_othercase(frame->fc)) 1197 RRETURN (MATCH_NOMATCH);1198 } 1199 } 1200 } 1201 break;1188 RRETURN_NO_MATCH; 1189 } 1190 } 1191 } 1192 NEXT_OPCODE; 1202 1193 1203 1194 /* Match a single ASCII character. */ 1204 1195 1205 case OP_ASCII_CHAR:1196 BEGIN_OPCODE(ASCII_CHAR): 1206 1197 if (md->end_subject == frame->eptr) 1207 RRETURN (MATCH_NOMATCH);1198 RRETURN_NO_MATCH; 1208 1199 if (*frame->eptr != frame->ecode[1]) 1209 RRETURN (MATCH_NOMATCH);1200 RRETURN_NO_MATCH; 1210 1201 ++frame->eptr; 1211 1202 frame->ecode += 2; 1212 break;1203 NEXT_OPCODE; 1213 1204 1214 1205 /* Match one of two cases of an ASCII character. */ 1215 1206 1216 case OP_ASCII_LETTER_NC:1207 BEGIN_OPCODE(ASCII_LETTER_NC): 1217 1208 if (md->end_subject == frame->eptr) 1218 RRETURN (MATCH_NOMATCH);1209 RRETURN_NO_MATCH; 1219 1210 if ((*frame->eptr | 0x20) != frame->ecode[1]) 1220 RRETURN (MATCH_NOMATCH);1211 RRETURN_NO_MATCH; 1221 1212 ++frame->eptr; 1222 1213 frame->ecode += 2; 1223 break;1214 NEXT_OPCODE; 1224 1215 1225 1216 /* Match a single character repeatedly; different opcodes share code. */ 1226 1217 1227 case OP_EXACT:1218 BEGIN_OPCODE(EXACT): 1228 1219 min = frame->max = GET2(frame->ecode, 1); 1229 1220 minimize = FALSE; … … 1231 1222 goto REPEATCHAR; 1232 1223 1233 case OP_UPTO:1234 case OP_MINUPTO:1224 BEGIN_OPCODE(UPTO): 1225 BEGIN_OPCODE(MINUPTO): 1235 1226 min = 0; 1236 1227 frame->max = GET2(frame->ecode, 1); … … 1239 1230 goto REPEATCHAR; 1240 1231 1241 case OP_STAR:1242 case OP_MINSTAR:1243 case OP_PLUS:1244 case OP_MINPLUS:1245 case OP_QUERY:1246 case OP_MINQUERY:1232 BEGIN_OPCODE(STAR): 1233 BEGIN_OPCODE(MINSTAR): 1234 BEGIN_OPCODE(PLUS): 1235 BEGIN_OPCODE(MINPLUS): 1236 BEGIN_OPCODE(QUERY): 1237 BEGIN_OPCODE(MINQUERY): 1247 1238 c = *frame->ecode++ - OP_STAR; 1248 1239 minimize = (c & 1) != 0; … … 1260 1251 GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length); 1261 1252 { 1262 if (min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN (MATCH_NOMATCH);1253 if (min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN_NO_MATCH; 1263 1254 frame->ecode += frame->length; 1264 1255 … … 1269 1260 for (i = 1; i <= min; i++) 1270 1261 { 1271 if (*frame->eptr != frame->fc && *frame->eptr != othercase) RRETURN (MATCH_NOMATCH);1262 if (*frame->eptr != frame->fc && *frame->eptr != othercase) RRETURN_NO_MATCH; 1272 1263 ++frame->eptr; 1273 1264 } 1274 1265 1275 if (min == frame->max) continue; 1266 if (min == frame->max) 1267 NEXT_OPCODE; 1276 1268 1277 1269 if (minimize) … … 1281 1273 { 1282 1274 RMATCH(28, frame->ecode, frame->eptrb, 0); 1283 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1284 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);1285 if (*frame->eptr != frame->fc && *frame->eptr != frame->repeat_othercase) RRETURN (MATCH_NOMATCH);1275 if (is_match) RRETURN; 1276 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN; 1277 if (*frame->eptr != frame->fc && *frame->eptr != frame->repeat_othercase) RRETURN; 1286 1278 ++frame->eptr; 1287 1279 } … … 1300 1292 { 1301 1293 RMATCH(29, frame->ecode, frame->eptrb, 0); 1302 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1294 if (is_match) RRETURN; 1303 1295 --frame->eptr; 1304 1296 } 1305 RRETURN (MATCH_NOMATCH);1297 RRETURN_NO_MATCH; 1306 1298 } 1307 1299 /* Control never gets here */ … … 1315 1307 int nc; 1316 1308 GETCHAR(nc, frame->eptr); 1317 if (nc != frame->fc) RRETURN (MATCH_NOMATCH);1309 if (nc != frame->fc) RRETURN_NO_MATCH; 1318 1310 frame->eptr += 2; 1319 1311 } 1320 1312 1321 if (min == frame->max) continue; 1313 if (min == frame->max) 1314 NEXT_OPCODE; 1322 1315 1323 1316 if (minimize) … … 1327 1320 int nc; 1328 1321 RMATCH(30, frame->ecode, frame->eptrb, 0); 1329 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1330 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);1322 if (is_match) RRETURN; 1323 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN; 1331 1324 GETCHAR(nc, frame->eptr); 1332 if (*frame->eptr != frame->fc) RRETURN (MATCH_NOMATCH);1325 if (*frame->eptr != frame->fc) RRETURN; 1333 1326 frame->eptr += 2; 1334 1327 } … … 1349 1342 { 1350 1343 RMATCH(31, frame->ecode, frame->eptrb, 0); 1351 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1344 if (is_match) RRETURN; 1352 1345 frame->eptr -= 2; 1353 1346 } 1354 RRETURN (MATCH_NOMATCH);1347 RRETURN_NO_MATCH; 1355 1348 } 1356 1349 /* Control never gets here */ … … 1362 1355 checking can be multibyte. */ 1363 1356 1364 case OP_NOT:1365 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);1357 BEGIN_OPCODE(NOT): 1358 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 1366 1359 frame->ecode++; 1367 1360 GETCHARINCTEST(c, frame->eptr); … … 1370 1363 if (c < 128) 1371 1364 c = md->lcc[c]; 1372 if (md->lcc[*frame->ecode++] == c) RRETURN (MATCH_NOMATCH);1365 if (md->lcc[*frame->ecode++] == c) RRETURN_NO_MATCH; 1373 1366 } 1374 1367 else 1375 1368 { 1376 if (*frame->ecode++ == c) RRETURN (MATCH_NOMATCH);1377 } 1378 break;1369 if (*frame->ecode++ == c) RRETURN_NO_MATCH; 1370 } 1371 NEXT_OPCODE; 1379 1372 1380 1373 /* Match a negated single one-byte character repeatedly. This is almost a … … 1385 1378 about... */ 1386 1379 1387 case OP_NOTEXACT:1380 BEGIN_OPCODE(NOTEXACT): 1388 1381 min = frame->max = GET2(frame->ecode, 1); 1389 1382 minimize = FALSE; … … 1391 1384 goto REPEATNOTCHAR; 1392 1385 1393 case OP_NOTUPTO:1394 case OP_NOTMINUPTO:1386 BEGIN_OPCODE(NOTUPTO): 1387 BEGIN_OPCODE(NOTMINUPTO): 1395 1388 min = 0; 1396 1389 frame->max = GET2(frame->ecode, 1); … … 1399 1392 goto REPEATNOTCHAR; 1400 1393 1401 case OP_NOTSTAR:1402 case OP_NOTMINSTAR:1403 case OP_NOTPLUS:1404 case OP_NOTMINPLUS:1405 case OP_NOTQUERY:1406 case OP_NOTMINQUERY:1394 BEGIN_OPCODE(NOTSTAR): 1395 BEGIN_OPCODE(NOTMINSTAR): 1396 BEGIN_OPCODE(NOTPLUS): 1397 BEGIN_OPCODE(NOTMINPLUS): 1398 BEGIN_OPCODE(NOTQUERY): 1399 BEGIN_OPCODE(NOTMINQUERY): 1407 1400 c = *frame->ecode++ - OP_NOTSTAR; 1408 1401 minimize = (c & 1) != 0; … … 1416 1409 1417 1410 REPEATNOTCHAR: 1418 if (min > md->end_subject - frame->eptr) RRETURN (MATCH_NOMATCH);1411 if (min > md->end_subject - frame->eptr) RRETURN_NO_MATCH; 1419 1412 frame->fc = *frame->ecode++; 1420 1413 … … 1431 1424 if (md->caseless) 1432 1425 { 1433 frame->fc = md->lcc[frame->fc]; 1426 if (frame->fc < 128) 1427 frame->fc = md->lcc[frame->fc]; 1434 1428 1435 1429 { … … 1439 1433 GETCHARINC(d, frame->eptr); 1440 1434 if (d < 128) d = md->lcc[d]; 1441 if (frame->fc == d) RRETURN(MATCH_NOMATCH); 1442 } 1443 } 1444 1445 if (min == frame->max) continue; 1435 if (frame->fc == d) RRETURN_NO_MATCH; 1436 } 1437 } 1438 1439 if (min == frame->max) 1440 NEXT_OPCODE; 1446 1441 1447 1442 if (minimize) … … 1452 1447 { 1453 1448 RMATCH(38, frame->ecode, frame->eptrb, 0); 1454 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1449 if (is_match) RRETURN; 1455 1450 GETCHARINC(d, frame->eptr); 1456 1451 if (d < 128) d = md->lcc[d]; 1457 1452 if (frame->fi >= frame->max || frame->eptr >= md->end_subject || frame->fc == d) 1458 RRETURN (MATCH_NOMATCH);1453 RRETURN; 1459 1454 } 1460 1455 } … … 1482 1477 { 1483 1478 RMATCH(40, frame->ecode, frame->eptrb, 0); 1484 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1479 if (is_match) RRETURN; 1485 1480 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */ 1486 1481 BACKCHAR(frame->eptr); … … 1488 1483 } 1489 1484 1490 RRETURN (MATCH_NOMATCH);1485 RRETURN; 1491 1486 } 1492 1487 /* Control never gets here */ … … 1502 1497 { 1503 1498 GETCHARINC(d, frame->eptr); 1504 if (frame->fc == d) RRETURN(MATCH_NOMATCH); 1505 } 1506 } 1507 1508 if (min == frame->max) continue; 1499 if (frame->fc == d) RRETURN_NO_MATCH; 1500 } 1501 } 1502 1503 if (min == frame->max) 1504 NEXT_OPCODE; 1509 1505 1510 1506 if (minimize) … … 1515 1511 { 1516 1512 RMATCH(42, frame->ecode, frame->eptrb, 0); 1517 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1513 if (is_match) RRETURN; 1518 1514 GETCHARINC(d, frame->eptr); 1519 1515 if (frame->fi >= frame->max || frame->eptr >= md->end_subject || frame->fc == d) 1520 RRETURN (MATCH_NOMATCH);1516 RRETURN; 1521 1517 } 1522 1518 } … … 1543 1539 { 1544 1540 RMATCH(44, frame->ecode, frame->eptrb, 0); 1545 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1541 if (is_match) RRETURN; 1546 1542 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */ 1547 1543 BACKCHAR(frame->eptr); … … 1549 1545 } 1550 1546 1551 RRETURN (MATCH_NOMATCH);1547 RRETURN; 1552 1548 } 1553 1549 } … … 1558 1554 repeat it in the interests of efficiency. */ 1559 1555 1560 case OP_TYPEEXACT:1556 BEGIN_OPCODE(TYPEEXACT): 1561 1557 min = frame->max = GET2(frame->ecode, 1); 1562 1558 minimize = TRUE; … … 1564 1560 goto REPEATTYPE; 1565 1561 1566 case OP_TYPEUPTO:1567 case OP_TYPEMINUPTO:1562 BEGIN_OPCODE(TYPEUPTO): 1563 BEGIN_OPCODE(TYPEMINUPTO): 1568 1564 min = 0; 1569 1565 frame->max = GET2(frame->ecode, 1); … … 1572 1568 goto REPEATTYPE; 1573 1569 1574 case OP_TYPESTAR:1575 case OP_TYPEMINSTAR:1576 case OP_TYPEPLUS:1577 case OP_TYPEMINPLUS:1578 case OP_TYPEQUERY:1579 case OP_TYPEMINQUERY:1570 BEGIN_OPCODE(TYPESTAR): 1571 BEGIN_OPCODE(TYPEMINSTAR): 1572 BEGIN_OPCODE(TYPEPLUS): 1573 BEGIN_OPCODE(TYPEMINPLUS): 1574 BEGIN_OPCODE(TYPEQUERY): 1575 BEGIN_OPCODE(TYPEMINQUERY): 1580 1576 c = *frame->ecode++ - OP_TYPESTAR; 1581 1577 minimize = (c & 1) != 0; … … 1599 1595 and single-bytes. */ 1600 1596 1601 if (min > md->end_subject - frame->eptr) RRETURN (MATCH_NOMATCH);1597 if (min > md->end_subject - frame->eptr) RRETURN_NO_MATCH; 1602 1598 if (min > 0) 1603 1599 { … … 1608 1604 { 1609 1605 if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) 1610 RRETURN (MATCH_NOMATCH);1606 RRETURN_NO_MATCH; 1611 1607 ++frame->eptr; 1612 1608 while (frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)) frame->eptr++; … … 1617 1613 for (i = 1; i <= min; i++) 1618 1614 { 1619 if (frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);1615 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH; 1620 1616 GETCHARINC(c, frame->eptr); 1621 if ( c < 128 && (md->ctypes[c] & ctype_digit) != 0)1622 RRETURN (MATCH_NOMATCH);1617 if (isASCIIDigit(c)) 1618 RRETURN_NO_MATCH; 1623 1619 } 1624 1620 break; … … 1627 1623 for (i = 1; i <= min; i++) 1628 1624 { 1629 if (frame->eptr >= md->end_subject || 1630 *frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_digit) == 0) 1631 RRETURN(MATCH_NOMATCH); 1625 if (frame->eptr >= md->end_subject || !isASCIIDigit(*frame->eptr++)) 1626 RRETURN_NO_MATCH; 1632 1627 /* No need to skip more bytes - we know it's a 1-byte character */ 1633 1628 } … … 1639 1634 if (frame->eptr >= md->end_subject || 1640 1635 (*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_space) != 0)) 1641 RRETURN (MATCH_NOMATCH);1636 RRETURN_NO_MATCH; 1642 1637 while (++frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)); 1643 1638 } … … 1649 1644 if (frame->eptr >= md->end_subject || 1650 1645 *frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_space) == 0) 1651 RRETURN (MATCH_NOMATCH);1646 RRETURN_NO_MATCH; 1652 1647 /* No need to skip more bytes - we know it's a 1-byte character */ 1653 1648 } … … 1659 1654 if (frame->eptr >= md->end_subject || 1660 1655 (*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_word) != 0)) 1661 RRETURN (MATCH_NOMATCH);1656 RRETURN_NO_MATCH; 1662 1657 while (++frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)); 1663 1658 } … … 1669 1664 if (frame->eptr >= md->end_subject || 1670 1665 *frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_word) == 0) 1671 RRETURN (MATCH_NOMATCH);1666 RRETURN_NO_MATCH; 1672 1667 /* No need to skip more bytes - we know it's a 1-byte character */ 1673 1668 } … … 1675 1670 1676 1671 default: 1677 RRETURN(JS_REGEXP_ERROR_INTERNAL); 1672 ASSERT_NOT_REACHED(); 1673 RRETURN_ERROR(JSRegExpErrorInternal); 1678 1674 } /* End switch(frame->ctype) */ 1679 1675 } … … 1681 1677 /* If min = max, continue at the same level without recursing */ 1682 1678 1683 if (min == frame->max) continue; 1679 if (min == frame->max) 1680 NEXT_OPCODE; 1684 1681 1685 1682 /* If minimizing, we have to test the rest of the pattern before each … … 1692 1689 { 1693 1690 RMATCH(48, frame->ecode, frame->eptrb, 0); 1694 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1695 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN (MATCH_NOMATCH);1691 if (is_match) RRETURN; 1692 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN; 1696 1693 1697 1694 GETCHARINC(c, frame->eptr); … … 1699 1696 { 1700 1697 case OP_ANY: 1701 if (IS_NEWLINE(c)) RRETURN (MATCH_NOMATCH);1698 if (IS_NEWLINE(c)) RRETURN; 1702 1699 break; 1703 1700 1704 1701 case OP_NOT_DIGIT: 1705 if ( c < 128 && (md->ctypes[c] & ctype_digit) != 0)1706 RRETURN (MATCH_NOMATCH);1702 if (isASCIIDigit(c)) 1703 RRETURN; 1707 1704 break; 1708 1705 1709 1706 case OP_DIGIT: 1710 if ( c >= 128 || (md->ctypes[c] & ctype_digit) == 0)1711 RRETURN (MATCH_NOMATCH);1707 if (!isASCIIDigit(c)) 1708 RRETURN; 1712 1709 break; 1713 1710 1714 1711 case OP_NOT_WHITESPACE: 1715 1712 if (c < 128 && (md->ctypes[c] & ctype_space) != 0) 1716 RRETURN (MATCH_NOMATCH);1713 RRETURN; 1717 1714 break; 1718 1715 1719 1716 case OP_WHITESPACE: 1720 1717 if (c >= 128 || (md->ctypes[c] & ctype_space) == 0) 1721 RRETURN (MATCH_NOMATCH);1718 RRETURN; 1722 1719 break; 1723 1720 1724 1721 case OP_NOT_WORDCHAR: 1725 1722 if (c < 128 && (md->ctypes[c] & ctype_word) != 0) 1726 RRETURN (MATCH_NOMATCH);1723 RRETURN; 1727 1724 break; 1728 1725 1729 1726 case OP_WORDCHAR: 1730 1727 if (c >= 128 || (md->ctypes[c] & ctype_word) == 0) 1731 RRETURN (MATCH_NOMATCH);1728 RRETURN; 1732 1729 break; 1733 1730 1734 1731 default: 1735 RRETURN(JS_REGEXP_ERROR_INTERNAL); 1732 ASSERT_NOT_REACHED(); 1733 RRETURN_ERROR(JSRegExpErrorInternal); 1736 1734 } 1737 1735 } … … 1747 1745 frame->pp = frame->eptr; /* Remember where we started */ 1748 1746 1749 {1750 1747 switch(frame->ctype) 1751 1748 { … … 1789 1786 if (frame->eptr >= md->end_subject) break; 1790 1787 GETCHARLEN(c, frame->eptr, len); 1791 if ( c < 128 && (md->ctypes[c] & ctype_digit) != 0) break;1788 if (isASCIIDigit(c)) break; 1792 1789 frame->eptr+= len; 1793 1790 } … … 1800 1797 if (frame->eptr >= md->end_subject) break; 1801 1798 GETCHARLEN(c, frame->eptr, len); 1802 if ( c >= 128 ||(md->ctypes[c] & ctype_digit) == 0) break;1799 if (!isASCIIDigit(c)) break; 1803 1800 frame->eptr+= len; 1804 1801 } … … 1850 1847 1851 1848 default: 1852 RRETURN(JS_REGEXP_ERROR_INTERNAL); 1849 ASSERT_NOT_REACHED(); 1850 RRETURN_ERROR(JSRegExpErrorInternal); 1853 1851 } 1854 1852 … … 1858 1856 { 1859 1857 RMATCH(52, frame->ecode, frame->eptrb, 0); 1860 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1858 if (is_match) RRETURN; 1861 1859 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */ 1862 1860 BACKCHAR(frame->eptr); 1863 1861 } 1864 }1865 1862 1866 1863 /* Get here if we can't make it match with any permitted repetitions */ 1867 1864 1868 RRETURN (MATCH_NOMATCH);1865 RRETURN; 1869 1866 } 1870 1867 /* Control never gets here */ 1871 1868 1869 BEGIN_OPCODE(BRANUMBER): 1870 BEGIN_OPCODE(CRMINPLUS): 1871 BEGIN_OPCODE(CRMINQUERY): 1872 BEGIN_OPCODE(CRMINRANGE): 1873 BEGIN_OPCODE(CRMINSTAR): 1874 BEGIN_OPCODE(CRPLUS): 1875 BEGIN_OPCODE(CRQUERY): 1876 BEGIN_OPCODE(CRRANGE): 1877 BEGIN_OPCODE(CRSTAR): 1878 ASSERT_NOT_REACHED(); 1879 RRETURN_ERROR(JSRegExpErrorInternal); 1880 1881 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP 1882 CAPTURING_BRACKET: 1883 #else 1872 1884 default: 1885 #endif 1873 1886 /* Opening capturing bracket. If there is space in the offset vector, save 1874 1887 the current subject position in the working slot at the top of the vector. We … … 1885 1898 here; that is handled in the code for KET. */ 1886 1899 1887 if (*frame->ecode > OP_BRA)1888 { 1900 ASSERT(*frame->ecode > OP_BRA); 1901 1889 1902 frame->number = *frame->ecode - OP_BRA; 1890 1903 … … 1905 1918 { 1906 1919 frame->save_offset1 = md->offset_vector[frame->offset]; 1907 frame->save_offset2 = md->offset_vector[frame->offset +1];1920 frame->save_offset2 = md->offset_vector[frame->offset + 1]; 1908 1921 frame->save_offset3 = md->offset_vector[md->offset_end - frame->number]; 1909 1922 … … 1914 1927 { 1915 1928 RMATCH(1, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup); 1916 if ( rrc != MATCH_NOMATCH) RRETURN(rrc);1929 if (is_match) RRETURN; 1917 1930 frame->ecode += GET(frame->ecode, 1); 1918 1931 } … … 1922 1935 1923 1936 md->offset_vector[frame->offset] = frame->save_offset1; 1924 md->offset_vector[frame->offset +1] = frame->save_offset2;1937 md->offset_vector[frame->offset + 1] = frame->save_offset2; 1925 1938 md->offset_vector[md->offset_end - frame->number] = frame->save_offset3; 1926 1939 1927 RRETURN (MATCH_NOMATCH);1940 RRETURN; 1928 1941 } 1929 1942 … … 1931 1944 1932 1945 goto NON_CAPTURING_BRACKET; 1933 }1934 1935 /* There's been some horrible disaster. Since all codes > OP_BRA are1936 for capturing brackets, and there shouldn't be any gaps between 0 and1937 OP_BRA, arrival here can only mean there is something seriously wrong1938 in the code above or the OP_xxx definitions. */1939 1940 DPRINTF(("Unknown opcode %d\n", *frame->ecode));1941 RRETURN(JS_REGEXP_ERROR_INTERNAL);1942 1946 } 1943 1947 … … 1946 1950 loop. */ 1947 1951 1948 } /* End of main loop */ 1952 } /* End of main loop */ 1953 1949 1954 /* Control never reaches here */ 1950 1955 1951 #ifndef __GNUC__1956 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION 1952 1957 1953 1958 RRETURN_SWITCH: 1954 1959 switch (frame->where) 1955 1960 { 1961 case 0: goto RETURN; 1956 1962 case 1: goto RRETURN_1; 1957 1963 case 2: goto RRETURN_2; … … 1988 1994 1989 1995 abort(); 1990 return 0;1996 RRETURN_ERROR(JSRegExpErrorInternal); 1991 1997 1992 1998 #endif 1993 1999 2000 RETURN: 2001 return is_match ? MATCH_MATCH : MATCH_NOMATCH; 2002 2003 RETURN_ERROR: 2004 while (!(frame >= stackframes && frame < stackframesend)) { 2005 newframe = frame->prevframe; 2006 delete frame; 2007 frame = newframe; 2008 } 2009 return i; 1994 2010 } 1995 2011 … … 2021 2037 int 2022 2038 jsRegExpExecute(const pcre *argument_re, 2023 const JSRegExpChar* subject, int length, int start_offset, int *offsets,2039 const UChar* subject, int length, int start_offset, int *offsets, 2024 2040 int offsetcount) 2025 2041 { … … 2076 2092 { 2077 2093 ocount = re->top_backref * 3 + 3; 2078 match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));2079 if (match_block.offset_vector == NULL) return JS _REGEXP_ERROR_NOMEMORY;2094 match_block.offset_vector = new int[ocount]; 2095 if (match_block.offset_vector == NULL) return JSRegExpErrorNoMemory; 2080 2096 using_temporary_offsets = TRUE; 2081 2097 DPRINTF(("Got memory to hold back references\n")); … … 2111 2127 studied, there may be a bitmap of possible first characters. */ 2112 2128 2113 {2114 2129 if ((re->options & PCRE_FIRSTSET) != 0) 2115 2130 { … … 2118 2133 first_byte = match_block.lcc[first_byte]; 2119 2134 } 2120 }2121 2135 2122 2136 /* For anchored or unanchored matches, there may be a "last known required … … 2254 2268 if certain parts of the pattern were not used. */ 2255 2269 2256 match_block.start_match = start_match;2257 2270 match_block.match_call_count = 0; 2258 2271 … … 2295 2308 2296 2309 DPRINTF(("Freeing temporary memory\n")); 2297 (pcre_free)(match_block.offset_vector);2310 delete [] match_block.offset_vector; 2298 2311 } 2299 2312 … … 2317 2330 { 2318 2331 DPRINTF(("Freeing temporary memory\n")); 2319 (pcre_free)(match_block.offset_vector);2332 delete [] match_block.offset_vector; 2320 2333 } 2321 2334 2322 2335 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); 2323 return JS _REGEXP_ERROR_NOMATCH;2336 return JSRegExpErrorNoMatch; 2324 2337 } 2325 2326 /* End of pcre_exec.c */
Note:
See TracChangeset
for help on using the changeset viewer.