Changeset 18517 in webkit for trunk/JavaScriptCore/pcre/pcre_compile.c
- Timestamp:
- Jan 1, 2007, 9:13:00 PM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/pcre/pcre_compile.c
r18498 r18517 356 356 357 357 static BOOL 358 compile_regex(int, int, int *, uschar **, const pcre_uchar **, int *, BOOL, int,358 compile_regex(int, int, int *, uschar **, const pcre_uchar **, const pcre_uchar const*, int *, BOOL, int, 359 359 int *, int *, branch_chain *, compile_data *); 360 360 … … 384 384 385 385 static int 386 check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,386 check_escape(const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, int *errorcodeptr, int bracount, 387 387 int options, BOOL isclass) 388 388 { … … 392 392 /* If backslash is at the end of the pattern, it's an error. */ 393 393 394 c = *(++ptr); 395 if (c == 0) *errorcodeptr = ERR1; 394 if (++ptr == patternEnd) { 395 *errorcodeptr = ERR1; 396 *ptrptr = ptr; 397 return 0; 398 } 399 400 c = *ptr; 396 401 397 402 /* Non-alphamerics are literals. For digits or letters, do an initial lookup in … … 400 405 401 406 #if !EBCDIC /* ASCII coding */ 402 elseif (c < '0' || c > 'z') {} /* Not alphameric */407 if (c < '0' || c > 'z') {} /* Not alphameric */ 403 408 else if ((i = escapes[c - '0']) != 0) c = i; 404 409 405 410 #else /* EBCDIC coding */ 406 elseif (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */411 if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */ 407 412 else if ((i = escapes[c - 0x48]) != 0) c = i; 408 413 #endif … … 445 450 oldptr = ptr; 446 451 c -= '0'; 447 while ( (DIGITAB(ptr[1]) & ctype_digit) != 0)452 while (ptr + 1 < patternEnd && (DIGITAB(ptr[1]) & ctype_digit) != 0) 448 453 c = c * 10 + *(++ptr) - '0'; 449 454 if (c < 10 || c <= bracount) … … 471 476 case '0': 472 477 c -= '0'; 473 while (i++ < 2&& ptr[1] >= '0' && ptr[1] <= '7')478 while (i++ < 2 && ptr + 1 < patternEnd && ptr[1] >= '0' && ptr[1] <= '7') 474 479 c = c * 8 + *(++ptr) - '0'; 475 480 c &= 255; /* Take least significant 8 bits */ … … 481 486 case 'x': 482 487 #ifdef SUPPORT_UTF8 483 if (ptr [1] == '{' && (options & PCRE_UTF8) != 0)488 if (ptr + 1 < patternEnd && ptr[1] == '{' && (options & PCRE_UTF8) != 0) 484 489 { 485 490 const pcre_uchar *pt = ptr + 2; 486 491 register int count = 0; 487 492 c = 0; 488 while ( (DIGITAB(*pt) & ctype_xdigit) != 0)493 while (pt < patternEnd && (DIGITAB(*pt) & ctype_xdigit) != 0) 489 494 { 490 495 int cc = *pt++; … … 498 503 #endif 499 504 } 500 if ( *pt == '}')505 if (pt < patternEnd && *pt == '}') 501 506 { 502 507 if (c < 0 || count > 8 || (c >= 0xd800 && c <= 0xdbff) || (c >= 0xfdd0 && c <= 0xfdef) || c == 0xfffe || c == 0xffff || c > 0x10FFFF) *errorcodeptr = ERR34; … … 512 517 513 518 c = 0; 514 while (i++ < 2 && (DIGITAB(ptr[1]) & ctype_xdigit) != 0)519 while (i++ < 2 && ptr + 1 < patternEnd && (DIGITAB(ptr[1]) & ctype_xdigit) != 0) 515 520 { 516 521 int cc; /* Some compilers don't like ++ */ … … 529 534 530 535 case 'c': 531 c = *(++ptr); 532 if (c == 0) 536 if (++ptr == patternEnd) 533 537 { 534 538 *errorcodeptr = ERR2; 535 539 return 0; 536 540 } 541 c = *ptr; 537 542 538 543 /* A letter is upper-cased; then the 0x40 bit is flipped. This coding … … 591 596 592 597 static int 593 get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *errorcodeptr)598 get_ucp(const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, BOOL *negptr, int *errorcodeptr) 594 599 { 595 600 int c, i, bot, top; … … 597 602 char name[4]; 598 603 599 c = *(++ptr);600 if (c == 0) goto ERROR_RETURN;604 if (++ptr == patternEnd) goto ERROR_RETURN; 605 c = *ptr; 601 606 602 607 *negptr = FALSE; … … 607 612 if (c == '{') 608 613 { 609 if (ptr [1] == '^')614 if (ptr + 1 < patternEnd && ptr[1] == '^') 610 615 { 611 616 *negptr = TRUE; … … 614 619 for (i = 0; i <= 2; i++) 615 620 { 616 c = *(++ptr);617 if (c == 0) goto ERROR_RETURN;621 if (++ptr == patternEnd) goto ERROR_RETURN; 622 c = *ptr; 618 623 if (c == '}') break; 619 624 if (c > 127) goto ERROR_RETURN; … … 622 627 if (c !='}') /* Try to distinguish error cases */ 623 628 { 624 while (*(++ptr) != 0 && *ptr != '}'); 629 while (++ptr < patternEnd && *ptr != '}') 630 ; 625 631 if (*ptr == '}') goto UNKNOWN_RETURN; else goto ERROR_RETURN; 626 632 } … … 683 689 684 690 static BOOL 685 is_counted_repeat(const pcre_uchar *p )691 is_counted_repeat(const pcre_uchar *p, const pcre_uchar const* patternEnd) 686 692 { 687 if ((DIGITAB(*p) & ctype_digit) == 0) return FALSE; 693 if (p >= patternEnd || (DIGITAB(*p) & ctype_digit) == 0) 694 return FALSE; 688 695 p++; 689 while ((DIGITAB(*p) & ctype_digit) != 0) p++; 690 if (*p == '}') return TRUE; 691 692 if (*p++ != ',') return FALSE; 693 if (*p == '}') return TRUE; 694 695 if ((DIGITAB(*p) & ctype_digit) == 0) return FALSE; 696 while (p < patternEnd && (DIGITAB(*p) & ctype_digit) != 0) 697 p++; 698 if (p < patternEnd && *p == '}') 699 return TRUE; 700 701 if (p >= patternEnd || *p++ != ',') 702 return FALSE; 703 if (p < patternEnd && *p == '}') 704 return TRUE; 705 706 if (p >= patternEnd || (DIGITAB(*p) & ctype_digit) == 0) 707 return FALSE; 696 708 p++; 697 while ((DIGITAB(*p) & ctype_digit) != 0) p++; 698 699 return (*p == '}'); 709 while (p < patternEnd && (DIGITAB(*p) & ctype_digit) != 0) 710 p++; 711 712 return (p < patternEnd && *p == '}'); 700 713 } 701 714 … … 1360 1373 1361 1374 static BOOL 1362 check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr, compile_data *cd)1375 check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar const *patternEnd, const pcre_uchar **endptr, compile_data *cd) 1363 1376 { 1364 1377 int terminator; /* Don't combine these lines; the Solaris cc */ 1365 1378 terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ 1366 1379 if (*(++ptr) == '^') ptr++; 1367 while ((CTYPES(cd, *ptr) & ctype_letter) != 0) ptr++; 1368 if (*ptr == terminator && ptr[1] == ']') 1380 while (ptr < patternEnd && (CTYPES(cd, *ptr) & ctype_letter) != 0) 1381 ptr++; 1382 if (ptr + 1 < patternEnd && *ptr == terminator && ptr[1] == ']') 1369 1383 { 1370 1384 *endptr = ptr; … … 1586 1600 static BOOL 1587 1601 compile_branch(int *optionsptr, int *brackets, uschar **codeptr, 1588 const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,1602 const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, int *errorcodeptr, int *firstbyteptr, 1589 1603 int *reqbyteptr, branch_chain *bcptr, compile_data *cd) 1590 1604 { … … 1662 1676 /* Next byte in the pattern */ 1663 1677 1664 c = *ptr;1678 c = ptr < patternEnd ? *ptr : 0; 1665 1679 1666 1680 /* If in \Q...\E, check for the end; if not, we have a literal */ 1667 1681 1668 if (inescq && c != 0)1682 if (inescq && ptr < patternEnd) 1669 1683 { 1670 if (c == '\\' && ptr [1] == 'E')1684 if (c == '\\' && ptr + 1 < patternEnd && ptr[1] == 'E') 1671 1685 { 1672 1686 inescq = FALSE; … … 1694 1708 1695 1709 is_quantifier = c == '*' || c == '+' || c == '?' || 1696 (c == '{' && is_counted_repeat(ptr+1 ));1710 (c == '{' && is_counted_repeat(ptr+1, patternEnd)); 1697 1711 1698 1712 if (!is_quantifier && previous_callout != NULL && … … 1710 1724 if (c == '#') 1711 1725 { 1712 /* The space before the ; is to avoid a warning on a silly compiler 1713 on the Macintosh. */ 1714 while ((c = *(++ptr)) != 0 && c != NEWLINE) ; 1715 if (c != 0) continue; /* Else fall through to handle end of string */ 1726 while (++ptr < patternEnd && (c = *ptr) != NEWLINE) 1727 ; 1728 if (ptr < patternEnd) 1729 continue; /* Else fall through to handle end of string */ 1730 c = 0; 1716 1731 } 1717 1732 } … … 1730 1745 1731 1746 case 0: 1747 if (ptr < patternEnd) 1748 goto NORMAL_CHAR; 1749 // End of string; fall through 1732 1750 case '|': 1733 1751 case ')': … … 1785 1803 1786 1804 if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && 1787 check_posix_syntax(ptr, &tempptr, cd))1805 check_posix_syntax(ptr, patternEnd, &tempptr, cd)) 1788 1806 { 1789 1807 *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31; … … 1858 1876 if (c == '[' && 1859 1877 (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && 1860 check_posix_syntax(ptr, &tempptr, cd))1878 check_posix_syntax(ptr, patternEnd, &tempptr, cd)) 1861 1879 { 1862 1880 BOOL local_negate = FALSE; … … 1900 1918 for (i = 0; i < 3; i++) 1901 1919 { 1902 BOOL blankclass = STREQUAL(ptr, 5, "blank");1920 BOOL blankclass = ptr + 5 <= patternEnd && STREQUAL(ptr, 5, "blank"); 1903 1921 int taboffset = posix_class_maps[posix_class + i]; 1904 1922 if (taboffset < 0) break; … … 1933 1951 if (c == '\\') 1934 1952 { 1935 c = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE);1953 c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, options, TRUE); 1936 1954 1937 1955 if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ … … 1939 1957 else if (-c == ESC_Q) /* Handle start of quoted string */ 1940 1958 { 1941 if (ptr [1] == '\\' && ptr[2] == 'E')1959 if (ptr + 2 < patternEnd && ptr[1] == '\\' && ptr[2] == 'E') 1942 1960 { 1943 1961 ptr += 2; /* avoid empty string */ … … 1986 2004 { 1987 2005 BOOL negated; 1988 int property = get_ucp(&ptr, &negated, errorcodeptr);2006 int property = get_ucp(&ptr, patternEnd, &negated, errorcodeptr); 1989 2007 if (property < 0) goto FAILED; 1990 2008 class_utf8 = TRUE; … … 2042 2060 { 2043 2061 const pcre_uchar *oldptr = ptr; 2044 d = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE);2062 d = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, options, TRUE); 2045 2063 2046 2064 /* \b is backslash; \X is literal X; any other special means the '-' … … 2376 2394 repeat type to the non-default. */ 2377 2395 2378 if (ptr [1] == '+')2396 if (ptr + 1 < patternEnd && ptr[1] == '+') 2379 2397 { 2380 2398 repeat_type = 0; /* Force greedy */ … … 2382 2400 ptr++; 2383 2401 } 2384 else if (ptr [1] == '?')2402 else if (ptr + 1 < patternEnd && ptr[1] == '?') 2385 2403 { 2386 2404 repeat_type = greedy_non_default; … … 3177 3195 &tempcode, /* Where to put code (updated) */ 3178 3196 &ptr, /* Input pointer (updated) */ 3197 patternEnd, 3179 3198 errorcodeptr, /* Where to put an error message */ 3180 3199 (bravalue == OP_ASSERTBACK || … … 3276 3295 /* Error if hit end of pattern */ 3277 3296 3278 if ( *ptr != ')')3297 if (ptr > patternEnd || *ptr != ')') 3279 3298 { 3280 3299 *errorcodeptr = ERR14; … … 3289 3308 case '\\': 3290 3309 tempptr = ptr; 3291 c = check_escape(&ptr, errorcodeptr, *brackets, options, FALSE);3310 c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, options, FALSE); 3292 3311 3293 3312 /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values … … 3302 3321 if (-c == ESC_Q) /* Handle start of quoted string */ 3303 3322 { 3304 if (ptr [1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */3323 if (ptr + 2 < patternEnd && ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */ 3305 3324 else inescq = TRUE; 3306 3325 continue; … … 3335 3354 { 3336 3355 BOOL negated; 3337 int value = get_ucp(&ptr, &negated, errorcodeptr);3356 int value = get_ucp(&ptr, patternEnd, &negated, errorcodeptr); 3338 3357 previous = code; 3339 3358 *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP; … … 3391 3410 if (utf8 && (c & 0xc0) == 0xc0) 3392 3411 { 3393 while ( (ptr[1] & 0xc0) == 0x80)3412 while (ptr + 1 < patternEnd && (ptr[1] & 0xc0) == 0x80) 3394 3413 mcbuffer[mclength++] = *(++ptr); 3395 3414 } … … 3484 3503 static BOOL 3485 3504 compile_regex(int options, int oldims, int *brackets, uschar **codeptr, 3486 const pcre_uchar **ptrptr, int *errorcodeptr, BOOL lookbehind, int skipbytes,3505 const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, int *errorcodeptr, BOOL lookbehind, int skipbytes, 3487 3506 int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd) 3488 3507 { … … 3529 3548 /* Now compile the branch */ 3530 3549 3531 if (!compile_branch(&options, brackets, &code, &ptr, errorcodeptr,3550 if (!compile_branch(&options, brackets, &code, &ptr, patternEnd, errorcodeptr, 3532 3551 &branchfirstbyte, &branchreqbyte, &bc, cd)) 3533 3552 { … … 3603 3622 at the terminating char. */ 3604 3623 3605 if ( *ptr != '|')3624 if (ptr >= patternEnd || *ptr != '|') 3606 3625 { 3607 3626 int length = INT_CAST(code - last_branch); … … 3623 3642 /* Resetting option if needed */ 3624 3643 3625 if ((options & PCRE_IMS) != oldims && *ptr == ')')3644 if ((options & PCRE_IMS) != oldims && ptr < patternEnd && *ptr == ')') 3626 3645 { 3627 3646 *code++ = OP_OPT; … … 3904 3923 3905 3924 PCRE_EXPORT pcre * 3906 pcre_compile(const pcre_char *pattern, int options, const char **errorptr,3925 pcre_compile(const pcre_char *pattern, int patternLength, int options, const char **errorptr, 3907 3926 int *erroroffset, const unsigned char *tables) 3908 3927 { 3909 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);3928 return pcre_compile2(pattern, patternLength, options, NULL, errorptr, erroroffset, tables); 3910 3929 } 3911 3930 3912 3931 3913 3932 PCRE_EXPORT pcre * 3914 pcre_compile2(const pcre_char *pattern, int options, int *errorcodeptr,3933 pcre_compile2(const pcre_char *pattern, int patternLength, int options, int *errorcodeptr, 3915 3934 const char **errorptr, int *erroroffset, const unsigned char *tables) 3916 3935 { … … 4027 4046 4028 4047 ptr = (const pcre_uchar *)(pattern - 1); 4029 while ((c = *(++ptr)) != 0) 4048 const pcre_uchar const* patternEnd = (const pcre_uchar *)(pattern + patternLength); 4049 4050 while (++ptr < patternEnd) 4030 4051 { 4052 c = *ptr; 4053 4031 4054 int min, max; 4032 4055 int class_optcount; … … 4049 4072 if (c == '#') 4050 4073 { 4051 /* The space before the ; is to avoid a warning on a silly compiler4052 on the Macintosh. */4053 while ((c = *(++ptr)) != 0 && c != NEWLINE) ;4054 if (c == 0)break;4074 while (++ptr < patternEnd && (c = *ptr) != NEWLINE) 4075 ; 4076 if (ptr == patternEnd) 4077 break; 4055 4078 continue; 4056 4079 } … … 4063 4086 if ((options & PCRE_AUTO_CALLOUT) != 0 && 4064 4087 c != '*' && c != '+' && c != '?' && 4065 (c != '{' || !is_counted_repeat(ptr + 1 )))4088 (c != '{' || !is_counted_repeat(ptr + 1, patternEnd))) 4066 4089 length += 2 + 2*LINK_SIZE; 4067 4090 … … 4072 4095 4073 4096 case '\\': 4074 c = check_escape(&ptr, &errorcode, bracount, options, FALSE);4097 c = check_escape(&ptr, patternEnd, &errorcode, bracount, options, FALSE); 4075 4098 if (errorcode != 0) goto PCRE_ERROR_RETURN; 4076 4099 … … 4122 4145 length += 2; 4123 4146 lastitemlength = 2; 4124 if (get_ucp(&ptr, &negated, &errorcode) < 0) goto PCRE_ERROR_RETURN;4147 if (get_ucp(&ptr, patternEnd, &negated, &errorcode) < 0) goto PCRE_ERROR_RETURN; 4125 4148 continue; 4126 4149 #else … … 4145 4168 compile_block.top_backref = refnum; 4146 4169 length += 2; /* For single back reference */ 4147 if (ptr [1] == '{' && is_counted_repeat(ptr+2))4170 if (ptr + 1 < patternEnd && ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd)) 4148 4171 { 4149 4172 ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode); … … 4153 4176 length++; 4154 4177 else length += 5; 4155 if (ptr[1] == '?') ptr++; 4178 if (ptr + 1 < patternEnd && ptr[1] == '?') 4179 ptr++; 4156 4180 } 4157 4181 } … … 4175 4199 4176 4200 case '{': 4177 if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; 4201 if (!is_counted_repeat(ptr+1, patternEnd)) 4202 goto NORMAL_CHAR; 4178 4203 ptr = read_repeat_counts(ptr+1, &min, &max, &errorcode); 4179 4204 if (errorcode != 0) goto PCRE_ERROR_RETURN; … … 4197 4222 } 4198 4223 4199 if (ptr[1] == '?') ptr++; /* Needs no extra length */ 4224 if (ptr + 1 < patternEnd && ptr[1] == '?') 4225 ptr++; /* Needs no extra length */ 4200 4226 4201 4227 POSESSIVE: /* Test for possessive quantifier */ 4202 if (ptr [1] == '+')4228 if (ptr + 1 < patternEnd && ptr[1] == '+') 4203 4229 { 4204 4230 ptr++; … … 4225 4251 4226 4252 case '[': 4227 if ( *(++ptr)== '^')4253 if (++ptr < patternEnd && *ptr == '^') 4228 4254 { 4229 4255 class_optcount = 10; /* Greater than one */ … … 4238 4264 /* Written as a "do" so that an initial ']' is taken as data */ 4239 4265 4240 if ( *ptr != 0) do4266 if (ptr < patternEnd) do 4241 4267 { 4242 4268 /* Inside \Q...\E everything is literal except \E */ … … 4244 4270 if (inescq) 4245 4271 { 4246 if (*ptr != '\\' || ptr[1] != 'E') goto GET_ONE_CHARACTER; 4272 if (*ptr != '\\' || ptr + 1 >= patternEnd || ptr[1] != 'E') 4273 goto GET_ONE_CHARACTER; 4247 4274 inescq = FALSE; 4248 4275 ptr += 1; … … 4254 4281 if (*ptr == '\\') 4255 4282 { 4256 c = check_escape(&ptr, &errorcode, bracount, options, TRUE);4283 c = check_escape(&ptr, patternEnd, &errorcode, bracount, options, TRUE); 4257 4284 if (errorcode != 0) goto PCRE_ERROR_RETURN; 4258 4285 … … 4297 4324 checked during the real compile phase. */ 4298 4325 4299 else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))4326 else if (*ptr == '[' && ptr + 2 < patternEnd && check_posix_syntax(ptr, patternEnd, &ptr, &compile_block)) 4300 4327 { 4301 4328 ptr++; … … 4319 4346 { 4320 4347 int extra = 0; 4321 GETCHARLEN (c, ptr, extra);4348 GETCHARLENEND(c, ptr, patternEnd, extra); 4322 4349 ptr += extra; 4323 4350 } … … 4333 4360 4334 4361 d = -1; 4335 if (ptr [1] == '-')4362 if (ptr + 1 < patternEnd && ptr[1] == '-') 4336 4363 { 4337 4364 pcre_uchar const *hyptr = ptr++; 4338 if (ptr[1] == '\\') 4339 { 4340 ptr++; 4341 d = check_escape(&ptr, &errorcode, bracount, options, TRUE); 4342 if (errorcode != 0) goto PCRE_ERROR_RETURN; 4343 if (-d == ESC_b) d = '\b'; /* backspace */ 4344 else if (-d == ESC_X) d = 'X'; /* literal X in a class */ 4345 } 4346 else if (ptr[1] != 0 && ptr[1] != ']') 4347 { 4348 ptr++; 4349 #ifdef SUPPORT_UTF8 4350 if (utf8) 4351 { 4352 int extra = 0; 4353 GETCHARLEN(d, ptr, extra); 4354 ptr += extra; 4355 } 4356 else 4357 #endif 4358 d = *ptr; 4359 } 4365 if (ptr + 1 < patternEnd) { 4366 if (ptr[1] == '\\') 4367 { 4368 ptr++; 4369 d = check_escape(&ptr, patternEnd, &errorcode, bracount, options, TRUE); 4370 if (errorcode != 0) goto PCRE_ERROR_RETURN; 4371 if (-d == ESC_b) d = '\b'; /* backspace */ 4372 else if (-d == ESC_X) d = 'X'; /* literal X in a class */ 4373 } 4374 else if (ptr[1] != ']') 4375 { 4376 ptr++; 4377 #ifdef SUPPORT_UTF8 4378 if (utf8) 4379 { 4380 int extra = 0; 4381 GETCHARLENEND(d, ptr, patternEnd, extra); 4382 ptr += extra; 4383 } 4384 else 4385 #endif 4386 d = *ptr; 4387 } 4388 } 4360 4389 if (d < 0) ptr = hyptr; /* go back to hyphen as data */ 4361 4390 } … … 4454 4483 } 4455 4484 } 4456 while ( *(++ptr) != 0&& (inescq || *ptr != ']')); /* Concludes "do" above */4457 4458 if ( *ptr == 0) /* Missing terminating ']' */4485 while (++ptr < patternEnd && (inescq || *ptr != ']')); /* Concludes "do" above */ 4486 4487 if (ptr >= patternEnd) /* Missing terminating ']' */ 4459 4488 { 4460 4489 errorcode = ERR6; … … 4473 4502 we also need extra for wrapping the whole thing in a sub-pattern. */ 4474 4503 4475 if ( *ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))4504 if (ptr + 1 < patternEnd && ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd)) 4476 4505 { 4477 4506 ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode); … … 4481 4510 length++; 4482 4511 else length += 5; 4483 if (ptr[1] == '+') 4484 { 4485 ptr++; 4486 length += 2 + 2*LINK_SIZE; 4487 } 4488 else if (ptr[1] == '?') ptr++; 4512 if (ptr + 1 < patternEnd) { 4513 if (ptr[1] == '+') 4514 { 4515 ptr++; 4516 length += 2 + 2*LINK_SIZE; 4517 } 4518 else if (ptr[1] == '?') 4519 ptr++; 4520 } 4489 4521 } 4490 4522 } … … 4500 4532 /* Handle special forms of bracket, which all start (? */ 4501 4533 4502 if (ptr [1] == '?')4534 if (ptr + 1 < patternEnd && ptr[1] == '?') 4503 4535 { 4504 4536 int set, unset; 4505 4537 int *optset; 4506 4538 4507 switch (c = ptr[2])4539 switch (c = (ptr + 2 < patternEnd ? ptr[2] : 0)) 4508 4540 { 4509 4541 /* Skip over comments entirely */ 4510 4542 case '#': 4511 4543 ptr += 3; 4512 while (*ptr != 0 && *ptr != ')') ptr++; 4513 if (*ptr == 0) 4544 while (ptr < patternEnd && *ptr != ')') 4545 ptr++; 4546 if (ptr == patternEnd) 4514 4547 { 4515 4548 errorcode = ERR18; … … 4547 4580 { 4548 4581 ++ptr; 4549 while ( (DIGITAB(*ptr) & ctype_digit) != 0)4582 while (ptr < patternEnd && (DIGITAB(*ptr) & ctype_digit) != 0) 4550 4583 ++ptr; 4551 4584 } 4552 if ( *ptr != ')')4585 if (ptr >= patternEnd || *ptr != ')') 4553 4586 { 4554 4587 errorcode = ERR29; … … 4561 4594 code that handles this for real brackets. */ 4562 4595 4563 if (ptr [1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')4596 if (ptr + 1 < patternEnd && (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')) 4564 4597 { 4565 4598 length += 2 + 2 * LINK_SIZE; /* to make bracketed */ … … 4575 4608 case 'C': 4576 4609 ptr += 3; 4577 while ( (DIGITAB(*ptr) & ctype_digit) != 0)4610 while (ptr < patternEnd && (DIGITAB(*ptr) & ctype_digit) != 0) 4578 4611 ++ptr; 4579 if ( *ptr != ')')4612 if (ptr >= patternEnd || *ptr != ')') 4580 4613 { 4581 4614 errorcode = ERR39; … … 4592 4625 /* Handle the definition of a named subpattern */ 4593 4626 4594 if (*ptr == '<') 4595 { 4596 const pcre_uchar *p; /* Don't amalgamate; some compilers */ 4597 p = ++ptr; /* grumble at autoincrement in declaration */ 4598 while ((CTYPES(&compile_block, *ptr) & ctype_word) != 0) ptr++; 4599 if (*ptr != '>') 4600 { 4601 errorcode = ERR42; 4602 goto PCRE_ERROR_RETURN; 4603 } 4604 name_count++; 4605 if (ptr - p > max_name_size) max_name_size = INT_CAST(ptr - p); 4606 capturing = TRUE; /* Named parentheses are always capturing */ 4607 break; 4608 } 4609 4610 /* Handle back references and recursive calls to named subpatterns */ 4611 4612 if (*ptr == '=' || *ptr == '>') 4613 { 4614 ++ptr; 4615 while ((CTYPES(&compile_block, *ptr) & ctype_word) != 0) 4616 ++ptr; 4617 if (*ptr != ')') 4618 { 4619 errorcode = ERR42; 4620 goto PCRE_ERROR_RETURN; 4621 } 4622 break; 4623 } 4627 if (ptr < patternEnd) { 4628 if (*ptr == '<') 4629 { 4630 const pcre_uchar *p; /* Don't amalgamate; some compilers */ 4631 p = ++ptr; /* grumble at autoincrement in declaration */ 4632 while (ptr < patternEnd && (CTYPES(&compile_block, *ptr) & ctype_word) != 0) 4633 ptr++; 4634 if (ptr >= patternEnd || *ptr != '>') 4635 { 4636 errorcode = ERR42; 4637 goto PCRE_ERROR_RETURN; 4638 } 4639 name_count++; 4640 if (ptr - p > max_name_size) max_name_size = INT_CAST(ptr - p); 4641 capturing = TRUE; /* Named parentheses are always capturing */ 4642 break; 4643 } 4644 4645 /* Handle back references and recursive calls to named subpatterns */ 4646 4647 if (*ptr == '=' || *ptr == '>') 4648 { 4649 ++ptr; 4650 while (ptr < patternEnd && (CTYPES(&compile_block, *ptr) & ctype_word) != 0) 4651 ++ptr; 4652 if (ptr >= patternEnd || *ptr != ')') 4653 { 4654 errorcode = ERR42; 4655 goto PCRE_ERROR_RETURN; 4656 } 4657 break; 4658 } 4659 } 4624 4660 4625 4661 /* Unknown character after (?P */ … … 4632 4668 case '<': 4633 4669 ptr += 3; 4634 if ( *ptr == '=' || *ptr == '!')4670 if (ptr < patternEnd && (*ptr == '=' || *ptr == '!')) 4635 4671 { 4636 4672 branch_newextra = 1 + LINK_SIZE; … … 4646 4682 4647 4683 case '(': 4648 if (ptr [3] == 'R' && ptr[4] == ')')4684 if (ptr + 4 < patternEnd && ptr[3] == 'R' && ptr[4] == ')') 4649 4685 { 4650 4686 ptr += 4; 4651 4687 length += 3; 4652 4688 } 4653 else if ( (DIGITAB(ptr[3]) & ctype_digit) != 0)4689 else if (ptr + 3 < patternEnd && (DIGITAB(ptr[3]) & ctype_digit) != 0) 4654 4690 { 4655 4691 ptr += 4; 4656 4692 length += 3; 4657 while ((DIGITAB(*ptr) & ctype_digit) != 0) ptr++; 4658 if (*ptr != ')') 4693 while (ptr < patternEnd && (DIGITAB(*ptr) & ctype_digit) != 0) 4694 ptr++; 4695 if (ptr >= patternEnd || *ptr != ')') 4659 4696 { 4660 4697 errorcode = ERR26; … … 4665 4702 { 4666 4703 ptr++; /* Can treat like ':' as far as spacing is concerned */ 4667 if (ptr [2] != '?' ||4704 if (ptr + 3 >= patternEnd || ptr[2] != '?' || 4668 4705 (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') ) 4669 4706 { … … 4687 4724 for (;; ptr++) 4688 4725 { 4689 c = *ptr;4726 c = ptr < patternEnd ? *ptr : 0; 4690 4727 switch (c) 4691 4728 { … … 4861 4898 automatically; for the others we need an increment. */ 4862 4899 4863 if ( (c = ptr[1]) == '{' && is_counted_repeat(ptr+2))4900 if (ptr + 1 < patternEnd && (c = ptr[1]) == '{' && is_counted_repeat(ptr+2, patternEnd)) 4864 4901 { 4865 4902 ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode); … … 4898 4935 /* Allow space for once brackets for "possessive quantifier" */ 4899 4936 4900 if (ptr [1] == '+')4937 if (ptr + 1 < patternEnd && ptr[1] == '+') 4901 4938 { 4902 4939 ptr++; … … 4912 4949 NORMAL_CHAR: 4913 4950 4914 if (inescq && c == '\\' && ptr [1] == 'E')4951 if (inescq && c == '\\' && ptr + 1 < patternEnd && ptr[1] == 'E') 4915 4952 { 4916 4953 inescq = FALSE; … … 4929 4966 if (IS_LEADING_SURROGATE(c)) 4930 4967 { 4931 c = DECODE_SURROGATE_PAIR(c, *ptr);4968 c = DECODE_SURROGATE_PAIR(c, ptr < patternEnd ? *ptr : 0); 4932 4969 ++ptr; 4933 4970 } … … 4945 4982 if (utf8 && (c & 0xc0) == 0xc0) 4946 4983 { 4947 while ( (ptr[1] & 0xc0) == 0x80) /* Can't flow over the end */4984 while (ptr + 1 < patternEnd && (ptr[1] & 0xc0) == 0x80) /* Can't flow over the end */ 4948 4985 { /* because the end is marked */ 4949 4986 lastitemlength++; /* by a zero byte. */ … … 5018 5055 *code = OP_BRA; 5019 5056 bracount = 0; 5020 (void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr, 5057 (void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr, patternEnd, 5021 5058 &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block); 5022 5059 re->top_bracket = bracount; … … 5027 5064 /* If not reached end of pattern on success, there's an excess bracket. */ 5028 5065 5029 if (errorcode == 0 && *ptr != 0) errorcode = ERR22;5066 if (errorcode == 0 && ptr < patternEnd) errorcode = ERR22; 5030 5067 5031 5068 /* Fill in the terminating state and check for disastrous overflow, but
Note:
See TracChangeset
for help on using the changeset viewer.