Changeset 27730 in webkit for trunk/JavaScriptCore/pcre/pcre_compile.cpp
- Timestamp:
- Nov 12, 2007, 3:04:41 PM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/pcre/pcre_compile.cpp
r27686 r27730 41 41 supporting internal functions that are not used by other modules. */ 42 42 43 #include "config.h" 44 43 45 #include "pcre_internal.h" 44 46 … … 47 49 48 50 using namespace WTF; 49 50 /* WARNING: This macro evaluates its parameters more than once. */51 #define DIGITAB(x) ((x) < 128 ? digitab[(x)] : 0)52 51 53 52 /************************************************* … … 130 129 } 131 130 132 /* Table to hex digits. This is used when compiling133 patterns. Note that the tables in chartables are dependent on the locale, and134 may mark arbitrary characters as digits - but the PCRE compiling code expects135 to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have136 a private table here. It costs 256 bytes, but it is a lot faster than doing137 character value tests (at least in some simple cases I timed), and in some138 applications one wants PCRE to compile efficiently as well as match139 efficiently.140 141 For convenience, we use the same bit definition as in chartables:142 143 0x08 hexadecimal digit144 145 Then we can use ctype_xdigit in the code. */146 147 static const unsigned char digitab[] =148 {149 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */150 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */151 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */152 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */153 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */154 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */155 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */156 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */157 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */158 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */159 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */160 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */161 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */162 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */163 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */164 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */165 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */166 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */167 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */168 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */169 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */170 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */171 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */172 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */173 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */174 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */175 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */176 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */177 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */178 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */179 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */180 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */181 182 131 /* Definition to allow mutual recursion */ 183 132 … … 201 150 bracount number of previous extracting brackets 202 151 options the options bits 203 isclass TRUEif inside a character class152 isclass true if inside a character class 204 153 205 154 Returns: zero or positive => a data character … … 302 251 303 252 c = 0; 304 while (pt < patternEnd && (DIGITAB(*pt) & ctype_xdigit) != 0)253 while (pt < patternEnd && isASCIIHexDigit(*pt)) 305 254 { 306 255 register int cc = *pt++; … … 331 280 332 281 c = 0; 333 while (i++ < 2 && ptr + 1 < patternEnd && (DIGITAB(ptr[1]) & ctype_xdigit) != 0)282 while (i++ < 2 && ptr + 1 < patternEnd && isASCIIHexDigit(ptr[1])) 334 283 { 335 284 int cc; /* Some compilers don't like ++ */ … … 345 294 while (i++ < 4) 346 295 { 347 if (pt + 1 >= patternEnd || (DIGITAB(pt[1]) & ctype_xdigit) == 0)296 if (pt + 1 >= patternEnd || !isASCIIHexDigit(pt[1])) 348 297 { 349 298 pt = ptr; … … 374 323 375 324 /* A letter is upper-cased; then the 0x40 bit is flipped. This coding 376 is ASCII-specific, but then the whole concept of \cx is ASCII-specific. 377 (However, an EBCDIC equivalent has now been added.) */ 325 is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */ 378 326 379 327 if (c >= 'a' && c <= 'z') c -= 32; 380 328 c ^= 0x40; 381 break;382 383 default:384 329 break; 385 330 } … … 404 349 p pointer to the first char after '{' 405 350 406 Returns: TRUE or FALSE351 Returns: true or false 407 352 */ 408 353 … … 411 356 { 412 357 if (p >= patternEnd || !isASCIIDigit(*p)) 413 return FALSE;358 return false; 414 359 p++; 415 360 while (p < patternEnd && isASCIIDigit(*p)) 416 361 p++; 417 362 if (p < patternEnd && *p == '}') 418 return TRUE;363 return true; 419 364 420 365 if (p >= patternEnd || *p++ != ',') 421 return FALSE;366 return false; 422 367 if (p < patternEnd && *p == '}') 423 return TRUE;368 return true; 424 369 425 370 if (p >= patternEnd || !isASCIIDigit(*p)) 426 return FALSE;371 return false; 427 372 p++; 428 373 while (p < patternEnd && isASCIIDigit(*p)) … … 513 458 Arguments: 514 459 code pointer to the start of the group 515 skipassert TRUEif certain assertions are to be skipped460 skipassert true if certain assertions are to be skipped 516 461 517 462 Returns: pointer to the first significant opcode … … 726 671 endcode points to where to stop 727 672 728 Returns: TRUEif what is matched could be empty673 Returns: true if what is matched could be empty 729 674 */ 730 675 … … 733 678 { 734 679 register int c; 735 for (code = first_significant_code(code + 1 + LINK_SIZE, TRUE);680 for (code = first_significant_code(code + 1 + LINK_SIZE, true); 736 681 code < endcode; 737 code = first_significant_code(code + OP_lengths[c], TRUE))682 code = first_significant_code(code + OP_lengths[c], true)) 738 683 { 739 684 const uschar *ccode; … … 744 689 { 745 690 BOOL empty_branch; 746 if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */691 if (GET(code, 1) == 0) return true; /* Hit unclosed bracket */ 747 692 748 693 /* Scan a closed bracket */ 749 694 750 empty_branch = FALSE;695 empty_branch = false; 751 696 do 752 697 { 753 698 if (!empty_branch && could_be_empty_branch(code, endcode)) 754 empty_branch = TRUE;699 empty_branch = true; 755 700 code += GET(code, 1); 756 701 } 757 702 while (*code == OP_ALT); 758 if (!empty_branch) return FALSE; /* All branches are non-empty */703 if (!empty_branch) return false; /* All branches are non-empty */ 759 704 code += 1 + LINK_SIZE; 760 705 c = *code; … … 786 731 case OP_CRPLUS: /* These repeats aren't empty */ 787 732 case OP_CRMINPLUS: 788 return FALSE;733 return false; 789 734 790 735 case OP_CRRANGE: 791 736 case OP_CRMINRANGE: 792 if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */737 if (GET2(ccode, 1) > 0) return false; /* Minimum > 0 */ 793 738 break; 794 739 } … … 818 763 case OP_TYPEMINPLUS: 819 764 case OP_TYPEEXACT: 820 return FALSE;765 return false; 821 766 822 767 /* End of branch */ … … 826 771 case OP_KETRMIN: 827 772 case OP_ALT: 828 return TRUE;773 return true; 829 774 830 775 /* In UTF-8 mode, STAR, MINSTAR, QUERY, MINQUERY, UPTO, and MINUPTO may be … … 842 787 } 843 788 844 return TRUE;789 return true; 845 790 } 846 791 … … 887 832 odptr where to put end of othercase range 888 833 889 Yield: TRUE when range returned; FALSEwhen no more834 Yield: true when range returned; false when no more 890 835 */ 891 836 … … 898 843 { if ((othercase = _pcre_ucp_othercase(c)) >= 0) break; } 899 844 900 if (c > d) return FALSE;845 if (c > d) return false; 901 846 902 847 *ocptr = othercase; … … 912 857 *cptr = c; 913 858 914 return TRUE;859 return true; 915 860 } 916 861 … … 934 879 cd contains pointers to tables etc. 935 880 936 Returns: TRUEon success937 FALSE, with *errorcodeptr set non-zero on error881 Returns: true on success 882 false, with *errorcodeptr set non-zero on error 938 883 */ 939 884 … … 953 898 register uschar *code = *codeptr; 954 899 uschar *tempcode; 955 BOOL groupsetfirstbyte = FALSE;900 BOOL groupsetfirstbyte = false; 956 901 const pcre_uchar *ptr = *ptrptr; 957 902 const pcre_uchar *tempptr; … … 1030 975 *codeptr = code; 1031 976 *ptrptr = ptr; 1032 return TRUE;977 return true; 1033 978 1034 979 /* Handle single-character metacharacters. In multiline mode, ^ disables … … 1074 1019 case '[': 1075 1020 previous = code; 1076 should_flip_negation = FALSE;1021 should_flip_negation = false; 1077 1022 1078 1023 /* PCRE supports POSIX class stuff inside a class. Perl gives an error if … … 1083 1028 if ((c = *(++ptr)) == '^') 1084 1029 { 1085 negate_class = TRUE;1030 negate_class = true; 1086 1031 c = *(++ptr); 1087 1032 } 1088 1033 else 1089 1034 { 1090 negate_class = FALSE;1035 negate_class = false; 1091 1036 } 1092 1037 … … 1098 1043 class_lastchar = -1; 1099 1044 1100 class_utf8 = FALSE; /* No chars >= 256 */1045 class_utf8 = false; /* No chars >= 256 */ 1101 1046 class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */ 1102 1047 … … 1131 1076 if (c == '\\') 1132 1077 { 1133 c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, TRUE);1078 c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, true); 1134 1079 1135 1080 if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ … … 1146 1091 1147 1092 case ESC_D: 1148 should_flip_negation = TRUE;1093 should_flip_negation = true; 1149 1094 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; 1150 1095 continue; … … 1155 1100 1156 1101 case ESC_W: 1157 should_flip_negation = TRUE;1102 should_flip_negation = true; 1158 1103 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; 1159 1104 continue; … … 1164 1109 1165 1110 case ESC_S: 1166 should_flip_negation = TRUE;1111 should_flip_negation = true; 1167 1112 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; 1168 1113 continue; … … 1201 1146 { 1202 1147 const pcre_uchar *oldptr = ptr; 1203 d = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, TRUE);1148 d = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, true); 1204 1149 1205 1150 /* \b is backslash; \X is literal X; any other special means the '-' … … 1229 1174 if ((d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127))) 1230 1175 { 1231 class_utf8 = TRUE;1176 class_utf8 = true; 1232 1177 1233 1178 /* With UCP support, we can find the other case equivalents of … … 1311 1256 if ((c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127))) 1312 1257 { 1313 class_utf8 = TRUE;1258 class_utf8 = true; 1314 1259 *class_utf8data++ = XCL_SINGLE; 1315 1260 class_utf8data += _pcre_ord2utf8(c, class_utf8data); … … 1486 1431 1487 1432 op_type = 0; /* Default single-char op codes */ 1488 possessive_quantifier = FALSE; /* Default not possessive quantifier */1433 possessive_quantifier = false; /* Default not possessive quantifier */ 1489 1434 1490 1435 /* Save start of previous item, in case we have to move it up to make space … … 1502 1447 { 1503 1448 repeat_type = 0; /* Force greedy */ 1504 possessive_quantifier = TRUE;1449 possessive_quantifier = true; 1505 1450 ptr++; 1506 1451 } … … 2003 1948 zeroreqbyte = reqbyte; 2004 1949 zerofirstbyte = firstbyte; 2005 groupsetfirstbyte = FALSE;1950 groupsetfirstbyte = false; 2006 1951 2007 1952 if (bravalue >= OP_BRA || bravalue == OP_ONCE) … … 2018 1963 { 2019 1964 firstbyte = subfirstbyte; 2020 groupsetfirstbyte = TRUE;1965 groupsetfirstbyte = true; 2021 1966 } 2022 1967 else firstbyte = REQ_NONE; … … 2066 2011 case '\\': 2067 2012 tempptr = ptr; 2068 c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, FALSE);2013 c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, false); 2069 2014 2070 2015 /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values … … 2186 2131 FAILED: 2187 2132 *ptrptr = ptr; 2188 return FALSE;2133 return false; 2189 2134 } 2190 2135 … … 2215 2160 cd points to the data block with tables pointers etc. 2216 2161 2217 Returns: TRUEon success2162 Returns: true on success 2218 2163 */ 2219 2164 … … 2247 2192 { 2248 2193 *ptrptr = ptr; 2249 return FALSE;2194 return false; 2250 2195 } 2251 2196 … … 2322 2267 *firstbyteptr = firstbyte; 2323 2268 *reqbyteptr = reqbyte; 2324 return TRUE;2269 return true; 2325 2270 } 2326 2271 … … 2377 2322 backref_map the back reference bitmap 2378 2323 2379 Returns: TRUE or FALSE2324 Returns: true or false 2380 2325 */ 2381 2326 … … 2386 2331 do { 2387 2332 const uschar *scode = 2388 first_significant_code(code + 1+LINK_SIZE, FALSE);2333 first_significant_code(code + 1+LINK_SIZE, false); 2389 2334 register int op = *scode; 2390 2335 … … 2397 2342 if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE); 2398 2343 new_map = bracket_map | ((op < 32)? (1 << op) : 1); 2399 if (!is_anchored(scode, options, new_map, backref_map)) return FALSE;2344 if (!is_anchored(scode, options, new_map, backref_map)) return false; 2400 2345 } 2401 2346 … … 2404 2349 else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE) 2405 2350 { 2406 if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;2351 if (!is_anchored(scode, options, bracket_map, backref_map)) return false; 2407 2352 } 2408 2353 … … 2410 2355 2411 2356 else if (((options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) 2412 return FALSE;2357 return false; 2413 2358 code += GET(code, 1); 2414 2359 } 2415 2360 while (*code == OP_ALT); /* Loop for each alternative */ 2416 return TRUE;2361 return true; 2417 2362 } 2418 2363 … … 2437 2382 backref_map the back reference bitmap 2438 2383 2439 Returns: TRUE or FALSE2384 Returns: true or false 2440 2385 */ 2441 2386 … … 2445 2390 { 2446 2391 do { 2447 const uschar *scode = first_significant_code(code + 1+LINK_SIZE, FALSE);2392 const uschar *scode = first_significant_code(code + 1+LINK_SIZE, false); 2448 2393 register int op = *scode; 2449 2394 … … 2456 2401 if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE); 2457 2402 new_map = bracket_map | ((op < 32)? (1 << op) : 1); 2458 if (!is_startline(scode, new_map, backref_map)) return FALSE;2403 if (!is_startline(scode, new_map, backref_map)) return false; 2459 2404 } 2460 2405 … … 2462 2407 2463 2408 else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE) 2464 { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }2409 { if (!is_startline(scode, bracket_map, backref_map)) return false; } 2465 2410 2466 2411 /* .* means "start at start or after \n" if it isn't in brackets that … … 2469 2414 else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) 2470 2415 { 2471 if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;2416 if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return false; 2472 2417 } 2473 2418 2474 2419 /* Check for explicit circumflex */ 2475 2420 2476 else if (op != OP_CIRC) return FALSE;2421 else if (op != OP_CIRC) return false; 2477 2422 2478 2423 /* Move on to the next alternative */ … … 2481 2426 } 2482 2427 while (*code == OP_ALT); /* Loop for each alternative */ 2483 return TRUE;2428 return true; 2484 2429 } 2485 2430 … … 2501 2446 code points to start of expression (the bracket) 2502 2447 options pointer to the options (used to check casing changes) 2503 inassert TRUEif in an assertion2448 inassert true if in an assertion 2504 2449 2505 2450 Returns: -1 or the fixed first char … … 2513 2458 int d; 2514 2459 const uschar *scode = 2515 first_significant_code(code + 1+LINK_SIZE, TRUE);2460 first_significant_code(code + 1+LINK_SIZE, true); 2516 2461 register int op = *scode; 2517 2462 … … 2657 2602 c = *ptr; 2658 2603 2659 /* If we are inside a \Q...\E sequence, all chars are literal */2660 2661 2604 item_count++; /* Is zero for the first non-comment item */ 2662 2605 … … 2667 2610 2668 2611 case '\\': 2669 c = check_escape(&ptr, patternEnd, &errorcode, bracount, FALSE);2612 c = check_escape(&ptr, patternEnd, &errorcode, bracount, false); 2670 2613 if (errorcode != 0) goto PCRE_ERROR_RETURN; 2671 2614 … … 2791 2734 else class_optcount = 0; 2792 2735 2793 class_utf8 = FALSE;2736 class_utf8 = false; 2794 2737 2795 2738 /* Written as a "do" so that an initial ']' is taken as data */ … … 2801 2744 if (*ptr == '\\') 2802 2745 { 2803 c = check_escape(&ptr, patternEnd, &errorcode, bracount, TRUE);2746 c = check_escape(&ptr, patternEnd, &errorcode, bracount, true); 2804 2747 if (errorcode != 0) goto PCRE_ERROR_RETURN; 2805 2748 … … 2849 2792 { 2850 2793 ptr++; 2851 d = check_escape(&ptr, patternEnd, &errorcode, bracount, TRUE);2794 d = check_escape(&ptr, patternEnd, &errorcode, bracount, true); 2852 2795 if (errorcode != 0) goto PCRE_ERROR_RETURN; 2853 2796 if (-d == ESC_b) d = '\b'; /* backspace */ … … 2882 2825 if (!class_utf8) /* Allow for XCLASS overhead */ 2883 2826 { 2884 class_utf8 = TRUE;2827 class_utf8 = true; 2885 2828 length += LINK_SIZE + 2; 2886 2829 } … … 2939 2882 if (!class_utf8) /* Allow for XCLASS overhead */ 2940 2883 { 2941 class_utf8 = TRUE;2884 class_utf8 = true; 2942 2885 length += LINK_SIZE + 2; 2943 2886 } … … 2989 2932 branch_newextra = 0; 2990 2933 bracket_length = 1 + LINK_SIZE; 2991 capturing = FALSE;2934 capturing = false; 2992 2935 2993 2936 /* Handle special forms of bracket, which all start (? */ … … 3237 3180 { 3238 3181 if (firstbyte < 0) 3239 firstbyte = find_firstassertedchar(codestart, re->options, FALSE);3182 firstbyte = find_firstassertedchar(codestart, re->options, false); 3240 3183 if (firstbyte >= 0) /* Remove caseless flag for non-caseable chars */ 3241 3184 {
Note:
See TracChangeset
for help on using the changeset viewer.