Changeset 27686 in webkit for trunk/JavaScriptCore/pcre/pcre_internal.h
- Timestamp:
- Nov 11, 2007, 10:56:13 AM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/pcre/pcre_internal.h
r27422 r27686 1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 6 /* PCRE is a library of functions to support regular expressions whose syntax 7 and semantics are as close as possible to those of the Perl 5 language. 8 9 Written by Philip Hazel 1 /* This is JavaScriptCore's variant of the PCRE library. While this library 2 started out as a copy of PCRE, many of the features of PCRE have been 3 removed. This library now supports only the regular expression features 4 required by the JavaScript language specification, and has only the functions 5 needed by JavaScriptCore and the rest of WebKit. 6 7 Originally written by Philip Hazel 10 8 Copyright (c) 1997-2006 University of Cambridge 11 Copyright (c) 2004, 2005 Apple Computer, Inc.9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. 12 10 13 11 ----------------------------------------------------------------------------- … … 54 52 #endif 55 53 56 #define _pcre_OP_lengths kjs_pcre_OP_lengths 54 /* The value of LINK_SIZE determines the number of bytes used to store links as 55 offsets within the compiled regex. The default is 2, which allows for compiled 56 patterns up to 64K long. This covers the vast majority of cases. However, PCRE 57 can also be compiled to use 3 or 4 bytes instead. This allows for longer 58 patterns in extreme cases. On systems that support it, "configure" can be used 59 to override this default. */ 60 61 #define LINK_SIZE 2 62 63 /* The value of MATCH_LIMIT determines the default number of times the internal 64 match() function can be called during a single execution of pcre_exec(). There 65 is a runtime interface for setting a different limit. The limit exists in order 66 to catch runaway regular expressions that take for ever to determine that they 67 do not match. The default is set very large so that it does not accidentally 68 catch legitimate cases. On systems that support it, "configure" can be used to 69 override this default default. */ 70 71 #define MATCH_LIMIT 10000000 72 73 /* The above limit applies to all calls of match(), whether or not they 74 increase the recursion depth. In some environments it is desirable to limit the 75 depth of recursive calls of match() more strictly, in order to restrict the 76 maximum amount of stack (or heap, if NO_RECURSE is defined) that is used. The 77 value of MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To 78 have any useful effect, it must be less than the value of MATCH_LIMIT. There is 79 a runtime method for setting a different limit. On systems that support it, 80 "configure" can be used to override this default default. */ 81 82 #define MATCH_LIMIT_RECURSION MATCH_LIMIT 83 57 84 #define _pcre_default_tables kjs_pcre_default_tables 58 85 #define _pcre_ord2utf8 kjs_pcre_ord2utf8 59 #define _pcre_printint kjs_pcre_printint60 #define _pcre_try_flipped kjs_pcre_try_flipped61 #define _pcre_ucp_findchar kjs_pcre_ucp_findchar62 86 #define _pcre_utf8_table1 kjs_pcre_utf8_table1 63 87 #define _pcre_utf8_table1_size kjs_pcre_utf8_table1_size … … 65 89 #define _pcre_utf8_table3 kjs_pcre_utf8_table3 66 90 #define _pcre_utf8_table4 kjs_pcre_utf8_table4 67 #define _pcre_utt kjs_pcre_utt68 #define _pcre_utt_size kjs_pcre_utt_size69 #define _pcre_valid_utf8 kjs_pcre_valid_utf870 91 #define _pcre_xclass kjs_pcre_xclass 71 92 … … 86 107 #define DPRINTF(p) /*nothing*/ 87 108 #endif 88 89 90 /* Get the definitions provided by running "configure" */91 92 #include "pcre-config.h"93 109 94 110 /* Standard C headers plus the external interface definition. The only time … … 115 131 typedef JSRegExp pcre; 116 132 117 typedef JSRegExpChar pcre_char; 118 typedef JSRegExpChar pcre_uchar; 119 typedef const JSRegExpChar* USPTR; 120 121 /* Temporary fastMalloc/fastFree until we port to C++. */ 122 #ifdef __cplusplus 123 extern "C" { 124 #endif 125 extern void* (*pcre_malloc)(size_t); 126 extern void (*pcre_free)(void*); 127 #ifdef __cplusplus 128 } /* extern "C" */ 129 #endif 133 typedef UChar pcre_char; 134 typedef UChar pcre_uchar; 135 typedef const UChar* USPTR; 130 136 131 137 /* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored … … 287 293 #define ISMIDCHAR(c) IS_TRAILING_SURROGATE(c) 288 294 289 /* If the pointer is not at the start of a character, move it back until290 it is. Called only in UTF-8 mode. */291 292 295 #define BACKCHAR(eptr) while(ISMIDCHAR(*eptr)) eptr--; 293 294 295 /* In case there is no definition of offsetof() provided - though any proper296 Standard C system should have one. */297 298 #ifndef offsetof299 #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))300 #endif301 302 303 /* Private options flags start at the most significant end of the four bytes,304 but skip the top bit so we can use ints for convenience without getting tangled305 with negative values. The public options defined in pcre.h start at the least306 significant end. Make sure they don't overlap! */307 296 308 297 #define PCRE_FIRSTSET 0x40000000 /* first_byte is set */ … … 310 299 #define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */ 311 300 #define PCRE_ANCHORED 0x02000000 /* can't use partial with this regex */ 312 #define PCRE_CASELESS JS_REGEXP_CASELESS313 #define PCRE_MULTILINE JS_REGEXP_MULTILINE301 #define PCRE_CASELESS 0x00000001 302 #define PCRE_MULTILINE 0x00000002 314 303 315 304 /* Negative values for the firstchar and reqchar variables */ … … 365 354 must also be updated to match. */ 366 355 367 enum { 368 OP_END, /* End of pattern */ 369 370 /* Values corresponding to backslashed metacharacters */ 371 372 OP_NOT_WORD_BOUNDARY, /* \B */ 373 OP_WORD_BOUNDARY, /* \b */ 374 OP_NOT_DIGIT, /* \D */ 375 OP_DIGIT, /* \d */ 376 OP_NOT_WHITESPACE, /* \S */ 377 OP_WHITESPACE, /* \s */ 378 OP_NOT_WORDCHAR, /* \W */ 379 OP_WORDCHAR, /* \w */ 380 381 OP_ANY, /* . -- Match any character */ 382 383 OP_CIRC, /* ^ */ 384 OP_DOLL, /* $ */ 385 OP_CHAR, /* Match one character, casefully */ 386 OP_CHARNC, /* Match one character, caselessly */ 387 OP_ASCII_CHAR, /* Match one ASCII (0-127) character. */ 388 OP_ASCII_LETTER_NC, /* Match one ASCII letter, caselessly. */ 389 OP_NOT, /* Match anything but the following char */ 390 391 OP_STAR, /* The maximizing and minimizing versions of */ 392 OP_MINSTAR, /* all these opcodes must come in pairs, with */ 393 OP_PLUS, /* the minimizing one second. */ 394 OP_MINPLUS, /* This first set applies to single characters */ 395 OP_QUERY, 396 OP_MINQUERY, 397 OP_UPTO, /* From 0 to n matches */ 398 OP_MINUPTO, 399 OP_EXACT, /* Exactly n matches */ 400 401 OP_NOTSTAR, /* This set applies to "not" single characters */ 402 OP_NOTMINSTAR, 403 OP_NOTPLUS, 404 OP_NOTMINPLUS, 405 OP_NOTQUERY, 406 OP_NOTMINQUERY, 407 OP_NOTUPTO, 408 OP_NOTMINUPTO, 409 OP_NOTEXACT, 410 411 OP_TYPESTAR, /* This set applies to character types such as \d */ 412 OP_TYPEMINSTAR, 413 OP_TYPEPLUS, 414 OP_TYPEMINPLUS, 415 OP_TYPEQUERY, 416 OP_TYPEMINQUERY, 417 OP_TYPEUPTO, 418 OP_TYPEMINUPTO, 419 OP_TYPEEXACT, 420 421 OP_CRSTAR, /* These are for character classes and back refs */ 422 OP_CRMINSTAR, 423 OP_CRPLUS, 424 OP_CRMINPLUS, 425 OP_CRQUERY, 426 OP_CRMINQUERY, 427 OP_CRRANGE, /* These are different to the three sets above. */ 428 OP_CRMINRANGE, 429 430 OP_CLASS, /* Match a character class, chars < 256 only */ 431 OP_NCLASS, /* Same, but the bitmap was created from a negative 432 class - the difference is relevant when a UTF-8 433 character > 255 is encountered. */ 434 435 OP_XCLASS, /* Extended class for handling UTF-8 chars within the 436 class. This does both positive and negative. */ 437 438 OP_REF, /* Match a back reference */ 439 440 OP_ALT, /* Start of alternation */ 441 OP_KET, /* End of group that doesn't have an unbounded repeat */ 442 OP_KETRMAX, /* These two must remain together and in this */ 443 OP_KETRMIN, /* order. They are for groups the repeat for ever. */ 444 445 /* The assertions must come before ONCE and COND */ 446 447 OP_ASSERT, /* Positive lookahead */ 448 OP_ASSERT_NOT, /* Negative lookahead */ 449 450 /* ONCE and COND must come after the assertions, with ONCE first, as there's 451 a test for >= ONCE for a subpattern that isn't an assertion. */ 452 453 OP_ONCE, /* Once matched, don't back up into the subpattern */ 454 455 OP_BRAZERO, /* These two must remain together and in this */ 456 OP_BRAMINZERO, /* order. */ 457 458 OP_BRANUMBER, /* Used for extracting brackets whose number is greater 459 than can fit into an opcode. */ 460 461 OP_BRA /* This and greater values are used for brackets that 462 extract substrings up to EXTRACT_BASIC_MAX. After 463 that, use is made of OP_BRANUMBER. */ 464 }; 356 #define FOR_EACH_OPCODE(macro) \ 357 macro(END) \ 358 \ 359 macro(NOT_WORD_BOUNDARY) \ 360 macro(WORD_BOUNDARY) \ 361 macro(NOT_DIGIT) \ 362 macro(DIGIT) \ 363 macro(NOT_WHITESPACE) \ 364 macro(WHITESPACE) \ 365 macro(NOT_WORDCHAR) \ 366 macro(WORDCHAR) \ 367 \ 368 macro(ANY) \ 369 \ 370 macro(CIRC) \ 371 macro(DOLL) \ 372 macro(CHAR) \ 373 macro(CHARNC) \ 374 macro(ASCII_CHAR) \ 375 macro(ASCII_LETTER_NC) \ 376 macro(NOT) \ 377 \ 378 macro(STAR) \ 379 macro(MINSTAR) \ 380 macro(PLUS) \ 381 macro(MINPLUS) \ 382 macro(QUERY) \ 383 macro(MINQUERY) \ 384 macro(UPTO) \ 385 macro(MINUPTO) \ 386 macro(EXACT) \ 387 \ 388 macro(NOTSTAR) \ 389 macro(NOTMINSTAR) \ 390 macro(NOTPLUS) \ 391 macro(NOTMINPLUS) \ 392 macro(NOTQUERY) \ 393 macro(NOTMINQUERY) \ 394 macro(NOTUPTO) \ 395 macro(NOTMINUPTO) \ 396 macro(NOTEXACT) \ 397 \ 398 macro(TYPESTAR) \ 399 macro(TYPEMINSTAR) \ 400 macro(TYPEPLUS) \ 401 macro(TYPEMINPLUS) \ 402 macro(TYPEQUERY) \ 403 macro(TYPEMINQUERY) \ 404 macro(TYPEUPTO) \ 405 macro(TYPEMINUPTO) \ 406 macro(TYPEEXACT) \ 407 \ 408 macro(CRSTAR) \ 409 macro(CRMINSTAR) \ 410 macro(CRPLUS) \ 411 macro(CRMINPLUS) \ 412 macro(CRQUERY) \ 413 macro(CRMINQUERY) \ 414 macro(CRRANGE) \ 415 macro(CRMINRANGE) \ 416 \ 417 macro(CLASS) \ 418 macro(NCLASS) \ 419 macro(XCLASS) \ 420 \ 421 macro(REF) \ 422 \ 423 macro(ALT) \ 424 macro(KET) \ 425 macro(KETRMAX) \ 426 macro(KETRMIN) \ 427 \ 428 macro(ASSERT) \ 429 macro(ASSERT_NOT) \ 430 \ 431 macro(ONCE) \ 432 \ 433 macro(BRAZERO) \ 434 macro(BRAMINZERO) \ 435 macro(BRANUMBER) \ 436 macro(BRA) 437 438 #define OPCODE_ENUM_VALUE(opcode) OP_##opcode, 439 enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) }; 465 440 466 441 /* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and … … 475 450 476 451 #define EXTRACT_BASIC_MAX 100 477 478 452 479 453 /* This macro defines the length of fixed length operations in the compiled … … 522 496 523 497 524 /* Error code numbers. They are given names so that they can more easily be525 tracked. */526 527 enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,528 ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,529 ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,530 ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,531 ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };532 533 498 /* The real format of the start of the pcre block; the index of names and the 534 499 code vector run on as long as necessary after the end. We store an explicit … … 573 538 } compile_data; 574 539 575 /* When compiling in a mode that doesn't use recursive calls to match(),576 a structure is used to remember local variables on the heap. It is defined in577 pcre.c, close to the match() function, so that it is easy to keep it in step578 with any changes of local variable. However, the pointer to the current frame579 must be saved in some "static" place over a longjmp(). We declare the580 structure here so that we can put a pointer in the match_data structure.581 NOTE: This isn't used for a "normal" compilation of pcre. */582 583 struct heapframe;584 585 /* Structure for passing "static" information around between the functions586 doing traditional NFA matching, so that they are thread-safe. */587 588 typedef struct match_data {589 unsigned long int match_call_count; /* As it says */590 int *offset_vector; /* Offset vector */591 int offset_end; /* One past the end */592 int offset_max; /* The maximum usable for return data */593 const uschar *lcc; /* Points to lower casing table */594 const uschar *ctypes; /* Points to table of type maps */595 BOOL offset_overflow; /* Set if too many extractions */596 USPTR start_subject; /* Start of the subject string */597 USPTR end_subject; /* End of the subject string */598 USPTR start_match; /* Start of this match attempt */599 USPTR end_match_ptr; /* Subject position at end match */600 int end_offset_top; /* Highwater mark at end of match */601 BOOL multiline;602 BOOL caseless;603 } match_data;604 605 540 /* Bit definitions for entries in the pcre_ctypes table. */ 606 541 607 542 #define ctype_space 0x01 608 #define ctype_digit 0x04609 543 #define ctype_xdigit 0x08 610 544 #define ctype_word 0x10 /* alphameric or '_' */ … … 651 585 extern const uschar _pcre_default_tables[]; 652 586 653 extern const uschar _pcre_OP_lengths[];654 655 587 656 588 /* Internal shared functions. These are functions that are used by more than
Note:
See TracChangeset
for help on using the changeset viewer.