Changeset 28139 in webkit for trunk/JavaScriptCore/pcre/pcre_exec.cpp
- Timestamp:
- Nov 29, 2007, 3:05:07 AM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/pcre/pcre_exec.cpp
r28138 r28139 81 81 #endif 82 82 83 typedef struct matchframe {83 struct MatchFrame { 84 84 ReturnLocation returnLocation; 85 85 86 struct matchframe* prevframe;86 struct MatchFrame* previousFrame; 87 87 88 88 /* Function arguments that may change */ 89 89 90 const pcre_uchar *eptr;90 const pcre_uchar* eptr; 91 91 const uschar* ecode; 92 92 int offset_top; 93 eptrblock *eptrb;93 eptrblock* eptrb; 94 94 95 95 /* Function local variables */ 96 96 97 const uschar *data;98 const uschar *next;99 const pcre_uchar *pp;100 const uschar *prev;101 const pcre_uchar *saved_eptr;97 const uschar* data; 98 const uschar* next; 99 const pcre_uchar* pp; 100 const uschar* prev; 101 const pcre_uchar* saved_eptr; 102 102 103 103 int repeat_othercase; … … 113 113 114 114 eptrblock newptrb; 115 } matchframe;115 }; 116 116 117 117 /* Structure for passing "static" information around between the functions … … 197 197 198 198 static BOOL 199 match_ref(int offset, registerUSPTR eptr, int length, match_data *md)199 match_ref(int offset, USPTR eptr, int length, match_data *md) 200 200 { 201 201 USPTR p = md->start_subject + md->offset_vector[offset]; … … 327 327 any calls to pcre_stack_malloc, yet the amount of stack used for the array is 328 328 modest enough that we don't run out of stack. */ 329 matchframe frames[16];330 matchframe* framesEnd;331 matchframe* currentFrame;329 MatchFrame frames[16]; 330 MatchFrame* framesEnd; 331 MatchFrame* currentFrame; 332 332 333 333 inline bool canUseStackBufferForNextFrame() … … 336 336 } 337 337 338 inline matchframe* allocateNextFrame()338 inline MatchFrame* allocateNextFrame() 339 339 { 340 340 if (canUseStackBufferForNextFrame()) 341 341 return currentFrame + 1; 342 return new matchframe;342 return new MatchFrame; 343 343 } 344 344 345 345 inline void pushNewFrame(const uschar* ecode, eptrblock* eptrb, ReturnLocation returnLocation) 346 346 { 347 matchframe* newframe = allocateNextFrame();348 newframe->prev frame = currentFrame;347 MatchFrame* newframe = allocateNextFrame(); 348 newframe->previousFrame = currentFrame; 349 349 350 350 newframe->eptr = currentFrame->eptr; … … 357 357 } 358 358 359 inline bool frameIsStackAllocated( matchframe* frame)359 inline bool frameIsStackAllocated(MatchFrame* frame) 360 360 { 361 361 return (frame >= frames && frame < framesEnd); … … 364 364 inline void popCurrentFrame() 365 365 { 366 matchframe* oldFrame = currentFrame;367 currentFrame = currentFrame->prev frame;366 MatchFrame* oldFrame = currentFrame; 367 currentFrame = currentFrame->previousFrame; 368 368 if (!frameIsStackAllocated(oldFrame)) 369 369 delete oldFrame; … … 373 373 { 374 374 while (!frameIsStackAllocated(currentFrame)) { 375 matchframe* oldFrame = currentFrame;376 currentFrame = currentFrame->prev frame;375 MatchFrame* oldFrame = currentFrame; 376 currentFrame = currentFrame->previousFrame; 377 377 delete oldFrame; 378 378 } … … 386 386 } 387 387 388 /* Get the next UTF-8 character, not advancing the pointer, incrementing length 389 if there are extra bytes. This is called when we know we are in UTF-8 mode. */ 390 391 static inline void getUTF8CharAndIncrementLength(int& c, const uschar* eptr, int& len) 392 { 393 c = *eptr; 394 if ((c & 0xc0) == 0xc0) { 395 int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ 396 int gcss = 6 * gcaa; 397 c = (c & _pcre_utf8_table3[gcaa]) << gcss; 398 for (int gcii = 1; gcii <= gcaa; gcii++) { 399 gcss -= 6; 400 c |= (eptr[gcii] & 0x3f) << gcss; 401 } 402 len += gcaa; 403 } 404 } 405 388 406 static int match(USPTR eptr, const uschar* ecode, int offset_top, match_data* md) 389 407 { 390 registerint is_match = false;391 registerint i;392 registerint c;408 int is_match = false; 409 int i; 410 int c; 393 411 394 412 unsigned rdepth = 0; … … 759 777 else { 760 778 const pcre_uchar *lastptr = stack.currentFrame->eptr - 1; 761 while( ISMIDCHAR(*lastptr))779 while(isTrailingSurrogate(*lastptr)) 762 780 lastptr--; 763 GETCHAR(c, lastptr);781 getChar(c, lastptr); 764 782 prev_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0; 765 783 } … … 767 785 cur_is_word = false; 768 786 else { 769 GETCHAR(c, stack.currentFrame->eptr);787 getChar(c, stack.currentFrame->eptr); 770 788 cur_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0; 771 789 } … … 784 802 if (stack.currentFrame->eptr++ >= md->end_subject) 785 803 RRETURN_NO_MATCH; 786 while (stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr))804 while (stack.currentFrame->eptr < md->end_subject && isTrailingSurrogate(*stack.currentFrame->eptr)) 787 805 stack.currentFrame->eptr++; 788 806 stack.currentFrame->ecode++; … … 1172 1190 stack.currentFrame->length = 1; 1173 1191 stack.currentFrame->ecode++; 1174 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);1192 getUTF8CharAndIncrementLength(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length); 1175 1193 { 1176 1194 int dc; … … 1181 1199 case 1: 1182 1200 dc = *stack.currentFrame->eptr++; 1183 if ( IS_LEADING_SURROGATE(dc))1201 if (isLeadingSurrogate(dc)) 1184 1202 RRETURN_NO_MATCH; 1185 1203 break; … … 1197 1215 stack.currentFrame->length = 1; 1198 1216 stack.currentFrame->ecode++; 1199 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);1217 getUTF8CharAndIncrementLength(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length); 1200 1218 1201 1219 if (md->end_subject - stack.currentFrame->eptr == 0) … … 1206 1224 if (md->end_subject - stack.currentFrame->eptr == 1) { 1207 1225 dc = *stack.currentFrame->eptr++; 1208 if ( IS_LEADING_SURROGATE(dc))1226 if (isLeadingSurrogate(dc)) 1209 1227 RRETURN_NO_MATCH; 1210 1228 } else … … 1280 1298 1281 1299 stack.currentFrame->length = 1; 1282 GETUTF8CHARLEN(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length);1300 getUTF8CharAndIncrementLength(stack.currentFrame->fc, stack.currentFrame->ecode, stack.currentFrame->length); 1283 1301 if (min * (stack.currentFrame->fc > 0xFFFF ? 2 : 1) > md->end_subject - stack.currentFrame->eptr) 1284 1302 RRETURN_NO_MATCH; … … 1333 1351 for (i = 1; i <= min; i++) { 1334 1352 int nc; 1335 GETCHAR(nc, stack.currentFrame->eptr);1353 getChar(nc, stack.currentFrame->eptr); 1336 1354 if (nc != stack.currentFrame->fc) 1337 1355 RRETURN_NO_MATCH; … … 1350 1368 if (stack.currentFrame->fi >= stack.currentFrame->max || stack.currentFrame->eptr >= md->end_subject) 1351 1369 RRETURN; 1352 GETCHAR(nc, stack.currentFrame->eptr);1370 getChar(nc, stack.currentFrame->eptr); 1353 1371 if (*stack.currentFrame->eptr != stack.currentFrame->fc) 1354 1372 RRETURN; … … 1362 1380 if (stack.currentFrame->eptr > md->end_subject - 2) 1363 1381 break; 1364 GETCHAR(nc, stack.currentFrame->eptr);1382 getChar(nc, stack.currentFrame->eptr); 1365 1383 if (*stack.currentFrame->eptr != stack.currentFrame->fc) 1366 1384 break; … … 1455 1473 1456 1474 { 1457 registerint d;1475 int d; 1458 1476 for (i = 1; i <= min; i++) { 1459 1477 GETCHARINC(d, stack.currentFrame->eptr); … … 1469 1487 1470 1488 if (minimize) { 1471 registerint d;1489 int d; 1472 1490 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) { 1473 1491 RMATCH(38, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0); … … 1489 1507 1490 1508 { 1491 registerint d;1509 int d; 1492 1510 for (i = min; i < stack.currentFrame->max; i++) { 1493 1511 int len = 1; … … 1520 1538 else { 1521 1539 { 1522 registerint d;1540 int d; 1523 1541 for (i = 1; i <= min; i++) { 1524 1542 GETCHARINC(d, stack.currentFrame->eptr); … … 1532 1550 1533 1551 if (minimize) { 1534 registerint d;1552 int d; 1535 1553 for (stack.currentFrame->fi = min;; stack.currentFrame->fi++) { 1536 1554 RMATCH(42, stack.currentFrame->ecode, stack.currentFrame->eptrb, 0); … … 1550 1568 1551 1569 { 1552 registerint d;1570 int d; 1553 1571 for (i = min; i < stack.currentFrame->max; i++) { 1554 1572 int len = 1; … … 1630 1648 RRETURN_NO_MATCH; 1631 1649 ++stack.currentFrame->eptr; 1632 while (stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr))1650 while (stack.currentFrame->eptr < md->end_subject && isTrailingSurrogate(*stack.currentFrame->eptr)) 1633 1651 stack.currentFrame->eptr++; 1634 1652 } … … 1658 1676 (*stack.currentFrame->eptr < 128 && (md->ctypes[*stack.currentFrame->eptr] & ctype_space) != 0)) 1659 1677 RRETURN_NO_MATCH; 1660 while (++stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr)) { }1678 while (++stack.currentFrame->eptr < md->end_subject && isTrailingSurrogate(*stack.currentFrame->eptr)) { } 1661 1679 } 1662 1680 break; … … 1676 1694 (*stack.currentFrame->eptr < 128 && (md->ctypes[*stack.currentFrame->eptr] & ctype_word) != 0)) 1677 1695 RRETURN_NO_MATCH; 1678 while (++stack.currentFrame->eptr < md->end_subject && ISMIDCHAR(*stack.currentFrame->eptr)) { }1696 while (++stack.currentFrame->eptr < md->end_subject && isTrailingSurrogate(*stack.currentFrame->eptr)) { } 1679 1697 } 1680 1698 break; … … 2132 2150 USPTR start_match = (USPTR)subject + start_offset; 2133 2151 USPTR req_byte_ptr = start_match - 1; 2152 bool startline = re->options & PCRE_STARTLINE; 2134 2153 2135 2154 do { … … 2138 2157 /* Reset the maximum number of extractions we might see. */ 2139 2158 2140 if (match_block.offset_vector != NULL) {2159 if (match_block.offset_vector) { 2141 2160 int* iptr = match_block.offset_vector; 2142 2161 int* iend = iptr + resetcount; … … 2171 2190 /* Or to just after \n for a multiline match if possible */ 2172 2191 2173 else if ( re->options & PCRE_STARTLINE) {2192 else if (startline) { 2174 2193 if (start_match > match_block.start_subject + start_offset) { 2175 2194 while (start_match < end_subject && !isNewline(start_match[-1])) … … 2265 2284 if (returnCode == MATCH_NOMATCH) { 2266 2285 start_match++; 2267 while(start_match < end_subject && ISMIDCHAR(*start_match))2286 while(start_match < end_subject && isTrailingSurrogate(*start_match)) 2268 2287 start_match++; 2269 2288 continue;
Note:
See TracChangeset
for help on using the changeset viewer.