Changeset 15455 in webkit for trunk/JavaScriptCore


Ignore:
Timestamp:
Jul 15, 2006, 8:30:03 AM (19 years ago)
Author:
darin
Message:

JavaScriptCore:

Reviewed by Geoff.

Test: fast/js/regexp-extended-characters-more.html

  • pcre/pcre_exec.c: (match): Got rid of utf16Length local variable to guarantee there's no extra stack usage in recursive calls. Fixed two places in the PCRE_UTF16 code that were using the length variable, which is the UTF-8 length of a character in the pattern, to move in the UTF-16 subject string. Instead they hardcode lengths of 1 and 2 since the code already handles BMP characters and surrogate pairs separately. Also fixed some DPRINTF so I could compile with DEBUG on. (pcre_exec): Changed a place that was checking for multibyte characters in the subject string to use ISMIDCHAR. Instead it was using hardcoded logic that was right for UTF-8 but wrong for UTF-16.
  • pcre/pcre_compile.c: (pcre_compile2): Fixed a DPRINTF so I could compile with DEBUG on.

LayoutTests:

Reviewed by Geoff.

  • fast/js/regexp-extended-characters-more-expected.txt: Added.
  • fast/js/regexp-extended-characters-more.html: Added.
  • fast/js/resources/regexp-extended-characters-more.js: Added.
Location:
trunk/JavaScriptCore
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/ChangeLog

    r15444 r15455  
     12006-07-15  Darin Adler  <[email protected]>
     2
     3        Reviewed by Geoff.
     4
     5        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=8395
     6          <rdar://problem/4613467>
     7          REGRESSION: RegEx seems broken for hex escaped non breaking space
     8
     9        Test: fast/js/regexp-extended-characters-more.html
     10
     11        * pcre/pcre_exec.c:
     12        (match): Got rid of utf16Length local variable to guarantee there's no
     13        extra stack usage in recursive calls. Fixed two places in the PCRE_UTF16
     14        code that were using the length variable, which is the UTF-8 length of
     15        a character in the pattern, to move in the UTF-16 subject string. Instead
     16        they hardcode lengths of 1 and 2 since the code already handles BMP
     17        characters and surrogate pairs separately. Also fixed some DPRINTF so
     18        I could compile with DEBUG on.
     19        (pcre_exec): Changed a place that was checking for multibyte characters
     20        in the subject string to use ISMIDCHAR. Instead it was using hardcoded
     21        logic that was right for UTF-8 but wrong for UTF-16.
     22
     23        * pcre/pcre_compile.c: (pcre_compile2): Fixed a DPRINTF so I could compile
     24        with DEBUG on.
     25
    1262006-07-14  Geoffrey Garen  <[email protected]>
    227
  • trunk/JavaScriptCore/pcre/pcre_compile.c

    r14736 r15455  
    39963996
    39973997DPRINTF(("------------------------------------------------------------------\n"));
     3998#if !PCRE_UTF16
    39983999DPRINTF(("%s\n", pattern));
     4000#endif
    39994001
    40004002/* The first thing to do is to make a pass over the pattern to compute the
  • trunk/JavaScriptCore/pcre/pcre_exec.c

    r14457 r15455  
    19521952      GETUTF8CHARLEN(fc, ecode, length);
    19531953      {
    1954       int utf16Length; /* don't initialize on this line as workaround for Win32 compile problem */
    1955       utf16Length = fc > 0xFFFF ? 2 : 1;
    1956       if (min * utf16Length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
     1954      if (min * (fc > 0xFFFF ? 2 : 1) > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
    19571955      ecode += length;
    19581956
    1959       if (utf16Length == 1)
     1957      if (fc <= 0xFFFF)
    19601958        {
    19611959#ifdef SUPPORT_UCP
     
    19911989          for (i = min; i < max; i++)
    19921990            {
    1993             if (eptr > md->end_subject - length) break;
     1991            if (eptr >= md->end_subject) break;
    19941992            if (*eptr != fc && *eptr != othercase) break;
    19951993            ++eptr;
     
    20392037            {
    20402038            int nc;
    2041             if (eptr > md->end_subject - length) break;
     2039            if (eptr > md->end_subject - 2) break;
    20422040            GETCHAR(nc, eptr);
    20432041            if (*eptr != fc) break;
     
    21622160    maximizing, find the maximum number of characters and work backwards. */
    21632161
     2162#if PCRE_UTF16
     2163    DPRINTF(("matching %c{%d,%d}\n", fc, min, max));
     2164#else
    21642165    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
    21652166      max, eptr));
     2167#endif
    21662168
    21672169    if ((ims & PCRE_CASELESS) != 0)
     
    23072309    characters and work backwards. */
    23082310
     2311#if PCRE_UTF16
     2312    DPRINTF(("negative matching %c{%d,%d}\n", fc, min, max));
     2313#else
    23092314    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
    23102315      max, eptr));
     2316#endif
    23112317
    23122318    if ((ims & PCRE_CASELESS) != 0)
     
    37333739#ifdef SUPPORT_UTF8
    37343740    if (match_block.utf8)
    3735       while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
     3741      while(start_match < end_subject && ISMIDCHAR(*start_match))
    37363742        start_match++;
    37373743#endif
Note: See TracChangeset for help on using the changeset viewer.