Context Navigation

← Previous Change
Next Change →

pcre_compile.c

Timestamp:

Jan 1, 2007, 9:13:00 PM (18 years ago)

Author:

ddkilzer

Message:

JavaScriptCore:

Reviewed by Darin.

fix http://bugs.webkit.org/show_bug.cgi?id=11849 REGRESSION (r18182): Google Calendar is broken (a regular expression containing a null character is not parsed correctly)

Modified pcre_compile() (and the functions that it calls) to work with patterns
containing null characters.

Covered by JavaScriptCore tests ecma_3/RegExp/octal-002.js and ecma_3/RegExp/regress-85721.js

kjs/regexp.cpp: (KJS::RegExp::RegExp): Changed to not null-terminate the pattern string and instead pass its length to pcre_compile.
pcre/pcre.h:
pcre/pcre_compile.c: (check_escape): (get_ucp): (is_counted_repeat): (check_posix_syntax): (compile_branch): (compile_regex): (pcre_compile): Added a parameter specifying the length of the pattern, which is no longer required to be null-terminated and may contain null characters. (pcre_compile2):
pcre/pcre_internal.h:
tests/mozilla/expected.html: Updated for the two tests that this patch fixes. Also updated failing results for ecma_3/RegExp/regress-100199.js which were not updated after bug 6257 was fixed.

WebCore:

Reviewed by Darin.

WebCore changes for http://bugs.webkit.org/show_bug.cgi?id=11849 REGRESSION (r18182): Google Calendar is broken (a regular expression containing a null character is not parsed correctly)

platform/RegularExpression.cpp: (WebCore::RegularExpression::Private::compile): Changed to not null-terminate the pattern string and instead pass its length to pcre_compile.

File:

: 1 edited

trunk/JavaScriptCore/pcre/pcre_compile.c (modified) (77 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/JavaScriptCore/pcre/pcre_compile.c

-              r18498
+              r18517
 static BOOL
   compile_regex(int, int, int *, uschar **, const pcre_uchar **, int *, BOOL, int,
+  compile_regex(int, int, int *, uschar **, const pcre_uchar **, const pcre_uchar const*, int *, BOOL, int,
     int *, int *, branch_chain *, compile_data *);
 …
 static int
 check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
+check_escape(const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, int *errorcodeptr, int bracount,
   int options, BOOL isclass)
+{
 …
 /* If backslash is at the end of the pattern, it's an error. */
+c = *(++ptr);
+if (c == 0) *errorcodeptr = ERR1;
+if (++ptr == patternEnd) {
+    *errorcodeptr = ERR1;
+    *ptrptr = ptr;
+    return 0;
+}
+c = *ptr;
 /* Non-alphamerics are literals. For digits or letters, do an initial lookup in
 …
 #if !EBCDIC    /* ASCII coding */
 else if (c < '0' || c > 'z') {}                           /* Not alphameric */
+if (c < '0' || c > 'z') {}                           /* Not alphameric */
 else if ((i = escapes[c - '0']) != 0) c = i;
 #else          /* EBCDIC coding */
 else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
+if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
 else if ((i = escapes[c - 0x48]) != 0)  c = i;
 #endif
 …
       oldptr = ptr;
       c -= '0';
       while ((DIGITAB(ptr[1]) & ctype_digit) != 0)
+      while (ptr + 1 < patternEnd && (DIGITAB(ptr[1]) & ctype_digit) != 0)
         c = c * 10 + *(++ptr) - '0';
       if (c < 10 || c <= bracount)
 …
     case '0':
     c -= '0';
     while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
+    while (i++ < 2 && ptr + 1 < patternEnd && ptr[1] >= '0' && ptr[1] <= '7')
         c = c * 8 + *(++ptr) - '0';
     c &= 255;     /* Take least significant 8 bits */
 …
     case 'x':
 #ifdef SUPPORT_UTF8
     if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)
+    if (ptr + 1 < patternEnd && ptr[1] == '{' && (options & PCRE_UTF8) != 0)
+      {
       const pcre_uchar *pt = ptr + 2;
       register int count = 0;
       c = 0;
       while ((DIGITAB(*pt) & ctype_xdigit) != 0)
+      while (pt < patternEnd && (DIGITAB(*pt) & ctype_xdigit) != 0)
+        {
         int cc = *pt++;
 …
 #endif
+        }
       if (*pt == '}')
+      if (pt < patternEnd && *pt == '}')
+        {
         if (c < 0 || count > 8 || (c >= 0xd800 && c <= 0xdbff) || (c >= 0xfdd0 && c <= 0xfdef) || c == 0xfffe || c == 0xffff || c > 0x10FFFF) *errorcodeptr = ERR34;
 …
     c = 0;
     while (i++ < 2 && (DIGITAB(ptr[1]) & ctype_xdigit) != 0)
+    while (i++ < 2 && ptr + 1 < patternEnd && (DIGITAB(ptr[1]) & ctype_xdigit) != 0)
+      {
       int cc;                               /* Some compilers don't like ++ */
 …
     case 'c':
+    c = *(++ptr);
+    if (c == 0)
+    if (++ptr == patternEnd)
+      {
       *errorcodeptr = ERR2;
       return 0;
+      }
+    c = *ptr;
     /* A letter is upper-cased; then the 0x40 bit is flipped. This coding
 …
 static int
 get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *errorcodeptr)
+get_ucp(const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, BOOL *negptr, int *errorcodeptr)
+{
 int c, i, bot, top;
 …
 char name[4];
 c = *(++ptr);
 if (c == 0) goto ERROR_RETURN;
+if (++ptr == patternEnd) goto ERROR_RETURN;
+c = *ptr;
 *negptr = FALSE;
 …
 if (c == '{')
+  {
   if (ptr[1] == '^')
+  if (ptr + 1 < patternEnd && ptr[1] == '^')
+    {
     *negptr = TRUE;
 …
   for (i = 0; i <= 2; i++)
+    {
     c = *(++ptr);
     if (c == 0) goto ERROR_RETURN;
+    if (++ptr == patternEnd) goto ERROR_RETURN;
+    c = *ptr;
     if (c == '}') break;
     if (c > 127) goto ERROR_RETURN;
 …
   if (c !='}')   /* Try to distinguish error cases */
+    {
+    while (*(++ptr) != 0 && *ptr != '}');
+    while (++ptr < patternEnd && *ptr != '}')
+        ;
     if (*ptr == '}') goto UNKNOWN_RETURN; else goto ERROR_RETURN;
+    }
 …
 static BOOL
 is_counted_repeat(const pcre_uchar *p)
+is_counted_repeat(const pcre_uchar *p, const pcre_uchar const* patternEnd)
+{
+if ((DIGITAB(*p) & ctype_digit) == 0) return FALSE;
+if (p >= patternEnd || (DIGITAB(*p) & ctype_digit) == 0)
+    return FALSE;
 p++;
+while ((DIGITAB(*p) & ctype_digit) != 0) p++;
+if (*p == '}') return TRUE;
+if (*p++ != ',') return FALSE;
+if (*p == '}') return TRUE;
+if ((DIGITAB(*p) & ctype_digit) == 0) return FALSE;
+while (p < patternEnd && (DIGITAB(*p) & ctype_digit) != 0)
+    p++;
+if (p < patternEnd && *p == '}')
+    return TRUE;
+if (p >= patternEnd || *p++ != ',')
+    return FALSE;
+if (p < patternEnd && *p == '}')
+    return TRUE;
+if (p >= patternEnd || (DIGITAB(*p) & ctype_digit) == 0)
+    return FALSE;
 p++;
+while ((DIGITAB(*p) & ctype_digit) != 0) p++;
+return (*p == '}');
+while (p < patternEnd && (DIGITAB(*p) & ctype_digit) != 0)
+    p++;
+return (p < patternEnd && *p == '}');
+}
 …
 static BOOL
 check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr, compile_data *cd)
+check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar const *patternEnd, const pcre_uchar **endptr, compile_data *cd)
+{
 int terminator;          /* Don't combine these lines; the Solaris cc */
 terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
 if (*(++ptr) == '^') ptr++;
+while ((CTYPES(cd, *ptr) & ctype_letter) != 0) ptr++;
+if (*ptr == terminator && ptr[1] == ']')
+while (ptr < patternEnd && (CTYPES(cd, *ptr) & ctype_letter) != 0)
+    ptr++;
+if (ptr + 1 < patternEnd && *ptr == terminator && ptr[1] == ']')
+  {
   *endptr = ptr;
 …
 static BOOL
 compile_branch(int *optionsptr, int *brackets, uschar **codeptr,
   const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
+  const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, int *errorcodeptr, int *firstbyteptr,
   int *reqbyteptr, branch_chain *bcptr, compile_data *cd)
+{
 …
   /* Next byte in the pattern */
   c = *ptr;
+  c = ptr < patternEnd ? *ptr : 0;
   /* If in \Q...\E, check for the end; if not, we have a literal */
   if (inescq && c != 0)
+  if (inescq && ptr < patternEnd)
+    {
     if (c == '\\' && ptr[1] == 'E')
+    if (c == '\\' && ptr + 1 < patternEnd && ptr[1] == 'E')
+      {
       inescq = FALSE;
 …
   is_quantifier = c == '*' || c == '+' || c == '?' ||
     (c == '{' && is_counted_repeat(ptr+1));
+    (c == '{' && is_counted_repeat(ptr+1, patternEnd));
   if (!is_quantifier && previous_callout != NULL &&
 …
     if (c == '#')
+      {
+      /* The space before the ; is to avoid a warning on a silly compiler
+      on the Macintosh. */
+      while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
+      if (c != 0) continue;   /* Else fall through to handle end of string */
+      while (++ptr < patternEnd && (c = *ptr) != NEWLINE)
+          ;
+      if (ptr < patternEnd)
+          continue;   /* Else fall through to handle end of string */
+      c = 0;
+      }
+    }
 …
     case 0:
+        if (ptr < patternEnd)
+            goto NORMAL_CHAR;
+        // End of string; fall through
     case '|':
     case ')':
 …
     if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
         check_posix_syntax(ptr, &tempptr, cd))
+        check_posix_syntax(ptr, patternEnd, &tempptr, cd))
+      {
       *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
 …
       if (c == '[' &&
           (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
           check_posix_syntax(ptr, &tempptr, cd))
+          check_posix_syntax(ptr, patternEnd, &tempptr, cd))
+        {
         BOOL local_negate = FALSE;
 …
         for (i = 0; i < 3; i++)
+          {
           BOOL blankclass = STREQUAL(ptr, 5, "blank");
+          BOOL blankclass = ptr + 5 <= patternEnd && STREQUAL(ptr, 5, "blank");
           int taboffset = posix_class_maps[posix_class + i];
           if (taboffset < 0) break;
 …
       if (c == '\\')
+        {
         c = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE);
+        c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, options, TRUE);
         if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */
 …
         else if (-c == ESC_Q)            /* Handle start of quoted string */
+          {
           if (ptr[1] == '\\' && ptr[2] == 'E')
+          if (ptr + 2 < patternEnd && ptr[1] == '\\' && ptr[2] == 'E')
+            {
             ptr += 2; /* avoid empty string */
 …
+              {
               BOOL negated;
               int property = get_ucp(&ptr, &negated, errorcodeptr);
+              int property = get_ucp(&ptr, patternEnd, &negated, errorcodeptr);
               if (property < 0) goto FAILED;
               class_utf8 = TRUE;
 …
+          {
           const pcre_uchar *oldptr = ptr;
           d = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE);
+          d = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, options, TRUE);
           /* \b is backslash; \X is literal X; any other special means the '-'
 …
     repeat type to the non-default. */
     if (ptr[1] == '+')
+    if (ptr + 1 < patternEnd && ptr[1] == '+')
+      {
       repeat_type = 0;                  /* Force greedy */
 …
       ptr++;
+      }
     else if (ptr[1] == '?')
+    else if (ptr + 1 < patternEnd && ptr[1] == '?')
+      {
       repeat_type = greedy_non_default;
 …
          &tempcode,                    /* Where to put code (updated) */
          &ptr,                         /* Input pointer (updated) */
+         patternEnd,
          errorcodeptr,                 /* Where to put an error message */
          (bravalue == OP_ASSERTBACK ||
 …
     /* Error if hit end of pattern */
     if (*ptr != ')')
+    if (ptr > patternEnd || *ptr != ')')
+      {
       *errorcodeptr = ERR14;
 …
     case '\\':
     tempptr = ptr;
     c = check_escape(&ptr, errorcodeptr, *brackets, options, FALSE);
+    c = check_escape(&ptr, patternEnd, errorcodeptr, *brackets, options, FALSE);
     /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
 …
       if (-c == ESC_Q)            /* Handle start of quoted string */
+        {
         if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */
+        if (ptr + 2 < patternEnd && ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */
           else inescq = TRUE;
         continue;
 …
+        {
         BOOL negated;
         int value = get_ucp(&ptr, &negated, errorcodeptr);
+        int value = get_ucp(&ptr, patternEnd, &negated, errorcodeptr);
         previous = code;
         *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
 …
     if (utf8 && (c & 0xc0) == 0xc0)
+      {
       while ((ptr[1] & 0xc0) == 0x80)
+      while (ptr + 1 < patternEnd && (ptr[1] & 0xc0) == 0x80)
         mcbuffer[mclength++] = *(++ptr);
+      }
 …
 static BOOL
 compile_regex(int options, int oldims, int *brackets, uschar **codeptr,
   const pcre_uchar **ptrptr, int *errorcodeptr, BOOL lookbehind, int skipbytes,
+  const pcre_uchar **ptrptr, const pcre_uchar const* patternEnd, int *errorcodeptr, BOOL lookbehind, int skipbytes,
   int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd)
+{
 …
   /* Now compile the branch */
   if (!compile_branch(&options, brackets, &code, &ptr, errorcodeptr,
+  if (!compile_branch(&options, brackets, &code, &ptr, patternEnd, errorcodeptr,
         &branchfirstbyte, &branchreqbyte, &bc, cd))
+    {
 …
   at the terminating char. */
   if (*ptr != '|')
+  if (ptr >= patternEnd || *ptr != '|')
+    {
     int length = INT_CAST(code - last_branch);
 …
     /* Resetting option if needed */
     if ((options & PCRE_IMS) != oldims && *ptr == ')')
+    if ((options & PCRE_IMS) != oldims && ptr < patternEnd && *ptr == ')')
+      {
       *code++ = OP_OPT;
 …
 PCRE_EXPORT pcre *
 pcre_compile(const pcre_char *pattern, int options, const char **errorptr,
+pcre_compile(const pcre_char *pattern, int patternLength, int options, const char **errorptr,
   int *erroroffset, const unsigned char *tables)
+{
 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
+return pcre_compile2(pattern, patternLength, options, NULL, errorptr, erroroffset, tables);
+}
 PCRE_EXPORT pcre *
 pcre_compile2(const pcre_char *pattern, int options, int *errorcodeptr,
+pcre_compile2(const pcre_char *pattern, int patternLength, int options, int *errorcodeptr,
   const char **errorptr, int *erroroffset, const unsigned char *tables)
+{
 …
 ptr = (const pcre_uchar *)(pattern - 1);
+while ((c = *(++ptr)) != 0)
+const pcre_uchar const* patternEnd = (const pcre_uchar *)(pattern + patternLength);
+while (++ptr < patternEnd)
+  {
+  c = *ptr;
   int min, max;
   int class_optcount;
 …
     if (c == '#')
+      {
       /* The space before the ; is to avoid a warning on a silly compiler
       on the Macintosh. */
       while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
       if (c == 0) break;
+      while (++ptr < patternEnd && (c = *ptr) != NEWLINE)
+        ;
+      if (ptr == patternEnd)
+        break;
       continue;
+      }
 …
   if ((options & PCRE_AUTO_CALLOUT) != 0 &&
        c != '*' && c != '+' && c != '?' &&
        (c != '{' || !is_counted_repeat(ptr + 1)))
+       (c != '{' || !is_counted_repeat(ptr + 1, patternEnd)))
     length += 2 + 2*LINK_SIZE;
 …
     case '\\':
     c = check_escape(&ptr, &errorcode, bracount, options, FALSE);
+    c = check_escape(&ptr, patternEnd, &errorcode, bracount, options, FALSE);
     if (errorcode != 0) goto PCRE_ERROR_RETURN;
 …
       length += 2;
       lastitemlength = 2;
       if (get_ucp(&ptr, &negated, &errorcode) < 0) goto PCRE_ERROR_RETURN;
+      if (get_ucp(&ptr, patternEnd, &negated, &errorcode) < 0) goto PCRE_ERROR_RETURN;
       continue;
 #else
 …
         compile_block.top_backref = refnum;
       length += 2;   /* For single back reference */
       if (ptr[1] == '{' && is_counted_repeat(ptr+2))
+      if (ptr + 1 < patternEnd && ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd))
+        {
         ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode);
 …
             length++;
         else length += 5;
+        if (ptr[1] == '?') ptr++;
+        if (ptr + 1 < patternEnd && ptr[1] == '?')
+            ptr++;
+        }
+      }
 …
     case '{':
+    if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
+    if (!is_counted_repeat(ptr+1, patternEnd))
+        goto NORMAL_CHAR;
     ptr = read_repeat_counts(ptr+1, &min, &max, &errorcode);
     if (errorcode != 0) goto PCRE_ERROR_RETURN;
 …
+      }
+    if (ptr[1] == '?') ptr++;      /* Needs no extra length */
+    if (ptr + 1 < patternEnd && ptr[1] == '?')
+        ptr++;                     /* Needs no extra length */
     POSESSIVE:                     /* Test for possessive quantifier */
     if (ptr[1] == '+')
+    if (ptr + 1 < patternEnd && ptr[1] == '+')
+      {
       ptr++;
 …
     case '[':
     if (*(++ptr) == '^')
+    if (++ptr < patternEnd && *ptr == '^')
+      {
       class_optcount = 10;  /* Greater than one */
 …
     /* Written as a "do" so that an initial ']' is taken as data */
     if (*ptr != 0) do
+    if (ptr < patternEnd) do
+      {
       /* Inside \Q...\E everything is literal except \E */
 …
       if (inescq)
+        {
+        if (*ptr != '\\' || ptr[1] != 'E') goto GET_ONE_CHARACTER;
+        if (*ptr != '\\' || ptr + 1 >= patternEnd || ptr[1] != 'E')
+            goto GET_ONE_CHARACTER;
         inescq = FALSE;
         ptr += 1;
 …
       if (*ptr == '\\')
+        {
         c = check_escape(&ptr, &errorcode, bracount, options, TRUE);
+        c = check_escape(&ptr, patternEnd, &errorcode, bracount, options, TRUE);
         if (errorcode != 0) goto PCRE_ERROR_RETURN;
 …
       checked during the real compile phase. */
       else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))
+      else if (*ptr == '[' && ptr + 2 < patternEnd && check_posix_syntax(ptr, patternEnd, &ptr, &compile_block))
+        {
         ptr++;
 …
+          {
           int extra = 0;
           GETCHARLEN(c, ptr, extra);
+          GETCHARLENEND(c, ptr, patternEnd, extra);
           ptr += extra;
+          }
 …
         d = -1;
         if (ptr[1] == '-')
+        if (ptr + 1 < patternEnd && ptr[1] == '-')
+          {
           pcre_uchar const *hyptr = ptr++;
+          if (ptr[1] == '\\')
+            {
+            ptr++;
+            d = check_escape(&ptr, &errorcode, bracount, options, TRUE);
+            if (errorcode != 0) goto PCRE_ERROR_RETURN;
+            if (-d == ESC_b) d = '\b';        /* backspace */
+            else if (-d == ESC_X) d = 'X';    /* literal X in a class */
+            }
+          else if (ptr[1] != 0 && ptr[1] != ']')
+            {
+            ptr++;
+#ifdef SUPPORT_UTF8
+            if (utf8)
+              {
+              int extra = 0;
+              GETCHARLEN(d, ptr, extra);
+              ptr += extra;
+              }
+            else
+#endif
+            d = *ptr;
+            }
+          if (ptr + 1 < patternEnd) {
+              if (ptr[1] == '\\')
+                {
+                ptr++;
+                d = check_escape(&ptr, patternEnd, &errorcode, bracount, options, TRUE);
+                if (errorcode != 0) goto PCRE_ERROR_RETURN;
+                if (-d == ESC_b) d = '\b';        /* backspace */
+                else if (-d == ESC_X) d = 'X';    /* literal X in a class */
+                }
+              else if (ptr[1] != ']')
+                {
+                ptr++;
+    #ifdef SUPPORT_UTF8
+                if (utf8)
+                  {
+                  int extra = 0;
+                  GETCHARLENEND(d, ptr, patternEnd, extra);
+                  ptr += extra;
+                  }
+                else
+    #endif
+                d = *ptr;
+                }
+          }
           if (d < 0) ptr = hyptr;      /* go back to hyphen as data */
+          }
 …
+        }
+      }
     while (*(++ptr) != 0 && (inescq || *ptr != ']')); /* Concludes "do" above */
     if (*ptr == 0)                          /* Missing terminating ']' */
+    while (++ptr < patternEnd && (inescq || *ptr != ']')); /* Concludes "do" above */
+    if (ptr >= patternEnd)                          /* Missing terminating ']' */
+      {
       errorcode = ERR6;
 …
       we also need extra for wrapping the whole thing in a sub-pattern. */
       if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
+      if (ptr + 1 < patternEnd && ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd))
+        {
         ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode);
 …
             length++;
         else length += 5;
+        if (ptr[1] == '+')
+          {
+          ptr++;
+          length += 2 + 2*LINK_SIZE;
+          }
+        else if (ptr[1] == '?') ptr++;
+        if (ptr + 1 < patternEnd) {
+            if (ptr[1] == '+')
+              {
+              ptr++;
+              length += 2 + 2*LINK_SIZE;
+              }
+            else if (ptr[1] == '?')
+                ptr++;
+        }
+        }
+      }
 …
     /* Handle special forms of bracket, which all start (? */
     if (ptr[1] == '?')
+    if (ptr + 1 < patternEnd && ptr[1] == '?')
+      {
       int set, unset;
       int *optset;
       switch (c = ptr[2])
+      switch (c = (ptr + 2 < patternEnd ? ptr[2] : 0))
+        {
         /* Skip over comments entirely */
         case '#':
         ptr += 3;
+        while (*ptr != 0 && *ptr != ')') ptr++;
+        if (*ptr == 0)
+        while (ptr < patternEnd && *ptr != ')')
+            ptr++;
+        if (ptr == patternEnd)
+          {
           errorcode = ERR18;
 …
+          {
           ++ptr;
           while ((DIGITAB(*ptr) & ctype_digit) != 0)
+          while (ptr < patternEnd && (DIGITAB(*ptr) & ctype_digit) != 0)
             ++ptr;
+          }
         if (*ptr != ')')
+        if (ptr >= patternEnd || *ptr != ')')
+          {
           errorcode = ERR29;
 …
         code that handles this for real brackets. */
         if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')
+        if (ptr + 1 < patternEnd && (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{'))
+          {
           length += 2 + 2 * LINK_SIZE;       /* to make bracketed */
 …
         case 'C':
         ptr += 3;
         while ((DIGITAB(*ptr) & ctype_digit) != 0)
+        while (ptr < patternEnd && (DIGITAB(*ptr) & ctype_digit) != 0)
           ++ptr;
         if (*ptr != ')')
+        if (ptr >= patternEnd || *ptr != ')')
+          {
           errorcode = ERR39;
 …
         /* Handle the definition of a named subpattern */
+        if (*ptr == '<')
+          {
+          const pcre_uchar *p;    /* Don't amalgamate; some compilers */
+          p = ++ptr;          /* grumble at autoincrement in declaration */
+          while ((CTYPES(&compile_block, *ptr) & ctype_word) != 0) ptr++;
+          if (*ptr != '>')
+            {
+            errorcode = ERR42;
+            goto PCRE_ERROR_RETURN;
+            }
+          name_count++;
+          if (ptr - p > max_name_size) max_name_size = INT_CAST(ptr - p);
+          capturing = TRUE;   /* Named parentheses are always capturing */
+          break;
+          }
+        /* Handle back references and recursive calls to named subpatterns */
+        if (*ptr == '=' || *ptr == '>')
+          {
+          ++ptr;
+          while ((CTYPES(&compile_block, *ptr) & ctype_word) != 0)
+            ++ptr;
+          if (*ptr != ')')
+            {
+            errorcode = ERR42;
+            goto PCRE_ERROR_RETURN;
+            }
+          break;
+          }
+        if (ptr < patternEnd) {
+            if (*ptr == '<')
+              {
+              const pcre_uchar *p;    /* Don't amalgamate; some compilers */
+              p = ++ptr;          /* grumble at autoincrement in declaration */
+              while (ptr < patternEnd && (CTYPES(&compile_block, *ptr) & ctype_word) != 0)
+                  ptr++;
+              if (ptr >= patternEnd || *ptr != '>')
+                {
+                errorcode = ERR42;
+                goto PCRE_ERROR_RETURN;
+                }
+              name_count++;
+              if (ptr - p > max_name_size) max_name_size = INT_CAST(ptr - p);
+              capturing = TRUE;   /* Named parentheses are always capturing */
+              break;
+              }
+            /* Handle back references and recursive calls to named subpatterns */
+            if (*ptr == '=' || *ptr == '>')
+              {
+              ++ptr;
+              while (ptr < patternEnd && (CTYPES(&compile_block, *ptr) & ctype_word) != 0)
+                ++ptr;
+              if (ptr >= patternEnd || *ptr != ')')
+                {
+                errorcode = ERR42;
+                goto PCRE_ERROR_RETURN;
+                }
+              break;
+              }
+        }
         /* Unknown character after (?P */
 …
         case '<':
         ptr += 3;
         if (*ptr == '=' || *ptr == '!')
+        if (ptr < patternEnd && (*ptr == '=' || *ptr == '!'))
+          {
           branch_newextra = 1 + LINK_SIZE;
 …
         case '(':
         if (ptr[3] == 'R' && ptr[4] == ')')
+        if (ptr + 4 < patternEnd && ptr[3] == 'R' && ptr[4] == ')')
+          {
           ptr += 4;
           length += 3;
+          }
         else if ((DIGITAB(ptr[3]) & ctype_digit) != 0)
+        else if (ptr + 3 < patternEnd && (DIGITAB(ptr[3]) & ctype_digit) != 0)
+          {
           ptr += 4;
           length += 3;
+          while ((DIGITAB(*ptr) & ctype_digit) != 0) ptr++;
+          if (*ptr != ')')
+          while (ptr < patternEnd && (DIGITAB(*ptr) & ctype_digit) != 0)
+              ptr++;
+          if (ptr >= patternEnd || *ptr != ')')
+            {
             errorcode = ERR26;
 …
+          {
           ptr++;   /* Can treat like ':' as far as spacing is concerned */
           if (ptr[2] != '?' ||
+          if (ptr + 3 >= patternEnd || ptr[2] != '?' ||
              (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
+            {
 …
         for (;; ptr++)
+          {
           c = *ptr;
+          c = ptr < patternEnd ? *ptr : 0;
           switch (c)
+            {
 …
     automatically; for the others we need an increment. */
     if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
+    if (ptr + 1 < patternEnd && (c = ptr[1]) == '{' && is_counted_repeat(ptr+2, patternEnd))
+      {
       ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode);
 …
     /* Allow space for once brackets for "possessive quantifier" */
     if (ptr[1] == '+')
+    if (ptr + 1 < patternEnd && ptr[1] == '+')
+      {
       ptr++;
 …
     NORMAL_CHAR:
     if (inescq && c == '\\' && ptr[1] == 'E')
+    if (inescq && c == '\\' && ptr + 1 < patternEnd && ptr[1] == 'E')
+      {
       inescq = FALSE;
 …
         if (IS_LEADING_SURROGATE(c))
+          {
           c = DECODE_SURROGATE_PAIR(c, *ptr);
+          c = DECODE_SURROGATE_PAIR(c, ptr < patternEnd ? *ptr : 0);
           ++ptr;
+          }
 …
     if (utf8 && (c & 0xc0) == 0xc0)
+      {
       while ((ptr[1] & 0xc0) == 0x80)         /* Can't flow over the end */
+      while (ptr + 1 < patternEnd && (ptr[1] & 0xc0) == 0x80)         /* Can't flow over the end */
         {                                     /* because the end is marked */
         lastitemlength++;                     /* by a zero byte. */
 …
 *code = OP_BRA;
 bracount = 0;
 (void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr,
+(void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr, patternEnd,
   &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block);
 re->top_bracket = bracount;
 …
 /* If not reached end of pattern on success, there's an excess bracket. */
 if (errorcode == 0 && *ptr != 0) errorcode = ERR22;
+if (errorcode == 0 && ptr < patternEnd) errorcode = ERR22;
 /* Fill in the terminating state and check for disastrous overflow, but

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 18517 in webkit for trunk/JavaScriptCore/pcre/pcre_compile.c

Legend:

trunk/JavaScriptCore/pcre/pcre_compile.c

Download in other formats: