Fix some regex issues with out-of-range characters and large char ranges.

author Tom Lane <[email protected]>

Mon, 8 Feb 2016 15:25:40 +0000 (10:25 -0500)

committer Tom Lane <[email protected]>

Mon, 8 Feb 2016 15:25:40 +0000 (10:25 -0500)
author Tom Lane <[email protected]>
Mon, 8 Feb 2016 15:25:40 +0000 (10:25 -0500)
committer Tom Lane <[email protected]>
Mon, 8 Feb 2016 15:25:40 +0000 (10:25 -0500)
diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c

index bfd9dcd2a492b25142d9f932bf74c00b6324dfc1..962cb230bb75a8e2d7dd56c66c4a9fc7940cd033 100644 (file)
--- a/src/backend/regex/regc_lex.c
+++ b/src/backend/regex/regc_lex.c
@@ -813,13 +813,13 @@ lexescape(struct vars * v)
             break;
         case CHR('u'):
             c = lexdigits(v, 16, 4, 4);
-           if (ISERR())
+           if (ISERR() || c < CHR_MIN || c > CHR_MAX)
                 FAILW(REG_EESCAPE);
             RETV(PLAIN, c);
             break;
         case CHR('U'):
             c = lexdigits(v, 16, 8, 8);
-           if (ISERR())
+           if (ISERR() || c < CHR_MIN || c > CHR_MAX)
                 FAILW(REG_EESCAPE);
             RETV(PLAIN, c);
             break;
@@ -837,7 +837,7 @@ lexescape(struct vars * v)
         case CHR('x'):
             NOTE(REG_UUNPORT);
             c = lexdigits(v, 16, 1, 255);       /* REs >255 long outside spec */
-           if (ISERR())
+           if (ISERR() || c < CHR_MIN || c > CHR_MAX)
                 FAILW(REG_EESCAPE);
             RETV(PLAIN, c);
             break;
@@ -899,6 +899,9 @@ lexescape(struct vars * v)
  
  /*
   * lexdigits - slurp up digits and return chr value
+ *
+ * This does not account for overflow; callers should range-check the result
+ * if maxlen is large enough to make that possible.
   */
  static chr                     /* chr value; errors signalled via ERR */
  lexdigits(struct vars * v,
diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c

index e7bbb50ef46680d01b8ac254dccb217495410645..4fe62921e3b438d92d4e7d4d85ff673f8882b74c 100644 (file)
--- a/src/backend/regex/regc_locale.c
+++ b/src/backend/regex/regc_locale.c
@@ -408,8 +408,7 @@ range(struct vars * v,          /* context */
     int         nchrs;
     struct cvec *cv;
     celt        c,
-               lc,
-               uc;
+               cc;
  
     if (a != b && !before(a, b))
     {
@@ -427,24 +426,51 @@ range(struct vars * v,            /* context */
  
     /*
      * When case-independent, it's hard to decide when cvec ranges are usable,
-    * so for now at least, we won't try.  We allocate enough space for two
-    * case variants plus a little extra for the two title case variants.
+    * so for now at least, we won't try.  We use a range for the originally
+    * specified chrs and then add on any case-equivalents that are outside
+    * that range as individual chrs.
+    *
+    * To ensure sane behavior if someone specifies a very large range, limit
+    * the allocation size to 100000 chrs (arbitrary) and check for overrun
+    * inside the loop below.
      */
+   nchrs = b - a + 1;
+   if (nchrs <= 0 || nchrs > 100000)
+       nchrs = 100000;
  
-   nchrs = (b - a + 1) * 2 + 4;
-
-   cv = getcvec(v, nchrs, 0);
+   cv = getcvec(v, nchrs, 1);
     NOERRN();
+   addrange(cv, a, b);
  
     for (c = a; c <= b; c++)
     {
-       addchr(cv, c);
-       lc = pg_wc_tolower((chr) c);
-       if (c != lc)
-           addchr(cv, lc);
-       uc = pg_wc_toupper((chr) c);
-       if (c != uc)
-           addchr(cv, uc);
+       cc = pg_wc_tolower((chr) c);
+       if (cc != c &&
+           (before(cc, a) || before(b, cc)))
+       {
+           if (cv->nchrs >= cv->chrspace)
+           {
+               ERR(REG_ETOOBIG);
+               return NULL;
+           }
+           addchr(cv, cc);
+       }
+       cc = pg_wc_toupper((chr) c);
+       if (cc != c &&
+           (before(cc, a) || before(b, cc)))
+       {
+           if (cv->nchrs >= cv->chrspace)
+           {
+               ERR(REG_ETOOBIG);
+               return NULL;
+           }
+           addchr(cv, cc);
+       }
+       if (CANCEL_REQUESTED(v->re))
+       {
+           ERR(REG_CANCEL);
+           return NULL;
+       }
     }
  
     return cv;
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c

index a165b3b1ca5676973376c13c0ef84f067b5135ec..cc589b0930101da518ab8e117070d2d71df51a30 100644 (file)
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1593,6 +1593,7 @@ dovec(struct vars * v,
     {
         ch = *p;
         newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
+       NOERR();
     }
  
     /* and the ranges */
@@ -1602,6 +1603,7 @@ dovec(struct vars * v,
         to = *(p + 1);
         if (from <= to)
             subrange(v, from, to, lp, rp);
+       NOERR();
     }
  }
  
diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h

index dbb461a0ce70fd928953270d2cbfa4c724802e05..3f1d14e19082097833a880d9291a8746562d6548 100644 (file)
--- a/src/include/regex/regcustom.h
+++ b/src/include/regex/regcustom.h
@@ -65,7 +65,8 @@ typedef int celt;             /* type to hold chr, or NOCELT */
  #define DIGITVAL(c) ((c)-'0')  /* turn chr digit into its value */
  #define CHRBITS 32             /* bits in a chr; must not use sizeof */
  #define CHR_MIN 0x00000000     /* smallest and largest chr; the value */
-#define CHR_MAX 0xfffffffe     /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#define CHR_MAX 0x7ffffffe     /* CHR_MAX-CHR_MIN+1 must fit in an int, and
+                                * CHR_MAX+1 must fit in both chr and celt */
  
  /* functions operating on chr */
  #define iscalnum(x) pg_wc_isalnum(x)
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out

index 07fb023534fa576e2b1a0357542c5acc5ce36a63..af097193c52dee740033f31db4e8a76b79af8a55 100644 (file)
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -495,3 +495,5 @@ select 'xyz' ~ 'x(\w)(?=\1)';  -- no backrefs in LACONs
  ERROR:  invalid regular expression: invalid backreference number
  select 'xyz' ~ 'x(\w)(?=(\1))';
  ERROR:  invalid regular expression: invalid backreference number
+select 'a' ~ '\x7fffffff';  -- invalid chr code
+ERROR:  invalid regular expression: invalid escape \ sequence
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql

index c45bdc91d853419e2fd094adb4a18f9de1ab8e5e..1028ca6dcdcd0e8cae17c13ce4281af678c45708 100644 (file)
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@@ -121,3 +121,4 @@ select 'a' ~ '()+\1';
  -- Error conditions
  select 'xyz' ~ 'x(\w)(?=\1)';  -- no backrefs in LACONs
  select 'xyz' ~ 'x(\w)(?=(\1))';
+select 'a' ~ '\x7fffffff';  -- invalid chr code
author	Tom Lane <[email protected]>
	Mon, 8 Feb 2016 15:25:40 +0000 (10:25 -0500)
committer	Tom Lane <[email protected]>
	Mon, 8 Feb 2016 15:25:40 +0000 (10:25 -0500)
src/backend/regex/regc_lex.c		patch \| blob \| blame \| history
src/backend/regex/regc_locale.c		patch \| blob \| blame \| history
src/backend/regex/regcomp.c		patch \| blob \| blame \| history
src/include/regex/regcustom.h		patch \| blob \| blame \| history
src/test/regress/expected/regex.out		patch \| blob \| blame \| history
src/test/regress/sql/regex.sql		patch \| blob \| blame \| history