Suppress unnecessary regex subre nodes in a couple more cases.

author Tom Lane <[email protected]>

Tue, 2 Mar 2021 17:14:14 +0000 (12:14 -0500)

committer Tom Lane <[email protected]>

Tue, 2 Mar 2021 17:14:14 +0000 (12:14 -0500)
author Tom Lane <[email protected]>
Tue, 2 Mar 2021 17:14:14 +0000 (12:14 -0500)
committer Tom Lane <[email protected]>
Tue, 2 Mar 2021 17:14:14 +0000 (12:14 -0500)
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c

index 3c7627a955eabc8fd5956dde71c2c566e35b7a93..ba8dd8646459b4ba69fe71b3bd02c9fe6e5e0bfc 100644 (file)
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1216,6 +1216,23 @@ parseqatom(struct vars *v,
         /* rest of branch can be strung starting from atom->end */
         s2 = atom->end;
     }
+   else if (!(atom->flags & (CAP | BACKR)))
+   {
+       /*
+        * If there's no captures nor backrefs in the atom being repeated, we
+        * don't really care where the submatches of the iteration are, so we
+        * don't need an iteration node.  Make a plain DFA node instead.
+        */
+       EMPTYARC(s, atom->begin);   /* empty prefix */
+       repeat(v, atom->begin, atom->end, m, n);
+       f = COMBINE(qprefer, atom->flags);
+       t = subre(v, '=', f, atom->begin, atom->end);
+       NOERR();
+       freesubre(v, atom);
+       *atomp = t;
+       /* rest of branch can be strung starting from t->end */
+       s2 = t->end;
+   }
     else if (m > 0 && !(atom->flags & BACKR))
     {
         /*
@@ -1271,6 +1288,10 @@ parseqatom(struct vars *v,
         t->flags |= COMBINE(t->flags, t->child->sibling->flags);
         top->flags |= COMBINE(top->flags, t->flags);
  
+       /* neither t nor top could be directly marked for capture as yet */
+       assert(t->capno == 0);
+       assert(top->capno == 0);
+
         /*
          * At this point both top and t are concatenation (op == '.') subres,
          * and we have top->child = prefix of branch, top->child->sibling = t,
@@ -1291,6 +1312,24 @@ parseqatom(struct vars *v,
             top->child = t->child;
             freesrnode(v, t);
         }
+
+       /*
+        * Otherwise, it's possible that t->child is not messy in itself, but
+        * we considered it messy because its greediness conflicts with what
+        * preceded it.  Then it could be that the combination of t->child and
+        * the rest of the branch is also not messy, in which case we can get
+        * rid of the child concatenation by merging t->child and the rest of
+        * the branch into one plain DFA node.
+        */
+       else if (t->child->op == '=' &&
+                t->child->sibling->op == '=' &&
+                !MESSY(UP(t->child->flags | t->child->sibling->flags)))
+       {
+           t->op = '=';
+           t->flags = COMBINE(t->child->flags, t->child->sibling->flags);
+           freesubreandsiblings(v, t->child);
+           t->child = NULL;
+       }
     }
     else
     {
author	Tom Lane <[email protected]>
	Tue, 2 Mar 2021 17:14:14 +0000 (12:14 -0500)
committer	Tom Lane <[email protected]>
	Tue, 2 Mar 2021 17:14:14 +0000 (12:14 -0500)