Fix regexp misbehavior with capturing parens inside "{0}".

author Tom Lane <[email protected]>

Tue, 24 Aug 2021 20:37:26 +0000 (16:37 -0400)

committer Tom Lane <[email protected]>

Tue, 24 Aug 2021 20:37:26 +0000 (16:37 -0400)
author Tom Lane <[email protected]>
Tue, 24 Aug 2021 20:37:26 +0000 (16:37 -0400)
committer Tom Lane <[email protected]>
Tue, 24 Aug 2021 20:37:26 +0000 (16:37 -0400)
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c

index ae3a7b6a38c08faf7bf763f6db7f01ab32b0df2a..d9840171a3393905ffada2e9c9dc63c951d73b2b 100644 (file)
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1089,11 +1089,23 @@ parseqatom(struct vars *v,
     /* annoying special case:  {0} or {0,0} cancels everything */
     if (m == 0 && n == 0)
     {
-       if (atom != NULL)
-           freesubre(v, atom);
-       if (atomtype == '(')
-           v->subs[subno] = NULL;
-       delsub(v->nfa, lp, rp);
+       /*
+        * If we had capturing subexpression(s) within the atom, we don't want
+        * to destroy them, because it's legal (if useless) to back-ref them
+        * later.  Hence, just unlink the atom from lp/rp and then ignore it.
+        */
+       if (atom != NULL && (atom->flags & CAP))
+       {
+           delsub(v->nfa, lp, atom->begin);
+           delsub(v->nfa, atom->end, rp);
+       }
+       else
+       {
+           /* Otherwise, we can clean up any subre infrastructure we made */
+           if (atom != NULL)
+               freesubre(v, atom);
+           delsub(v->nfa, lp, rp);
+       }
         EMPTYARC(lp, rp);
         return top;
     }
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out

index 4886858d66d2bbf8cdfca76646961df41db1aa75..731ba506d3545141230b9ba73bbdac9591c6ec06 100644 (file)
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -3576,6 +3576,28 @@ select * from test_regex('((.))(\2)', 'xyy', 'oRP');
   {yy,NULL,NULL,NULL}
  (2 rows)
  
+-- expectNomatch   21.39 PQR   {(.){0}(\1)}    xxx
+select * from test_regex('(.){0}(\1)', 'xxx', 'PQR');
+                 test_regex                 
+--------------------------------------------
+ {2,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX}
+(1 row)
+
+-- expectNomatch   21.40 PQR   {((.)){0}(\2)}  xxx
+select * from test_regex('((.)){0}(\2)', 'xxx', 'PQR');
+                 test_regex                 
+--------------------------------------------
+ {3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX}
+(1 row)
+
+-- expectMatch 21.41 NPQR  {((.)){0}(\2){0}}   xyz {}  {}  {}  {}
+select * from test_regex('((.)){0}(\2){0}', 'xyz', 'NPQR');
+                         test_regex                         
+------------------------------------------------------------
+ {3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX,REG_UEMPTYMATCH}
+ {"",NULL,NULL,NULL}
+(2 rows)
+
  -- doing 22 "multicharacter collating elements"
  -- # again ugh
  -- MCCEs are not implemented in Postgres, so we skip all these tests
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql

index 418527da3df00a9beba39be9a70dd70fd08432a1..478fa2c5475288428e1b5b987363f5d512e5bfa8 100644 (file)
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -1036,6 +1036,12 @@ select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
  select * from test_regex('((.))(\2)', 'xyy', 'RP');
  -- expectMatch 21.38 oRP   ((.))(\2)   xyy yy  {}  {}  {}
  select * from test_regex('((.))(\2)', 'xyy', 'oRP');
+-- expectNomatch   21.39 PQR   {(.){0}(\1)}    xxx
+select * from test_regex('(.){0}(\1)', 'xxx', 'PQR');
+-- expectNomatch   21.40 PQR   {((.)){0}(\2)}  xxx
+select * from test_regex('((.)){0}(\2)', 'xxx', 'PQR');
+-- expectMatch 21.41 NPQR  {((.)){0}(\2){0}}   xyz {}  {}  {}  {}
+select * from test_regex('((.)){0}(\2){0}', 'xyz', 'NPQR');
  
  -- doing 22 "multicharacter collating elements"
  -- # again ugh
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out

index cbe2cfc3ea163af11e2c68a258b1caf8a499a514..ae0de7307db738c5446e24fc7106b523aec78bf3 100644 (file)
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -567,6 +567,25 @@ select 'a' ~ '()+\1';
   t
  (1 row)
  
+-- Test incorrect removal of capture groups within {0}
+select 'xxx' ~ '(.){0}(\1)' as f;
+ f 
+---
+ f
+(1 row)
+
+select 'xxx' ~ '((.)){0}(\2)' as f;
+ f 
+---
+ f
+(1 row)
+
+select 'xyz' ~ '((.)){0}(\2){0}' as t;
+ t 
+---
+ t
+(1 row)
+
  -- Test ancient oversight in when to apply zaptreesubs
  select 'abcdef' ~ '^(.)\1|\1.' as f;
   f 
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql

index c6974a43d118739161140447e0d5fc6e1d198e88..56217104ce63971cb4950683d7c10500345b4edd 100644 (file)
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@@ -135,6 +135,11 @@ select 'a' ~ '.. ()|\1';
  select 'a' ~ '()*\1';
  select 'a' ~ '()+\1';
  
+-- Test incorrect removal of capture groups within {0}
+select 'xxx' ~ '(.){0}(\1)' as f;
+select 'xxx' ~ '((.)){0}(\2)' as f;
+select 'xyz' ~ '((.)){0}(\2){0}' as t;
+
  -- Test ancient oversight in when to apply zaptreesubs
  select 'abcdef' ~ '^(.)\1|\1.' as f;
  select 'abadef' ~ '^((.)\2|..)\2' as f;
author	Tom Lane <[email protected]>
	Tue, 24 Aug 2021 20:37:26 +0000 (16:37 -0400)
committer	Tom Lane <[email protected]>
	Tue, 24 Aug 2021 20:37:26 +0000 (16:37 -0400)
src/backend/regex/regcomp.c		patch \| blob \| blame \| history
src/test/modules/test_regex/expected/test_regex.out		patch \| blob \| blame \| history
src/test/modules/test_regex/sql/test_regex.sql		patch \| blob \| blame \| history
src/test/regress/expected/regex.out		patch \| blob \| blame \| history
src/test/regress/sql/regex.sql		patch \| blob \| blame \| history