Pathify RHS unique-ification for semijoin planning

author Richard Guo <[email protected]>

Tue, 19 Aug 2025 00:35:40 +0000 (09:35 +0900)

committer Richard Guo <[email protected]>

Tue, 19 Aug 2025 00:35:40 +0000 (09:35 +0900)
author Richard Guo <[email protected]>
Tue, 19 Aug 2025 00:35:40 +0000 (09:35 +0900)
committer Richard Guo <[email protected]>
Tue, 19 Aug 2025 00:35:40 +0000 (09:35 +0900)
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README

index 9c724ccfabf831ba813b8fe33fe8909190d8dc92..843368096fd0d243dfea24cf2a0d0ee6dac2e794 100644 (file)
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -640,7 +640,6 @@ RelOptInfo      - a relation or joined relations
    GroupResultPath - childless Result plan node (used for degenerate grouping)
    MaterialPath  - a Material plan node
    MemoizePath   - a Memoize plan node for caching tuples from sub-paths
-  UniquePath    - remove duplicate rows (either by hashing or sorting)
    GatherPath    - collect the results of parallel workers
    GatherMergePath - collect parallel results, preserving their common sort order
    ProjectionPath - a Result plan node with child (used for projection)
@@ -648,7 +647,7 @@ RelOptInfo      - a relation or joined relations
    SortPath      - a Sort plan node applied to some sub-path
    IncrementalSortPath - an IncrementalSort plan node applied to some sub-path
    GroupPath     - a Group plan node applied to some sub-path
-  UpperUniquePath - a Unique plan node applied to some sub-path
+  UniquePath    - a Unique plan node applied to some sub-path
    AggPath       - an Agg plan node applied to some sub-path
    GroupingSetsPath - an Agg plan node used to implement GROUPING SETS
    MinMaxAggPath - a Result plan node with subplans performing MIN/MAX
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 344a3188317b165c8acd5724d0f51cc89a89b545..783dca8a4acbe01c16a3610b54fe0949ff32216a 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3966,10 +3966,12 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
      * when we should not.  Can we do better without expensive selectivity
      * computations?
      *
-    * The whole issue is moot if we are working from a unique-ified outer
-    * input, or if we know we don't need to mark/restore at all.
+    * The whole issue is moot if we know we don't need to mark/restore at
+    * all, or if we are working from a unique-ified outer input.
      */
-   if (IsA(outer_path, UniquePath) || path->skip_mark_restore)
+   if (path->skip_mark_restore ||
+       RELATION_WAS_MADE_UNIQUE(outer_path->parent, extra->sjinfo,
+                                path->jpath.jointype))
         rescannedtuples = 0;
     else
     {
@@ -4364,7 +4366,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
      * because we avoid contaminating the cache with a value that's wrong for
      * non-unique-ified paths.
      */
-   if (IsA(inner_path, UniquePath))
+   if (RELATION_WAS_MADE_UNIQUE(inner_path->parent, extra->sjinfo,
+                                path->jpath.jointype))
     {
         innerbucketsize = 1.0 / virtualbuckets;
         innermcvfreq = 0.0;
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index ebedc5574ca9c32284c7be0667ac1520340d185b..3b9407eb2eb794f357dcb885e3dbc92c5c4a48b6 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -112,12 +112,12 @@ static void generate_mergejoin_paths(PlannerInfo *root,
   * "flipped around" if we are considering joining the rels in the opposite
   * direction from what's indicated in sjinfo.
   *
- * Also, this routine and others in this module accept the special JoinTypes
- * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
- * unique-ify the outer or inner relation and then apply a regular inner
- * join.  These values are not allowed to propagate outside this module,
- * however.  Path cost estimation code may need to recognize that it's
- * dealing with such a case --- the combination of nominal jointype INNER
+ * Also, this routine accepts the special JoinTypes JOIN_UNIQUE_OUTER and
+ * JOIN_UNIQUE_INNER to indicate that the outer or inner relation has been
+ * unique-ified and a regular inner join should then be applied.  These values
+ * are not allowed to propagate outside this routine, however.  Path cost
+ * estimation code, as well as match_unsorted_outer, may need to recognize that
+ * it's dealing with such a case --- the combination of nominal jointype INNER
   * with sjinfo->jointype == JOIN_SEMI indicates that.
   */
  void
@@ -129,6 +129,7 @@ add_paths_to_joinrel(PlannerInfo *root,
                      SpecialJoinInfo *sjinfo,
                      List *restrictlist)
  {
+   JoinType    save_jointype = jointype;
     JoinPathExtraData extra;
     bool        mergejoin_allowed = true;
     ListCell   *lc;
@@ -165,10 +166,10 @@ add_paths_to_joinrel(PlannerInfo *root,
      * reduce_unique_semijoins would've simplified it), so there's no point in
      * calling innerrel_is_unique.  However, if the LHS covers all of the
      * semijoin's min_lefthand, then it's appropriate to set inner_unique
-    * because the path produced by create_unique_path will be unique relative
-    * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
-    * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
-    * letting that value escape this module.
+    * because the unique relation produced by create_unique_paths will be
+    * unique relative to the LHS.  (If we have an LHS that's only part of the
+    * min_lefthand, that is *not* true.)  For JOIN_UNIQUE_OUTER, pass
+    * JOIN_INNER to avoid letting that value escape this module.
      */
     switch (jointype)
     {
@@ -199,6 +200,13 @@ add_paths_to_joinrel(PlannerInfo *root,
             break;
     }
  
+   /*
+    * If the outer or inner relation has been unique-ified, handle as a plain
+    * inner join.
+    */
+   if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+       jointype = JOIN_INNER;
+
     /*
      * Find potential mergejoin clauses.  We can skip this if we are not
      * interested in doing a mergejoin.  However, mergejoin may be our only
@@ -329,7 +337,7 @@ add_paths_to_joinrel(PlannerInfo *root,
         joinrel->fdwroutine->GetForeignJoinPaths)
         joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
                                                  outerrel, innerrel,
-                                                jointype, &extra);
+                                                save_jointype, &extra);
  
     /*
      * 6. Finally, give extensions a chance to manipulate the path list.  They
@@ -339,7 +347,7 @@ add_paths_to_joinrel(PlannerInfo *root,
      */
     if (set_join_pathlist_hook)
         set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
-                              jointype, &extra);
+                              save_jointype, &extra);
  }
  
  /*
@@ -1364,7 +1372,6 @@ sort_inner_and_outer(PlannerInfo *root,
                      JoinType jointype,
                      JoinPathExtraData *extra)
  {
-   JoinType    save_jointype = jointype;
     Path       *outer_path;
     Path       *inner_path;
     Path       *cheapest_partial_outer = NULL;
@@ -1402,38 +1409,16 @@ sort_inner_and_outer(PlannerInfo *root,
         PATH_PARAM_BY_REL(inner_path, outerrel))
         return;
  
-   /*
-    * If unique-ification is requested, do it and then handle as a plain
-    * inner join.
-    */
-   if (jointype == JOIN_UNIQUE_OUTER)
-   {
-       outer_path = (Path *) create_unique_path(root, outerrel,
-                                                outer_path, extra->sjinfo);
-       Assert(outer_path);
-       jointype = JOIN_INNER;
-   }
-   else if (jointype == JOIN_UNIQUE_INNER)
-   {
-       inner_path = (Path *) create_unique_path(root, innerrel,
-                                                inner_path, extra->sjinfo);
-       Assert(inner_path);
-       jointype = JOIN_INNER;
-   }
-
     /*
      * If the joinrel is parallel-safe, we may be able to consider a partial
-    * merge join.  However, we can't handle JOIN_UNIQUE_OUTER, because the
-    * outer path will be partial, and therefore we won't be able to properly
-    * guarantee uniqueness.  Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
-    * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
+    * merge join.  However, we can't handle JOIN_FULL, JOIN_RIGHT and
+    * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
      * Also, the resulting path must not be parameterized.
      */
     if (joinrel->consider_parallel &&
-       save_jointype != JOIN_UNIQUE_OUTER &&
-       save_jointype != JOIN_FULL &&
-       save_jointype != JOIN_RIGHT &&
-       save_jointype != JOIN_RIGHT_ANTI &&
+       jointype != JOIN_FULL &&
+       jointype != JOIN_RIGHT &&
+       jointype != JOIN_RIGHT_ANTI &&
         outerrel->partial_pathlist != NIL &&
         bms_is_empty(joinrel->lateral_relids))
     {
@@ -1441,7 +1426,7 @@ sort_inner_and_outer(PlannerInfo *root,
  
         if (inner_path->parallel_safe)
             cheapest_safe_inner = inner_path;
-       else if (save_jointype != JOIN_UNIQUE_INNER)
+       else
             cheapest_safe_inner =
                 get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
     }
@@ -1580,13 +1565,9 @@ generate_mergejoin_paths(PlannerInfo *root,
     List       *trialsortkeys;
     Path       *cheapest_startup_inner;
     Path       *cheapest_total_inner;
-   JoinType    save_jointype = jointype;
     int         num_sortkeys;
     int         sortkeycnt;
  
-   if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
-       jointype = JOIN_INNER;
-
     /* Look for useful mergeclauses (if any) */
     mergeclauses =
         find_mergeclauses_for_outer_pathkeys(root,
@@ -1636,10 +1617,6 @@ generate_mergejoin_paths(PlannerInfo *root,
                        extra,
                        is_partial);
  
-   /* Can't do anything else if inner path needs to be unique'd */
-   if (save_jointype == JOIN_UNIQUE_INNER)
-       return;
-
     /*
      * Look for presorted inner paths that satisfy the innersortkey list ---
      * or any truncation thereof, if we are allowed to build a mergejoin using
@@ -1819,7 +1796,6 @@ match_unsorted_outer(PlannerInfo *root,
                      JoinType jointype,
                      JoinPathExtraData *extra)
  {
-   JoinType    save_jointype = jointype;
     bool        nestjoinOK;
     bool        useallclauses;
     Path       *inner_cheapest_total = innerrel->cheapest_total_path;
@@ -1855,12 +1831,6 @@ match_unsorted_outer(PlannerInfo *root,
             nestjoinOK = false;
             useallclauses = true;
             break;
-       case JOIN_UNIQUE_OUTER:
-       case JOIN_UNIQUE_INNER:
-           jointype = JOIN_INNER;
-           nestjoinOK = true;
-           useallclauses = false;
-           break;
         default:
             elog(ERROR, "unrecognized join type: %d",
                  (int) jointype);
@@ -1873,24 +1843,20 @@ match_unsorted_outer(PlannerInfo *root,
      * If inner_cheapest_total is parameterized by the outer rel, ignore it;
      * we will consider it below as a member of cheapest_parameterized_paths,
      * but the other possibilities considered in this routine aren't usable.
+    *
+    * Furthermore, if the inner side is a unique-ified relation, we cannot
+    * generate any valid paths here, because the inner rel's dependency on
+    * the outer rel makes unique-ification meaningless.
      */
     if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
+   {
         inner_cheapest_total = NULL;
  
-   /*
-    * If we need to unique-ify the inner path, we will consider only the
-    * cheapest-total inner.
-    */
-   if (save_jointype == JOIN_UNIQUE_INNER)
-   {
-       /* No way to do this with an inner path parameterized by outer rel */
-       if (inner_cheapest_total == NULL)
+       if (RELATION_WAS_MADE_UNIQUE(innerrel, extra->sjinfo, jointype))
             return;
-       inner_cheapest_total = (Path *)
-           create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
-       Assert(inner_cheapest_total);
     }
-   else if (nestjoinOK)
+
+   if (nestjoinOK)
     {
         /*
          * Consider materializing the cheapest inner path, unless
@@ -1914,20 +1880,6 @@ match_unsorted_outer(PlannerInfo *root,
         if (PATH_PARAM_BY_REL(outerpath, innerrel))
             continue;
  
-       /*
-        * If we need to unique-ify the outer path, it's pointless to consider
-        * any but the cheapest outer.  (XXX we don't consider parameterized
-        * outers, nor inners, for unique-ified cases.  Should we?)
-        */
-       if (save_jointype == JOIN_UNIQUE_OUTER)
-       {
-           if (outerpath != outerrel->cheapest_total_path)
-               continue;
-           outerpath = (Path *) create_unique_path(root, outerrel,
-                                                   outerpath, extra->sjinfo);
-           Assert(outerpath);
-       }
-
         /*
          * The result will have this sort order (even if it is implemented as
          * a nestloop, and even if some of the mergeclauses are implemented by
@@ -1936,21 +1888,7 @@ match_unsorted_outer(PlannerInfo *root,
         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
                                              outerpath->pathkeys);
  
-       if (save_jointype == JOIN_UNIQUE_INNER)
-       {
-           /*
-            * Consider nestloop join, but only with the unique-ified cheapest
-            * inner path
-            */
-           try_nestloop_path(root,
-                             joinrel,
-                             outerpath,
-                             inner_cheapest_total,
-                             merge_pathkeys,
-                             jointype,
-                             extra);
-       }
-       else if (nestjoinOK)
+       if (nestjoinOK)
         {
             /*
              * Consider nestloop joins using this outer path and various
@@ -2001,17 +1939,13 @@ match_unsorted_outer(PlannerInfo *root,
                                   extra);
         }
  
-       /* Can't do anything else if outer path needs to be unique'd */
-       if (save_jointype == JOIN_UNIQUE_OUTER)
-           continue;
-
         /* Can't do anything else if inner rel is parameterized by outer */
         if (inner_cheapest_total == NULL)
             continue;
  
         /* Generate merge join paths */
         generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
-                                save_jointype, extra, useallclauses,
+                                jointype, extra, useallclauses,
                                  inner_cheapest_total, merge_pathkeys,
                                  false);
     }
@@ -2019,41 +1953,35 @@ match_unsorted_outer(PlannerInfo *root,
     /*
      * Consider partial nestloop and mergejoin plan if outerrel has any
      * partial path and the joinrel is parallel-safe.  However, we can't
-    * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
-    * therefore we won't be able to properly guarantee uniqueness.  Nor can
-    * we handle joins needing lateral rels, since partial paths must not be
-    * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
+    * handle joins needing lateral rels, since partial paths must not be
+    * parameterized.  Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
      * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
      */
     if (joinrel->consider_parallel &&
-       save_jointype != JOIN_UNIQUE_OUTER &&
-       save_jointype != JOIN_FULL &&
-       save_jointype != JOIN_RIGHT &&
-       save_jointype != JOIN_RIGHT_ANTI &&
+       jointype != JOIN_FULL &&
+       jointype != JOIN_RIGHT &&
+       jointype != JOIN_RIGHT_ANTI &&
         outerrel->partial_pathlist != NIL &&
         bms_is_empty(joinrel->lateral_relids))
     {
         if (nestjoinOK)
             consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
-                                      save_jointype, extra);
+                                      jointype, extra);
  
         /*
          * If inner_cheapest_total is NULL or non parallel-safe then find the
-        * cheapest total parallel safe path.  If doing JOIN_UNIQUE_INNER, we
-        * can't use any alternative inner path.
+        * cheapest total parallel safe path.
          */
         if (inner_cheapest_total == NULL ||
             !inner_cheapest_total->parallel_safe)
         {
-           if (save_jointype == JOIN_UNIQUE_INNER)
-               return;
-
-           inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
+           inner_cheapest_total =
+               get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
         }
  
         if (inner_cheapest_total)
             consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
-                                       save_jointype, extra,
+                                       jointype, extra,
                                         inner_cheapest_total);
     }
  }
@@ -2118,24 +2046,17 @@ consider_parallel_nestloop(PlannerInfo *root,
                            JoinType jointype,
                            JoinPathExtraData *extra)
  {
-   JoinType    save_jointype = jointype;
     Path       *inner_cheapest_total = innerrel->cheapest_total_path;
     Path       *matpath = NULL;
     ListCell   *lc1;
  
-   if (jointype == JOIN_UNIQUE_INNER)
-       jointype = JOIN_INNER;
-
     /*
-    * Consider materializing the cheapest inner path, unless: 1) we're doing
-    * JOIN_UNIQUE_INNER, because in this case we have to unique-ify the
-    * cheapest inner path, 2) enable_material is off, 3) the cheapest inner
-    * path is not parallel-safe, 4) the cheapest inner path is parameterized
-    * by the outer rel, or 5) the cheapest inner path materializes its output
-    * anyway.
+    * Consider materializing the cheapest inner path, unless: 1)
+    * enable_material is off, 2) the cheapest inner path is not
+    * parallel-safe, 3) the cheapest inner path is parameterized by the outer
+    * rel, or 4) the cheapest inner path materializes its output anyway.
      */
-   if (save_jointype != JOIN_UNIQUE_INNER &&
-       enable_material && inner_cheapest_total->parallel_safe &&
+   if (enable_material && inner_cheapest_total->parallel_safe &&
         !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
         !ExecMaterializesOutput(inner_cheapest_total->pathtype))
     {
@@ -2169,23 +2090,6 @@ consider_parallel_nestloop(PlannerInfo *root,
             if (!innerpath->parallel_safe)
                 continue;
  
-           /*
-            * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
-            * cheapest_total_path, and we have to unique-ify it.  (We might
-            * be able to relax this to allow other safe, unparameterized
-            * inner paths, but right now create_unique_path is not on board
-            * with that.)
-            */
-           if (save_jointype == JOIN_UNIQUE_INNER)
-           {
-               if (innerpath != innerrel->cheapest_total_path)
-                   continue;
-               innerpath = (Path *) create_unique_path(root, innerrel,
-                                                       innerpath,
-                                                       extra->sjinfo);
-               Assert(innerpath);
-           }
-
             try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
                                       pathkeys, jointype, extra);
  
@@ -2227,7 +2131,6 @@ hash_inner_and_outer(PlannerInfo *root,
                      JoinType jointype,
                      JoinPathExtraData *extra)
  {
-   JoinType    save_jointype = jointype;
     bool        isouterjoin = IS_OUTER_JOIN(jointype);
     List       *hashclauses;
     ListCell   *l;
@@ -2290,6 +2193,8 @@ hash_inner_and_outer(PlannerInfo *root,
         Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
         Path       *cheapest_total_outer = outerrel->cheapest_total_path;
         Path       *cheapest_total_inner = innerrel->cheapest_total_path;
+       ListCell   *lc1;
+       ListCell   *lc2;
  
         /*
          * If either cheapest-total path is parameterized by the other rel, we
@@ -2301,114 +2206,64 @@ hash_inner_and_outer(PlannerInfo *root,
             PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
             return;
  
-       /* Unique-ify if need be; we ignore parameterized possibilities */
-       if (jointype == JOIN_UNIQUE_OUTER)
-       {
-           cheapest_total_outer = (Path *)
-               create_unique_path(root, outerrel,
-                                  cheapest_total_outer, extra->sjinfo);
-           Assert(cheapest_total_outer);
-           jointype = JOIN_INNER;
-           try_hashjoin_path(root,
-                             joinrel,
-                             cheapest_total_outer,
-                             cheapest_total_inner,
-                             hashclauses,
-                             jointype,
-                             extra);
-           /* no possibility of cheap startup here */
-       }
-       else if (jointype == JOIN_UNIQUE_INNER)
-       {
-           cheapest_total_inner = (Path *)
-               create_unique_path(root, innerrel,
-                                  cheapest_total_inner, extra->sjinfo);
-           Assert(cheapest_total_inner);
-           jointype = JOIN_INNER;
+       /*
+        * Consider the cheapest startup outer together with the cheapest
+        * total inner, and then consider pairings of cheapest-total paths
+        * including parameterized ones.  There is no use in generating
+        * parameterized paths on the basis of possibly cheap startup cost, so
+        * this is sufficient.
+        */
+       if (cheapest_startup_outer != NULL)
             try_hashjoin_path(root,
                               joinrel,
-                             cheapest_total_outer,
+                             cheapest_startup_outer,
                               cheapest_total_inner,
                               hashclauses,
                               jointype,
                               extra);
-           if (cheapest_startup_outer != NULL &&
-               cheapest_startup_outer != cheapest_total_outer)
-               try_hashjoin_path(root,
-                                 joinrel,
-                                 cheapest_startup_outer,
-                                 cheapest_total_inner,
-                                 hashclauses,
-                                 jointype,
-                                 extra);
-       }
-       else
+
+       foreach(lc1, outerrel->cheapest_parameterized_paths)
         {
+           Path       *outerpath = (Path *) lfirst(lc1);
+
             /*
-            * For other jointypes, we consider the cheapest startup outer
-            * together with the cheapest total inner, and then consider
-            * pairings of cheapest-total paths including parameterized ones.
-            * There is no use in generating parameterized paths on the basis
-            * of possibly cheap startup cost, so this is sufficient.
+            * We cannot use an outer path that is parameterized by the inner
+            * rel.
              */
-           ListCell   *lc1;
-           ListCell   *lc2;
-
-           if (cheapest_startup_outer != NULL)
-               try_hashjoin_path(root,
-                                 joinrel,
-                                 cheapest_startup_outer,
-                                 cheapest_total_inner,
-                                 hashclauses,
-                                 jointype,
-                                 extra);
+           if (PATH_PARAM_BY_REL(outerpath, innerrel))
+               continue;
  
-           foreach(lc1, outerrel->cheapest_parameterized_paths)
+           foreach(lc2, innerrel->cheapest_parameterized_paths)
             {
-               Path       *outerpath = (Path *) lfirst(lc1);
+               Path       *innerpath = (Path *) lfirst(lc2);
  
                 /*
-                * We cannot use an outer path that is parameterized by the
-                * inner rel.
+                * We cannot use an inner path that is parameterized by the
+                * outer rel, either.
                  */
-               if (PATH_PARAM_BY_REL(outerpath, innerrel))
+               if (PATH_PARAM_BY_REL(innerpath, outerrel))
                     continue;
  
-               foreach(lc2, innerrel->cheapest_parameterized_paths)
-               {
-                   Path       *innerpath = (Path *) lfirst(lc2);
-
-                   /*
-                    * We cannot use an inner path that is parameterized by
-                    * the outer rel, either.
-                    */
-                   if (PATH_PARAM_BY_REL(innerpath, outerrel))
-                       continue;
+               if (outerpath == cheapest_startup_outer &&
+                   innerpath == cheapest_total_inner)
+                   continue;   /* already tried it */
  
-                   if (outerpath == cheapest_startup_outer &&
-                       innerpath == cheapest_total_inner)
-                       continue;   /* already tried it */
-
-                   try_hashjoin_path(root,
-                                     joinrel,
-                                     outerpath,
-                                     innerpath,
-                                     hashclauses,
-                                     jointype,
-                                     extra);
-               }
+               try_hashjoin_path(root,
+                                 joinrel,
+                                 outerpath,
+                                 innerpath,
+                                 hashclauses,
+                                 jointype,
+                                 extra);
             }
         }
  
         /*
          * If the joinrel is parallel-safe, we may be able to consider a
-        * partial hash join.  However, we can't handle JOIN_UNIQUE_OUTER,
-        * because the outer path will be partial, and therefore we won't be
-        * able to properly guarantee uniqueness.  Also, the resulting path
-        * must not be parameterized.
+        * partial hash join.  However, the resulting path must not be
+        * parameterized.
          */
         if (joinrel->consider_parallel &&
-           save_jointype != JOIN_UNIQUE_OUTER &&
             outerrel->partial_pathlist != NIL &&
             bms_is_empty(joinrel->lateral_relids))
         {
@@ -2421,11 +2276,9 @@ hash_inner_and_outer(PlannerInfo *root,
  
             /*
              * Can we use a partial inner plan too, so that we can build a
-            * shared hash table in parallel?  We can't handle
-            * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
+            * shared hash table in parallel?
              */
             if (innerrel->partial_pathlist != NIL &&
-               save_jointype != JOIN_UNIQUE_INNER &&
                 enable_parallel_hash)
             {
                 cheapest_partial_inner =
@@ -2441,19 +2294,18 @@ hash_inner_and_outer(PlannerInfo *root,
              * Normally, given that the joinrel is parallel-safe, the cheapest
              * total inner path will also be parallel-safe, but if not, we'll
              * have to search for the cheapest safe, unparameterized inner
-            * path.  If doing JOIN_UNIQUE_INNER, we can't use any alternative
-            * inner path.  If full, right, right-semi or right-anti join, we
-            * can't use parallelism (building the hash table in each backend)
+            * path.  If full, right, right-semi or right-anti join, we can't
+            * use parallelism (building the hash table in each backend)
              * because no one process has all the match bits.
              */
-           if (save_jointype == JOIN_FULL ||
-               save_jointype == JOIN_RIGHT ||
-               save_jointype == JOIN_RIGHT_SEMI ||
-               save_jointype == JOIN_RIGHT_ANTI)
+           if (jointype == JOIN_FULL ||
+               jointype == JOIN_RIGHT ||
+               jointype == JOIN_RIGHT_SEMI ||
+               jointype == JOIN_RIGHT_ANTI)
                 cheapest_safe_inner = NULL;
             else if (cheapest_total_inner->parallel_safe)
                 cheapest_safe_inner = cheapest_total_inner;
-           else if (save_jointype != JOIN_UNIQUE_INNER)
+           else
                 cheapest_safe_inner =
                     get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
  
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c

index aad41b940091db693e2b570199c3db3ca3d9d3ea..535248aa525161fd23ca5d2343becc25d57bdf49 100644 (file)
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -19,6 +19,7 @@
  #include "optimizer/joininfo.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
+#include "optimizer/planner.h"
  #include "partitioning/partbounds.h"
  #include "utils/memutils.h"
  
@@ -444,8 +445,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
         }
         else if (sjinfo->jointype == JOIN_SEMI &&
                  bms_equal(sjinfo->syn_righthand, rel2->relids) &&
-                create_unique_path(root, rel2, rel2->cheapest_total_path,
-                                   sjinfo) != NULL)
+                create_unique_paths(root, rel2, sjinfo) != NULL)
         {
             /*----------
              * For a semijoin, we can join the RHS to anything else by
@@ -477,8 +477,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
         }
         else if (sjinfo->jointype == JOIN_SEMI &&
                  bms_equal(sjinfo->syn_righthand, rel1->relids) &&
-                create_unique_path(root, rel1, rel1->cheapest_total_path,
-                                   sjinfo) != NULL)
+                create_unique_paths(root, rel1, sjinfo) != NULL)
         {
             /* Reversed semijoin case */
             if (match_sjinfo)
@@ -886,6 +885,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
                             RelOptInfo *rel2, RelOptInfo *joinrel,
                             SpecialJoinInfo *sjinfo, List *restrictlist)
  {
+   RelOptInfo *unique_rel2;
+
     /*
      * Consider paths using each rel as both outer and inner.  Depending on
      * the join type, a provably empty outer or inner rel might mean the join
@@ -991,14 +992,13 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
             /*
              * If we know how to unique-ify the RHS and one input rel is
              * exactly the RHS (not a superset) we can consider unique-ifying
-            * it and then doing a regular join.  (The create_unique_path
+            * it and then doing a regular join.  (The create_unique_paths
              * check here is probably redundant with what join_is_legal did,
              * but if so the check is cheap because it's cached.  So test
              * anyway to be sure.)
              */
             if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
-               create_unique_path(root, rel2, rel2->cheapest_total_path,
-                                  sjinfo) != NULL)
+               (unique_rel2 = create_unique_paths(root, rel2, sjinfo)) != NULL)
             {
                 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
                     restriction_is_constant_false(restrictlist, joinrel, false))
@@ -1006,10 +1006,10 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
                     mark_dummy_rel(joinrel);
                     break;
                 }
-               add_paths_to_joinrel(root, joinrel, rel1, rel2,
+               add_paths_to_joinrel(root, joinrel, rel1, unique_rel2,
                                      JOIN_UNIQUE_INNER, sjinfo,
                                      restrictlist);
-               add_paths_to_joinrel(root, joinrel, rel2, rel1,
+               add_paths_to_joinrel(root, joinrel, unique_rel2, rel1,
                                      JOIN_UNIQUE_OUTER, sjinfo,
                                      restrictlist);
             }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 9fd5c31edf228673db35a16e585777cb673889cd..6791cbeb416edf973b5116929203658f4b267443 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -95,8 +95,6 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path
                                       int flags);
  static Memoize *create_memoize_plan(PlannerInfo *root, MemoizePath *best_path,
                                     int flags);
-static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path,
-                               int flags);
  static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path);
  static Plan *create_projection_plan(PlannerInfo *root,
                                     ProjectionPath *best_path,
@@ -106,8 +104,7 @@ static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
  static IncrementalSort *create_incrementalsort_plan(PlannerInfo *root,
                                                     IncrementalSortPath *best_path, int flags);
  static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path);
-static Unique *create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path,
-                                       int flags);
+static Unique *create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags);
  static Agg *create_agg_plan(PlannerInfo *root, AggPath *best_path);
  static Plan *create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path);
  static Result *create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path);
@@ -296,9 +293,9 @@ static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
  static Group *make_group(List *tlist, List *qual, int numGroupCols,
                          AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
                          Plan *lefttree);
-static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList);
  static Unique *make_unique_from_pathkeys(Plan *lefttree,
-                                        List *pathkeys, int numCols);
+                                        List *pathkeys, int numCols,
+                                        Relids relids);
  static Gather *make_gather(List *qptlist, List *qpqual,
                            int nworkers, int rescan_param, bool single_copy, Plan *subplan);
  static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
@@ -470,19 +467,9 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
                                                 flags);
             break;
         case T_Unique:
-           if (IsA(best_path, UpperUniquePath))
-           {
-               plan = (Plan *) create_upper_unique_plan(root,
-                                                        (UpperUniquePath *) best_path,
-                                                        flags);
-           }
-           else
-           {
-               Assert(IsA(best_path, UniquePath));
-               plan = create_unique_plan(root,
-                                         (UniquePath *) best_path,
-                                         flags);
-           }
+           plan = (Plan *) create_unique_plan(root,
+                                              (UniquePath *) best_path,
+                                              flags);
             break;
         case T_Gather:
             plan = (Plan *) create_gather_plan(root,
@@ -1764,207 +1751,6 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
     return plan;
  }
  
-/*
- * create_unique_plan
- *   Create a Unique plan for 'best_path' and (recursively) plans
- *   for its subpaths.
- *
- *   Returns a Plan node.
- */
-static Plan *
-create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
-{
-   Plan       *plan;
-   Plan       *subplan;
-   List       *in_operators;
-   List       *uniq_exprs;
-   List       *newtlist;
-   int         nextresno;
-   bool        newitems;
-   int         numGroupCols;
-   AttrNumber *groupColIdx;
-   Oid        *groupCollations;
-   int         groupColPos;
-   ListCell   *l;
-
-   /* Unique doesn't project, so tlist requirements pass through */
-   subplan = create_plan_recurse(root, best_path->subpath, flags);
-
-   /* Done if we don't need to do any actual unique-ifying */
-   if (best_path->umethod == UNIQUE_PATH_NOOP)
-       return subplan;
-
-   /*
-    * As constructed, the subplan has a "flat" tlist containing just the Vars
-    * needed here and at upper levels.  The values we are supposed to
-    * unique-ify may be expressions in these variables.  We have to add any
-    * such expressions to the subplan's tlist.
-    *
-    * The subplan may have a "physical" tlist if it is a simple scan plan. If
-    * we're going to sort, this should be reduced to the regular tlist, so
-    * that we don't sort more data than we need to.  For hashing, the tlist
-    * should be left as-is if we don't need to add any expressions; but if we
-    * do have to add expressions, then a projection step will be needed at
-    * runtime anyway, so we may as well remove unneeded items. Therefore
-    * newtlist starts from build_path_tlist() not just a copy of the
-    * subplan's tlist; and we don't install it into the subplan unless we are
-    * sorting or stuff has to be added.
-    */
-   in_operators = best_path->in_operators;
-   uniq_exprs = best_path->uniq_exprs;
-
-   /* initialize modified subplan tlist as just the "required" vars */
-   newtlist = build_path_tlist(root, &best_path->path);
-   nextresno = list_length(newtlist) + 1;
-   newitems = false;
-
-   foreach(l, uniq_exprs)
-   {
-       Expr       *uniqexpr = lfirst(l);
-       TargetEntry *tle;
-
-       tle = tlist_member(uniqexpr, newtlist);
-       if (!tle)
-       {
-           tle = makeTargetEntry((Expr *) uniqexpr,
-                                 nextresno,
-                                 NULL,
-                                 false);
-           newtlist = lappend(newtlist, tle);
-           nextresno++;
-           newitems = true;
-       }
-   }
-
-   /* Use change_plan_targetlist in case we need to insert a Result node */
-   if (newitems || best_path->umethod == UNIQUE_PATH_SORT)
-       subplan = change_plan_targetlist(subplan, newtlist,
-                                        best_path->path.parallel_safe);
-
-   /*
-    * Build control information showing which subplan output columns are to
-    * be examined by the grouping step.  Unfortunately we can't merge this
-    * with the previous loop, since we didn't then know which version of the
-    * subplan tlist we'd end up using.
-    */
-   newtlist = subplan->targetlist;
-   numGroupCols = list_length(uniq_exprs);
-   groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
-   groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid));
-
-   groupColPos = 0;
-   foreach(l, uniq_exprs)
-   {
-       Expr       *uniqexpr = lfirst(l);
-       TargetEntry *tle;
-
-       tle = tlist_member(uniqexpr, newtlist);
-       if (!tle)               /* shouldn't happen */
-           elog(ERROR, "failed to find unique expression in subplan tlist");
-       groupColIdx[groupColPos] = tle->resno;
-       groupCollations[groupColPos] = exprCollation((Node *) tle->expr);
-       groupColPos++;
-   }
-
-   if (best_path->umethod == UNIQUE_PATH_HASH)
-   {
-       Oid        *groupOperators;
-
-       /*
-        * Get the hashable equality operators for the Agg node to use.
-        * Normally these are the same as the IN clause operators, but if
-        * those are cross-type operators then the equality operators are the
-        * ones for the IN clause operators' RHS datatype.
-        */
-       groupOperators = (Oid *) palloc(numGroupCols * sizeof(Oid));
-       groupColPos = 0;
-       foreach(l, in_operators)
-       {
-           Oid         in_oper = lfirst_oid(l);
-           Oid         eq_oper;
-
-           if (!get_compatible_hash_operators(in_oper, NULL, &eq_oper))
-               elog(ERROR, "could not find compatible hash operator for operator %u",
-                    in_oper);
-           groupOperators[groupColPos++] = eq_oper;
-       }
-
-       /*
-        * Since the Agg node is going to project anyway, we can give it the
-        * minimum output tlist, without any stuff we might have added to the
-        * subplan tlist.
-        */
-       plan = (Plan *) make_agg(build_path_tlist(root, &best_path->path),
-                                NIL,
-                                AGG_HASHED,
-                                AGGSPLIT_SIMPLE,
-                                numGroupCols,
-                                groupColIdx,
-                                groupOperators,
-                                groupCollations,
-                                NIL,
-                                NIL,
-                                best_path->path.rows,
-                                0,
-                                subplan);
-   }
-   else
-   {
-       List       *sortList = NIL;
-       Sort       *sort;
-
-       /* Create an ORDER BY list to sort the input compatibly */
-       groupColPos = 0;
-       foreach(l, in_operators)
-       {
-           Oid         in_oper = lfirst_oid(l);
-           Oid         sortop;
-           Oid         eqop;
-           TargetEntry *tle;
-           SortGroupClause *sortcl;
-
-           sortop = get_ordering_op_for_equality_op(in_oper, false);
-           if (!OidIsValid(sortop))    /* shouldn't happen */
-               elog(ERROR, "could not find ordering operator for equality operator %u",
-                    in_oper);
-
-           /*
-            * The Unique node will need equality operators.  Normally these
-            * are the same as the IN clause operators, but if those are
-            * cross-type operators then the equality operators are the ones
-            * for the IN clause operators' RHS datatype.
-            */
-           eqop = get_equality_op_for_ordering_op(sortop, NULL);
-           if (!OidIsValid(eqop))  /* shouldn't happen */
-               elog(ERROR, "could not find equality operator for ordering operator %u",
-                    sortop);
-
-           tle = get_tle_by_resno(subplan->targetlist,
-                                  groupColIdx[groupColPos]);
-           Assert(tle != NULL);
-
-           sortcl = makeNode(SortGroupClause);
-           sortcl->tleSortGroupRef = assignSortGroupRef(tle,
-                                                        subplan->targetlist);
-           sortcl->eqop = eqop;
-           sortcl->sortop = sortop;
-           sortcl->reverse_sort = false;
-           sortcl->nulls_first = false;
-           sortcl->hashable = false;   /* no need to make this accurate */
-           sortList = lappend(sortList, sortcl);
-           groupColPos++;
-       }
-       sort = make_sort_from_sortclauses(sortList, subplan);
-       label_sort_with_costsize(root, sort, -1.0);
-       plan = (Plan *) make_unique_from_sortclauses((Plan *) sort, sortList);
-   }
-
-   /* Copy cost data from Path to Plan */
-   copy_generic_path_info(plan, &best_path->path);
-
-   return plan;
-}
-
  /*
   * create_gather_plan
   *
@@ -2322,13 +2108,13 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path)
  }
  
  /*
- * create_upper_unique_plan
+ * create_unique_plan
   *
   *   Create a Unique plan for 'best_path' and (recursively) plans
   *   for its subpaths.
   */
  static Unique *
-create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flags)
+create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
  {
     Unique     *plan;
     Plan       *subplan;
@@ -2340,9 +2126,17 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag
     subplan = create_plan_recurse(root, best_path->subpath,
                                   flags | CP_LABEL_TLIST);
  
+   /*
+    * make_unique_from_pathkeys calls find_ec_member_matching_expr, which
+    * will ignore any child EC members that don't belong to the given relids.
+    * Thus, if this unique path is based on a child relation, we must pass
+    * its relids.
+    */
     plan = make_unique_from_pathkeys(subplan,
                                      best_path->path.pathkeys,
-                                    best_path->numkeys);
+                                    best_path->numkeys,
+                                    IS_OTHER_REL(best_path->path.parent) ?
+                                    best_path->path.parent->relids : NULL);
  
     copy_generic_path_info(&plan->plan, (Path *) best_path);
  
@@ -6880,61 +6674,14 @@ make_group(List *tlist,
  }
  
  /*
- * distinctList is a list of SortGroupClauses, identifying the targetlist items
- * that should be considered by the Unique filter.  The input path must
- * already be sorted accordingly.
- */
-static Unique *
-make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
-{
-   Unique     *node = makeNode(Unique);
-   Plan       *plan = &node->plan;
-   int         numCols = list_length(distinctList);
-   int         keyno = 0;
-   AttrNumber *uniqColIdx;
-   Oid        *uniqOperators;
-   Oid        *uniqCollations;
-   ListCell   *slitem;
-
-   plan->targetlist = lefttree->targetlist;
-   plan->qual = NIL;
-   plan->lefttree = lefttree;
-   plan->righttree = NULL;
-
-   /*
-    * convert SortGroupClause list into arrays of attr indexes and equality
-    * operators, as wanted by executor
-    */
-   Assert(numCols > 0);
-   uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-   uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
-   uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
-
-   foreach(slitem, distinctList)
-   {
-       SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
-       TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
-
-       uniqColIdx[keyno] = tle->resno;
-       uniqOperators[keyno] = sortcl->eqop;
-       uniqCollations[keyno] = exprCollation((Node *) tle->expr);
-       Assert(OidIsValid(uniqOperators[keyno]));
-       keyno++;
-   }
-
-   node->numCols = numCols;
-   node->uniqColIdx = uniqColIdx;
-   node->uniqOperators = uniqOperators;
-   node->uniqCollations = uniqCollations;
-
-   return node;
-}
-
-/*
- * as above, but use pathkeys to identify the sort columns and semantics
+ * pathkeys is a list of PathKeys, identifying the sort columns and semantics.
+ * The input plan must already be sorted accordingly.
+ *
+ * relids identifies the child relation being unique-ified, if any.
   */
  static Unique *
-make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
+make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols,
+                         Relids relids)
  {
     Unique     *node = makeNode(Unique);
     Plan       *plan = &node->plan;
@@ -6997,7 +6744,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
             foreach(j, plan->targetlist)
             {
                 tle = (TargetEntry *) lfirst(j);
-               em = find_ec_member_matching_expr(ec, tle->expr, NULL);
+               em = find_ec_member_matching_expr(ec, tle->expr, relids);
                 if (em)
                 {
                     /* found expr already in tlist */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 0d5a692e5fdca96ea2095c5da0f9e732df348cf3..65f1710159194ff4ed044f4a74ce4722a1e222f2 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -268,6 +268,12 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
  static int common_prefix_cmp(const void *a, const void *b);
  static List *generate_setop_child_grouplist(SetOperationStmt *op,
                                             List *targetlist);
+static void create_final_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+                                     List *sortPathkeys, List *groupClause,
+                                     SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel);
+static void create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+                                       List *sortPathkeys, List *groupClause,
+                                       SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel);
  
  
  /*****************************************************************************
@@ -4939,10 +4945,10 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                 else
                 {
                     add_partial_path(partial_distinct_rel, (Path *)
-                                    create_upper_unique_path(root, partial_distinct_rel,
-                                                             sorted_path,
-                                                             list_length(root->distinct_pathkeys),
-                                                             numDistinctRows));
+                                    create_unique_path(root, partial_distinct_rel,
+                                                       sorted_path,
+                                                       list_length(root->distinct_pathkeys),
+                                                       numDistinctRows));
                 }
             }
         }
@@ -5133,10 +5139,10 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                 else
                 {
                     add_path(distinct_rel, (Path *)
-                            create_upper_unique_path(root, distinct_rel,
-                                                     sorted_path,
-                                                     list_length(root->distinct_pathkeys),
-                                                     numDistinctRows));
+                            create_unique_path(root, distinct_rel,
+                                               sorted_path,
+                                               list_length(root->distinct_pathkeys),
+                                               numDistinctRows));
                 }
             }
         }
@@ -8270,3 +8276,499 @@ generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
  
     return grouplist;
  }
+
+/*
+ * create_unique_paths
+ *    Build a new RelOptInfo containing Paths that represent elimination of
+ *    distinct rows from the input data.  Distinct-ness is defined according to
+ *    the needs of the semijoin represented by sjinfo.  If it is not possible
+ *    to identify how to make the data unique, NULL is returned.
+ *
+ * If used at all, this is likely to be called repeatedly on the same rel;
+ * So we cache the result.
+ */
+RelOptInfo *
+create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
+{
+   RelOptInfo *unique_rel;
+   List       *sortPathkeys = NIL;
+   List       *groupClause = NIL;
+   MemoryContext oldcontext;
+
+   /* Caller made a mistake if SpecialJoinInfo is the wrong one */
+   Assert(sjinfo->jointype == JOIN_SEMI);
+   Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
+
+   /* If result already cached, return it */
+   if (rel->unique_rel)
+       return rel->unique_rel;
+
+   /* If it's not possible to unique-ify, return NULL */
+   if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
+       return NULL;
+
+   /*
+    * When called during GEQO join planning, we are in a short-lived memory
+    * context.  We must make sure that the unique rel and any subsidiary data
+    * structures created for a baserel survive the GEQO cycle, else the
+    * baserel is trashed for future GEQO cycles.  On the other hand, when we
+    * are creating those for a joinrel during GEQO, we don't want them to
+    * clutter the main planning context.  Upshot is that the best solution is
+    * to explicitly allocate memory in the same context the given RelOptInfo
+    * is in.
+    */
+   oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
+
+   unique_rel = makeNode(RelOptInfo);
+   memcpy(unique_rel, rel, sizeof(RelOptInfo));
+
+   /*
+    * clear path info
+    */
+   unique_rel->pathlist = NIL;
+   unique_rel->ppilist = NIL;
+   unique_rel->partial_pathlist = NIL;
+   unique_rel->cheapest_startup_path = NULL;
+   unique_rel->cheapest_total_path = NULL;
+   unique_rel->cheapest_parameterized_paths = NIL;
+
+   /*
+    * Build the target list for the unique rel.  We also build the pathkeys
+    * that represent the ordering requirements for the sort-based
+    * implementation, and the list of SortGroupClause nodes that represent
+    * the columns to be grouped on for the hash-based implementation.
+    *
+    * For a child rel, we can construct these fields from those of its
+    * parent.
+    */
+   if (IS_OTHER_REL(rel))
+   {
+       PathTarget *child_unique_target;
+       PathTarget *parent_unique_target;
+
+       parent_unique_target = rel->top_parent->unique_rel->reltarget;
+
+       child_unique_target = copy_pathtarget(parent_unique_target);
+
+       /* Translate the target expressions */
+       child_unique_target->exprs = (List *)
+           adjust_appendrel_attrs_multilevel(root,
+                                             (Node *) parent_unique_target->exprs,
+                                             rel,
+                                             rel->top_parent);
+
+       unique_rel->reltarget = child_unique_target;
+
+       sortPathkeys = rel->top_parent->unique_pathkeys;
+       groupClause = rel->top_parent->unique_groupclause;
+   }
+   else
+   {
+       List       *newtlist;
+       int         nextresno;
+       List       *sortList = NIL;
+       ListCell   *lc1;
+       ListCell   *lc2;
+
+       /*
+        * The values we are supposed to unique-ify may be expressions in the
+        * variables of the input rel's targetlist.  We have to add any such
+        * expressions to the unique rel's targetlist.
+        *
+        * While in the loop, build the lists of SortGroupClause's that
+        * represent the ordering for the sort-based implementation and the
+        * grouping for the hash-based implementation.
+        */
+       newtlist = make_tlist_from_pathtarget(rel->reltarget);
+       nextresno = list_length(newtlist) + 1;
+
+       forboth(lc1, sjinfo->semi_rhs_exprs, lc2, sjinfo->semi_operators)
+       {
+           Expr       *uniqexpr = lfirst(lc1);
+           Oid         in_oper = lfirst_oid(lc2);
+           Oid         sortop = InvalidOid;
+           TargetEntry *tle;
+
+           tle = tlist_member(uniqexpr, newtlist);
+           if (!tle)
+           {
+               tle = makeTargetEntry((Expr *) uniqexpr,
+                                     nextresno,
+                                     NULL,
+                                     false);
+               newtlist = lappend(newtlist, tle);
+               nextresno++;
+           }
+
+           if (sjinfo->semi_can_btree)
+           {
+               /* Create an ORDER BY list to sort the input compatibly */
+               Oid         eqop;
+               SortGroupClause *sortcl;
+
+               sortop = get_ordering_op_for_equality_op(in_oper, false);
+               if (!OidIsValid(sortop))    /* shouldn't happen */
+                   elog(ERROR, "could not find ordering operator for equality operator %u",
+                        in_oper);
+
+               /*
+                * The Unique node will need equality operators.  Normally
+                * these are the same as the IN clause operators, but if those
+                * are cross-type operators then the equality operators are
+                * the ones for the IN clause operators' RHS datatype.
+                */
+               eqop = get_equality_op_for_ordering_op(sortop, NULL);
+               if (!OidIsValid(eqop))  /* shouldn't happen */
+                   elog(ERROR, "could not find equality operator for ordering operator %u",
+                        sortop);
+
+               sortcl = makeNode(SortGroupClause);
+               sortcl->tleSortGroupRef = assignSortGroupRef(tle, newtlist);
+               sortcl->eqop = eqop;
+               sortcl->sortop = sortop;
+               sortcl->reverse_sort = false;
+               sortcl->nulls_first = false;
+               sortcl->hashable = false;   /* no need to make this accurate */
+               sortList = lappend(sortList, sortcl);
+           }
+
+           if (sjinfo->semi_can_hash)
+           {
+               /* Create a GROUP BY list for the Agg node to use */
+               Oid         eq_oper;
+               SortGroupClause *groupcl;
+
+               /*
+                * Get the hashable equality operators for the Agg node to
+                * use. Normally these are the same as the IN clause
+                * operators, but if those are cross-type operators then the
+                * equality operators are the ones for the IN clause
+                * operators' RHS datatype.
+                */
+               if (!get_compatible_hash_operators(in_oper, NULL, &eq_oper))
+                   elog(ERROR, "could not find compatible hash operator for operator %u",
+                        in_oper);
+
+               groupcl = makeNode(SortGroupClause);
+               groupcl->tleSortGroupRef = assignSortGroupRef(tle, newtlist);
+               groupcl->eqop = eq_oper;
+               groupcl->sortop = sortop;
+               groupcl->reverse_sort = false;
+               groupcl->nulls_first = false;
+               groupcl->hashable = true;
+               groupClause = lappend(groupClause, groupcl);
+           }
+       }
+
+       unique_rel->reltarget = create_pathtarget(root, newtlist);
+       sortPathkeys = make_pathkeys_for_sortclauses(root, sortList, newtlist);
+   }
+
+   /* build unique paths based on input rel's pathlist */
+   create_final_unique_paths(root, rel, sortPathkeys, groupClause,
+                             sjinfo, unique_rel);
+
+   /* build unique paths based on input rel's partial_pathlist */
+   create_partial_unique_paths(root, rel, sortPathkeys, groupClause,
+                               sjinfo, unique_rel);
+
+   /* Now choose the best path(s) */
+   set_cheapest(unique_rel);
+
+   /*
+    * There shouldn't be any partial paths for the unique relation;
+    * otherwise, we won't be able to properly guarantee uniqueness.
+    */
+   Assert(unique_rel->partial_pathlist == NIL);
+
+   /* Cache the result */
+   rel->unique_rel = unique_rel;
+   rel->unique_pathkeys = sortPathkeys;
+   rel->unique_groupclause = groupClause;
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return unique_rel;
+}
+
+/*
+ * create_final_unique_paths
+ *    Create unique paths in 'unique_rel' based on 'input_rel' pathlist
+ */
+static void
+create_final_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+                         List *sortPathkeys, List *groupClause,
+                         SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel)
+{
+   Path       *cheapest_input_path = input_rel->cheapest_total_path;
+
+   /* Estimate number of output rows */
+   unique_rel->rows = estimate_num_groups(root,
+                                          sjinfo->semi_rhs_exprs,
+                                          cheapest_input_path->rows,
+                                          NULL,
+                                          NULL);
+
+   /* Consider sort-based implementations, if possible. */
+   if (sjinfo->semi_can_btree)
+   {
+       ListCell   *lc;
+
+       /*
+        * Use any available suitably-sorted path as input, and also consider
+        * sorting the cheapest-total path and incremental sort on any paths
+        * with presorted keys.
+        *
+        * To save planning time, we ignore parameterized input paths unless
+        * they are the cheapest-total path.
+        */
+       foreach(lc, input_rel->pathlist)
+       {
+           Path       *input_path = (Path *) lfirst(lc);
+           Path       *path;
+           bool        is_sorted;
+           int         presorted_keys;
+
+           /*
+            * Ignore parameterized paths that are not the cheapest-total
+            * path.
+            */
+           if (input_path->param_info &&
+               input_path != cheapest_input_path)
+               continue;
+
+           is_sorted = pathkeys_count_contained_in(sortPathkeys,
+                                                   input_path->pathkeys,
+                                                   &presorted_keys);
+
+           /*
+            * Ignore paths that are not suitably or partially sorted, unless
+            * they are the cheapest total path (no need to deal with paths
+            * which have presorted keys when incremental sort is disabled).
+            */
+           if (!is_sorted && input_path != cheapest_input_path &&
+               (presorted_keys == 0 || !enable_incremental_sort))
+               continue;
+
+           /*
+            * Make a separate ProjectionPath in case we need a Result node.
+            */
+           path = (Path *) create_projection_path(root,
+                                                  unique_rel,
+                                                  input_path,
+                                                  unique_rel->reltarget);
+
+           if (!is_sorted)
+           {
+               /*
+                * We've no need to consider both a sort and incremental sort.
+                * We'll just do a sort if there are no presorted keys and an
+                * incremental sort when there are presorted keys.
+                */
+               if (presorted_keys == 0 || !enable_incremental_sort)
+                   path = (Path *) create_sort_path(root,
+                                                    unique_rel,
+                                                    path,
+                                                    sortPathkeys,
+                                                    -1.0);
+               else
+                   path = (Path *) create_incremental_sort_path(root,
+                                                                unique_rel,
+                                                                path,
+                                                                sortPathkeys,
+                                                                presorted_keys,
+                                                                -1.0);
+           }
+
+           path = (Path *) create_unique_path(root, unique_rel, path,
+                                              list_length(sortPathkeys),
+                                              unique_rel->rows);
+
+           add_path(unique_rel, path);
+       }
+   }
+
+   /* Consider hash-based implementation, if possible. */
+   if (sjinfo->semi_can_hash)
+   {
+       Path       *path;
+
+       /*
+        * Make a separate ProjectionPath in case we need a Result node.
+        */
+       path = (Path *) create_projection_path(root,
+                                              unique_rel,
+                                              cheapest_input_path,
+                                              unique_rel->reltarget);
+
+       path = (Path *) create_agg_path(root,
+                                       unique_rel,
+                                       path,
+                                       cheapest_input_path->pathtarget,
+                                       AGG_HASHED,
+                                       AGGSPLIT_SIMPLE,
+                                       groupClause,
+                                       NIL,
+                                       NULL,
+                                       unique_rel->rows);
+
+       add_path(unique_rel, path);
+   }
+}
+
+/*
+ * create_partial_unique_paths
+ *    Create unique paths in 'unique_rel' based on 'input_rel' partial_pathlist
+ */
+static void
+create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+                           List *sortPathkeys, List *groupClause,
+                           SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel)
+{
+   RelOptInfo *partial_unique_rel;
+   Path       *cheapest_partial_path;
+
+   /* nothing to do when there are no partial paths in the input rel */
+   if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
+       return;
+
+   /*
+    * nothing to do if there's anything in the targetlist that's
+    * parallel-restricted.
+    */
+   if (!is_parallel_safe(root, (Node *) unique_rel->reltarget->exprs))
+       return;
+
+   cheapest_partial_path = linitial(input_rel->partial_pathlist);
+
+   partial_unique_rel = makeNode(RelOptInfo);
+   memcpy(partial_unique_rel, input_rel, sizeof(RelOptInfo));
+
+   /*
+    * clear path info
+    */
+   partial_unique_rel->pathlist = NIL;
+   partial_unique_rel->ppilist = NIL;
+   partial_unique_rel->partial_pathlist = NIL;
+   partial_unique_rel->cheapest_startup_path = NULL;
+   partial_unique_rel->cheapest_total_path = NULL;
+   partial_unique_rel->cheapest_parameterized_paths = NIL;
+
+   /* Estimate number of output rows */
+   partial_unique_rel->rows = estimate_num_groups(root,
+                                                  sjinfo->semi_rhs_exprs,
+                                                  cheapest_partial_path->rows,
+                                                  NULL,
+                                                  NULL);
+   partial_unique_rel->reltarget = unique_rel->reltarget;
+
+   /* Consider sort-based implementations, if possible. */
+   if (sjinfo->semi_can_btree)
+   {
+       ListCell   *lc;
+
+       /*
+        * Use any available suitably-sorted path as input, and also consider
+        * sorting the cheapest partial path and incremental sort on any paths
+        * with presorted keys.
+        */
+       foreach(lc, input_rel->partial_pathlist)
+       {
+           Path       *input_path = (Path *) lfirst(lc);
+           Path       *path;
+           bool        is_sorted;
+           int         presorted_keys;
+
+           is_sorted = pathkeys_count_contained_in(sortPathkeys,
+                                                   input_path->pathkeys,
+                                                   &presorted_keys);
+
+           /*
+            * Ignore paths that are not suitably or partially sorted, unless
+            * they are the cheapest partial path (no need to deal with paths
+            * which have presorted keys when incremental sort is disabled).
+            */
+           if (!is_sorted && input_path != cheapest_partial_path &&
+               (presorted_keys == 0 || !enable_incremental_sort))
+               continue;
+
+           /*
+            * Make a separate ProjectionPath in case we need a Result node.
+            */
+           path = (Path *) create_projection_path(root,
+                                                  partial_unique_rel,
+                                                  input_path,
+                                                  partial_unique_rel->reltarget);
+
+           if (!is_sorted)
+           {
+               /*
+                * We've no need to consider both a sort and incremental sort.
+                * We'll just do a sort if there are no presorted keys and an
+                * incremental sort when there are presorted keys.
+                */
+               if (presorted_keys == 0 || !enable_incremental_sort)
+                   path = (Path *) create_sort_path(root,
+                                                    partial_unique_rel,
+                                                    path,
+                                                    sortPathkeys,
+                                                    -1.0);
+               else
+                   path = (Path *) create_incremental_sort_path(root,
+                                                                partial_unique_rel,
+                                                                path,
+                                                                sortPathkeys,
+                                                                presorted_keys,
+                                                                -1.0);
+           }
+
+           path = (Path *) create_unique_path(root, partial_unique_rel, path,
+                                              list_length(sortPathkeys),
+                                              partial_unique_rel->rows);
+
+           add_partial_path(partial_unique_rel, path);
+       }
+   }
+
+   /* Consider hash-based implementation, if possible. */
+   if (sjinfo->semi_can_hash)
+   {
+       Path       *path;
+
+       /*
+        * Make a separate ProjectionPath in case we need a Result node.
+        */
+       path = (Path *) create_projection_path(root,
+                                              partial_unique_rel,
+                                              cheapest_partial_path,
+                                              partial_unique_rel->reltarget);
+
+       path = (Path *) create_agg_path(root,
+                                       partial_unique_rel,
+                                       path,
+                                       cheapest_partial_path->pathtarget,
+                                       AGG_HASHED,
+                                       AGGSPLIT_SIMPLE,
+                                       groupClause,
+                                       NIL,
+                                       NULL,
+                                       partial_unique_rel->rows);
+
+       add_partial_path(partial_unique_rel, path);
+   }
+
+   if (partial_unique_rel->partial_pathlist != NIL)
+   {
+       generate_useful_gather_paths(root, partial_unique_rel, true);
+       set_cheapest(partial_unique_rel);
+
+       /*
+        * Finally, create paths to unique-ify the final result.  This step is
+        * needed to remove any duplicates due to combining rows from parallel
+        * workers.
+        */
+       create_final_unique_paths(root, partial_unique_rel,
+                                 sortPathkeys, groupClause,
+                                 sjinfo, unique_rel);
+   }
+}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index eab44da65b8f078c98a890157d196483460e5fe0..28a4ae644406812273c8208cb18c490f736a5ab8 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -929,11 +929,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
                                                  make_pathkeys_for_sortclauses(root, groupList, tlist),
                                                  -1.0);
  
-           path = (Path *) create_upper_unique_path(root,
-                                                    result_rel,
-                                                    path,
-                                                    list_length(path->pathkeys),
-                                                    dNumGroups);
+           path = (Path *) create_unique_path(root,
+                                              result_rel,
+                                              path,
+                                              list_length(path->pathkeys),
+                                              dNumGroups);
  
             add_path(result_rel, path);
  
@@ -946,11 +946,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
                                                  make_pathkeys_for_sortclauses(root, groupList, tlist),
                                                  -1.0);
  
-               path = (Path *) create_upper_unique_path(root,
-                                                        result_rel,
-                                                        path,
-                                                        list_length(path->pathkeys),
-                                                        dNumGroups);
+               path = (Path *) create_unique_path(root,
+                                                  result_rel,
+                                                  path,
+                                                  list_length(path->pathkeys),
+                                                  dNumGroups);
                 add_path(result_rel, path);
             }
         }
@@ -970,11 +970,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
                                                      NULL);
  
             /* and make the MergeAppend unique */
-           path = (Path *) create_upper_unique_path(root,
-                                                    result_rel,
-                                                    path,
-                                                    list_length(tlist),
-                                                    dNumGroups);
+           path = (Path *) create_unique_path(root,
+                                              result_rel,
+                                              path,
+                                              list_length(tlist),
+                                              dNumGroups);
  
             add_path(result_rel, path);
         }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index a4c5867cdcb8477b1f33f044a2349f3a578a098f..b0da28150d32cf4ea7c3ae99670b2e4b1359c41d 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -46,7 +46,6 @@ typedef enum
   */
  #define STD_FUZZ_FACTOR 1.01
  
-static List *translate_sub_tlist(List *tlist, int relid);
  static int append_total_cost_compare(const ListCell *a, const ListCell *b);
  static int append_startup_cost_compare(const ListCell *a, const ListCell *b);
  static List *reparameterize_pathlist_by_child(PlannerInfo *root,
@@ -381,7 +380,6 @@ set_cheapest(RelOptInfo *parent_rel)
  
     parent_rel->cheapest_startup_path = cheapest_startup_path;
     parent_rel->cheapest_total_path = cheapest_total_path;
-   parent_rel->cheapest_unique_path = NULL;    /* computed only if needed */
     parent_rel->cheapest_parameterized_paths = parameterized_paths;
  }
  
@@ -1740,246 +1738,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
     return pathnode;
  }
  
-/*
- * create_unique_path
- *   Creates a path representing elimination of distinct rows from the
- *   input data.  Distinct-ness is defined according to the needs of the
- *   semijoin represented by sjinfo.  If it is not possible to identify
- *   how to make the data unique, NULL is returned.
- *
- * If used at all, this is likely to be called repeatedly on the same rel;
- * and the input subpath should always be the same (the cheapest_total path
- * for the rel).  So we cache the result.
- */
-UniquePath *
-create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
-                  SpecialJoinInfo *sjinfo)
-{
-   UniquePath *pathnode;
-   Path        sort_path;      /* dummy for result of cost_sort */
-   Path        agg_path;       /* dummy for result of cost_agg */
-   MemoryContext oldcontext;
-   int         numCols;
-
-   /* Caller made a mistake if subpath isn't cheapest_total ... */
-   Assert(subpath == rel->cheapest_total_path);
-   Assert(subpath->parent == rel);
-   /* ... or if SpecialJoinInfo is the wrong one */
-   Assert(sjinfo->jointype == JOIN_SEMI);
-   Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
-
-   /* If result already cached, return it */
-   if (rel->cheapest_unique_path)
-       return (UniquePath *) rel->cheapest_unique_path;
-
-   /* If it's not possible to unique-ify, return NULL */
-   if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
-       return NULL;
-
-   /*
-    * When called during GEQO join planning, we are in a short-lived memory
-    * context.  We must make sure that the path and any subsidiary data
-    * structures created for a baserel survive the GEQO cycle, else the
-    * baserel is trashed for future GEQO cycles.  On the other hand, when we
-    * are creating those for a joinrel during GEQO, we don't want them to
-    * clutter the main planning context.  Upshot is that the best solution is
-    * to explicitly allocate memory in the same context the given RelOptInfo
-    * is in.
-    */
-   oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
-
-   pathnode = makeNode(UniquePath);
-
-   pathnode->path.pathtype = T_Unique;
-   pathnode->path.parent = rel;
-   pathnode->path.pathtarget = rel->reltarget;
-   pathnode->path.param_info = subpath->param_info;
-   pathnode->path.parallel_aware = false;
-   pathnode->path.parallel_safe = rel->consider_parallel &&
-       subpath->parallel_safe;
-   pathnode->path.parallel_workers = subpath->parallel_workers;
-
-   /*
-    * Assume the output is unsorted, since we don't necessarily have pathkeys
-    * to represent it.  (This might get overridden below.)
-    */
-   pathnode->path.pathkeys = NIL;
-
-   pathnode->subpath = subpath;
-
-   /*
-    * Under GEQO and when planning child joins, the sjinfo might be
-    * short-lived, so we'd better make copies of data structures we extract
-    * from it.
-    */
-   pathnode->in_operators = copyObject(sjinfo->semi_operators);
-   pathnode->uniq_exprs = copyObject(sjinfo->semi_rhs_exprs);
-
-   /*
-    * If the input is a relation and it has a unique index that proves the
-    * semi_rhs_exprs are unique, then we don't need to do anything.  Note
-    * that relation_has_unique_index_for automatically considers restriction
-    * clauses for the rel, as well.
-    */
-   if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
-       relation_has_unique_index_for(root, rel, NIL,
-                                     sjinfo->semi_rhs_exprs,
-                                     sjinfo->semi_operators))
-   {
-       pathnode->umethod = UNIQUE_PATH_NOOP;
-       pathnode->path.rows = rel->rows;
-       pathnode->path.disabled_nodes = subpath->disabled_nodes;
-       pathnode->path.startup_cost = subpath->startup_cost;
-       pathnode->path.total_cost = subpath->total_cost;
-       pathnode->path.pathkeys = subpath->pathkeys;
-
-       rel->cheapest_unique_path = (Path *) pathnode;
-
-       MemoryContextSwitchTo(oldcontext);
-
-       return pathnode;
-   }
-
-   /*
-    * If the input is a subquery whose output must be unique already, then we
-    * don't need to do anything.  The test for uniqueness has to consider
-    * exactly which columns we are extracting; for example "SELECT DISTINCT
-    * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
-    * this optimization unless semi_rhs_exprs consists only of simple Vars
-    * referencing subquery outputs.  (Possibly we could do something with
-    * expressions in the subquery outputs, too, but for now keep it simple.)
-    */
-   if (rel->rtekind == RTE_SUBQUERY)
-   {
-       RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
-
-       if (query_supports_distinctness(rte->subquery))
-       {
-           List       *sub_tlist_colnos;
-
-           sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
-                                                  rel->relid);
-
-           if (sub_tlist_colnos &&
-               query_is_distinct_for(rte->subquery,
-                                     sub_tlist_colnos,
-                                     sjinfo->semi_operators))
-           {
-               pathnode->umethod = UNIQUE_PATH_NOOP;
-               pathnode->path.rows = rel->rows;
-               pathnode->path.disabled_nodes = subpath->disabled_nodes;
-               pathnode->path.startup_cost = subpath->startup_cost;
-               pathnode->path.total_cost = subpath->total_cost;
-               pathnode->path.pathkeys = subpath->pathkeys;
-
-               rel->cheapest_unique_path = (Path *) pathnode;
-
-               MemoryContextSwitchTo(oldcontext);
-
-               return pathnode;
-           }
-       }
-   }
-
-   /* Estimate number of output rows */
-   pathnode->path.rows = estimate_num_groups(root,
-                                             sjinfo->semi_rhs_exprs,
-                                             rel->rows,
-                                             NULL,
-                                             NULL);
-   numCols = list_length(sjinfo->semi_rhs_exprs);
-
-   if (sjinfo->semi_can_btree)
-   {
-       /*
-        * Estimate cost for sort+unique implementation
-        */
-       cost_sort(&sort_path, root, NIL,
-                 subpath->disabled_nodes,
-                 subpath->total_cost,
-                 rel->rows,
-                 subpath->pathtarget->width,
-                 0.0,
-                 work_mem,
-                 -1.0);
-
-       /*
-        * Charge one cpu_operator_cost per comparison per input tuple. We
-        * assume all columns get compared at most of the tuples. (XXX
-        * probably this is an overestimate.)  This should agree with
-        * create_upper_unique_path.
-        */
-       sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
-   }
-
-   if (sjinfo->semi_can_hash)
-   {
-       /*
-        * Estimate the overhead per hashtable entry at 64 bytes (same as in
-        * planner.c).
-        */
-       int         hashentrysize = subpath->pathtarget->width + 64;
-
-       if (hashentrysize * pathnode->path.rows > get_hash_memory_limit())
-       {
-           /*
-            * We should not try to hash.  Hack the SpecialJoinInfo to
-            * remember this, in case we come through here again.
-            */
-           sjinfo->semi_can_hash = false;
-       }
-       else
-           cost_agg(&agg_path, root,
-                    AGG_HASHED, NULL,
-                    numCols, pathnode->path.rows,
-                    NIL,
-                    subpath->disabled_nodes,
-                    subpath->startup_cost,
-                    subpath->total_cost,
-                    rel->rows,
-                    subpath->pathtarget->width);
-   }
-
-   if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
-   {
-       if (agg_path.disabled_nodes < sort_path.disabled_nodes ||
-           (agg_path.disabled_nodes == sort_path.disabled_nodes &&
-            agg_path.total_cost < sort_path.total_cost))
-           pathnode->umethod = UNIQUE_PATH_HASH;
-       else
-           pathnode->umethod = UNIQUE_PATH_SORT;
-   }
-   else if (sjinfo->semi_can_btree)
-       pathnode->umethod = UNIQUE_PATH_SORT;
-   else if (sjinfo->semi_can_hash)
-       pathnode->umethod = UNIQUE_PATH_HASH;
-   else
-   {
-       /* we can get here only if we abandoned hashing above */
-       MemoryContextSwitchTo(oldcontext);
-       return NULL;
-   }
-
-   if (pathnode->umethod == UNIQUE_PATH_HASH)
-   {
-       pathnode->path.disabled_nodes = agg_path.disabled_nodes;
-       pathnode->path.startup_cost = agg_path.startup_cost;
-       pathnode->path.total_cost = agg_path.total_cost;
-   }
-   else
-   {
-       pathnode->path.disabled_nodes = sort_path.disabled_nodes;
-       pathnode->path.startup_cost = sort_path.startup_cost;
-       pathnode->path.total_cost = sort_path.total_cost;
-   }
-
-   rel->cheapest_unique_path = (Path *) pathnode;
-
-   MemoryContextSwitchTo(oldcontext);
-
-   return pathnode;
-}
-
  /*
   * create_gather_merge_path
   *
@@ -2031,36 +1789,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
     return pathnode;
  }
  
-/*
- * translate_sub_tlist - get subquery column numbers represented by tlist
- *
- * The given targetlist usually contains only Vars referencing the given relid.
- * Extract their varattnos (ie, the column numbers of the subquery) and return
- * as an integer List.
- *
- * If any of the tlist items is not a simple Var, we cannot determine whether
- * the subquery's uniqueness condition (if any) matches ours, so punt and
- * return NIL.
- */
-static List *
-translate_sub_tlist(List *tlist, int relid)
-{
-   List       *result = NIL;
-   ListCell   *l;
-
-   foreach(l, tlist)
-   {
-       Var        *var = (Var *) lfirst(l);
-
-       if (!var || !IsA(var, Var) ||
-           var->varno != relid)
-           return NIL;         /* punt */
-
-       result = lappend_int(result, var->varattno);
-   }
-   return result;
-}
-
  /*
   * create_gather_path
   *   Creates a path corresponding to a gather scan, returning the
@@ -2818,8 +2546,7 @@ create_projection_path(PlannerInfo *root,
     pathnode->path.pathtype = T_Result;
     pathnode->path.parent = rel;
     pathnode->path.pathtarget = target;
-   /* For now, assume we are above any joins, so no parameterization */
-   pathnode->path.param_info = NULL;
+   pathnode->path.param_info = subpath->param_info;
     pathnode->path.parallel_aware = false;
     pathnode->path.parallel_safe = rel->consider_parallel &&
         subpath->parallel_safe &&
@@ -3074,8 +2801,7 @@ create_incremental_sort_path(PlannerInfo *root,
     pathnode->path.parent = rel;
     /* Sort doesn't project, so use source path's pathtarget */
     pathnode->path.pathtarget = subpath->pathtarget;
-   /* For now, assume we are above any joins, so no parameterization */
-   pathnode->path.param_info = NULL;
+   pathnode->path.param_info = subpath->param_info;
     pathnode->path.parallel_aware = false;
     pathnode->path.parallel_safe = rel->consider_parallel &&
         subpath->parallel_safe;
@@ -3122,8 +2848,7 @@ create_sort_path(PlannerInfo *root,
     pathnode->path.parent = rel;
     /* Sort doesn't project, so use source path's pathtarget */
     pathnode->path.pathtarget = subpath->pathtarget;
-   /* For now, assume we are above any joins, so no parameterization */
-   pathnode->path.param_info = NULL;
+   pathnode->path.param_info = subpath->param_info;
     pathnode->path.parallel_aware = false;
     pathnode->path.parallel_safe = rel->consider_parallel &&
         subpath->parallel_safe;
@@ -3199,13 +2924,10 @@ create_group_path(PlannerInfo *root,
  }
  
  /*
- * create_upper_unique_path
+ * create_unique_path
   *   Creates a pathnode that represents performing an explicit Unique step
   *   on presorted input.
   *
- * This produces a Unique plan node, but the use-case is so different from
- * create_unique_path that it doesn't seem worth trying to merge the two.
- *
   * 'rel' is the parent relation associated with the result
   * 'subpath' is the path representing the source of data
   * 'numCols' is the number of grouping columns
@@ -3214,21 +2936,20 @@ create_group_path(PlannerInfo *root,
   * The input path must be sorted on the grouping columns, plus possibly
   * additional columns; so the first numCols pathkeys are the grouping columns
   */
-UpperUniquePath *
-create_upper_unique_path(PlannerInfo *root,
-                        RelOptInfo *rel,
-                        Path *subpath,
-                        int numCols,
-                        double numGroups)
+UniquePath *
+create_unique_path(PlannerInfo *root,
+                  RelOptInfo *rel,
+                  Path *subpath,
+                  int numCols,
+                  double numGroups)
  {
-   UpperUniquePath *pathnode = makeNode(UpperUniquePath);
+   UniquePath *pathnode = makeNode(UniquePath);
  
     pathnode->path.pathtype = T_Unique;
     pathnode->path.parent = rel;
     /* Unique doesn't project, so use source path's pathtarget */
     pathnode->path.pathtarget = subpath->pathtarget;
-   /* For now, assume we are above any joins, so no parameterization */
-   pathnode->path.param_info = NULL;
+   pathnode->path.param_info = subpath->param_info;
     pathnode->path.parallel_aware = false;
     pathnode->path.parallel_safe = rel->consider_parallel &&
         subpath->parallel_safe;
@@ -3284,8 +3005,7 @@ create_agg_path(PlannerInfo *root,
     pathnode->path.pathtype = T_Agg;
     pathnode->path.parent = rel;
     pathnode->path.pathtarget = target;
-   /* For now, assume we are above any joins, so no parameterization */
-   pathnode->path.param_info = NULL;
+   pathnode->path.param_info = subpath->param_info;
     pathnode->path.parallel_aware = false;
     pathnode->path.parallel_safe = rel->consider_parallel &&
         subpath->parallel_safe;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c

index ff507331a061a0139d57a86826e905ce04ec760f..0e523d2eb5b4486fb2b47ea8463199a3fdb21829 100644 (file)
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -217,7 +217,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
     rel->partial_pathlist = NIL;
     rel->cheapest_startup_path = NULL;
     rel->cheapest_total_path = NULL;
-   rel->cheapest_unique_path = NULL;
     rel->cheapest_parameterized_paths = NIL;
     rel->relid = relid;
     rel->rtekind = rte->rtekind;
@@ -269,6 +268,9 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
     rel->fdw_private = NULL;
     rel->unique_for_rels = NIL;
     rel->non_unique_for_rels = NIL;
+   rel->unique_rel = NULL;
+   rel->unique_pathkeys = NIL;
+   rel->unique_groupclause = NIL;
     rel->baserestrictinfo = NIL;
     rel->baserestrictcost.startup = 0;
     rel->baserestrictcost.per_tuple = 0;
@@ -713,7 +715,6 @@ build_join_rel(PlannerInfo *root,
     joinrel->partial_pathlist = NIL;
     joinrel->cheapest_startup_path = NULL;
     joinrel->cheapest_total_path = NULL;
-   joinrel->cheapest_unique_path = NULL;
     joinrel->cheapest_parameterized_paths = NIL;
     /* init direct_lateral_relids from children; we'll finish it up below */
     joinrel->direct_lateral_relids =
@@ -748,6 +749,9 @@ build_join_rel(PlannerInfo *root,
     joinrel->fdw_private = NULL;
     joinrel->unique_for_rels = NIL;
     joinrel->non_unique_for_rels = NIL;
+   joinrel->unique_rel = NULL;
+   joinrel->unique_pathkeys = NIL;
+   joinrel->unique_groupclause = NIL;
     joinrel->baserestrictinfo = NIL;
     joinrel->baserestrictcost.startup = 0;
     joinrel->baserestrictcost.per_tuple = 0;
@@ -906,7 +910,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
     joinrel->partial_pathlist = NIL;
     joinrel->cheapest_startup_path = NULL;
     joinrel->cheapest_total_path = NULL;
-   joinrel->cheapest_unique_path = NULL;
     joinrel->cheapest_parameterized_paths = NIL;
     joinrel->direct_lateral_relids = NULL;
     joinrel->lateral_relids = NULL;
@@ -933,6 +936,9 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
     joinrel->useridiscurrent = false;
     joinrel->fdwroutine = NULL;
     joinrel->fdw_private = NULL;
+   joinrel->unique_rel = NULL;
+   joinrel->unique_pathkeys = NIL;
+   joinrel->unique_groupclause = NIL;
     joinrel->baserestrictinfo = NIL;
     joinrel->baserestrictcost.startup = 0;
     joinrel->baserestrictcost.per_tuple = 0;
@@ -1488,7 +1494,6 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
     upperrel->pathlist = NIL;
     upperrel->cheapest_startup_path = NULL;
     upperrel->cheapest_total_path = NULL;
-   upperrel->cheapest_unique_path = NULL;
     upperrel->cheapest_parameterized_paths = NIL;
  
     root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index b2dc380b57b101884d4236529c8cadb5973ecbbd..fb3957e75e5f1a6a9d08904c4ed1662d2bd67483 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -323,8 +323,8 @@ typedef enum JoinType
      * These codes are used internally in the planner, but are not supported
      * by the executor (nor, indeed, by most of the planner).
      */
-   JOIN_UNIQUE_OUTER,          /* LHS path must be made unique */
-   JOIN_UNIQUE_INNER,          /* RHS path must be made unique */
+   JOIN_UNIQUE_OUTER,          /* LHS has be made unique */
+   JOIN_UNIQUE_INNER,          /* RHS has be made unique */
  
     /*
      * We might need additional join types someday.
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index ad2726f026f7dff5a17c16f19a9eaf12d99a0ecb..4a903d1ec1832213d39673c3c8186cc66c8abffa 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -703,8 +703,6 @@ typedef struct PartitionSchemeData *PartitionScheme;
   *         (regardless of ordering) among the unparameterized paths;
   *         or if there is no unparameterized path, the path with lowest
   *         total cost among the paths with minimum parameterization
- *     cheapest_unique_path - for caching cheapest path to produce unique
- *         (no duplicates) output from relation; NULL if not yet requested
   *     cheapest_parameterized_paths - best paths for their parameterizations;
   *         always includes cheapest_total_path, even if that's unparameterized
   *     direct_lateral_relids - rels this rel has direct LATERAL references to
@@ -770,6 +768,21 @@ typedef struct PartitionSchemeData *PartitionScheme;
   *                 other rels for which we have tried and failed to prove
   *                 this one unique
   *
+ * Three fields are used to cache information about unique-ification of this
+ * relation.  This is used to support semijoins where the relation appears on
+ * the RHS: the relation is first unique-ified, and then a regular join is
+ * performed:
+ *
+ *     unique_rel - the unique-ified version of the relation, containing paths
+ *                 that produce unique (no duplicates) output from relation;
+ *                 NULL if not yet requested
+ *     unique_pathkeys - pathkeys that represent the ordering requirements for
+ *                 the relation's output in sort-based unique-ification
+ *                 implementations
+ *     unique_groupclause - a list of SortGroupClause nodes that represent the
+ *                 columns to be grouped on in hash-based unique-ification
+ *                 implementations
+ *
   * The presence of the following fields depends on the restrictions
   * and joins that the relation participates in:
   *
@@ -930,7 +943,6 @@ typedef struct RelOptInfo
     List       *partial_pathlist;   /* partial Paths */
     struct Path *cheapest_startup_path;
     struct Path *cheapest_total_path;
-   struct Path *cheapest_unique_path;
     List       *cheapest_parameterized_paths;
  
     /*
@@ -1004,6 +1016,16 @@ typedef struct RelOptInfo
     /* known not unique for these set(s) */
     List       *non_unique_for_rels;
  
+   /*
+    * information about unique-ification of this relation
+    */
+   /* the unique-ified version of the relation */
+   struct RelOptInfo *unique_rel;
+   /* pathkeys for sort-based unique-ification implementations */
+   List       *unique_pathkeys;
+   /* SortGroupClause nodes for hash-based unique-ification implementations */
+   List       *unique_groupclause;
+
     /*
      * used by various scans and joins:
      */
@@ -1097,6 +1119,17 @@ typedef struct RelOptInfo
     ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \
      (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs)
  
+/*
+ * Is given relation unique-ified?
+ *
+ * When the nominal jointype is JOIN_INNER, sjinfo->jointype is JOIN_SEMI, and
+ * the given rel is exactly the RHS of the semijoin, it indicates that the rel
+ * has been unique-ified.
+ */
+#define RELATION_WAS_MADE_UNIQUE(rel, sjinfo, nominal_jointype) \
+   ((nominal_jointype) == JOIN_INNER && (sjinfo)->jointype == JOIN_SEMI && \
+    bms_equal((sjinfo)->syn_righthand, (rel)->relids))
+
  /*
   * IndexOptInfo
   *     Per-index information for planning/optimization
@@ -1741,8 +1774,8 @@ typedef struct ParamPathInfo
   * and the specified outer rel(s).
   *
   * "rows" is the same as parent->rows in simple paths, but in parameterized
- * paths and UniquePaths it can be less than parent->rows, reflecting the
- * fact that we've filtered by extra join conditions or removed duplicates.
+ * paths it can be less than parent->rows, reflecting the fact that we've
+ * filtered by extra join conditions.
   *
   * "pathkeys" is a List of PathKey nodes (see above), describing the sort
   * ordering of the path's output rows.
@@ -2141,34 +2174,6 @@ typedef struct MemoizePath
     double      est_hit_ratio;  /* estimated cache hit ratio, for EXPLAIN */
  } MemoizePath;
  
-/*
- * UniquePath represents elimination of distinct rows from the output of
- * its subpath.
- *
- * This can represent significantly different plans: either hash-based or
- * sort-based implementation, or a no-op if the input path can be proven
- * distinct already.  The decision is sufficiently localized that it's not
- * worth having separate Path node types.  (Note: in the no-op case, we could
- * eliminate the UniquePath node entirely and just return the subpath; but
- * it's convenient to have a UniquePath in the path tree to signal upper-level
- * routines that the input is known distinct.)
- */
-typedef enum UniquePathMethod
-{
-   UNIQUE_PATH_NOOP,           /* input is known unique already */
-   UNIQUE_PATH_HASH,           /* use hashing */
-   UNIQUE_PATH_SORT,           /* use sorting */
-} UniquePathMethod;
-
-typedef struct UniquePath
-{
-   Path        path;
-   Path       *subpath;
-   UniquePathMethod umethod;
-   List       *in_operators;   /* equality operators of the IN clause */
-   List       *uniq_exprs;     /* expressions to be made unique */
-} UniquePath;
-
  /*
   * GatherPath runs several copies of a plan in parallel and collects the
   * results.  The parallel leader may also execute the plan, unless the
@@ -2375,17 +2380,17 @@ typedef struct GroupPath
  } GroupPath;
  
  /*
- * UpperUniquePath represents adjacent-duplicate removal (in presorted input)
+ * UniquePath represents adjacent-duplicate removal (in presorted input)
   *
   * The columns to be compared are the first numkeys columns of the path's
   * pathkeys.  The input is presumed already sorted that way.
   */
-typedef struct UpperUniquePath
+typedef struct UniquePath
  {
     Path        path;
     Path       *subpath;        /* path representing input source */
     int         numkeys;        /* number of pathkey columns to compare */
-} UpperUniquePath;
+} UniquePath;
  
  /*
   * AggPath represents generic computation of aggregate functions
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 58936e963cb6bd9250018e6d701fca1f6b041841..763cd25bb3c9a0a4f0d243fd0b574da1693d5057 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -91,8 +91,6 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root,
                                         bool singlerow,
                                         bool binary_mode,
                                         Cardinality est_calls);
-extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
-                                     Path *subpath, SpecialJoinInfo *sjinfo);
  extern GatherPath *create_gather_path(PlannerInfo *root,
                                       RelOptInfo *rel, Path *subpath, PathTarget *target,
                                       Relids required_outer, double *rows);
@@ -223,11 +221,11 @@ extern GroupPath *create_group_path(PlannerInfo *root,
                                     List *groupClause,
                                     List *qual,
                                     double numGroups);
-extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root,
-                                                RelOptInfo *rel,
-                                                Path *subpath,
-                                                int numCols,
-                                                double numGroups);
+extern UniquePath *create_unique_path(PlannerInfo *root,
+                                     RelOptInfo *rel,
+                                     Path *subpath,
+                                     int numCols,
+                                     double numGroups);
  extern AggPath *create_agg_path(PlannerInfo *root,
                                 RelOptInfo *rel,
                                 Path *subpath,
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h

index 347c582a78927854d54dd71d5842671c68b4db61..f220e9a270d5cfdeba54cfd2a9aceffcb7726c85 100644 (file)
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -59,4 +59,7 @@ extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
  
  extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
  
+extern RelOptInfo *create_unique_paths(PlannerInfo *root, RelOptInfo *rel,
+                                      SpecialJoinInfo *sjinfo);
+
  #endif                         /* PLANNER_H */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 4d5d35d07270dce88ee4c6b3c35314f53fd501fb..98b05c94a1195b34695cec872c2493ae929fb118 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -9468,23 +9468,20 @@ where exists (select 1 from tenk1 t3
  ---------------------------------------------------------------------------------
   Nested Loop
     Output: t1.unique1, t2.hundred
-   ->  Hash Join
+   ->  Merge Join
           Output: t1.unique1, t3.tenthous
-         Hash Cond: (t3.thousand = t1.unique1)
-         ->  HashAggregate
+         Merge Cond: (t3.thousand = t1.unique1)
+         ->  Unique
                 Output: t3.thousand, t3.tenthous
-               Group Key: t3.thousand, t3.tenthous
                 ->  Index Only Scan using tenk1_thous_tenthous on public.tenk1 t3
                       Output: t3.thousand, t3.tenthous
-         ->  Hash
+         ->  Index Only Scan using onek_unique1 on public.onek t1
                 Output: t1.unique1
-               ->  Index Only Scan using onek_unique1 on public.onek t1
-                     Output: t1.unique1
-                     Index Cond: (t1.unique1 < 1)
+               Index Cond: (t1.unique1 < 1)
     ->  Index Only Scan using tenk1_hundred on public.tenk1 t2
           Output: t2.hundred
           Index Cond: (t2.hundred = t3.tenthous)
-(18 rows)
+(15 rows)
  
  -- ... unless it actually is unique
  create table j3 as select unique1, tenthous from onek;
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out

index d5368186caa9f0b97cd50d4b4a54151f402afa88..24e06845f921e71b2848f910c24f32dde0cf74bf 100644 (file)
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1134,48 +1134,50 @@ EXPLAIN (COSTS OFF)
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
                                     QUERY PLAN                                    
  ---------------------------------------------------------------------------------
- Sort
+ Merge Append
     Sort Key: t1.a
-   ->  Append
-         ->  Nested Loop
-               Join Filter: (t1_2.a = t1_5.b)
-               ->  HashAggregate
-                     Group Key: t1_5.b
+   ->  Nested Loop
+         Join Filter: (t1_2.a = t1_5.b)
+         ->  Unique
+               ->  Sort
+                     Sort Key: t1_5.b
                       ->  Hash Join
                             Hash Cond: (((t2_1.a + t2_1.b) / 2) = t1_5.b)
                             ->  Seq Scan on prt1_e_p1 t2_1
                             ->  Hash
                                   ->  Seq Scan on prt2_p1 t1_5
                                         Filter: (a = 0)
-               ->  Index Scan using iprt1_p1_a on prt1_p1 t1_2
-                     Index Cond: (a = ((t2_1.a + t2_1.b) / 2))
-                     Filter: (b = 0)
-         ->  Nested Loop
-               Join Filter: (t1_3.a = t1_6.b)
-               ->  HashAggregate
-                     Group Key: t1_6.b
+         ->  Index Scan using iprt1_p1_a on prt1_p1 t1_2
+               Index Cond: (a = ((t2_1.a + t2_1.b) / 2))
+               Filter: (b = 0)
+   ->  Nested Loop
+         Join Filter: (t1_3.a = t1_6.b)
+         ->  Unique
+               ->  Sort
+                     Sort Key: t1_6.b
                       ->  Hash Join
                             Hash Cond: (((t2_2.a + t2_2.b) / 2) = t1_6.b)
                             ->  Seq Scan on prt1_e_p2 t2_2
                             ->  Hash
                                   ->  Seq Scan on prt2_p2 t1_6
                                         Filter: (a = 0)
-               ->  Index Scan using iprt1_p2_a on prt1_p2 t1_3
-                     Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
-                     Filter: (b = 0)
-         ->  Nested Loop
-               Join Filter: (t1_4.a = t1_7.b)
-               ->  HashAggregate
-                     Group Key: t1_7.b
+         ->  Index Scan using iprt1_p2_a on prt1_p2 t1_3
+               Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
+               Filter: (b = 0)
+   ->  Nested Loop
+         Join Filter: (t1_4.a = t1_7.b)
+         ->  Unique
+               ->  Sort
+                     Sort Key: t1_7.b
                       ->  Nested Loop
                             ->  Seq Scan on prt2_p3 t1_7
                                   Filter: (a = 0)
                             ->  Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t2_3
                                   Index Cond: (((a + b) / 2) = t1_7.b)
-               ->  Index Scan using iprt1_p3_a on prt1_p3 t1_4
-                     Index Cond: (a = ((t2_3.a + t2_3.b) / 2))
-                     Filter: (b = 0)
-(41 rows)
+         ->  Index Scan using iprt1_p3_a on prt1_p3 t1_4
+               Index Cond: (a = ((t2_3.a + t2_3.b) / 2))
+               Filter: (b = 0)
+(43 rows)
  
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
    a  | b |  c   
@@ -1190,46 +1192,48 @@ EXPLAIN (COSTS OFF)
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
                                  QUERY PLAN                                 
  ---------------------------------------------------------------------------
- Sort
+ Merge Append
     Sort Key: t1.a
-   ->  Append
-         ->  Nested Loop
-               ->  HashAggregate
-                     Group Key: t1_6.b
+   ->  Nested Loop
+         ->  Unique
+               ->  Sort
+                     Sort Key: t1_6.b
                       ->  Hash Semi Join
                             Hash Cond: (t1_6.b = ((t1_9.a + t1_9.b) / 2))
                             ->  Seq Scan on prt2_p1 t1_6
                             ->  Hash
                                   ->  Seq Scan on prt1_e_p1 t1_9
                                         Filter: (c = 0)
-               ->  Index Scan using iprt1_p1_a on prt1_p1 t1_3
-                     Index Cond: (a = t1_6.b)
-                     Filter: (b = 0)
-         ->  Nested Loop
-               ->  HashAggregate
-                     Group Key: t1_7.b
+         ->  Index Scan using iprt1_p1_a on prt1_p1 t1_3
+               Index Cond: (a = t1_6.b)
+               Filter: (b = 0)
+   ->  Nested Loop
+         ->  Unique
+               ->  Sort
+                     Sort Key: t1_7.b
                       ->  Hash Semi Join
                             Hash Cond: (t1_7.b = ((t1_10.a + t1_10.b) / 2))
                             ->  Seq Scan on prt2_p2 t1_7
                             ->  Hash
                                   ->  Seq Scan on prt1_e_p2 t1_10
                                         Filter: (c = 0)
-               ->  Index Scan using iprt1_p2_a on prt1_p2 t1_4
-                     Index Cond: (a = t1_7.b)
-                     Filter: (b = 0)
-         ->  Nested Loop
-               ->  HashAggregate
-                     Group Key: t1_8.b
+         ->  Index Scan using iprt1_p2_a on prt1_p2 t1_4
+               Index Cond: (a = t1_7.b)
+               Filter: (b = 0)
+   ->  Nested Loop
+         ->  Unique
+               ->  Sort
+                     Sort Key: t1_8.b
                       ->  Hash Semi Join
                             Hash Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2))
                             ->  Seq Scan on prt2_p3 t1_8
                             ->  Hash
                                   ->  Seq Scan on prt1_e_p3 t1_11
                                         Filter: (c = 0)
-               ->  Index Scan using iprt1_p3_a on prt1_p3 t1_5
-                     Index Cond: (a = t1_8.b)
-                     Filter: (b = 0)
-(39 rows)
+         ->  Index Scan using iprt1_p3_a on prt1_p3 t1_5
+               Index Cond: (a = t1_8.b)
+               Filter: (b = 0)
+(41 rows)
  
  SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
    a  | b |  c   
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out

index 18fed63e7381a8e641a171bbf583c02be36243f9..0563d0cd5a19974a17ba5d36a2a6c842b7d25636 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -707,6 +707,212 @@ select * from numeric_table
                         3
  (4 rows)
  
+--
+-- Test that a semijoin implemented by unique-ifying the RHS can explore
+-- different paths of the RHS rel.
+--
+create table semijoin_unique_tbl (a int, b int);
+insert into semijoin_unique_tbl select i%10, i%10 from generate_series(1,1000)i;
+create index on semijoin_unique_tbl(a, b);
+analyze semijoin_unique_tbl;
+-- Ensure that we get a plan with Unique + IndexScan
+explain (verbose, costs off)
+select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2
+where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3)
+order by t1.a, t2.a;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Nested Loop
+   Output: t1.a, t1.b, t2.a, t2.b
+   ->  Merge Join
+         Output: t1.a, t1.b, t3.b
+         Merge Cond: (t3.a = t1.a)
+         ->  Unique
+               Output: t3.a, t3.b
+               ->  Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t3
+                     Output: t3.a, t3.b
+         ->  Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t1
+               Output: t1.a, t1.b
+   ->  Memoize
+         Output: t2.a, t2.b
+         Cache Key: t3.b
+         Cache Mode: logical
+         ->  Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t2
+               Output: t2.a, t2.b
+               Index Cond: (t2.a = t3.b)
+(18 rows)
+
+-- Ensure that we can unique-ify expressions more complex than plain Vars
+explain (verbose, costs off)
+select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2
+where (t1.a, t2.a) in (select a+1, b+1 from semijoin_unique_tbl t3)
+order by t1.a, t2.a;
+                                           QUERY PLAN                                           
+------------------------------------------------------------------------------------------------
+ Incremental Sort
+   Output: t1.a, t1.b, t2.a, t2.b
+   Sort Key: t1.a, t2.a
+   Presorted Key: t1.a
+   ->  Merge Join
+         Output: t1.a, t1.b, t2.a, t2.b
+         Merge Cond: (t1.a = ((t3.a + 1)))
+         ->  Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t1
+               Output: t1.a, t1.b
+         ->  Sort
+               Output: t2.a, t2.b, t3.a, ((t3.a + 1))
+               Sort Key: ((t3.a + 1))
+               ->  Hash Join
+                     Output: t2.a, t2.b, t3.a, (t3.a + 1)
+                     Hash Cond: (t2.a = (t3.b + 1))
+                     ->  Seq Scan on public.semijoin_unique_tbl t2
+                           Output: t2.a, t2.b
+                     ->  Hash
+                           Output: t3.a, t3.b
+                           ->  HashAggregate
+                                 Output: t3.a, t3.b
+                                 Group Key: (t3.a + 1), (t3.b + 1)
+                                 ->  Seq Scan on public.semijoin_unique_tbl t3
+                                       Output: t3.a, t3.b, (t3.a + 1), (t3.b + 1)
+(24 rows)
+
+-- encourage use of parallel plans
+set parallel_setup_cost=0;
+set parallel_tuple_cost=0;
+set min_parallel_table_scan_size=0;
+set max_parallel_workers_per_gather=4;
+set enable_indexscan to off;
+-- Ensure that we get a parallel plan for the unique-ification
+explain (verbose, costs off)
+select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2
+where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3)
+order by t1.a, t2.a;
+                                       QUERY PLAN                                       
+----------------------------------------------------------------------------------------
+ Nested Loop
+   Output: t1.a, t1.b, t2.a, t2.b
+   ->  Merge Join
+         Output: t1.a, t1.b, t3.b
+         Merge Cond: (t3.a = t1.a)
+         ->  Unique
+               Output: t3.a, t3.b
+               ->  Gather Merge
+                     Output: t3.a, t3.b
+                     Workers Planned: 2
+                     ->  Sort
+                           Output: t3.a, t3.b
+                           Sort Key: t3.a, t3.b
+                           ->  HashAggregate
+                                 Output: t3.a, t3.b
+                                 Group Key: t3.a, t3.b
+                                 ->  Parallel Seq Scan on public.semijoin_unique_tbl t3
+                                       Output: t3.a, t3.b
+         ->  Materialize
+               Output: t1.a, t1.b
+               ->  Gather Merge
+                     Output: t1.a, t1.b
+                     Workers Planned: 2
+                     ->  Sort
+                           Output: t1.a, t1.b
+                           Sort Key: t1.a
+                           ->  Parallel Seq Scan on public.semijoin_unique_tbl t1
+                                 Output: t1.a, t1.b
+   ->  Memoize
+         Output: t2.a, t2.b
+         Cache Key: t3.b
+         Cache Mode: logical
+         ->  Bitmap Heap Scan on public.semijoin_unique_tbl t2
+               Output: t2.a, t2.b
+               Recheck Cond: (t2.a = t3.b)
+               ->  Bitmap Index Scan on semijoin_unique_tbl_a_b_idx
+                     Index Cond: (t2.a = t3.b)
+(37 rows)
+
+reset enable_indexscan;
+reset max_parallel_workers_per_gather;
+reset min_parallel_table_scan_size;
+reset parallel_tuple_cost;
+reset parallel_setup_cost;
+drop table semijoin_unique_tbl;
+create table unique_tbl_p (a int, b int) partition by range(a);
+create table unique_tbl_p1 partition of unique_tbl_p for values from (0) to (5);
+create table unique_tbl_p2 partition of unique_tbl_p for values from (5) to (10);
+create table unique_tbl_p3 partition of unique_tbl_p for values from (10) to (20);
+insert into unique_tbl_p select i%12, i from generate_series(0, 1000)i;
+create index on unique_tbl_p1(a);
+create index on unique_tbl_p2(a);
+create index on unique_tbl_p3(a);
+analyze unique_tbl_p;
+set enable_partitionwise_join to on;
+-- Ensure that the unique-ification works for partition-wise join
+explain (verbose, costs off)
+select * from unique_tbl_p t1, unique_tbl_p t2
+where (t1.a, t2.a) in (select a, a from unique_tbl_p t3)
+order by t1.a, t2.a;
+                                           QUERY PLAN                                           
+------------------------------------------------------------------------------------------------
+ Merge Append
+   Sort Key: t1.a
+   ->  Nested Loop
+         Output: t1_1.a, t1_1.b, t2_1.a, t2_1.b
+         ->  Nested Loop
+               Output: t1_1.a, t1_1.b, t3_1.a
+               ->  Unique
+                     Output: t3_1.a
+                     ->  Index Only Scan using unique_tbl_p1_a_idx on public.unique_tbl_p1 t3_1
+                           Output: t3_1.a
+               ->  Index Scan using unique_tbl_p1_a_idx on public.unique_tbl_p1 t1_1
+                     Output: t1_1.a, t1_1.b
+                     Index Cond: (t1_1.a = t3_1.a)
+         ->  Memoize
+               Output: t2_1.a, t2_1.b
+               Cache Key: t1_1.a
+               Cache Mode: logical
+               ->  Index Scan using unique_tbl_p1_a_idx on public.unique_tbl_p1 t2_1
+                     Output: t2_1.a, t2_1.b
+                     Index Cond: (t2_1.a = t1_1.a)
+   ->  Nested Loop
+         Output: t1_2.a, t1_2.b, t2_2.a, t2_2.b
+         ->  Nested Loop
+               Output: t1_2.a, t1_2.b, t3_2.a
+               ->  Unique
+                     Output: t3_2.a
+                     ->  Index Only Scan using unique_tbl_p2_a_idx on public.unique_tbl_p2 t3_2
+                           Output: t3_2.a
+               ->  Index Scan using unique_tbl_p2_a_idx on public.unique_tbl_p2 t1_2
+                     Output: t1_2.a, t1_2.b
+                     Index Cond: (t1_2.a = t3_2.a)
+         ->  Memoize
+               Output: t2_2.a, t2_2.b
+               Cache Key: t1_2.a
+               Cache Mode: logical
+               ->  Index Scan using unique_tbl_p2_a_idx on public.unique_tbl_p2 t2_2
+                     Output: t2_2.a, t2_2.b
+                     Index Cond: (t2_2.a = t1_2.a)
+   ->  Nested Loop
+         Output: t1_3.a, t1_3.b, t2_3.a, t2_3.b
+         ->  Nested Loop
+               Output: t1_3.a, t1_3.b, t3_3.a
+               ->  Unique
+                     Output: t3_3.a
+                     ->  Sort
+                           Output: t3_3.a
+                           Sort Key: t3_3.a
+                           ->  Seq Scan on public.unique_tbl_p3 t3_3
+                                 Output: t3_3.a
+               ->  Index Scan using unique_tbl_p3_a_idx on public.unique_tbl_p3 t1_3
+                     Output: t1_3.a, t1_3.b
+                     Index Cond: (t1_3.a = t3_3.a)
+         ->  Memoize
+               Output: t2_3.a, t2_3.b
+               Cache Key: t1_3.a
+               Cache Mode: logical
+               ->  Index Scan using unique_tbl_p3_a_idx on public.unique_tbl_p3 t2_3
+                     Output: t2_3.a, t2_3.b
+                     Index Cond: (t2_3.a = t1_3.a)
+(59 rows)
+
+reset enable_partitionwise_join;
+drop table unique_tbl_p;
  --
  -- Test case for bug #4290: bogus calculation of subplan param sets
  --
@@ -2672,18 +2878,17 @@ EXPLAIN (COSTS OFF)
  SELECT * FROM onek
    WHERE (unique1,ten) IN (VALUES (1,1), (20,0), (99,9), (17,99))
    ORDER BY unique1;
-                           QUERY PLAN                            
------------------------------------------------------------------
- Sort
-   Sort Key: onek.unique1
-   ->  Nested Loop
-         ->  HashAggregate
-               Group Key: "*VALUES*".column1, "*VALUES*".column2
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Nested Loop
+   ->  Unique
+         ->  Sort
+               Sort Key: "*VALUES*".column1, "*VALUES*".column2
                 ->  Values Scan on "*VALUES*"
-         ->  Index Scan using onek_unique1 on onek
-               Index Cond: (unique1 = "*VALUES*".column1)
-               Filter: ("*VALUES*".column2 = ten)
-(9 rows)
+   ->  Index Scan using onek_unique1 on onek
+         Index Cond: (unique1 = "*VALUES*".column1)
+         Filter: ("*VALUES*".column2 = ten)
+(8 rows)
  
  EXPLAIN (COSTS OFF)
  SELECT * FROM onek
@@ -2858,12 +3063,10 @@ SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) ORDER BY 1);
     ->  Unique
           ->  Sort
                 Sort Key: "*VALUES*".column1
-               ->  Sort
-                     Sort Key: "*VALUES*".column1
-                     ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*"
     ->  Index Scan using onek_unique1 on onek
           Index Cond: (unique1 = "*VALUES*".column1)
-(9 rows)
+(7 rows)
  
  EXPLAIN (COSTS OFF)
  SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) LIMIT 1);
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql

index d9a841fbc9ffdd4fba98be6eb4dc1d945c8e07c9..a6d276a115b2efc181fd7191853957c4e78164f1 100644 (file)
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -361,6 +361,73 @@ select * from float_table
  select * from numeric_table
    where num_col in (select float_col from float_table);
  
+--
+-- Test that a semijoin implemented by unique-ifying the RHS can explore
+-- different paths of the RHS rel.
+--
+
+create table semijoin_unique_tbl (a int, b int);
+insert into semijoin_unique_tbl select i%10, i%10 from generate_series(1,1000)i;
+create index on semijoin_unique_tbl(a, b);
+analyze semijoin_unique_tbl;
+
+-- Ensure that we get a plan with Unique + IndexScan
+explain (verbose, costs off)
+select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2
+where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3)
+order by t1.a, t2.a;
+
+-- Ensure that we can unique-ify expressions more complex than plain Vars
+explain (verbose, costs off)
+select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2
+where (t1.a, t2.a) in (select a+1, b+1 from semijoin_unique_tbl t3)
+order by t1.a, t2.a;
+
+-- encourage use of parallel plans
+set parallel_setup_cost=0;
+set parallel_tuple_cost=0;
+set min_parallel_table_scan_size=0;
+set max_parallel_workers_per_gather=4;
+
+set enable_indexscan to off;
+
+-- Ensure that we get a parallel plan for the unique-ification
+explain (verbose, costs off)
+select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2
+where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3)
+order by t1.a, t2.a;
+
+reset enable_indexscan;
+
+reset max_parallel_workers_per_gather;
+reset min_parallel_table_scan_size;
+reset parallel_tuple_cost;
+reset parallel_setup_cost;
+
+drop table semijoin_unique_tbl;
+
+create table unique_tbl_p (a int, b int) partition by range(a);
+create table unique_tbl_p1 partition of unique_tbl_p for values from (0) to (5);
+create table unique_tbl_p2 partition of unique_tbl_p for values from (5) to (10);
+create table unique_tbl_p3 partition of unique_tbl_p for values from (10) to (20);
+insert into unique_tbl_p select i%12, i from generate_series(0, 1000)i;
+create index on unique_tbl_p1(a);
+create index on unique_tbl_p2(a);
+create index on unique_tbl_p3(a);
+analyze unique_tbl_p;
+
+set enable_partitionwise_join to on;
+
+-- Ensure that the unique-ification works for partition-wise join
+explain (verbose, costs off)
+select * from unique_tbl_p t1, unique_tbl_p t2
+where (t1.a, t2.a) in (select a, a from unique_tbl_p t3)
+order by t1.a, t2.a;
+
+reset enable_partitionwise_join;
+
+drop table unique_tbl_p;
+
  --
  -- Test case for bug #4290: bogus calculation of subplan param sets
  --
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index e6f2e93b2d6faca0eb68ada2eb130bcd0deb4bd1..e4a9ec65ab4daa4e174edb367a29190624e637da 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3159,7 +3159,6 @@ UnicodeNormalizationForm
  UnicodeNormalizationQC
  Unique
  UniquePath
-UniquePathMethod
  UniqueRelInfo
  UniqueState
  UnlistenStmt
@@ -3175,7 +3174,6 @@ UpgradeTaskSlotState
  UpgradeTaskStep
  UploadManifestCmd
  UpperRelationKind
-UpperUniquePath
  UserAuth
  UserContext
  UserMapping
author	Richard Guo <[email protected]>
	Tue, 19 Aug 2025 00:35:40 +0000 (09:35 +0900)
committer	Richard Guo <[email protected]>
	Tue, 19 Aug 2025 00:35:40 +0000 (09:35 +0900)
src/backend/optimizer/README		patch \| blob \| blame \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| blame \| history
src/backend/optimizer/path/joinrels.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/backend/optimizer/util/relnode.c		patch \| blob \| blame \| history
src/include/nodes/nodes.h		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/optimizer/pathnode.h		patch \| blob \| blame \| history
src/include/optimizer/planner.h		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/expected/partition_join.out		patch \| blob \| blame \| history
src/test/regress/expected/subselect.out		patch \| blob \| blame \| history
src/test/regress/sql/subselect.sql		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history