Fix handling of root->distribution during redistribution
authorTomas Vondra <[email protected]>
Thu, 5 Oct 2017 15:21:43 +0000 (17:21 +0200)
committerTomas Vondra <[email protected]>
Thu, 19 Oct 2017 13:48:20 +0000 (15:48 +0200)
This fixes some remaining bugs in handling root->distribution, caused
by the upper-planner pathification (in PostgreSQL 9.6).

Prior to the pathification (so in PostgreSQL 9.5 and Postgres-XL 9.5),
the root->distribution was used for two purposes:

* To track distribution expected by ModifyTable (UPDATE,DELETE), so
  that grouping_planner() knew how to redistribute the data.

* To communicate the resulting distribution from grouping_planner()
  back to standard_planner().

This worked fine in 9.5 as grouping_planner() was only dealing with
a single remaining path (plan) when considering the redistribution,
and so it was OK to tweak root->distribution.

But since the pathification in 9.6 that is no longer true. There is
no obvious reason why all the paths would have to share the same
distribution, and we don't know which one will be the cheapest one.

So from now on root->distribution is used to track the distribution
expected by ModifyTable. Distribution for each path is available in
path->distribution if needed.

Note: We still use subroot->distribution to pass information about
distribution of subqueries, though. But we only set it after the
one cheapest path is selected.

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/plan/subselect.c

index 9ebe69ebad3057dc6bb357d8169138b99a1b2add..8873a298d2a441d2cc90933dab9c3c7029b456d1 100644 (file)
@@ -1964,7 +1964,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                                                                         subpath->pathkeys,
                                                                                         make_tlist_from_pathtarget(subpath->pathtarget));
 
-               if (subroot->distribution && subroot->distribution->distributionExpr)
+               if (subpath->distribution && subpath->distribution->distributionExpr)
                {
                        ListCell *lc;
 
@@ -1976,14 +1976,14 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                         * be from the rel, need conversion.
                         */
                        distribution = makeNode(Distribution);
-                       distribution->distributionType = subroot->distribution->distributionType;
-                       distribution->nodes = bms_copy(subroot->distribution->nodes);
-                       distribution->restrictNodes = bms_copy(subroot->distribution->restrictNodes);
+                       distribution->distributionType = subpath->distribution->distributionType;
+                       distribution->nodes = bms_copy(subpath->distribution->nodes);
+                       distribution->restrictNodes = bms_copy(subpath->distribution->restrictNodes);
 
                        foreach(lc, targetlist)
                        {
                                TargetEntry *tle = (TargetEntry *) lfirst(lc);
-                               if (equal(tle->expr, subroot->distribution->distributionExpr))
+                               if (equal(tle->expr, subpath->distribution->distributionExpr))
                                {
                                        distribution->distributionExpr = (Node *)
                                                        makeVarFromTargetEntry(rel->relid, tle);
@@ -1992,7 +1992,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                        }
                }
                else
-                       distribution = subroot->distribution;
+                       distribution = subpath->distribution;
 
                /* Generate outer path using this subpath */
                add_path(rel, (Path *)
index c2166808896d4fd9f56325fc430cbda86003a14c..bce977ec81854868bf236363b2ea5fe6889537ab 100644 (file)
@@ -368,6 +368,9 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
        final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
        best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
 
+       if (!root->distribution)
+               root->distribution = best_path->distribution;
+
        top_plan = create_plan(root, best_path);
 #ifdef XCP
        if (root->distribution)
@@ -6915,23 +6918,45 @@ equal_distributions(PlannerInfo *root, Distribution *dst1,
        return false;
 }
 
+/*
+ * adjust_path_distribution
+ *             Adjust distribution of the path to match what's expected by ModifyTable.
+ *
+ * We use root->distribution to communicate distribution expected by a ModifyTable.
+ * Currently it's set either in preprocess_targetlist() for simple target relations,
+ * or in inheritance_planner() for targets that are inheritance trees.
+ *
+ * If root->distribution is NULL, we don't need to do anything and we can leave the
+ * path distribution as it is. This happens when there is no ModifyTable node, for
+ * example.
+ *
+ * If the root->distribution is set, we need to inspect it and redistribute the data
+ * if needed (when it root->distribution does not match path->distribution).
+ *
+ * We also detect DML (e.g. correlated UPDATE/DELETE or updates of distribution key)
+ * that we can't handle at this point.
+ *
+ * XXX We must not update root->distribution here, because we need to do this on all
+ * paths considered by grouping_planner(), and there's no obvious guarantee all the
+ * paths will share the same distribution. Postgres-XL 9.5 was allowed to do that,
+ * because prior to the pathification (in PostgreSQL 9.6) grouping_planner() picked
+ * before the distributions were adjusted.
+ */
 static Path *
 adjust_path_distribution(PlannerInfo *root, Query *parse, Path *path)
 {
-       /* if the root distribution is NULL, set it to path distribution */
+       /* if there is no root distribution, no redistribution is needed */
        if (!root->distribution)
-       {
-               root->distribution = path->distribution;
-               return path;
-       }
-
-       /* don't touch paths without distribution attached (catalogs etc.) */
-       if ((path->distribution == NULL) && (root->distribution == NULL))
                return path;
 
+       /* and also skip dummy paths */
        if (IS_DUMMY_PATH(path))
                return path;
 
+       /*
+        * Both the path and root have distribution. Let's see if they differ,
+        * and do a redistribution if not.
+        */
        if (equal_distributions(root, root->distribution, path->distribution))
        {
                if (IsLocatorReplicated(path->distribution->distributionType) &&
index beeebd3a57ec9bea0d5b1ec34604c2ae7091d1dc..e7f82bf9b55a63421e88bcb450e22a2a719fa93a 100644 (file)
@@ -548,6 +548,9 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
        final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
        best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
 
+       if (!subroot->distribution)
+               subroot->distribution = best_path->distribution;
+
        plan = create_plan(subroot, best_path);
 
 #ifdef XCP
@@ -1223,6 +1226,9 @@ SS_process_ctes(PlannerInfo *root)
                final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
                best_path = final_rel->cheapest_total_path;
 
+               if (!subroot->distribution)
+                       subroot->distribution = best_path->distribution;
+
                plan = create_plan(subroot, best_path);
 
 #ifdef XCP