static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
Relation heapRelation);
static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
+static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
+ Relation relation);
+static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel);
+static List **build_baserel_partition_key_exprs(Relation relation, Index varno);
/*
* get_relation_info -
/* Collect info about relation's foreign keys, if relevant */
get_relation_foreign_keys(root, rel, relation, inhparent);
+ /*
+ * Collect info about relation's partitioning scheme, if any. Only
+ * inheritance parents may be partitioned.
+ */
+ if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ set_relation_partition_info(root, rel, relation);
+
heap_close(relation, NoLock);
/*
heap_close(relation, NoLock);
return result;
}
+
+/*
+ * set_relation_partition_info
+ *
+ * Set partitioning scheme and related information for a partitioned table.
+ */
+static void
+set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
+ Relation relation)
+{
+ PartitionDesc partdesc;
+
+ Assert(relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+ partdesc = RelationGetPartitionDesc(relation);
+ rel->part_scheme = find_partition_scheme(root, relation);
+ Assert(partdesc != NULL && rel->part_scheme != NULL);
+ rel->boundinfo = partdesc->boundinfo;
+ rel->nparts = partdesc->nparts;
+ rel->partexprs = build_baserel_partition_key_exprs(relation, rel->relid);
+}
+
+/*
+ * find_partition_scheme
+ *
+ * Find or create a PartitionScheme for this Relation.
+ */
+static PartitionScheme
+find_partition_scheme(PlannerInfo *root, Relation relation)
+{
+ PartitionKey partkey = RelationGetPartitionKey(relation);
+ ListCell *lc;
+ int partnatts;
+ PartitionScheme part_scheme;
+
+ /* A partitioned table should have a partition key. */
+ Assert(partkey != NULL);
+
+ partnatts = partkey->partnatts;
+
+ /* Search for a matching partition scheme and return if found one. */
+ foreach(lc, root->part_schemes)
+ {
+ part_scheme = lfirst(lc);
+
+ /* Match partitioning strategy and number of keys. */
+ if (partkey->strategy != part_scheme->strategy ||
+ partnatts != part_scheme->partnatts)
+ continue;
+
+ /* Match the partition key types. */
+ if (memcmp(partkey->partopfamily, part_scheme->partopfamily,
+ sizeof(Oid) * partnatts) != 0 ||
+ memcmp(partkey->partopcintype, part_scheme->partopcintype,
+ sizeof(Oid) * partnatts) != 0 ||
+ memcmp(partkey->parttypcoll, part_scheme->parttypcoll,
+ sizeof(Oid) * partnatts) != 0)
+ continue;
+
+ /*
+ * Length and byval information should match when partopcintype
+ * matches.
+ */
+ Assert(memcmp(partkey->parttyplen, part_scheme->parttyplen,
+ sizeof(int16) * partnatts) == 0);
+ Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval,
+ sizeof(bool) * partnatts) == 0);
+
+ /* Found matching partition scheme. */
+ return part_scheme;
+ }
+
+ /*
+ * Did not find matching partition scheme. Create one copying relevant
+ * information from the relcache. Instead of copying whole arrays, copy
+ * the pointers in relcache. It's safe to do so since
+ * RelationClearRelation() wouldn't change it while planner is using it.
+ */
+ part_scheme = (PartitionScheme) palloc0(sizeof(PartitionSchemeData));
+ part_scheme->strategy = partkey->strategy;
+ part_scheme->partnatts = partkey->partnatts;
+ part_scheme->partopfamily = partkey->partopfamily;
+ part_scheme->partopcintype = partkey->partopcintype;
+ part_scheme->parttypcoll = partkey->parttypcoll;
+ part_scheme->parttyplen = partkey->parttyplen;
+ part_scheme->parttypbyval = partkey->parttypbyval;
+
+ /* Add the partitioning scheme to PlannerInfo. */
+ root->part_schemes = lappend(root->part_schemes, part_scheme);
+
+ return part_scheme;
+}
+
+/*
+ * build_baserel_partition_key_exprs
+ *
+ * Collects partition key expressions for a given base relation. Any single
+ * column partition keys are converted to Var nodes. All Var nodes are set
+ * to the given varno. The partition key expressions are returned as an array
+ * of single element lists to be stored in RelOptInfo of the base relation.
+ */
+static List **
+build_baserel_partition_key_exprs(Relation relation, Index varno)
+{
+ PartitionKey partkey = RelationGetPartitionKey(relation);
+ int partnatts;
+ int cnt;
+ List **partexprs;
+ ListCell *lc;
+
+ /* A partitioned table should have a partition key. */
+ Assert(partkey != NULL);
+
+ partnatts = partkey->partnatts;
+ partexprs = (List **) palloc(sizeof(List *) * partnatts);
+ lc = list_head(partkey->partexprs);
+
+ for (cnt = 0; cnt < partnatts; cnt++)
+ {
+ Expr *partexpr;
+ AttrNumber attno = partkey->partattrs[cnt];
+
+ if (attno != InvalidAttrNumber)
+ {
+ /* Single column partition key is stored as a Var node. */
+ Assert(attno > 0);
+
+ partexpr = (Expr *) makeVar(varno, attno,
+ partkey->parttypid[cnt],
+ partkey->parttypmod[cnt],
+ partkey->parttypcoll[cnt], 0);
+ }
+ else
+ {
+ if (lc == NULL)
+ elog(ERROR, "wrong number of partition key expressions");
+
+ /* Re-stamp the expression with given varno. */
+ partexpr = (Expr *) copyObject(lfirst(lc));
+ ChangeVarNodes((Node *) partexpr, 1, varno, 0);
+ lc = lnext(lc);
+ }
+
+ partexprs[cnt] = list_make1(partexpr);
+ }
+
+ return partexprs;
+}
rel->baserestrict_min_security = UINT_MAX;
rel->joininfo = NIL;
rel->has_eclass_joins = false;
+ rel->part_scheme = NULL;
+ rel->nparts = 0;
+ rel->boundinfo = NULL;
+ rel->part_rels = NULL;
+ rel->partexprs = NULL;
/*
* Pass top parent's relids down the inheritance hierarchy. If the parent
if (rte->inh)
{
ListCell *l;
+ int nparts = rel->nparts;
+ int cnt_parts = 0;
+
+ if (nparts > 0)
+ rel->part_rels = (RelOptInfo **)
+ palloc(sizeof(RelOptInfo *) * nparts);
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
+ RelOptInfo *childrel;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != relid)
continue;
- (void) build_simple_rel(root, appinfo->child_relid,
- rel);
+ childrel = build_simple_rel(root, appinfo->child_relid,
+ rel);
+
+ /* Nothing more to do for an unpartitioned table. */
+ if (!rel->part_scheme)
+ continue;
+
+ /*
+ * The order of partition OIDs in append_rel_list is the same as
+ * the order in the PartitionDesc, so the order of part_rels will
+ * also match the PartitionDesc. See expand_partitioned_rtentry.
+ */
+ Assert(cnt_parts < nparts);
+ rel->part_rels[cnt_parts] = childrel;
+ cnt_parts++;
}
+
+ /* We should have seen all the child partitions. */
+ Assert(cnt_parts == nparts);
}
return rel;
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
joinrel->top_parent_relids = NULL;
+ joinrel->part_scheme = NULL;
+ joinrel->nparts = 0;
+ joinrel->boundinfo = NULL;
+ joinrel->part_rels = NULL;
+ joinrel->partexprs = NULL;
/* Compute information relevant to the foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
List *distinct_pathkeys; /* distinctClause pathkeys, if any */
List *sort_pathkeys; /* sortClause pathkeys, if any */
+ List *part_schemes; /* Canonicalised partition schemes used in the
+ * query. */
+
List *initial_rels; /* RelOptInfos we are now trying to join */
/* Use fetch_upper_rel() to get any particular upper rel */
((root)->simple_rte_array ? (root)->simple_rte_array[rti] : \
rt_fetch(rti, (root)->parse->rtable))
+/*
+ * If multiple relations are partitioned the same way, all such partitions
+ * will have a pointer to the same PartitionScheme. A list of PartitionScheme
+ * objects is attached to the PlannerInfo. By design, the partition scheme
+ * incorporates only the general properties of the partition method (LIST vs.
+ * RANGE, number of partitioning columns and the type information for each)
+ * and not the specific bounds.
+ *
+ * We store the opclass-declared input data types instead of the partition key
+ * datatypes since the former rather than the latter are used to compare
+ * partition bounds. Since partition key data types and the opclass declared
+ * input data types are expected to be binary compatible (per ResolveOpClass),
+ * both of those should have same byval and length properties.
+ */
+typedef struct PartitionSchemeData
+{
+ char strategy; /* partition strategy */
+ int16 partnatts; /* number of partition attributes */
+ Oid *partopfamily; /* OIDs of operator families */
+ Oid *partopcintype; /* OIDs of opclass declared input data types */
+ Oid *parttypcoll; /* OIDs of collations of partition keys. */
+
+ /* Cached information about partition key data types. */
+ int16 *parttyplen;
+ bool *parttypbyval;
+} PartitionSchemeData;
+
+typedef struct PartitionSchemeData *PartitionScheme;
/*----------
* RelOptInfo
* other rels for which we have tried and failed to prove
* this one unique
*
- * The presence of the remaining fields depends on the restrictions
+ * The presence of the following fields depends on the restrictions
* and joins that the relation participates in:
*
* baserestrictinfo - List of RestrictInfo nodes, containing info about
* We store baserestrictcost in the RelOptInfo (for base relations) because
* we know we will need it at least once (to price the sequential scan)
* and may need it multiple times to price index scans.
+ *
+ * If the relation is partitioned, these fields will be set:
+ *
+ * part_scheme - Partitioning scheme of the relation
+ * boundinfo - Partition bounds
+ * nparts - Number of partitions
+ * part_rels - RelOptInfos for each partition
+ * partexprs - Partition key expressions
+ *
+ * Note: A base relation always has only one set of partition keys, but a join
+ * relation may have as many sets of partition keys as the number of relations
+ * being joined. partexprs is an array containing part_scheme->partnatts
+ * elements, each of which is a list of partition key expressions. For a base
+ * relation each list contains only one expression, but for a join relation
+ * there can be one per baserel.
*----------
*/
typedef enum RelOptKind
/* used by "other" relations */
Relids top_parent_relids; /* Relids of topmost parents */
+
+ /* used for partitioned relations */
+ PartitionScheme part_scheme; /* Partitioning scheme. */
+ int nparts; /* number of partitions */
+ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */
+ struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions,
+ * stored in the same order of bounds */
+ List **partexprs; /* Partition key expressions. */
} RelOptInfo;
/*