From 6e5f49085195eff2c2574217980c2a6df067b04d Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Sat, 7 Jun 2025 23:17:03 -0400 Subject: [PATCH 1/5] Refactor output format of pg_ndistinct and add working input function. The existing format of pg_ndistinct uses a single-object JSON structure where each key is itself a comma-separated list of attnums. While this is a very compact format, it's confusing to read and is difficult to manipulate values within the object. This wasn't a concern until statistics import functions were introduced, enabling users to inject hypothetical statistics into an object to observe their effect on the query planner. The new format is an array of objects, each object must have the keys "attributes", which must contain an array of attnums, and "ndistinct", which must be an integer. This is a quirk because the underlying internal storage is a double, but the value stored was always an integer. The change in format is adequately described from the changes to src/test/regress/expected/stats_ext.out so description here is redundant. --- src/backend/statistics/mvdistinct.c | 463 +++++++++++++++++++++++- src/test/regress/expected/stats_ext.out | 56 ++- src/test/regress/sql/stats_ext.sql | 12 + 3 files changed, 503 insertions(+), 28 deletions(-) diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 7e7a63405c8b..003dc3a74abf 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -27,9 +27,15 @@ #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "common/int.h" +#include "common/jsonapi.h" #include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" +#include "nodes/pg_list.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" +#include "utils/builtins.h" #include "utils/fmgrprotos.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -328,28 +334,453 @@ statext_ndistinct_deserialize(bytea *data) return ndistinct; } +typedef enum +{ + NDIST_EXPECT_START = 0, + NDIST_EXPECT_ITEM, + NDIST_EXPECT_KEY, + NDIST_EXPECT_ATTNUM_LIST, + NDIST_EXPECT_ATTNUM, + NDIST_EXPECT_NDISTINCT, + NDIST_EXPECT_COMPLETE +} ndistinctSemanticState; + +typedef struct +{ + const char *str; + ndistinctSemanticState state; + + List *distinct_items; /* Accumulated complete MVNDistinctItems */ + Node *escontext; + + bool found_attributes; /* Item has an attributes key */ + bool found_ndistinct; /* Item has ndistinct key */ + List *attnum_list; /* Accumulated attributes attnums */ + int64 ndistinct; +} ndistinctParseState; + +/* + * Invoked at the start of each MVNDistinctItem. + * + * The entire JSON document shoul be one array of MVNDistinctItem objects. + * + * If we're anywhere else in the document, it's an error. + */ +static JsonParseErrorType +ndistinct_object_start(void *state) +{ + ndistinctParseState *parse = state; + + if (parse->state != NDIST_EXPECT_ITEM) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Expected Item object"))); + return JSON_SEM_ACTION_FAILED; + } + + /* Now we expect to see attributes/ndistinct keys */ + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; +} + +/* + * Routine to allow qsorting of AttNumbers + */ +static int +attnum_compare(const void *aptr, const void *bptr) +{ + AttrNumber a = *(const AttrNumber *) aptr; + AttrNumber b = *(const AttrNumber *) bptr; + + return pg_cmp_s16(a, b); +} + + +/* + * Invoked at the end of an object. + * + * Check to ensure that it was a complete MVNDistinctItem + * + */ +static JsonParseErrorType +ndistinct_object_end(void *state) +{ + ndistinctParseState *parse = state; + + int natts = 0; + AttrNumber *attrsort; + + MVNDistinctItem *item; + + if (!parse->found_attributes) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"attributes\" key"))); + return JSON_SEM_ACTION_FAILED; + } + + if (!parse->found_ndistinct) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"ndistinct\" key"))); + return JSON_SEM_ACTION_FAILED; + } + + if (parse->attnum_list == NIL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("The \"attributes\" key must be an non-empty array"))); + return JSON_SEM_ACTION_FAILED; + } + + /* + * We need at least 2 attnums for a ndistinct item, anything less is + * malformed. + */ + natts = parse->attnum_list->length; + if (natts < 2) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("The attributes key must contain an array of at least two attnums"))); + + return JSON_SEM_ACTION_FAILED; + } + attrsort = palloc0(natts * sizeof(AttrNumber)); + + /* Create the MVNDistinctItem */ + item = palloc(sizeof(MVNDistinctItem)); + item->nattributes = natts; + item->attributes = palloc0(natts * sizeof(AttrNumber)); + item->ndistinct = (double) parse->ndistinct; + + /* fill out both attnum list and sortable list */ + for (int i = 0; i < natts; i++) + { + attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value; + item->attributes[i] = attrsort[i]; + } + + /* Check attrsort for uniqueness */ + qsort(attrsort, natts, sizeof(AttrNumber), attnum_compare); + for (int i = 1; i < natts; i++) + if (attrsort[i] == attrsort[i - 1]) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("attnum list duplicate value found: %d", attrsort[i]))); + + return JSON_SEM_ACTION_FAILED; + } + pfree(attrsort); + + parse->distinct_items = lappend(parse->distinct_items, (void *) item); + + /* reset item state vars */ + list_free(parse->attnum_list); + parse->attnum_list = NIL; + parse->ndistinct = 0; + parse->found_attributes = false; + parse->found_ndistinct = false; + + /* Now we are looking for the next MVNDistinctItem */ + parse->state = NDIST_EXPECT_ITEM; + return JSON_SUCCESS; +} + + +/* + * ndsitinct input format has two types of arrays, the outer MVNDistinctItem + * array, and the attnum list array within each MVNDistinctItem. + */ +static JsonParseErrorType +ndistinct_array_start(void *state) +{ + ndistinctParseState *parse = state; + + switch (parse->state) + { + case NDIST_EXPECT_ATTNUM_LIST: + parse->state = NDIST_EXPECT_ATTNUM; + break; + + case NDIST_EXPECT_START: + parse->state = NDIST_EXPECT_ITEM; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Array found in unexpected place"))); + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; +} + + +static JsonParseErrorType +ndistinct_array_end(void *state) +{ + ndistinctParseState *parse = state; + + /* The attnum list is complete, look for more MVNDistinctItem keys */ + if (parse->state == NDIST_EXPECT_ATTNUM) + { + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; + } + + if (parse->state == NDIST_EXPECT_ITEM) + { + parse->state = NDIST_EXPECT_COMPLETE; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Array found in unexpected place"))); + return JSON_SEM_ACTION_FAILED; +} + + +/* + * The valid keys for the MVNDistinctItem object are: + * - attributes + * - ndistinct + */ +static JsonParseErrorType +ndistinct_object_field_start(void *state, char *fname, bool isnull) +{ + ndistinctParseState *parse = state; + + const char *attributes = "attributes"; + const char *ndistinct = "ndistinct"; + + if (strcmp(fname, attributes) == 0) + { + parse->found_attributes = true; + parse->state = NDIST_EXPECT_ATTNUM_LIST; + return JSON_SUCCESS; + } + + if (strcmp(fname, ndistinct) == 0) + { + parse->found_ndistinct = true; + parse->state = NDIST_EXPECT_NDISTINCT; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Only allowed keys are \%s\" and \%s\".", attributes, ndistinct))); + return JSON_SEM_ACTION_FAILED; +} + +/* + * + */ +static JsonParseErrorType +ndistinct_array_element_start(void *state, bool isnull) +{ + ndistinctParseState *parse = state; + + if (parse->state == NDIST_EXPECT_ATTNUM) + { + if (isnull) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Attnum list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + } + return JSON_SUCCESS; + } + + if (parse->state == NDIST_EXPECT_ITEM) + { + if (isnull) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Unexpected array element."))); + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Handle scalar events from the ndistinct input parser. + * + */ +static JsonParseErrorType +ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) +{ + ndistinctParseState *parse = state; + + if (parse->state == NDIST_EXPECT_ATTNUM) + { + AttrNumber attnum = pg_strtoint16_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum); + return JSON_SUCCESS; + } + + if (parse->state == NDIST_EXPECT_NDISTINCT) + { + /* + * While the structure dictates that ndistinct in a double precision + * floating point, in practice it has always been an integer, and it + * is output as such. Therefore, we follow usage precendent over the + * actual storage structure, and read it in as an integer. + */ + parse->ndistinct = pg_strtoint64_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Unexpected scalar."))); + + return JSON_SEM_ACTION_FAILED; +} + /* * pg_ndistinct_in * input routine for type pg_ndistinct * - * pg_ndistinct is real enough to be a table column, but it has no - * operations of its own, and disallows input (just like pg_node_tree). + * example input: + * [{"attributes": [6, -1], "ndistinct": 14}, + * {"attributes": [6, -2], "ndistinct": 9143}, + * {"attributes": [-1,-2], "ndistinct": 13454}, + * {"attributes": [6, -1, -2], "ndistinct": 14549}] */ Datum pg_ndistinct_in(PG_FUNCTION_ARGS) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + char *str = PG_GETARG_CSTRING(0); + + ndistinctParseState parse_state; + JsonParseErrorType result; + JsonLexContext *lex; + JsonSemAction sem_action; + + /* initialize semantic state */ + parse_state.str = str; + parse_state.state = NDIST_EXPECT_START; + parse_state.distinct_items = NIL; + parse_state.escontext = fcinfo->context; + parse_state.found_attributes = false; + parse_state.found_ndistinct = false; + parse_state.attnum_list = NIL; + parse_state.ndistinct = 0; + + /* set callbacks */ + sem_action.semstate = (void *) &parse_state; + sem_action.object_start = ndistinct_object_start; + sem_action.object_end = ndistinct_object_end; + sem_action.array_start = ndistinct_array_start; + sem_action.array_end = ndistinct_array_end; + sem_action.object_field_start = ndistinct_object_field_start; + sem_action.object_field_end = NULL; + sem_action.array_element_start = ndistinct_array_element_start; + sem_action.array_element_end = NULL; + sem_action.scalar = ndistinct_scalar; + + lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), + PG_UTF8, true); + result = pg_parse_json(lex, &sem_action); + freeJsonLexContext(lex); + + if (result == JSON_SUCCESS) + { + MVNDistinct *ndistinct; + int nitems = parse_state.distinct_items->length; + bytea *bytes; - PG_RETURN_VOID(); /* keep compiler quiet */ + ndistinct = palloc(offsetof(MVNDistinct, items) + + nitems * sizeof(MVNDistinctItem)); + + ndistinct->magic = STATS_NDISTINCT_MAGIC; + ndistinct->type = STATS_NDISTINCT_TYPE_BASIC; + ndistinct->nitems = nitems; + + for (int i = 0; i < nitems; i++) + { + MVNDistinctItem *item = parse_state.distinct_items->elements[i].ptr_value; + + ndistinct->items[i].ndistinct = item->ndistinct; + ndistinct->items[i].nattributes = item->nattributes; + ndistinct->items[i].attributes = item->attributes; + + /* + * free the MVNDistinctItem, but not the attributes we're still + * using + */ + pfree(item); + } + bytes = statext_ndistinct_serialize(ndistinct); + + list_free(parse_state.distinct_items); + for (int i = 0; i < nitems; i++) + pfree(ndistinct->items[i].attributes); + pfree(ndistinct); + + PG_RETURN_BYTEA_P(bytes); + } + else if (result == JSON_SEM_ACTION_FAILED) + PG_RETURN_NULL(); /* escontext already set */ + + /* Anything else is a generic JSON parse error */ + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", str), + errdetail("Must be valid JSON."))); + PG_RETURN_NULL(); } /* * pg_ndistinct * output routine for type pg_ndistinct * - * Produces a human-readable representation of the value. + * Produces a human-readable representation of the value, in the format: + * [{"attributes": [attnum,. ..], "ndistinct": int}, ...] + * */ Datum pg_ndistinct_out(PG_FUNCTION_ARGS) @@ -360,26 +791,26 @@ pg_ndistinct_out(PG_FUNCTION_ARGS) StringInfoData str; initStringInfo(&str); - appendStringInfoChar(&str, '{'); + appendStringInfoChar(&str, '['); for (i = 0; i < ndist->nitems; i++) { - int j; MVNDistinctItem item = ndist->items[i]; if (i > 0) appendStringInfoString(&str, ", "); - for (j = 0; j < item.nattributes; j++) - { - AttrNumber attnum = item.attributes[j]; + Assert(item.nattributes > 0); /* TODO: elog? */ - appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum); - } - appendStringInfo(&str, "\": %d", (int) item.ndistinct); + appendStringInfo(&str, "{\"attributes\": [%d", item.attributes[0]); + + for (int j = 1; j < item.nattributes; j++) + appendStringInfo(&str, ", %d", item.attributes[j]); + + appendStringInfo(&str, "], \"ndistinct\": %d}", (int) item.ndistinct); } - appendStringInfoChar(&str, '}'); + appendStringInfoChar(&str, ']'); PG_RETURN_CSTRING(str.data); } diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 6359e5fb689c..ae79eb57c673 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -447,9 +447,9 @@ SELECT s.stxkind, d.stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+----------------------------------------------------- - {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11} + stxkind | stxdndistinct +---------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {d,f,m} | [{"attributes": [3, 4], "ndistinct": 11}, {"attributes": [3, 6], "ndistinct": 11}, {"attributes": [4, 6], "ndistinct": 11}, {"attributes": [3, 4, 6], "ndistinct": 11}] (1 row) -- minor improvement, make sure the ctid does not break the matching @@ -529,9 +529,9 @@ SELECT s.stxkind, d.stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+---------------------------------------------------------- - {d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000} + stxkind | stxdndistinct +---------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {d,f,m} | [{"attributes": [3, 4], "ndistinct": 221}, {"attributes": [3, 6], "ndistinct": 247}, {"attributes": [4, 6], "ndistinct": 323}, {"attributes": [3, 4, 6], "ndistinct": 1000}] (1 row) -- correct estimates @@ -678,9 +678,9 @@ SELECT s.stxkind, d.stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+------------------------------------------------------------------- - {d,e} | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000} + stxkind | stxdndistinct +---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {d,e} | [{"attributes": [-1, -2], "ndistinct": 221}, {"attributes": [-1, -3], "ndistinct": 247}, {"attributes": [-2, -3], "ndistinct": 323}, {"attributes": [-1, -2, -3], "ndistinct": 1000}] (1 row) SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); @@ -727,9 +727,9 @@ SELECT s.stxkind, d.stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+------------------------------------------------------------- - {d,e} | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000} + stxkind | stxdndistinct +---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {d,e} | [{"attributes": [3, 4], "ndistinct": 221}, {"attributes": [3, -1], "ndistinct": 247}, {"attributes": [4, -1], "ndistinct": 323}, {"attributes": [3, 4, -1], "ndistinct": 1000}] (1 row) SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); @@ -3455,4 +3455,36 @@ SELECT FROM sb_1 LEFT JOIN sb_2 RESET enable_nestloop; RESET enable_mergejoin; +-- Test input function of pg_ndistinct. +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct; + pg_ndistinct +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"attributes": [2, 3], "ndistinct": 4}, {"attributes": [2, -1], "ndistinct": 4}, {"attributes": [2, 3, -1], "ndistinct": 4}, {"attributes": [1, 3, -1, -2], "ndistinct": 4}] +(1 row) + +-- error, cannot duplicate attribute +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,2], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,2], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + ^ +DETAIL: attnum list duplicate value found: 2 +-- Test input function of pg_dependencies. +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.0000}, + {"attributes" : [2,-1], "dependency" : 4, "degree": 0.0000}, + {"attributes" : [2,3,-1], "dependency" : 4, "degree": 0.5000}, + {"attributes" : [1,3,-1,-2], "dependency" : 4, "degree": 1.0000}]'::pg_dependencies; + pg_dependencies +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"attributes": [2, 3], "dependency": 4, "degree": 1.000000}, {"attributes": [2, -1], "dependency": 4, "degree": 0.000000}, {"attributes": [2, 3, -1], "dependency": 4, "degree": 0.500000}, {"attributes": [1, 3, -1, -2], "dependency": 4, "degree": 1.000000}] +(1 row) + DROP TABLE sb_1, sb_2 CASCADE; diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index da4f2fe9c938..4ffc33d84573 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1758,4 +1758,16 @@ SELECT FROM sb_1 LEFT JOIN sb_2 RESET enable_nestloop; RESET enable_mergejoin; +-- Test input function of pg_ndistinct. +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct; + +-- error, cannot duplicate attribute +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,2], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct; + DROP TABLE sb_1, sb_2 CASCADE; From cffd1b41cfc7b4631287afe09e99ac2a7daa7c9f Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Sun, 8 Jun 2025 00:15:40 -0400 Subject: [PATCH 2/5] Refactor output format of pg_dependencies and add working input function. The existing format of pg_dependencies uses a single-object JSON structure where each key is itself a comma-separated list of attnums. While this is a very compact format, it's confusing to read and is difficult to manipulate values within the object. This wasn't a concern until statistics import functions were introduced, enabling users to inject hypothetical statistics into an object to observe their effect on the query planner. The new format is an array of objects, each object must have the keys "attributes", which must contain an array of attnums, "dependency", which must be an integer, and "degree", which must be a float. The change in format is adequately described from the changes to src/test/regress/expected/stats_ext.out so description here is redundant. --- src/backend/statistics/dependencies.c | 491 ++++++++++++++++++++++-- src/test/regress/expected/stats_ext.out | 24 +- src/test/regress/sql/stats_ext.sql | 12 + 3 files changed, 496 insertions(+), 31 deletions(-) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index eb2fc4366b4a..fd6125fc9da8 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -13,18 +13,26 @@ */ #include "postgres.h" +#include "access/attnum.h" #include "access/htup_details.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "common/int.h" +#include "common/jsonapi.h" #include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/pathnodes.h" +#include "nodes/pg_list.h" #include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "parser/parsetree.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" +#include "utils/builtins.h" +#include "utils/float.h" #include "utils/fmgroids.h" #include "utils/fmgrprotos.h" #include "utils/lsyscache.h" @@ -643,24 +651,459 @@ statext_dependencies_load(Oid mvoid, bool inh) return result; } +typedef enum +{ + DEPS_EXPECT_START = 0, + DEPS_EXPECT_ITEM, + DEPS_EXPECT_KEY, + DEPS_EXPECT_ATTNUM_LIST, + DEPS_EXPECT_ATTNUM, + DEPS_EXPECT_DEPENDENCY, + DEPS_EXPECT_DEGREE, + DEPS_PARSE_COMPLETE +} depsParseSemanticState; + +typedef struct +{ + const char *str; + depsParseSemanticState state; + + List *dependency_list; + Node *escontext; + + bool found_attributes; /* Item has an attributes key */ + bool found_dependency; /* Item has an dependency key */ + bool found_degree; /* Item has degree key */ + List *attnum_list; /* Accumulated attributes attnums */ + AttrNumber dependency; + double degree; +} dependenciesParseState; + +/* + * Invoked at the start of each MVDependency object. + * + * The entire JSON document shoul be one array of MVDependency objects. + * + * If we're anywhere else in the document, it's an error. + */ +static JsonParseErrorType +dependencies_object_start(void *state) +{ + dependenciesParseState *parse = state; + + if (parse->state != DEPS_EXPECT_ITEM) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Expected Item object"))); + return JSON_SEM_ACTION_FAILED; + } + + /* Now we expect to see attributes/dependency/degree keys */ + parse->state = DEPS_EXPECT_KEY; + return JSON_SUCCESS; +} + +static int +attnum_compare(const void *aptr, const void *bptr) +{ + AttrNumber a = *(const AttrNumber *) aptr; + AttrNumber b = *(const AttrNumber *) bptr; + + return pg_cmp_s16(a, b); +} + +static JsonParseErrorType +dependencies_object_end(void *state) +{ + dependenciesParseState *parse = state; + + MVDependency *dep; + AttrNumber *attrsort; + + int natts = 0; + + if (!parse->found_attributes) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item must contain \"attributes\" key"))); + return JSON_SEM_ACTION_FAILED; + } + + if (!parse->found_dependency) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item must contain \"dependencies\" key"))); + return JSON_SEM_ACTION_FAILED; + } + + if (!parse->found_degree) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item must contain \"degree\" key"))); + return JSON_SEM_ACTION_FAILED; + } + + if (parse->attnum_list == NIL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("The \"attributes\" key must be an non-empty array"))); + return JSON_SEM_ACTION_FAILED; + } + + /* + * We need at least 1 attnum for a dependencies item, anything less is + * malformed. + */ + natts = parse->attnum_list->length; + if (natts < 1) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("The attributes key must contain an array of at least one attnum"))); + + return JSON_SEM_ACTION_FAILED; + } + attrsort = palloc0(natts * sizeof(AttrNumber)); + + /* + * Allocate enough space for the dependency, the attnums in the list, plus + * the final attnum + */ + dep = palloc0(offsetof(MVDependency, attributes) + ((natts + 1) * sizeof(AttrNumber))); + dep->nattributes = natts + 1; + + dep->attributes[natts] = parse->dependency; + dep->degree = parse->degree; + + attrsort = palloc0(dep->nattributes * sizeof(AttrNumber)); + attrsort[natts] = parse->dependency; + + for (int i = 0; i < natts; i++) + { + attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value; + dep->attributes[i] = attrsort[i]; + } + + /* Check attrsort for uniqueness */ + qsort(attrsort, natts + 1, sizeof(AttrNumber), attnum_compare); + for (int i = 1; i < dep->nattributes; i++) + if (attrsort[i] == attrsort[i - 1]) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("attnum list duplicate value found: %d", attrsort[i]))); + + return JSON_SEM_ACTION_FAILED; + } + pfree(attrsort); + + parse->dependency_list = lappend(parse->dependency_list, (void *) dep); + + /* reset dep item state vars */ + list_free(parse->attnum_list); + parse->attnum_list = NIL; + parse->dependency = 0; + parse->degree = 0.0; + parse->found_attributes = false; + parse->found_dependency = false; + parse->found_degree = false; + + /* Now we are looking for the next MVDependency */ + parse->state = DEPS_EXPECT_ITEM; + return JSON_SUCCESS; +} + +/* + * dependencies input format does not have arrays, so any array elements encountered + * are an error. + */ +static JsonParseErrorType +dependencies_array_start(void *state) +{ + dependenciesParseState *parse = state; + + switch (parse->state) + { + case DEPS_EXPECT_ATTNUM_LIST: + parse->state = DEPS_EXPECT_ATTNUM; + break; + case DEPS_EXPECT_START: + parse->state = DEPS_EXPECT_ITEM; + break; + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Array found in unexpected place"))); + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; +} + +/* + * Either the end of an attnum list or the whole object + */ +static JsonParseErrorType +dependencies_array_end(void *state) +{ + dependenciesParseState *parse = state; + + switch (parse->state) + { + case DEPS_EXPECT_ATTNUM: + parse->state = DEPS_EXPECT_KEY; + break; + + case DEPS_EXPECT_ITEM: + parse->state = DEPS_PARSE_COMPLETE; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Array found in unexpected place"))); + return JSON_SEM_ACTION_FAILED; + } + return JSON_SUCCESS; +} + +/* + * The valid keys for the MVDependency object are: + * - attributes + * - depeendency + * - degree + */ +static JsonParseErrorType +dependencies_object_field_start(void *state, char *fname, bool isnull) +{ + dependenciesParseState *parse = state; + + const char *attributes = "attributes"; + const char *dependency = "dependency"; + const char *degree = "degree"; + + if (strcmp(fname, attributes) == 0) + { + parse->found_attributes = true; + parse->state = DEPS_EXPECT_ATTNUM_LIST; + return JSON_SUCCESS; + } + + if (strcmp(fname, dependency) == 0) + { + parse->found_dependency = true; + parse->state = DEPS_EXPECT_DEPENDENCY; + return JSON_SUCCESS; + } + + if (strcmp(fname, degree) == 0) + { + parse->found_degree = true; + parse->state = DEPS_EXPECT_DEGREE; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Only allowed keys are \%s\", \"%s\" and \%s\".", + attributes, dependency, degree))); + return JSON_SEM_ACTION_FAILED; +} + +/* + * ndsitinct input format does not have arrays, so any array elements encountered + * are an error. + */ +static JsonParseErrorType +dependencies_array_element_start(void *state, bool isnull) +{ + dependenciesParseState *parse = state; + + if (parse->state == DEPS_EXPECT_ATTNUM) + { + if (isnull) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Attnum list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + } + return JSON_SUCCESS; + } + + if (parse->state == DEPS_EXPECT_ITEM) + { + if (isnull) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Unexpected array element."))); + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Handle scalar events from the dependencies input parser. + * + * There is only one case where we will encounter a scalar, and that is the + * dependency degree for the previous object key. + */ +static JsonParseErrorType +dependencies_scalar(void *state, char *token, JsonTokenType tokentype) +{ + dependenciesParseState *parse = state; + + if (parse->state == DEPS_EXPECT_ATTNUM) + { + AttrNumber attnum = pg_strtoint16_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum); + return JSON_SUCCESS; + } + + if (parse->state == DEPS_EXPECT_DEPENDENCY) + { + parse->dependency = (AttrNumber) pg_strtoint16_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + return JSON_SUCCESS; + } + + + if (parse->state == DEPS_EXPECT_DEGREE) + { + parse->degree = float8in_internal(token, NULL, "double", + token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Unexpected scalar."))); + return JSON_SEM_ACTION_FAILED; +} + /* * pg_dependencies_in - input routine for type pg_dependencies. * - * pg_dependencies is real enough to be a table column, but it has no operations - * of its own, and disallows input too + * This format is valid JSON, with the expected format: + * [{"attributes": [1,2], "dependency": -1, "degree": 1.0000}, + * {"attributes": [1,-1], "dependency": 2, "degree": 0.0000}, + * {"attributes": [2,-1], "dependency": 1, "degree": 1.0000}] + * */ Datum pg_dependencies_in(PG_FUNCTION_ARGS) { - /* - * pg_node_list stores the data in binary form and parsing text input is - * not needed, so disallow this. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot accept a value of type %s", "pg_dependencies"))); + char *str = PG_GETARG_CSTRING(0); + + dependenciesParseState parse_state; + JsonParseErrorType result; + JsonLexContext *lex; + JsonSemAction sem_action; + + /* initialize the semantic state */ + parse_state.str = str; + parse_state.state = DEPS_EXPECT_START; + parse_state.dependency_list = NIL; + parse_state.attnum_list = NIL; + parse_state.dependency = 0; + parse_state.degree = 0.0; + parse_state.found_attributes = false; + parse_state.found_dependency = false; + parse_state.found_degree = false; + parse_state.escontext = fcinfo->context; + + /* set callbacks */ + sem_action.semstate = (void *) &parse_state; + sem_action.object_start = dependencies_object_start; + sem_action.object_end = dependencies_object_end; + sem_action.array_start = dependencies_array_start; + sem_action.array_end = dependencies_array_end; + sem_action.array_element_start = dependencies_array_element_start; + sem_action.array_element_end = NULL; + sem_action.object_field_start = dependencies_object_field_start; + sem_action.object_field_end = NULL; + sem_action.scalar = dependencies_scalar; + + lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), PG_UTF8, true); + + result = pg_parse_json(lex, &sem_action); + freeJsonLexContext(lex); + + if (result == JSON_SUCCESS) + { + List *list = parse_state.dependency_list; + int ndeps = list->length; + MVDependencies *mvdeps; + bytea *bytes; + + mvdeps = palloc0(offsetof(MVDependencies, deps) + ndeps * sizeof(MVDependency)); + mvdeps->magic = STATS_DEPS_MAGIC; + mvdeps->type = STATS_DEPS_TYPE_BASIC; + mvdeps->ndeps = ndeps; + + /* copy MVDependency structs out of the list into the MVDependencies */ + for (int i = 0; i < ndeps; i++) + mvdeps->deps[i] = list->elements[i].ptr_value; + bytes = statext_dependencies_serialize(mvdeps); + + list_free(list); + for (int i = 0; i < ndeps; i++) + pfree(mvdeps->deps[i]); + pfree(mvdeps); + + PG_RETURN_BYTEA_P(bytes); + } + else if (result == JSON_SEM_ACTION_FAILED) + PG_RETURN_NULL(); - PG_RETURN_VOID(); /* keep compiler quiet */ + /* Anything else is a generic JSON parse error */ + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", str), + errdetail("Must be valid JSON."))); + + PG_RETURN_NULL(); /* keep compiler quiet */ } /* @@ -671,34 +1114,32 @@ pg_dependencies_out(PG_FUNCTION_ARGS) { bytea *data = PG_GETARG_BYTEA_PP(0); MVDependencies *dependencies = statext_dependencies_deserialize(data); - int i, - j; StringInfoData str; initStringInfo(&str); - appendStringInfoChar(&str, '{'); + appendStringInfoChar(&str, '['); - for (i = 0; i < dependencies->ndeps; i++) + for (int i = 0; i < dependencies->ndeps; i++) { MVDependency *dependency = dependencies->deps[i]; if (i > 0) appendStringInfoString(&str, ", "); - appendStringInfoChar(&str, '"'); - for (j = 0; j < dependency->nattributes; j++) - { - if (j == dependency->nattributes - 1) - appendStringInfoString(&str, " => "); - else if (j > 0) - appendStringInfoString(&str, ", "); + Assert(dependency->nattributes > 1); /* TODO: elog? */ - appendStringInfo(&str, "%d", dependency->attributes[j]); - } - appendStringInfo(&str, "\": %f", dependency->degree); + appendStringInfo(&str, "{\"attributes\": [%d", + dependency->attributes[0]); + + for (int j = 1; j < dependency->nattributes - 1; j++) + appendStringInfo(&str, ", %d", dependency->attributes[j]); + + appendStringInfo(&str, "], \"dependency\": %d, \"degree\": %f}", + dependency->attributes[dependency->nattributes - 1], + dependency->degree); } - appendStringInfoChar(&str, '}'); + appendStringInfoChar(&str, ']'); PG_RETURN_CSTRING(str.data); } diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index ae79eb57c673..babfe4acda0e 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -1281,9 +1281,9 @@ CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_depen ANALYZE functional_dependencies; -- print the detected dependencies SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; - dependencies ------------------------------------------------------------------------------------------------------------- - {"3 => 4": 1.000000, "3 => 6": 1.000000, "4 => 6": 1.000000, "3, 4 => 6": 1.000000, "3, 6 => 4": 1.000000} + dependencies +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"attributes": [3], "dependency": 4, "degree": 1.000000}, {"attributes": [3], "dependency": 6, "degree": 1.000000}, {"attributes": [4], "dependency": 6, "degree": 1.000000}, {"attributes": [3, 4], "dependency": 6, "degree": 1.000000}, {"attributes": [3, 6], "dependency": 4, "degree": 1.000000}] (1 row) SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'''); @@ -1623,9 +1623,9 @@ CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), upper(b), (c + 1) FR ANALYZE functional_dependencies; -- print the detected dependencies SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; - dependencies ------------------------------------------------------------------------------------------------------------------------- - {"-1 => -2": 1.000000, "-1 => -3": 1.000000, "-2 => -3": 1.000000, "-1, -2 => -3": 1.000000, "-1, -3 => -2": 1.000000} + dependencies +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"attributes": [-1], "dependency": -2, "degree": 1.000000}, {"attributes": [-1], "dependency": -3, "degree": 1.000000}, {"attributes": [-2], "dependency": -3, "degree": 1.000000}, {"attributes": [-1, -2], "dependency": -3, "degree": 1.000000}, {"attributes": [-1, -3], "dependency": -2, "degree": 1.000000}] (1 row) SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'''); @@ -3487,4 +3487,16 @@ SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.0000}, [{"attributes": [2, 3], "dependency": 4, "degree": 1.000000}, {"attributes": [2, -1], "dependency": 4, "degree": 0.000000}, {"attributes": [2, 3, -1], "dependency": 4, "degree": 0.500000}, {"attributes": [1, 3, -1, -2], "dependency": 4, "degree": 1.000000}] (1 row) +-- error, cannot duplicate attribute +SELECT '[{"attributes": [6], "dependency": 6, "degree": 0.292508}, + {"attributes": [-2], "dependency": -1, "degree": 0.113999}, + {"attributes": [6, -2], "dependency": -1, "degree": 0.348479}, + {"attributes": [-1, -2], "dependency": 6, "degree": 0.839691}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes": [6], "dependency": 6, "degree": 0.292508}, + {"attributes": [-2], "dependency": -1, "degree": 0.113999}, + {"attributes": [6, -2], "dependency": -1, "degree": 0.348479}, + {"attributes": [-1, -2], "dependency": 6, "degree": 0.839691}]" +LINE 1: SELECT '[{"attributes": [6], "dependency": 6, "degree": 0.29... + ^ +DETAIL: attnum list duplicate value found: 6 DROP TABLE sb_1, sb_2 CASCADE; diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 4ffc33d84573..879c5ad370b1 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1770,4 +1770,16 @@ SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, {"attributes" : [2,3,2], "ndistinct" : 4}, {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct; +-- Test input function of pg_dependencies. +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.0000}, + {"attributes" : [2,-1], "dependency" : 4, "degree": 0.0000}, + {"attributes" : [2,3,-1], "dependency" : 4, "degree": 0.5000}, + {"attributes" : [1,3,-1,-2], "dependency" : 4, "degree": 1.0000}]'::pg_dependencies; + +-- error, cannot duplicate attribute +SELECT '[{"attributes": [6], "dependency": 6, "degree": 0.292508}, + {"attributes": [-2], "dependency": -1, "degree": 0.113999}, + {"attributes": [6, -2], "dependency": -1, "degree": 0.348479}, + {"attributes": [-1, -2], "dependency": 6, "degree": 0.839691}]'::pg_dependencies; + DROP TABLE sb_1, sb_2 CASCADE; From c0ba4bf6142ee0c0ef68f6dd5181cd31beaa2d19 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Thu, 26 Dec 2024 05:02:06 -0500 Subject: [PATCH 3/5] Expose attribute statistics functions for use in extended_stats. Many of the operations of attribute stats have analogous operations in extended stats. * get_attr_stat_type() * init_empty_stats_tuple() * text_to_stavalues() * get_elem_stat_type() --- src/backend/statistics/attribute_stats.c | 24 +++++------------------- src/include/statistics/statistics.h | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/backend/statistics/attribute_stats.c b/src/backend/statistics/attribute_stats.c index ab198076401b..6d5006a13c1a 100644 --- a/src/backend/statistics/attribute_stats.c +++ b/src/backend/statistics/attribute_stats.c @@ -100,23 +100,9 @@ static struct StatsArgInfo cleararginfo[] = static bool attribute_statistics_update(FunctionCallInfo fcinfo); static Node *get_attr_expr(Relation rel, int attnum); -static void get_attr_stat_type(Oid reloid, AttrNumber attnum, - Oid *atttypid, int32 *atttypmod, - char *atttyptype, Oid *atttypcoll, - Oid *eq_opr, Oid *lt_opr); -static bool get_elem_stat_type(Oid atttypid, char atttyptype, - Oid *elemtypid, Oid *elem_eq_opr); -static Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, - Oid typid, int32 typmod, bool *ok); -static void set_stats_slot(Datum *values, bool *nulls, bool *replaces, - int16 stakind, Oid staop, Oid stacoll, - Datum stanumbers, bool stanumbers_isnull, - Datum stavalues, bool stavalues_isnull); static void upsert_pg_statistic(Relation starel, HeapTuple oldtup, Datum *values, bool *nulls, bool *replaces); static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit); -static void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, - Datum *values, bool *nulls, bool *replaces); /* * Insert or Update Attribute Statistics @@ -568,7 +554,7 @@ get_attr_expr(Relation rel, int attnum) /* * Derive type information from the attribute. */ -static void +void get_attr_stat_type(Oid reloid, AttrNumber attnum, Oid *atttypid, int32 *atttypmod, char *atttyptype, Oid *atttypcoll, @@ -650,7 +636,7 @@ get_attr_stat_type(Oid reloid, AttrNumber attnum, /* * Derive element type information from the attribute type. */ -static bool +bool get_elem_stat_type(Oid atttypid, char atttyptype, Oid *elemtypid, Oid *elem_eq_opr) { @@ -690,7 +676,7 @@ get_elem_stat_type(Oid atttypid, char atttyptype, * to false. If the resulting array contains NULLs, raise a WARNING and set ok * to false. Otherwise, set ok to true. */ -static Datum +Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, int32 typmod, bool *ok) { @@ -743,7 +729,7 @@ text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, * Find and update the slot with the given stakind, or use the first empty * slot. */ -static void +void set_stats_slot(Datum *values, bool *nulls, bool *replaces, int16 stakind, Oid staop, Oid stacoll, Datum stanumbers, bool stanumbers_isnull, @@ -867,7 +853,7 @@ delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit) /* * Initialize values and nulls for a new stats tuple. */ -static void +void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, Datum *values, bool *nulls, bool *replaces) { diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 7dd0f9755454..a0ab4b7633c2 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -127,4 +127,21 @@ extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, int nclauses); extern HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx); +extern void get_attr_stat_type(Oid reloid, AttrNumber attnum, + Oid *atttypid, int32 *atttypmod, + char *atttyptype, Oid *atttypcoll, + Oid *eq_opr, Oid *lt_opr); +extern void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, + Datum *values, bool *nulls, bool *replaces); + +extern void set_stats_slot(Datum *values, bool *nulls, bool *replaces, + int16 stakind, Oid staop, Oid stacoll, + Datum stanumbers, bool stanumbers_isnull, + Datum stavalues, bool stavalues_isnull); + +extern Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, + Oid typid, int32 typmod, bool *ok); +extern bool get_elem_stat_type(Oid atttypid, char atttyptype, + Oid *elemtypid, Oid *elem_eq_opr); + #endif /* STATISTICS_H */ From 907077ca43afac15c555eb1d62ae0099181323f4 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Fri, 3 Jan 2025 13:43:29 -0500 Subject: [PATCH 4/5] Add extended statistics support functions. Add pg_restore_extended_stats() and pg_clear_extended_stats(). These functions closely mirror their relation and attribute counterparts, but for extended statistics (i.e. CREATE STATISTICS) objects. --- doc/src/sgml/func.sgml | 98 ++ src/backend/statistics/dependencies.c | 65 + src/backend/statistics/extended_stats.c | 1104 +++++++++++++++++ src/backend/statistics/mcv.c | 144 +++ src/backend/statistics/mvdistinct.c | 62 + src/include/catalog/pg_proc.dat | 18 + .../statistics/extended_stats_internal.h | 17 + src/test/regress/expected/stats_import.out | 588 +++++++++ src/test/regress/sql/stats_import.sql | 452 +++++++ 9 files changed, 2548 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 74a16af04ad3..6b0acf84dbcd 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -30703,6 +30703,104 @@ SELECT pg_restore_attribute_stats( + + + + pg_restore_extended_stats + + pg_restore_extended_stats ( + VARIADIC kwargs "any" ) + boolean + + + Creates or updates statistics for statistics objects. Ordinarily, + these statistics are collected automatically or updated as a part of + or , so + it's not necessary to call this function. However, it is useful + after a restore to enable the optimizer to choose better plans if + ANALYZE has not been run yet. + + + The tracked statistics may change from version to version, so + arguments are passed as pairs of argname + and argvalue in the form: + + SELECT pg_restore_extended_stats( + 'arg1name', 'arg1value'::arg1type, + 'arg2name', 'arg2value'::arg2type, + 'arg3name', 'arg3value'::arg3type); + + + + For example, to set the n_distinct, + dependencies, and exprs + values for the statistics object myschema.mystatsobj: + + SELECT pg_restore_extended_stats( + 'statistics_schemaname', 'myschema'::name, + 'statistics_name', 'mytable'::name, + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4}'::pg_ndistinct, + 'dependencies', '{"2 => 1": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000}'::pg_dependencies + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[]); + + + + The required arguments are statistics_schemaname with a value + of type name, which specifies the statistics object's schema; + statistics_name with a value of type name, which specifies + the name of the statistics object; and inherited, which + specifies whether the statistics include values from child tables. + Other arguments are the names and values of statistics corresponding + to columns in pg_stats_ext + . To accept statistics for any expressions in the extended statistics object, the + parameter exprs with a type text[] is available, the array + must be two dimensional with an outer array in length equal to the number of expressions in + the object, and the inner array elements for each of the statistical columns in pg_stats_ext_exprs, some + of which are themselves arrays. + + + Additionally, this function accepts argument name + version of type integer, which + specifies the server version from which the statistics originated. + This is anticipated to be helpful in porting statistics from older + versions of PostgreSQL. + + + Minor errors are reported as a WARNING and + ignored, and remaining statistics will still be restored. If all + specified statistics are successfully restored, returns + true, otherwise false. + + + The caller must have the MAINTAIN privilege on the + table or be the owner of the database. + + + + + + + + pg_clear_extended_stats + + pg_clear_extended_stats ( + statistics_schemaname name, + statistics_name name, + inherited boolean ) + void + + + Clears statistics for the given statistics object, as + though the object was newly created. + + + The caller must have the MAINTAIN privilege on + the table or be the owner of the database. + + + diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index fd6125fc9da8..aee0bcb90d89 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -336,6 +336,10 @@ dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency) return (n_supporting_rows * 1.0 / data->numrows); } + +void +free_pg_dependencies(MVDependencies *dependencies); + /* * detects functional dependencies between groups of columns * @@ -1022,6 +1026,55 @@ dependencies_scalar(void *state, char *token, JsonTokenType tokentype) return JSON_SEM_ACTION_FAILED; } +/* + * Validate an MVDependencies against the extended statistics object definition. + * + * Every MVDependencies must be checked to ensure that the attnums in the + * attributes list correspond to attnums/expressions defined by the + * extended statistics object. + * + * Positive attnums are attributes which must be found in the stxkeys, + * while negative attnums correspond to an expr number, so the attnum + * can't be below (0 - numexprs). + */ +bool +pg_dependencies_validate_deps(MVDependencies *dependencies, int2vector *stxkeys, int numexprs, int elevel) +{ + int attnum_expr_lowbound = 0 - numexprs; + + for (int i = 0; i < dependencies->ndeps; i++) + { + MVDependency *dep = dependencies->deps[i]; + + for (int j = 0; j < dep->nattributes; j++) + { + AttrNumber attnum = dep->attributes[j]; + bool ok = false; + + if (attnum > 0) + { + for (int k = 0; k < stxkeys->dim1; k++) + if (attnum == stxkeys->values[k]) + { + ok = true; + break; + } + } + else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) + ok = true; + + if (!ok) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("pg_dependencies: invalid attnum for this statistics object: %d", attnum))); + return false; + } + } + } + return true; +} + /* * pg_dependencies_in - input routine for type pg_dependencies. * @@ -1106,6 +1159,18 @@ pg_dependencies_in(PG_FUNCTION_ARGS) PG_RETURN_NULL(); /* keep compiler quiet */ } +/* + * Free allocations of an MVNDistinct + */ +void +free_pg_dependencies(MVDependencies *dependencies) +{ + for (int i = 0; i < dependencies->ndeps; i++) + pfree(dependencies->deps[i]); + + pfree(dependencies); +} + /* * pg_dependencies - output routine for type pg_dependencies. */ diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index a8b63ec0884a..ce883a01cc1e 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -18,11 +18,16 @@ #include "access/detoast.h" #include "access/genam.h" +#include "access/heapam.h" +#include "access/htup.h" #include "access/htup_details.h" #include "access/table.h" #include "catalog/indexing.h" +#include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "catalog/pg_type_d.h" +#include "catalog/namespace.h" #include "commands/defrem.h" #include "commands/progress.h" #include "executor/executor.h" @@ -33,6 +38,7 @@ #include "pgstat.h" #include "postmaster/autovacuum.h" #include "statistics/extended_stats_internal.h" +#include "statistics/stat_utils.h" #include "statistics/statistics.h" #include "utils/acl.h" #include "utils/array.h" @@ -72,6 +78,71 @@ typedef struct StatExtEntry List *exprs; /* expressions */ } StatExtEntry; +enum extended_stats_argnum +{ + STATSCHEMA_ARG = 0, + STATNAME_ARG, + INHERITED_ARG, + NDISTINCT_ARG, + DEPENDENCIES_ARG, + MOST_COMMON_VALS_ARG, + MOST_COMMON_VAL_NULLS_ARG, + MOST_COMMON_FREQS_ARG, + MOST_COMMON_BASE_FREQS_ARG, + EXPRESSIONS_ARG, + NUM_EXTENDED_STATS_ARGS +}; + +static struct StatsArgInfo extarginfo[] = +{ + [STATSCHEMA_ARG] = {"statistics_schemaname", TEXTOID}, + [STATNAME_ARG] = {"statistics_name", TEXTOID}, + [INHERITED_ARG] = {"inherited", BOOLOID}, + [NDISTINCT_ARG] = {"n_distinct", PG_NDISTINCTOID}, + [DEPENDENCIES_ARG] = {"dependencies", PG_DEPENDENCIESOID}, + [MOST_COMMON_VALS_ARG] = {"most_common_vals", TEXTARRAYOID}, + [MOST_COMMON_VAL_NULLS_ARG] = {"most_common_val_nulls", BOOLARRAYOID}, + [MOST_COMMON_FREQS_ARG] = {"most_common_freqs", FLOAT8ARRAYOID}, + [MOST_COMMON_BASE_FREQS_ARG] = {"most_common_base_freqs", FLOAT8ARRAYOID}, + [EXPRESSIONS_ARG] = {"exprs", TEXTARRAYOID}, + [NUM_EXTENDED_STATS_ARGS] = {0} +}; + +/* + * NOTE: the RANGE_LENGTH & RANGE_BOUNDS stats are not yet reflected in any + * version of pg_stat_ext_exprs. + */ +enum extended_stats_exprs_element +{ + NULL_FRAC_ELEM = 0, + AVG_WIDTH_ELEM, + N_DISTINCT_ELEM, + MOST_COMMON_VALS_ELEM, + MOST_COMMON_FREQS_ELEM, + HISTOGRAM_BOUNDS_ELEM, + CORRELATION_ELEM, + MOST_COMMON_ELEMS_ELEM, + MOST_COMMON_ELEM_FREQS_ELEM, + ELEM_COUNT_HISTOGRAM_ELEM, + NUM_ATTRIBUTE_STATS_ELEMS +}; + +static struct StatsArgInfo extexprarginfo[] = +{ + [NULL_FRAC_ELEM] = {"null_frac", FLOAT4OID}, + [AVG_WIDTH_ELEM] = {"avg_width", INT4OID}, + [N_DISTINCT_ELEM] = {"n_distinct", FLOAT4OID}, + [MOST_COMMON_VALS_ELEM] = {"most_common_vals", TEXTOID}, + [MOST_COMMON_FREQS_ELEM] = {"most_common_freqs", FLOAT4ARRAYOID}, + [HISTOGRAM_BOUNDS_ELEM] = {"histogram_bounds", TEXTOID}, + [CORRELATION_ELEM] = {"correlation", FLOAT4OID}, + [MOST_COMMON_ELEMS_ELEM] = {"most_common_elems", TEXTOID}, + [MOST_COMMON_ELEM_FREQS_ELEM] = {"most_common_elem_freqs", FLOAT4ARRAYOID}, + [ELEM_COUNT_HISTOGRAM_ELEM] = {"elem_count_histogram", FLOAT4ARRAYOID}, + [NUM_ATTRIBUTE_STATS_ELEMS] = {0} +}; + +static bool extended_statistics_update(FunctionCallInfo fcinfo); static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); static VacAttrStats **lookup_var_attr_stats(Bitmapset *attrs, List *exprs, @@ -99,6 +170,28 @@ static StatsBuildData *make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, VacAttrStats **stats, int stattarget); +static HeapTuple get_pg_statistic_ext(Relation pg_stext, Oid nspoid, + const char *stxname); +static bool delete_pg_statistic_ext_data(Oid stxoid, bool inherited); + +typedef struct +{ + bool ndistinct; + bool dependencies; + bool mcv; + bool expressions; +} stakindFlags; + +static void expand_stxkind(HeapTuple tup, stakindFlags * enabled); +static void upsert_pg_statistic_ext_data(Datum *values, bool *nulls, bool *replaces); +static bool check_mcvlist_array(ArrayType *arr, int argindex, + int required_ndimss, int mcv_length); +static Datum import_expressions(Relation pgsd, int numexprs, + Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, ArrayType *exprs_arr); +static bool text_to_float4(Datum input, Datum *output); +static bool text_to_int4(Datum input, Datum *output); + /* * Compute requested extended stats, using the rows sampled for the plain @@ -2631,3 +2724,1014 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, return result; } + +static HeapTuple +get_pg_statistic_ext(Relation pg_stext, Oid nspoid, const char *stxname) +{ + ScanKeyData key[2]; + SysScanDesc scan; + HeapTuple tup; + Oid stxoid = InvalidOid; + + ScanKeyInit(&key[0], + Anum_pg_statistic_ext_stxname, + BTEqualStrategyNumber, + F_NAMEEQ, + CStringGetDatum(stxname)); + ScanKeyInit(&key[1], + Anum_pg_statistic_ext_stxnamespace, + BTEqualStrategyNumber, + F_OIDEQ, + ObjectIdGetDatum(nspoid)); + + /* + * Try to find matching pg_statistic_ext row. + */ + scan = systable_beginscan(pg_stext, + StatisticExtNameIndexId, + true, + NULL, + 2, + key); + + /* Unique index, either we get a tuple or we don't. */ + tup = systable_getnext(scan); + + if (HeapTupleIsValid(tup)) + stxoid = ((Form_pg_statistic_ext) GETSTRUCT(tup))->oid; + + systable_endscan(scan); + + if (!OidIsValid(stxoid)) + return NULL; + + return SearchSysCacheCopy1(STATEXTOID, ObjectIdGetDatum(stxoid)); +} + +/* + * Decode the stxkind column so that we know which stats types to expect. + */ +static void +expand_stxkind(HeapTuple tup, stakindFlags * enabled) +{ + Datum datum; + ArrayType *arr; + char *kinds; + + datum = SysCacheGetAttrNotNull(STATEXTOID, + tup, + Anum_pg_statistic_ext_stxkind); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "stxkind is not a 1-D char array"); + + kinds = (char *) ARR_DATA_PTR(arr); + + for (int i = 0; i < ARR_DIMS(arr)[0]; i++) + if (kinds[i] == STATS_EXT_NDISTINCT) + enabled->ndistinct = true; + else if (kinds[i] == STATS_EXT_DEPENDENCIES) + enabled->dependencies = true; + else if (kinds[i] == STATS_EXT_MCV) + enabled->mcv = true; + else if (kinds[i] == STATS_EXT_EXPRESSIONS) + enabled->expressions = true; +} + +static void +upsert_pg_statistic_ext_data(Datum *values, bool *nulls, bool *replaces) +{ + Relation pg_stextdata; + HeapTuple stxdtup; + HeapTuple newtup; + + pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock); + + stxdtup = SearchSysCache2(STATEXTDATASTXOID, + values[Anum_pg_statistic_ext_data_stxoid - 1], + values[Anum_pg_statistic_ext_data_stxdinherit - 1]); + + if (HeapTupleIsValid(stxdtup)) + { + newtup = heap_modify_tuple(stxdtup, + RelationGetDescr(pg_stextdata), + values, + nulls, + replaces); + CatalogTupleUpdate(pg_stextdata, &newtup->t_self, newtup); + ReleaseSysCache(stxdtup); + } + else + { + newtup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls); + CatalogTupleInsert(pg_stextdata, newtup); + } + + heap_freetuple(newtup); + + CommandCounterIncrement(); + + table_close(pg_stextdata, RowExclusiveLock); +} + +/* + * Insert or Update Extended Statistics + * + * Major errors, such as the table not existing, the statistics object not + * existing, or a permissions failure are always reported at ERROR. Other + * errors, such as a conversion failure on one statistic kind, are reported + * as WARNINGs, and other statistic kinds may still be updated. + */ +static bool +extended_statistics_update(FunctionCallInfo fcinfo) +{ + Oid nspoid; + char *nspname; + char *stxname; + bool inherited; + Relation pg_stext; + HeapTuple tup = NULL; + + stakindFlags enabled; + stakindFlags has; + + Form_pg_statistic_ext stxform; + + Datum values[Natts_pg_statistic_ext_data]; + bool nulls[Natts_pg_statistic_ext_data]; + bool replaces[Natts_pg_statistic_ext_data]; + + bool success = true; + + Datum exprdatum; + bool isnull; + List *exprs = NIL; + int numattnums = 0; + int numexprs = 0; + int numattrs = 0; + + /* arrays of type info, if we need them */ + Oid *atttypids = NULL; + int32 *atttypmods = NULL; + Oid *atttypcolls = NULL; + + memset(nulls, false, sizeof(nulls)); + memset(values, 0, sizeof(values)); + memset(replaces, 0, sizeof(replaces)); + memset(&enabled, 0, sizeof(enabled)); + + has.mcv = (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) && + !PG_ARGISNULL(MOST_COMMON_VAL_NULLS_ARG) && + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) && + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)); + has.ndistinct = !PG_ARGISNULL(NDISTINCT_ARG); + has.dependencies = !PG_ARGISNULL(DEPENDENCIES_ARG); + has.expressions = !PG_ARGISNULL(EXPRESSIONS_ARG); + + if (RecoveryInProgress()) + { + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("Statistics cannot be modified during recovery."))); + PG_RETURN_BOOL(false); + } + + stats_check_required_arg(fcinfo, extarginfo, STATSCHEMA_ARG); + nspname = TextDatumGetCString(PG_GETARG_DATUM(STATSCHEMA_ARG)); + stats_check_required_arg(fcinfo, extarginfo, STATNAME_ARG); + stxname = TextDatumGetCString(PG_GETARG_DATUM(STATNAME_ARG)); + stats_check_required_arg(fcinfo, extarginfo, INHERITED_ARG); + inherited = PG_GETARG_NAME(INHERITED_ARG); + + nspoid = get_namespace_oid(nspname, true); + if (nspoid == InvalidOid) + { + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Namespace \"%s\" not found.", stxname))); + PG_RETURN_BOOL(false); + } + + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + tup = get_pg_statistic_ext(pg_stext, nspoid, stxname); + + if (!HeapTupleIsValid(tup)) + { + table_close(pg_stext, RowExclusiveLock); + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Extended Statistics Object \"%s\".\"%s\" not found.", + get_namespace_name(nspoid), stxname))); + PG_RETURN_BOOL(false); + } + + stxform = (Form_pg_statistic_ext) GETSTRUCT(tup); + expand_stxkind(tup, &enabled); + numattnums = stxform->stxkeys.dim1; + + /* decode expression (if any) */ + exprdatum = SysCacheGetAttr(STATEXTOID, + tup, + Anum_pg_statistic_ext_stxexprs, + &isnull); + + if (!isnull) + { + char *s; + + s = TextDatumGetCString(exprdatum); + exprs = (List *) stringToNode(s); + pfree(s); + + /* + * Run the expressions through eval_const_expressions. This is not + * just an optimization, but is necessary, because the planner + * will be comparing them to similarly-processed qual clauses, and + * may fail to detect valid matches without this. We must not use + * canonicalize_qual, however, since these aren't qual + * expressions. + */ + exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) exprs); + } + numexprs = list_length(exprs); + numattrs = numattnums + numexprs; + + /* lock table */ + stats_lock_check_privileges(stxform->stxrelid); + + if (has.mcv) + { + if (!enabled.mcv) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV parameters \"%s\", \"%s\", \"%s\", and \"%s\" were all " + "specified for extended statistics object that does not expect MCV ", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_VAL_NULLS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname))); + has.mcv = false; + success = false; + } + } + else + { + /* The MCV args must all be NULL */ + if (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) || + !PG_ARGISNULL(MOST_COMMON_VAL_NULLS_ARG) || + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) || + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV parameters \"%s\", \"%s\", \"%s\", and \"%s\" must be all specified if any are specified", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_VAL_NULLS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname))); + success = false; + } + } + + if (has.ndistinct && !enabled.ndistinct) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[NDISTINCT_ARG].argname, + extarginfo[NDISTINCT_ARG].argname))); + has.ndistinct = false; + success = false; + } + + if (has.dependencies && !enabled.dependencies) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[DEPENDENCIES_ARG].argname, + extarginfo[DEPENDENCIES_ARG].argname))); + has.dependencies = false; + success = false; + } + + if (has.expressions && !enabled.expressions) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[DEPENDENCIES_ARG].argname, + extarginfo[DEPENDENCIES_ARG].argname))); + has.expressions = false; + success = false; + } + + /* + * Either of these statsistic types requires that we supply + * semi-filled-out VacAttrStatP array. + * + * + * It is not possible to use the existing lookup_var_attr_stats() and + * examine_attribute() because these functions will skip attributes for + * which attstattarget is 0, and we may have stats to import for those + * attributes. + */ + if (has.mcv || has.expressions) + { + atttypids = palloc0(numattrs * sizeof(Oid)); + atttypmods = palloc0(numattrs * sizeof(int32)); + atttypcolls = palloc0(numattrs * sizeof(Oid)); + + for (int i = 0; i < numattnums; i++) + { + AttrNumber attnum = stxform->stxkeys.values[i]; + + Oid lt_opr; + Oid eq_opr; + char typetype; + + /* + * fetch attribute entries the same as are done for attribute + * stats + */ + get_attr_stat_type(stxform->stxrelid, + attnum, + &atttypids[i], + &atttypmods[i], + &typetype, + &atttypcolls[i], + <_opr, + &eq_opr); + } + + for (int i = numattnums; i < numattrs; i++) + { + Node *expr = list_nth(exprs, i - numattnums); + + atttypids[i] = exprType(expr); + atttypmods[i] = exprTypmod(expr); + atttypcolls[i] = exprCollation(expr); + + /* + * Duplicate logic from get_attr_stat_type + */ + + /* + * If it's a multirange, step down to the range type, as is done + * by multirange_typanalyze(). + */ + if (type_is_multirange(atttypids[i])) + atttypids[i] = get_multirange_range(atttypids[i]); + + /* + * Special case: collation for tsvector is DEFAULT_COLLATION_OID. + * See compute_tsvector_stats(). + */ + if (atttypids[i] == TSVECTOROID) + atttypcolls[i] = DEFAULT_COLLATION_OID; + + } + } + + /* Primary Key: cannot be NULL or replaced. */ + values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(stxform->oid); + values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inherited); + + if (has.ndistinct) + { + Datum ndistinct_datum = PG_GETARG_DATUM(NDISTINCT_ARG); + bytea *data = DatumGetByteaPP(ndistinct_datum); + MVNDistinct *ndistinct = statext_ndistinct_deserialize(data); + + if (pg_ndistinct_validate_items(ndistinct, &stxform->stxkeys, numexprs, WARNING)) + { + values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = ndistinct_datum; + replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + } + else + { + nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + success = false; + } + + free_pg_ndistinct(ndistinct); + } + else + nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + + if (has.dependencies) + { + Datum dependencies_datum = PG_GETARG_DATUM(DEPENDENCIES_ARG); + bytea *data = DatumGetByteaPP(dependencies_datum); + MVDependencies *dependencies = statext_dependencies_deserialize(data); + + if (pg_dependencies_validate_deps(dependencies, &stxform->stxkeys, numexprs, WARNING)) + { + values[Anum_pg_statistic_ext_data_stxddependencies - 1] = dependencies_datum; + replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + } + else + { + nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + success = false; + } + + free_pg_dependencies(dependencies); + } + else + nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + + if (has.mcv) + { + Datum datum; + ArrayType *mcv_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VALS_ARG); + ArrayType *nulls_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VAL_NULLS_ARG); + ArrayType *freqs_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_FREQS_ARG); + ArrayType *base_freqs_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_BASE_FREQS_ARG); + int nitems; + Datum *mcv_elems; + bool *mcv_nulls; + int check_nummcv; + + /* + * The mcv_arr is an array of arrays of text, and we use it as the + * reference array for checking the lengths of the other 3 arrays. + */ + if (ARR_NDIM(mcv_arr) != 2) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" must be a text array of 2 dimensions.", + extarginfo[MOST_COMMON_VALS_ARG].argname))); + return (Datum) 0; + } + + nitems = ARR_DIMS(mcv_arr)[0]; + + /* fixed length arrays that cannot contain NULLs */ + if (!check_mcvlist_array(nulls_arr, MOST_COMMON_VAL_NULLS_ARG, + 2, nitems) || + !check_mcvlist_array(freqs_arr, MOST_COMMON_FREQS_ARG, + 1, nitems) || + !check_mcvlist_array(base_freqs_arr, MOST_COMMON_BASE_FREQS_ARG, + 1, nitems)) + return (Datum) 0; + + + deconstruct_array_builtin(mcv_arr, TEXTOID, &mcv_elems, + &mcv_nulls, &check_nummcv); + + Assert(check_nummcv == (nitems * numattrs)); + + datum = import_mcvlist(tup, WARNING, numattrs, + atttypids, atttypmods, atttypcolls, + nitems, mcv_elems, mcv_nulls, + (bool *) ARR_DATA_PTR(nulls_arr), + (float8 *) ARR_DATA_PTR(freqs_arr), + (float8 *) ARR_DATA_PTR(base_freqs_arr)); + + values[Anum_pg_statistic_ext_data_stxdmcv - 1] = datum; + replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true; + } + else + nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true; + + if (has.expressions) + { + Datum datum; + Relation pgsd; + + pgsd = table_open(StatisticRelationId, RowExclusiveLock); + + datum = import_expressions(pgsd, numexprs, + &atttypids[numattnums], &atttypmods[numattnums], + &atttypcolls[numattnums], + PG_GETARG_ARRAYTYPE_P(EXPRESSIONS_ARG)); + + table_close(pgsd, RowExclusiveLock); + + values[Anum_pg_statistic_ext_data_stxdexpr - 1] = datum; + replaces[Anum_pg_statistic_ext_data_stxdexpr - 1] = true; + } + else + nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = true; + + upsert_pg_statistic_ext_data(values, nulls, replaces); + + heap_freetuple(tup); + table_close(pg_stext, RowExclusiveLock); + + if (atttypids != NULL) + pfree(atttypids); + if (atttypmods != NULL) + pfree(atttypmods); + if (atttypcolls != NULL) + pfree(atttypcolls); + return success; +} + +/* + * Consistency checks to ensure that other mcvlist arrays are in alignment + * with the mcv array. + */ +static bool +check_mcvlist_array(ArrayType *arr, int argindex, int required_ndims, + int mcv_length) +{ + if (ARR_NDIM(arr) != required_ndims) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must be an array of %d dimensions.", + extarginfo[argindex].argname, required_ndims))); + return false; + } + + if (array_contains_nulls(arr)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Array \"%s\" cannot contain NULLs.", + extarginfo[argindex].argname))); + return false; + } + + if (ARR_DIMS(arr)[0] != mcv_length) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" must have the same number of elements as \"%s\"", + extarginfo[argindex].argname, + extarginfo[MOST_COMMON_VALS_ARG].argname))); + return false; + } + + return true; +} + +/* + * Create the stxdexprs datum using the user input in an array of array of + * text, referenced against the datatypes for the expressions. + */ +static Datum +import_expressions(Relation pgsd, int numexprs, + Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, ArrayType *exprs_arr) +{ + Datum *exprs_elems; + bool *exprs_nulls; + int check_numexprs; + int offset = 0; + + FmgrInfo array_in_fn; + + Oid pgstypoid = get_rel_type_id(StatisticRelationId); + + ArrayBuildState *astate = NULL; + + + if (ARR_NDIM(exprs_arr) != 2) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must be a text array of 2 dimensions.", + extarginfo[EXPRESSIONS_ARG].argname))); + return (Datum) 0; + } + + if (ARR_DIMS(exprs_arr)[0] != numexprs) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must have an outer dimension of %d elements.", + extarginfo[EXPRESSIONS_ARG].argname, numexprs))); + return (Datum) 0; + } + if (ARR_DIMS(exprs_arr)[1] != NUM_ATTRIBUTE_STATS_ELEMS) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must have an inner dimension of %d elements.", + extarginfo[EXPRESSIONS_ARG].argname, + NUM_ATTRIBUTE_STATS_ELEMS))); + return (Datum) 0; + } + + fmgr_info(F_ARRAY_IN, &array_in_fn); + + deconstruct_array_builtin(exprs_arr, TEXTOID, &exprs_elems, + &exprs_nulls, &check_numexprs); + + for (int i = 0; i < numexprs; i++) + { + Oid typid = atttypids[i]; + int32 typmod = atttypmods[i]; + Oid stacoll = atttypcolls[i]; + TypeCacheEntry *typcache; + + Oid elemtypid = InvalidOid; + Oid elem_eq_opr = InvalidOid; + + bool ok; + + Datum values[Natts_pg_statistic]; + bool nulls[Natts_pg_statistic]; + bool replaces[Natts_pg_statistic]; + + HeapTuple pgstup; + Datum pgstdat; + + /* finds the right operators even if atttypid is a domain */ + typcache = lookup_type_cache(typid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR); + + init_empty_stats_tuple(InvalidOid, InvalidAttrNumber, false, + values, nulls, replaces); + + if (!exprs_nulls[offset + NULL_FRAC_ELEM]) + { + ok = text_to_float4(exprs_elems[offset + NULL_FRAC_ELEM], + &values[Anum_pg_statistic_stanullfrac - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[NULL_FRAC_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + AVG_WIDTH_ELEM]) + { + ok = text_to_int4(exprs_elems[offset + AVG_WIDTH_ELEM], + &values[Anum_pg_statistic_stawidth - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[AVG_WIDTH_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + N_DISTINCT_ELEM]) + { + ok = text_to_float4(exprs_elems[offset + N_DISTINCT_ELEM], + &values[Anum_pg_statistic_stadistinct - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[N_DISTINCT_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + /* + * The STAKIND statistics are the same as the ones found in attribute + * stats. However, these are all derived from text columns, whereas + * the ones derived for attribute stats are a mix of datatypes. This + * limits the opportunities for code sharing between the two. + */ + + /* STATISTIC_KIND_MCV */ + if (exprs_nulls[offset + MOST_COMMON_VALS_ELEM] != + exprs_nulls[offset + MOST_COMMON_FREQS_ELEM]) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s and %s must both be NOT NULL or both NULL.", + extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + extexprarginfo[MOST_COMMON_FREQS_ELEM].argname))); + return (Datum) 0; + } + + if (!exprs_nulls[offset + MOST_COMMON_VALS_ELEM]) + { + Datum stavalues; + Datum stanumbers; + + stavalues = text_to_stavalues(extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + &array_in_fn, exprs_elems[offset + MOST_COMMON_VALS_ELEM], + typid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + stanumbers = text_to_stavalues(extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + &array_in_fn, exprs_elems[offset + MOST_COMMON_FREQS_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_MCV, + typcache->eq_opr, stacoll, + stanumbers, false, stavalues, false); + } + + /* STATISTIC_KIND_HISTOGRAM */ + if (!exprs_nulls[offset + HISTOGRAM_BOUNDS_ELEM]) + { + Datum stavalues; + + stavalues = text_to_stavalues(extexprarginfo[HISTOGRAM_BOUNDS_ELEM].argname, + &array_in_fn, exprs_elems[offset + HISTOGRAM_BOUNDS_ELEM], + typid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_HISTOGRAM, + typcache->lt_opr, stacoll, + 0, true, stavalues, false); + } + + /* STATISTIC_KIND_CORRELATION */ + if (!exprs_nulls[offset + CORRELATION_ELEM]) + { + Datum corr[] = {(Datum) 0}; + ArrayType *arry; + Datum stanumbers; + + ok = text_to_float4(exprs_elems[offset + CORRELATION_ELEM], &corr[0]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + CORRELATION_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[CORRELATION_ELEM].argname, s))); + return (Datum) 0; + } + + arry = construct_array_builtin(corr, 1, FLOAT4OID); + + stanumbers = PointerGetDatum(arry); + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_CORRELATION, + typcache->lt_opr, stacoll, + stanumbers, false, 0, true); + } + + /* STATISTIC_KIND_MCELEM */ + if (exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM] != + exprs_nulls[offset + MOST_COMMON_ELEM_FREQS_ELEM]) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s and %s must both be NOT NULL or both NULL.", + extexprarginfo[MOST_COMMON_ELEMS_ELEM].argname, + extexprarginfo[MOST_COMMON_ELEM_FREQS_ELEM].argname))); + return (Datum) 0; + } + + /* + * We only need to fetch element type and eq operator if we have a + * stat of type MCELEM or DECHIST. + */ + if (!exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM] || + !exprs_nulls[offset + ELEM_COUNT_HISTOGRAM_ELEM]) + { + if (!get_elem_stat_type(typid, typcache->typtype, + &elemtypid, &elem_eq_opr)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + (errmsg("unable to determine element type of expression")))); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM]) + { + Datum stavalues; + Datum stanumbers; + + stavalues = text_to_stavalues(extexprarginfo[MOST_COMMON_ELEMS_ELEM].argname, + &array_in_fn, + exprs_elems[offset + MOST_COMMON_ELEMS_ELEM], + elemtypid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + stanumbers = text_to_stavalues(extexprarginfo[MOST_COMMON_ELEM_FREQS_ELEM].argname, + &array_in_fn, + exprs_elems[offset + MOST_COMMON_ELEM_FREQS_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, + STATISTIC_KIND_MCELEM, + elem_eq_opr, stacoll, + stanumbers, false, stavalues, false); + } + + if (!exprs_nulls[offset + ELEM_COUNT_HISTOGRAM_ELEM]) + { + Datum stanumbers; + + stanumbers = text_to_stavalues(extexprarginfo[ELEM_COUNT_HISTOGRAM_ELEM].argname, + &array_in_fn, + exprs_elems[offset + ELEM_COUNT_HISTOGRAM_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + set_stats_slot(values, nulls, replaces, STATISTIC_KIND_DECHIST, + elem_eq_opr, stacoll, + stanumbers, false, 0, true); + } + + /* + * Currently there are no extended stats exports of the statistic + * kinds STATISTIC_KIND_BOUNDS_HISTOGRAM or + * STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM so these cannot be imported. + * These may be added in the future. + */ + + pgstup = heap_form_tuple(RelationGetDescr(pgsd), values, nulls); + pgstdat = heap_copy_tuple_as_datum(pgstup, RelationGetDescr(pgsd)); + astate = accumArrayResult(astate, pgstdat, false, pgstypoid, + CurrentMemoryContext); + + offset += NUM_ATTRIBUTE_STATS_ELEMS; + } + + pfree(exprs_elems); + pfree(exprs_nulls); + + return makeArrayResult(astate, CurrentMemoryContext); +} + +static bool +text_to_float4(Datum input, Datum *output) +{ + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + char *s; + bool ok; + + s = TextDatumGetCString(input); + ok = DirectInputFunctionCallSafe(float4in, s, InvalidOid, -1, + (Node *) &escontext, output); + + pfree(s); + return ok; +} + + +static bool +text_to_int4(Datum input, Datum *output) +{ + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + char *s; + bool ok; + + s = TextDatumGetCString(input); + ok = DirectInputFunctionCallSafe(int4in, s, InvalidOid, -1, + (Node *) &escontext, output); + + pfree(s); + return ok; +} + +static bool +delete_pg_statistic_ext_data(Oid stxoid, bool inherited) +{ + Relation sed = table_open(StatisticExtDataRelationId, RowExclusiveLock); + HeapTuple oldtup; + bool result = false; + + /* Is there already a pg_statistic tuple for this attribute? */ + oldtup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(stxoid), + BoolGetDatum(inherited)); + + if (HeapTupleIsValid(oldtup)) + { + CatalogTupleDelete(sed, &oldtup->t_self); + ReleaseSysCache(oldtup); + result = true; + } + + table_close(sed, RowExclusiveLock); + + CommandCounterIncrement(); + + return result; +} + +Datum +pg_restore_extended_stats(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(positional_fcinfo, NUM_EXTENDED_STATS_ARGS); + bool result = true; + + InitFunctionCallInfoData(*positional_fcinfo, NULL, NUM_EXTENDED_STATS_ARGS, + InvalidOid, NULL, NULL); + + if (!stats_fill_fcinfo_from_arg_pairs(fcinfo, positional_fcinfo, extarginfo)) + result = false; + + if (!extended_statistics_update(positional_fcinfo)) + result = false; + + PG_RETURN_BOOL(result); +} + +/* + * Delete statistics for the given statistics object. + */ +Datum +pg_clear_extended_stats(PG_FUNCTION_ARGS) +{ + char *nspname; + Oid nspoid; + char *stxname; + bool inherited; + Relation pg_stext; + HeapTuple tup; + + Form_pg_statistic_ext stxform; + + stats_check_required_arg(fcinfo, extarginfo, STATSCHEMA_ARG); + nspname = TextDatumGetCString(PG_GETARG_DATUM(STATSCHEMA_ARG)); + stats_check_required_arg(fcinfo, extarginfo, STATNAME_ARG); + stxname = TextDatumGetCString(PG_GETARG_DATUM(STATNAME_ARG)); + stats_check_required_arg(fcinfo, extarginfo, INHERITED_ARG); + inherited = PG_GETARG_NAME(INHERITED_ARG); + + nspoid = get_namespace_oid(nspname, true); + if (nspoid == InvalidOid) + { + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Namespace \"%s\" not found.", stxname))); + PG_RETURN_VOID(); + } + + if (RecoveryInProgress()) + { + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("Statistics cannot be modified during recovery."))); + PG_RETURN_VOID(); + } + + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + tup = get_pg_statistic_ext(pg_stext, nspoid, stxname); + + if (!HeapTupleIsValid(tup)) + { + table_close(pg_stext, RowExclusiveLock); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Extended Statistics Object \"%s\".\"%s\" not found.", + nspname, stxname))); + PG_RETURN_VOID(); + } + + stxform = (Form_pg_statistic_ext) GETSTRUCT(tup); + + stats_lock_check_privileges(stxform->stxrelid); + + delete_pg_statistic_ext_data(stxform->oid, inherited); + heap_freetuple(tup); + table_close(pg_stext, RowExclusiveLock); + + PG_RETURN_VOID(); +} diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index d98cda698d94..73f78e060785 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -2173,3 +2173,147 @@ mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, return s; } + +/* + * The MCV is an array of records, but this is expected as 4 separate arrays. + * It is not possible to have a generic input function for pg_mcv_list + * because the most_common_values is a composite type with element types + * defined by the specific statistics object. + */ +Datum +import_mcvlist(HeapTuple tup, int elevel, int numattrs, Oid *atttypids, + int32 *atttypmods, Oid *atttypcolls, int nitems, + Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs) +{ + MCVList *mcvlist; + bytea *bytes; + + HeapTuple *vatuples; + VacAttrStats **vastats; + + /* + * Allocate the MCV list structure, set the global parameters. + */ + mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) + + (sizeof(MCVItem) * nitems)); + + mcvlist->magic = STATS_MCV_MAGIC; + mcvlist->type = STATS_MCV_TYPE_BASIC; + mcvlist->ndimensions = numattrs; + mcvlist->nitems = nitems; + + /* Set the values for the 1-D arrays and allocate space for the 2-D arrays */ + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + item->frequency = freqs[i]; + item->base_frequency = base_freqs[i]; + item->values = (Datum *) palloc0(sizeof(Datum) * numattrs); + item->isnull = (bool *) palloc0(sizeof(bool) * numattrs); + } + + /* Walk through each dimension */ + for (int j = 0; j < numattrs; j++) + { + FmgrInfo finfo; + Oid ioparam; + Oid infunc; + int index = j; + + getTypeInputInfo(atttypids[j], &infunc, &ioparam); + fmgr_info(infunc, &finfo); + + /* store info about data type OIDs */ + mcvlist->types[j] = atttypids[j]; + + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + /* These should be in agreement, but just to be safe check both */ + if (mcv_elem_nulls[index] || mcv_nulls[index]) + { + item->values[j] = (Datum) 0; + item->isnull[j] = true; + } + else + { + char *s = TextDatumGetCString(mcv_elems[index]); + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!InputFunctionCallSafe(&finfo, s, ioparam, atttypmods[j], + (fmNodePtr) &escontext, &item->values[j])) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV elemement \"%s\" does not match expected input type.", s))); + return (Datum) 0; + } + + pfree(s); + } + + index += numattrs; + } + } + + /* + * The function statext_mcv_serialize() requires an array of pointers to + * VacAttrStats records, but only a few fields within those records have + * to be filled out. + */ + vastats = (VacAttrStats **) palloc0(numattrs * sizeof(VacAttrStats)); + vatuples = (HeapTuple *) palloc0(numattrs * sizeof(HeapTuple)); + + for (int i = 0; i < numattrs; i++) + { + Oid typid = atttypids[i]; + HeapTuple typtuple; + + typtuple = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typid)); + + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", typid); + + vatuples[i] = typtuple; + + vastats[i] = palloc0(sizeof(VacAttrStats)); + + vastats[i]->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + vastats[i]->attrtypid = typid; + vastats[i]->attrcollid = atttypcolls[i]; + } + + bytes = statext_mcv_serialize(mcvlist, vastats); + + for (int i = 0; i < numattrs; i++) + { + pfree(vatuples[i]); + pfree(vastats[i]); + } + pfree((void *) vatuples); + pfree((void *) vastats); + + if (bytes == NULL) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Unable to import mcv list"))); + return (Datum) 0; + } + + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + pfree(item->values); + pfree(item->isnull); + } + pfree(mcvlist); + pfree(mcv_elems); + pfree(mcv_nulls); + + return PointerGetDatum(bytes); +} diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 003dc3a74abf..4c24e580c2a4 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -774,6 +774,68 @@ pg_ndistinct_in(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } +/* + * Free allocations of an MVNDistinct + */ +void +free_pg_ndistinct(MVNDistinct *ndistinct) +{ + for (int i = 0; i < ndistinct->nitems; i++) + pfree(ndistinct->items[i].attributes); + + pfree(ndistinct); +} + +/* + * Validate an MVNDistinct against the extended statistics object definition. + * + * Every MVNDistinctItem must be checked to ensure that the attnums in the + * attributes list correspond to attnums/expressions defined by the + * extended statistics object. + * + * Positive attnums are attributes which must be found in the stxkeys, + * while negative attnums correspond to an expr number, so the attnum + * can't be below (0 - numexprs). + */ +bool +pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, int numexprs, int elevel) +{ + int attnum_expr_lowbound = 0 - numexprs; + + for (int i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem item = ndistinct->items[i]; + + for (int j = 0; j < item.nattributes; j++) + { + AttrNumber attnum = item.attributes[j]; + bool ok = false; + + if (attnum > 0) + { + for (int k = 0; k < stxkeys->dim1; k++) + if (attnum == stxkeys->values[k]) + { + ok = true; + break; + } + } + else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) + ok = true; + + if (!ok) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("pg_ndistinct: invalid attnum for this statistics object: %d", attnum))); + return false; + } + } + } + return true; +} + + /* * pg_ndistinct * output routine for type pg_ndistinct diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 3ee8fed7e537..807802744921 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12566,6 +12566,24 @@ proname => 'gist_translate_cmptype_common', prorettype => 'int2', proargtypes => 'int4', prosrc => 'gist_translate_cmptype_common' }, +# Extended Statistics Import +{ oid => '9947', + descr => 'restore statistics on extended statistics object', + proname => 'pg_restore_extended_stats', provolatile => 'v', proisstrict => 'f', + provariadic => 'any', + proparallel => 'u', prorettype => 'bool', + proargtypes => 'any', + proargnames => '{kwargs}', + proargmodes => '{v}', + prosrc => 'pg_restore_extended_stats' }, +{ oid => '9948', + descr => 'clear statistics on extended statistics object', + proname => 'pg_clear_extended_stats', provolatile => 'v', proisstrict => 'f', + proparallel => 'u', prorettype => 'void', + proargtypes => 'text text bool', + proargnames => '{statistics_schemaname,statistics_name,inherited}', + prosrc => 'pg_clear_extended_stats' }, + # AIO related functions { oid => '6399', descr => 'information about in-progress asynchronous IOs', proname => 'pg_get_aios', prorows => '100', proretset => 't', diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index efcb7dc35461..ba7f5dcad829 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -127,4 +127,21 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, Selectivity *overlap_basesel, Selectivity *totalsel); +extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, + Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs); + +extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, + Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs); +extern bool pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, + int numexprs, int elevel); +extern void free_pg_ndistinct(MVNDistinct *ndistinct); +extern bool pg_dependencies_validate_deps(MVDependencies *dependencies, + int2vector *stxkeys, int numexprs, + int elevel); +extern void free_pg_dependencies(MVDependencies *dependencies); + #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out index 48d6392b4ad4..d852e046f9ec 100644 --- a/src/test/regress/expected/stats_import.out +++ b/src/test/regress/expected/stats_import.out @@ -1084,11 +1084,15 @@ SELECT 3, 'tre', (3, 3.3, 'TRE', '2003-03-03', NULL)::stats_import.complex_type, UNION ALL SELECT 4, 'four', NULL, int4range(0,100), NULL; CREATE INDEX is_odd ON stats_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test; -- Generate statistics on table with data ANALYZE stats_import.test; CREATE TABLE stats_import.test_clone ( LIKE stats_import.test ) WITH (autovacuum_enabled = false); CREATE INDEX is_odd_clone ON stats_import.test_clone(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat_clone ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test_clone; -- -- Copy stats from test to test_clone, and is_odd to is_odd_clone -- @@ -1342,6 +1346,590 @@ AND attname = 'i'; (1 row) DROP TABLE stats_temp; +-- set n_distinct using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,-2], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: 1 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set n_distinct using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,0], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: 0 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set n_distinct using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-4], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: -4 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +n_distinct | [{"attributes": [2, 3], "ndistinct": 4}, {"attributes": [2, -1], "ndistinct": 4}, {"attributes": [3, -1], "ndistinct": 4}, {"attributes": [3, -2], "ndistinct": 4}, {"attributes": [-1, -2], "ndistinct": 3}, {"attributes": [2, 3, -1], "ndistinct": 4}, {"attributes": [2, 3, -2], "ndistinct": 4}, {"attributes": [2, -1, -2], "ndistinct": 4}, {"attributes": [3, -1, -2], "ndistinct": 4}] +dependencies | +most_common_vals | +most_common_val_nulls | +most_common_freqs | +most_common_base_freqs | + +-- set dependencies using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 1, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: 1 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set dependencies using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 3, "degree": 1.000000}, + {"attributes": [0], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: 0 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set dependencies using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": -3, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: -3 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +n_distinct | [{"attributes": [2, 3], "ndistinct": 4}, {"attributes": [2, -1], "ndistinct": 4}, {"attributes": [3, -1], "ndistinct": 4}, {"attributes": [3, -2], "ndistinct": 4}, {"attributes": [-1, -2], "ndistinct": 3}, {"attributes": [2, 3, -1], "ndistinct": 4}, {"attributes": [2, 3, -2], "ndistinct": 4}, {"attributes": [2, -1, -2], "ndistinct": 4}, {"attributes": [3, -1, -2], "ndistinct": 4}] +dependencies | [{"attributes": [2], "dependency": 3, "degree": 1.000000}, {"attributes": [2], "dependency": -1, "degree": 1.000000}, {"attributes": [2], "dependency": -2, "degree": 1.000000}, {"attributes": [3], "dependency": 2, "degree": 1.000000}, {"attributes": [3], "dependency": -1, "degree": 1.000000}, {"attributes": [3], "dependency": -2, "degree": 1.000000}, {"attributes": [-1], "dependency": 2, "degree": 0.500000}, {"attributes": [-1], "dependency": 3, "degree": 0.500000}, {"attributes": [-1], "dependency": -2, "degree": 1.000000}, {"attributes": [-2], "dependency": 2, "degree": 0.500000}, {"attributes": [-2], "dependency": 3, "degree": 0.500000}, {"attributes": [-2], "dependency": -1, "degree": 1.000000}, {"attributes": [2, 3], "dependency": -1, "degree": 1.000000}, {"attributes": [2, 3], "dependency": -2, "degree": 1.000000}, {"attributes": [2, -1], "dependency": 3, "degree": 1.000000}, {"attributes": [2, -1], "dependency": -2, "degree": 1.000000}, {"attributes": [2, -2], "dependency": 3, "degree": 1.000000}, {"attributes": [2, -2], "dependency": -1, "degree": 1.000000}, {"attributes": [3, -1], "dependency": 2, "degree": 1.000000}, {"attributes": [3, -1], "dependency": -2, "degree": 1.000000}, {"attributes": [3, -2], "dependency": 2, "degree": 1.000000}, {"attributes": [3, -2], "dependency": -1, "degree": 1.000000}, {"attributes": [-1, -2], "dependency": 2, "degree": 0.500000}, {"attributes": [-1, -2], "dependency": 3, "degree": 0.500000}, {"attributes": [2, 3], "dependency": -1, "degree": 1.000000}, {"attributes": [2, 3], "dependency": -2, "degree": 1.000000}, {"attributes": [2, 3, -2], "dependency": -1, "degree": 1.000000}, {"attributes": [2, -1, -2], "dependency": 3, "degree": 1.000000}, {"attributes": [3, -1, -2], "dependency": 2, "degree": 1.000000}] +most_common_vals | +most_common_val_nulls | +most_common_freqs | +most_common_base_freqs | + +-- if any one mcv param specified, all four must be specified (part 1) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 3) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 4) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +n_distinct | [{"attributes": [2, 3], "ndistinct": 4}, {"attributes": [2, -1], "ndistinct": 4}, {"attributes": [3, -1], "ndistinct": 4}, {"attributes": [3, -2], "ndistinct": 4}, {"attributes": [-1, -2], "ndistinct": 3}, {"attributes": [2, 3, -1], "ndistinct": 4}, {"attributes": [2, 3, -2], "ndistinct": 4}, {"attributes": [2, -1, -2], "ndistinct": 4}, {"attributes": [3, -1, -2], "ndistinct": 4}] +dependencies | [{"attributes": [2], "dependency": 3, "degree": 1.000000}, {"attributes": [2], "dependency": -1, "degree": 1.000000}, {"attributes": [2], "dependency": -2, "degree": 1.000000}, {"attributes": [3], "dependency": 2, "degree": 1.000000}, {"attributes": [3], "dependency": -1, "degree": 1.000000}, {"attributes": [3], "dependency": -2, "degree": 1.000000}, {"attributes": [-1], "dependency": 2, "degree": 0.500000}, {"attributes": [-1], "dependency": 3, "degree": 0.500000}, {"attributes": [-1], "dependency": -2, "degree": 1.000000}, {"attributes": [-2], "dependency": 2, "degree": 0.500000}, {"attributes": [-2], "dependency": 3, "degree": 0.500000}, {"attributes": [-2], "dependency": -1, "degree": 1.000000}, {"attributes": [2, 3], "dependency": -1, "degree": 1.000000}, {"attributes": [2, 3], "dependency": -2, "degree": 1.000000}, {"attributes": [2, -1], "dependency": 3, "degree": 1.000000}, {"attributes": [2, -1], "dependency": -2, "degree": 1.000000}, {"attributes": [2, -2], "dependency": 3, "degree": 1.000000}, {"attributes": [2, -2], "dependency": -1, "degree": 1.000000}, {"attributes": [3, -1], "dependency": 2, "degree": 1.000000}, {"attributes": [3, -1], "dependency": -2, "degree": 1.000000}, {"attributes": [3, -2], "dependency": 2, "degree": 1.000000}, {"attributes": [3, -2], "dependency": -1, "degree": 1.000000}, {"attributes": [-1, -2], "dependency": 2, "degree": 0.500000}, {"attributes": [-1, -2], "dependency": 3, "degree": 0.500000}, {"attributes": [2, 3], "dependency": -1, "degree": 1.000000}, {"attributes": [2, 3], "dependency": -2, "degree": 1.000000}, {"attributes": [2, 3, -2], "dependency": -1, "degree": 1.000000}, {"attributes": [2, -1, -2], "dependency": 3, "degree": 1.000000}, {"attributes": [3, -1, -2], "dependency": 2, "degree": 1.000000}] +most_common_vals | {{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}} +most_common_val_nulls | {{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}} +most_common_freqs | {0.25,0.25,0.25,0.25} +most_common_base_freqs | {0.00390625,0.015625,0.00390625,0.015625} + +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[] + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.inherited, e.null_frac, e.avg_width, e.n_distinct, e.most_common_vals, + e.most_common_freqs, e.histogram_bounds, e.correlation, + e.most_common_elems, e.most_common_elem_freqs, e.elem_count_histogram +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +and e.inherited = false +\gx +-[ RECORD 1 ]----------+------- +inherited | f +null_frac | 0 +avg_width | 4 +n_distinct | -0.75 +most_common_vals | {1} +most_common_freqs | {0.5} +histogram_bounds | {-1,0} +correlation | -0.6 +most_common_elems | +most_common_elem_freqs | +elem_count_histogram | +-[ RECORD 2 ]----------+------- +inherited | f +null_frac | 0.25 +avg_width | 4 +n_distinct | -0.5 +most_common_vals | {2} +most_common_freqs | {0.5} +histogram_bounds | +correlation | 1 +most_common_elems | +most_common_elem_freqs | +elem_count_histogram | + +SELECT + pg_catalog.pg_clear_extended_stats( + statistics_schemaname => 'stats_import', + statistics_name => 'test_stat_clone', + inherited => false); + pg_clear_extended_stats +------------------------- + +(1 row) + +SELECT COUNT(*) +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + count +------- + 0 +(1 row) + +SELECT COUNT(*) +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + count +------- + 0 +(1 row) + +-- +-- Copy stats from test_stat to test_stat_clone +-- +SELECT + e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'test_stat_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_val_nulls', e.most_common_val_nulls, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs + ) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT + array_agg( + ARRAY[ee.null_frac::text, ee.avg_width::text, + ee.n_distinct::text, ee.most_common_vals::text, + ee.most_common_freqs::text, ee.histogram_bounds::text, + ee.correlation::text, ee.most_common_elems::text, + ee.most_common_elem_freqs::text, + ee.elem_count_histogram::text]) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname + AND ee.statistics_name = e.statistics_name + AND ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat'; + statistics_name | pg_restore_extended_stats +-----------------+--------------------------- + test_stat | t +(1 row) + +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + inherited | n_distinct | dependencies | most_common_vals | most_common_val_nulls | most_common_freqs | most_common_base_freqs +-----------+------------+--------------+------------------+-----------------------+-------------------+------------------------ +(0 rows) + +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + inherited | n_distinct | dependencies | most_common_vals | most_common_val_nulls | most_common_freqs | most_common_base_freqs +-----------+------------+--------------+------------------+-----------------------+-------------------+------------------------ +(0 rows) + +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation | most_common_elems | most_common_elem_freqs | elem_count_histogram +-----------+-----------+-----------+------------+------------------+-------------------+------------------+-------------+-------------------+------------------------+---------------------- +(0 rows) + +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation | most_common_elems | most_common_elem_freqs | elem_count_histogram +-----------+-----------+-----------+------------+------------------+-------------------+------------------+-------------+-------------------+------------------------+---------------------- +(0 rows) + DROP SCHEMA stats_import CASCADE; NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to type stats_import.complex_type diff --git a/src/test/regress/sql/stats_import.sql b/src/test/regress/sql/stats_import.sql index d140733a7502..4dd568be9fcf 100644 --- a/src/test/regress/sql/stats_import.sql +++ b/src/test/regress/sql/stats_import.sql @@ -766,6 +766,9 @@ SELECT 4, 'four', NULL, int4range(0,100), NULL; CREATE INDEX is_odd ON stats_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test; + -- Generate statistics on table with data ANALYZE stats_import.test; @@ -774,6 +777,9 @@ CREATE TABLE stats_import.test_clone ( LIKE stats_import.test ) CREATE INDEX is_odd_clone ON stats_import.test_clone(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat_clone ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test_clone; + -- -- Copy stats from test to test_clone, and is_odd to is_odd_clone -- @@ -970,4 +976,450 @@ AND tablename = 'stats_temp' AND inherited = false AND attname = 'i'; DROP TABLE stats_temp; + +-- set n_distinct using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,-2], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); + +-- set n_distinct using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,0], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); + +-- set n_distinct using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-4], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +-- set dependencies using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 1, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); + +-- set dependencies using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 3, "degree": 1.000000}, + {"attributes": [0], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); + +-- set dependencies using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": -3, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +-- if any one mcv param specified, all four must be specified (part 1) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[] + ); + +-- if any one mcv param specified, all four must be specified (part 2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[] + ); + +-- if any one mcv param specified, all four must be specified (part 3) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[] + ); + +-- if any one mcv param specified, all four must be specified (part 4) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + +SELECT + e.n_distinct, e.dependencies, e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[] + ); + +SELECT + e.inherited, e.null_frac, e.avg_width, e.n_distinct, e.most_common_vals, + e.most_common_freqs, e.histogram_bounds, e.correlation, + e.most_common_elems, e.most_common_elem_freqs, e.elem_count_histogram +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +and e.inherited = false +\gx + +SELECT + pg_catalog.pg_clear_extended_stats( + statistics_schemaname => 'stats_import', + statistics_name => 'test_stat_clone', + inherited => false); + +SELECT COUNT(*) +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + +SELECT COUNT(*) +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + +-- +-- Copy stats from test_stat to test_stat_clone +-- +SELECT + e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'test_stat_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_val_nulls', e.most_common_val_nulls, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs + ) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT + array_agg( + ARRAY[ee.null_frac::text, ee.avg_width::text, + ee.n_distinct::text, ee.most_common_vals::text, + ee.most_common_freqs::text, ee.histogram_bounds::text, + ee.correlation::text, ee.most_common_elems::text, + ee.most_common_elem_freqs::text, + ee.elem_count_histogram::text]) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname + AND ee.statistics_name = e.statistics_name + AND ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat'; + +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + DROP SCHEMA stats_import CASCADE; From 8344a2a866b47b71524e07d1198216d0f4a6c9ff Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Sat, 21 Jun 2025 03:16:24 -0400 Subject: [PATCH 5/5] Include Extended Statistics in pg_dump. Incorporate the new pg_restore_extended_stats() function into pg_dump. This detects the existence of extended statistics statistics (i.e. pg_statistic_ext_data rows). This handles many of the changes that have happened to extended statistic statistics over the various versions, including: * Format change for pg_ndistinct and pg_dependencies in current development version. Earlier versions have the format translated via the pg_dump SQL statement. * Inherited extended statistics were introduced in v15. * Expressions were introduced to extended statistics in v14. * MCV extended statistics were introduced in v13. * pg_statistic_ext_data and pg_stats_ext introduced in v12, prior to that ndstinct and depdendencies data (the only kind of stats that existed were directly on pg_statistic_ext. * Extended Statistics were introduced in v10, so there is no support for prior versions necessary. --- src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_backup_archiver.c | 3 +- src/bin/pg_dump/pg_dump.c | 229 +++++++++++++++++++++++++++ src/bin/pg_dump/t/002_pg_dump.pl | 28 ++++ 4 files changed, 260 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 4ebef1e86445..9ebf5e12ede2 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -68,6 +68,7 @@ enum _dumpPreparedQueries PREPQUERY_DUMPCOMPOSITETYPE, PREPQUERY_DUMPDOMAIN, PREPQUERY_DUMPENUMTYPE, + PREPQUERY_DUMPEXTSTATSSTATS, PREPQUERY_DUMPFUNC, PREPQUERY_DUMPOPR, PREPQUERY_DUMPRANGETYPE, diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index dce88f040ace..d629fec95f47 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -2989,7 +2989,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) strcmp(te->desc, "SEARCHPATH") == 0) return REQ_SPECIAL; - if (strcmp(te->desc, "STATISTICS DATA") == 0) + if ((strcmp(te->desc, "STATISTICS DATA") == 0) || + (strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0)) { if (!ropt->dumpStatistics) return 0; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 273117c977c5..5e3c6631c834 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -324,6 +324,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, const IndxInfo *indxinfo); static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo); static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo); +static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo); @@ -8165,6 +8166,9 @@ getExtendedStatistics(Archive *fout) /* Decide whether we want to dump it */ selectDumpableStatisticsObject(&(statsextinfo[i]), fout); + + if (fout->dopt->dumpStatistics) + statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS; } PQclear(res); @@ -11617,6 +11621,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) break; case DO_STATSEXT: dumpStatisticsExt(fout, (const StatsExtInfo *) dobj); + dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj); break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (const TableDataInfo *) dobj); @@ -18420,6 +18425,230 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo) free(qstatsextname); } +/* + * dumpStatisticsExtStats + * write out to fout the stats for an extended statistics object + */ +static void +dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer query; + PGresult *res; + int nstats; + + /* Do nothing if not dumping statistics */ + if (!dopt->dumpStatistics) + return; + + if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS]) + { + PQExpBuffer pq = createPQExpBuffer(); + + /* + * Set up query for constraint-specific details. + * + * 19+: query pg_stats_ext and pg_stats_ext_exprs as-is 15-18: query + * pg_stats_ext translating the ndistinct and depdendencies, 14: + * inherited is always NULL 12-13: no pg_stats_ext_exprs 10-11: no + * pg_stats_ext, join pg_statistic_ext and pg_namespace + */ + + appendPQExpBufferStr(pq, + "PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n" + "SELECT "); + + /* Versions 15+ have inherited stats */ + if (fout->remoteVersion >= 150000) + appendPQExpBufferStr(pq, "e.inherited, "); + else + appendPQExpBufferStr(pq, "false AS inherited, "); + + /* + * Versions < 19 use the old ndistintinct and depdendencies formats + * Versions < 12 use the pg_statistic_ext columns + * + * TODO: Until v18 is released the master branch has a + * server_version_num of 180000. We will update this to 190000 as soon + * as the master branch updates. + */ + if (fout->remoteVersion >= 180000) + appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies, "); + else + appendPQExpBufferStr(pq, + "( " + "SELECT json_agg( " + " json_build_object( " + " 'attributes', " + " string_to_array(kv.key, ', ')::integer[], " + " 'ndistinct', " + " kv.value::bigint )) " + "FROM json_each_text(e.n_distinct::text::json) AS kv" + ") AS n_distinct, " + "( " + "SELECT json_agg( " + " json_build_object( " + " 'attributes', " + " string_to_array( " + " split_part(kv.key, ' => ', 1), " + " ', ')::integer[], " + " 'dependency', " + " split_part(kv.key, ' => ', 2)::integer, " + " 'degree', " + " kv.value::double precision )) " + "FROM json_each_text(e.dependencies::text::json) AS kv " + ") AS dependencies, "); + + /* Versions < 12 do not have MCV */ + if (fout->remoteVersion >= 130000) + appendPQExpBufferStr(pq, + "e.most_common_vals, e.most_common_val_nulls, " + "e.most_common_freqs, e.most_common_base_freqs, "); + else + appendPQExpBufferStr(pq, + "NULL AS most_common_vals, NULL AS most_common_val_nulls, " + "NULL AS most_common_freqs, NULL AS most_common_base_freqs, "); + + /* Expressions were introduced in v14 */ + if (fout->remoteVersion >= 140000) + { + appendPQExpBufferStr(pq, + "( " + "SELECT array_agg( " + " ARRAY[ee.null_frac::text, ee.avg_width::text, " + " ee.n_distinct::text, ee.most_common_vals::text, " + " ee.most_common_freqs::text, ee.histogram_bounds::text, " + " ee.correlation::text, ee.most_common_elems::text, " + " ee.most_common_elem_freqs::text, " + " ee.elem_count_histogram::text]) " + "FROM pg_stats_ext_exprs AS ee " + "WHERE ee.statistics_schemaname = $1 " + "AND ee.statistics_name = $2 "); + + /* Inherited expressions introduced in v15 */ + if (fout->remoteVersion >= 150000) + appendPQExpBufferStr(pq, "AND ee.inherited = e.inherited"); + + appendPQExpBufferStr(pq, ") AS exprs "); + } + else + appendPQExpBufferStr(pq, "NULL AS exprs "); + + /* pg_stats_ext introduced in v12 */ + if (fout->remoteVersion >= 120000) + appendPQExpBufferStr(pq, + "FROM pg_catalog.pg_stats_ext AS e " + "WHERE e.statistics_schemaname = $1 " + "AND e.statistics_name = $2 "); + else + appendPQExpBufferStr(pq, + "FROM ( " + "SELECT s.stxndistinct AS n_distinct, " + " s.stxdependencies AS dependencies " + "FROM pg_catalog.pg_statistics_ext AS s " + "JOIN pg_catalog.pg_namespace AS n " + "ON n.oid = s.stxnamespace " + "WHERE n.nspname = $1 " + "AND e.stxname = $2 " + ") AS e "); + + appendPQExpBufferStr(pq, "ORDER BY e.inherited"); + + ExecuteSqlStatement(fout, pq->data); + + fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS] = true; + + destroyPQExpBuffer(pq); + } + + query = createPQExpBuffer(); + + appendPQExpBufferStr(query, "EXECUTE getExtStatsStats("); + appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name, "); + appendStringLiteralAH(query, statsextinfo->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name)"); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + destroyPQExpBuffer(query); + + nstats = PQntuples(res); + + if (nstats > 0) + { + PQExpBuffer out = createPQExpBuffer(); + + int i_inherited = PQfnumber(res, "inherited"); + int i_ndistinct = PQfnumber(res, "n_distinct"); + int i_dependencies = PQfnumber(res, "dependencies"); + int i_mcv = PQfnumber(res, "most_common_vals"); + int i_mcv_nulls = PQfnumber(res, "most_common_val_nulls"); + int i_mcf = PQfnumber(res, "most_common_freqs"); + int i_mcbf = PQfnumber(res, "most_common_base_freqs"); + int i_exprs = PQfnumber(res, "exprs"); + + for (int i = 0; i < nstats; i++) + { + if (PQgetisnull(res, i, i_inherited)) + pg_fatal("inherited cannot be NULL"); + + appendPQExpBufferStr(out, + "SELECT * FROM pg_catalog.pg_restore_extended_stats(\n"); + appendPQExpBuffer(out, "\t'version', '%d'::integer,\n", + fout->remoteVersion); + appendPQExpBufferStr(out, "\t'statistics_schemaname', "); + appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(out, ",\n\t'statistics_name', "); + appendStringLiteralAH(out, statsextinfo->dobj.name, fout); + appendNamedArgument(out, fout, "inherited", "boolean", + PQgetvalue(res, i, i_inherited)); + + if (!PQgetisnull(res, i, i_ndistinct)) + appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct", + PQgetvalue(res, i, i_ndistinct)); + + if (!PQgetisnull(res, i, i_dependencies)) + appendNamedArgument(out, fout, "dependencies", "pg_dependencies", + PQgetvalue(res, i, i_dependencies)); + + if (!PQgetisnull(res, i, i_mcv)) + appendNamedArgument(out, fout, "most_common_vals", "text[]", + PQgetvalue(res, i, i_mcv)); + + if (!PQgetisnull(res, i, i_mcv_nulls)) + appendNamedArgument(out, fout, "most_common_val_nulls", "boolean[]", + PQgetvalue(res, i, i_mcv_nulls)); + + if (!PQgetisnull(res, i, i_mcf)) + appendNamedArgument(out, fout, "most_common_freqs", "double precision[]", + PQgetvalue(res, i, i_mcf)); + + if (!PQgetisnull(res, i, i_mcbf)) + appendNamedArgument(out, fout, "most_common_base_freqs", "double precision[]", + PQgetvalue(res, i, i_mcbf)); + + if (!PQgetisnull(res, i, i_exprs)) + appendNamedArgument(out, fout, "exprs", "text[]", + PQgetvalue(res, i, i_exprs)); + + appendPQExpBufferStr(out, "\n);\n"); + } + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = statsextinfo->dobj.name, + .namespace = statsextinfo->dobj.namespace->dobj.name, + .owner = statsextinfo->rolname, + .description = "EXTENDED STATISTICS DATA", + .section = SECTION_POST_DATA, + .createStmt = out->data, + .deps = &statsextinfo->dobj.dumpId, + .nDeps = 1)); + destroyPQExpBuffer(out); + } + PQclear(res); +} + /* * dumpConstraint * write out to fout a user-defined constraint diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 6c7ec80e271c..bb5ddb5e98e1 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -4991,6 +4991,34 @@ }, }, + # + # EXTENDED stats will end up in SECTION_POST_DATA. + # + 'extended_statistics_import' => { + create_sql => ' + CREATE TABLE dump_test.has_ext_stats + AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g); + CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats; + ANALYZE dump_test.has_ext_stats;', + regexp => qr/^ + \QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm, + like => { + %full_runs, + %dump_test_schema_runs, + no_data_no_schema => 1, + no_schema => 1, + section_post_data => 1, + statistics_only => 1, + schema_only_with_statistics => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + no_statistics => 1, + only_dump_measurement => 1, + schema_only => 1, + }, + }, + # # While attribute stats (aka pg_statistic stats) only appear for tables # that have been analyzed, all tables will have relation stats because