From: Michael Paquier Date: Wed, 21 May 2025 01:22:12 +0000 (+0900) Subject: Fix regression with location calculation of nested statements X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=06450c7b8c70120cebe91e4b6ed3cec9d7051d0a;p=postgresql.git Fix regression with location calculation of nested statements The statement location calculated for some nested query cases was wrong when multiple queries are sent as a single string, these being separated by semicolons. As pointed by Sami Imseih, the location calculation was incorrect when the last query of nested statement with multiple queries does **NOT** finish with a semicolon for the last statement. In this case, the statement length tracked by RawStmt is 0, which is equivalent to say that the string should be used until its end. The code previously discarded this case entirely, causing the location to remain at 0, the same as pointing at the beginning of the string. This caused pg_stat_statements to store incorrect query strings. This issue has been introduced in 499edb09741b. I have looked at the diffs generated by pgaudit back then, and noticed the difference generated for this nested query case, but I have missed the point that it was an actual regression with an existing case. A test case is added in pg_stat_statements to provide some coverage, restoring the pre-17 behavior for the calculation of the query locations. Special thanks to David Steele, who, through an analysis of the test diffs generated by pgaudit with the new v18 logic, has poked me about the fact that my original analysis of the matter was wrong. The test output of pg_overexplain is updated to reflect the new logic, as the new locations refer to the beginning of the argument passed to the function explain_filter(). When the module was introduced in 8d5ceb113e3f, which was after 499edb09741b (for the new calculation method), the locations of the test were not actually right: the plan generated for the query string given in input of the function pointed to the top-level query, not the nested one. Reported-by: David Steele Author: Michael Paquier Reviewed-by: Anthonin Bonnefoy Reviewed-by: Jian He Reviewed-by: Sami Imseih Reviewed-by: David Steele Discussion: https://postgr.es/m/844a3b38-bbf1-4fb2-9fd6-f58c35c09917@pgbackrest.org --- diff --git a/contrib/pg_overexplain/expected/pg_overexplain.out b/contrib/pg_overexplain/expected/pg_overexplain.out index 44120c388af..cb5c396c519 100644 --- a/contrib/pg_overexplain/expected/pg_overexplain.out +++ b/contrib/pg_overexplain/expected/pg_overexplain.out @@ -119,7 +119,7 @@ $$); Subplans Needing Rewind: none Relation OIDs: NNN... Executor Parameter Types: none - Parse Location: 0 to end + Parse Location: 41 to end RTI 1 (relation, inherited, in-from-clause): Eref: vegetables (id, name, genus) Relation: vegetables @@ -240,7 +240,7 @@ $$); none + NNN... + none + - 0 to end + + 53 to end + + + + @@ -344,7 +344,7 @@ $$); Subplans Needing Rewind: none Relation OIDs: NNN... Executor Parameter Types: none - Parse Location: 0 to end + Parse Location: 28 to end (37 rows) SET debug_parallel_query = false; @@ -372,7 +372,7 @@ $$); Subplans Needing Rewind: none Relation OIDs: NNN... Executor Parameter Types: 0 - Parse Location: 0 to end + Parse Location: 28 to end (15 rows) -- Create an index, and then attempt to force a nested loop with inner index diff --git a/contrib/pg_stat_statements/expected/level_tracking.out b/contrib/pg_stat_statements/expected/level_tracking.out index 03bea14d5da..75e785e1719 100644 --- a/contrib/pg_stat_statements/expected/level_tracking.out +++ b/contrib/pg_stat_statements/expected/level_tracking.out @@ -1319,6 +1319,57 @@ SELECT toplevel, calls, query FROM pg_stat_statements t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t (4 rows) +-- DO block --- multiple inner queries with separators +SET pg_stat_statements.track = 'all'; +SET pg_stat_statements.track_utility = TRUE; +CREATE TABLE pgss_do_util_tab_1 (a int); +CREATE TABLE pgss_do_util_tab_2 (a int); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +DO $$ +DECLARE BEGIN + EXECUTE 'CREATE TABLE pgss_do_table (id INT); DROP TABLE pgss_do_table'; + EXECUTE 'SELECT a FROM pgss_do_util_tab_1; SELECT a FROM pgss_do_util_tab_2'; +END $$; +SELECT toplevel, calls, rows, query FROM pg_stat_statements + WHERE toplevel IS FALSE + ORDER BY query COLLATE "C"; + toplevel | calls | rows | query +----------+-------+------+------------------------------------- + f | 1 | 0 | CREATE TABLE pgss_do_table (id INT) + f | 1 | 0 | DROP TABLE pgss_do_table + f | 1 | 0 | SELECT a FROM pgss_do_util_tab_1 + f | 1 | 0 | SELECT a FROM pgss_do_util_tab_2 +(4 rows) + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Note the extra semicolon at the end of the query. +DO $$ +DECLARE BEGIN + EXECUTE 'CREATE TABLE pgss_do_table (id INT); DROP TABLE pgss_do_table;'; + EXECUTE 'SELECT a FROM pgss_do_util_tab_1; SELECT a FROM pgss_do_util_tab_2;'; +END $$; +SELECT toplevel, calls, rows, query FROM pg_stat_statements + WHERE toplevel IS FALSE + ORDER BY query COLLATE "C"; + toplevel | calls | rows | query +----------+-------+------+------------------------------------- + f | 1 | 0 | CREATE TABLE pgss_do_table (id INT) + f | 1 | 0 | DROP TABLE pgss_do_table + f | 1 | 0 | SELECT a FROM pgss_do_util_tab_1 + f | 1 | 0 | SELECT a FROM pgss_do_util_tab_2 +(4 rows) + +DROP TABLE pgss_do_util_tab_1, pgss_do_util_tab_2; -- PL/pgSQL function - top-level tracking. SET pg_stat_statements.track = 'top'; SET pg_stat_statements.track_utility = FALSE; diff --git a/contrib/pg_stat_statements/sql/level_tracking.sql b/contrib/pg_stat_statements/sql/level_tracking.sql index 6b81230f186..86f007e8552 100644 --- a/contrib/pg_stat_statements/sql/level_tracking.sql +++ b/contrib/pg_stat_statements/sql/level_tracking.sql @@ -334,6 +334,32 @@ END; $$; SELECT toplevel, calls, query FROM pg_stat_statements ORDER BY query COLLATE "C", toplevel; +-- DO block --- multiple inner queries with separators +SET pg_stat_statements.track = 'all'; +SET pg_stat_statements.track_utility = TRUE; +CREATE TABLE pgss_do_util_tab_1 (a int); +CREATE TABLE pgss_do_util_tab_2 (a int); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +DO $$ +DECLARE BEGIN + EXECUTE 'CREATE TABLE pgss_do_table (id INT); DROP TABLE pgss_do_table'; + EXECUTE 'SELECT a FROM pgss_do_util_tab_1; SELECT a FROM pgss_do_util_tab_2'; +END $$; +SELECT toplevel, calls, rows, query FROM pg_stat_statements + WHERE toplevel IS FALSE + ORDER BY query COLLATE "C"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +-- Note the extra semicolon at the end of the query. +DO $$ +DECLARE BEGIN + EXECUTE 'CREATE TABLE pgss_do_table (id INT); DROP TABLE pgss_do_table;'; + EXECUTE 'SELECT a FROM pgss_do_util_tab_1; SELECT a FROM pgss_do_util_tab_2;'; +END $$; +SELECT toplevel, calls, rows, query FROM pg_stat_statements + WHERE toplevel IS FALSE + ORDER BY query COLLATE "C"; +DROP TABLE pgss_do_util_tab_1, pgss_do_util_tab_2; + -- PL/pgSQL function - top-level tracking. SET pg_stat_statements.track = 'top'; SET pg_stat_statements.track_utility = FALSE; diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 1f4d6adda52..a16fdd65601 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -253,20 +253,14 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState, * statements. However, we have the statement's location plus the length * (p_stmt_len) and location (p_stmt_location) of the top level RawStmt, * stored in pstate. Thus, the statement's length is the RawStmt's length - * minus how much we've advanced in the RawStmt's string. + * minus how much we've advanced in the RawStmt's string. If p_stmt_len + * is 0, the SQL string is used up to its end. */ static void setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree) { ParseLoc stmt_len = 0; - /* - * If there is no information about the top RawStmt's length, leave it at - * 0 to use the whole string. - */ - if (pstate->p_stmt_len == 0) - return; - switch (nodeTag(parseTree)) { case T_InsertStmt: @@ -308,11 +302,12 @@ setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree) /* Statement's length is known, use it */ qry->stmt_len = stmt_len; } - else + else if (pstate->p_stmt_len > 0) { /* - * Compute the statement's length from the statement's location and - * the RawStmt's length and location. + * The top RawStmt's length is known, so calculate the statement's + * length from the statement's location and the RawStmt's length and + * location. */ qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location); }