diff --git a/.github/scripts/job/aqo_instance_launch.sh b/.github/scripts/job/aqo_instance_launch.sh new file mode 100755 index 00000000..f43d6b8e --- /dev/null +++ b/.github/scripts/job/aqo_instance_launch.sh @@ -0,0 +1,47 @@ +#!/bin/bash +ulimit -c unlimited + +# Kill all orphan processes +pkill -U `whoami` -9 -e postgres +pkill -U `whoami` -9 -e pgbench +pkill -U `whoami` -9 -e psql + +sleep 1 + +M=`pwd`/PGDATA +U=`whoami` + +rm -rf $M || true +mkdir $M +rm -rf logfile.log || true + +export LC_ALL=C +export LANGUAGE="en_US:en" +initdb -D $M --locale=C + +# PG Version-specific settings +ver=$(pg_ctl -V | egrep -o "[0-9]." | head -1) +echo "PostgreSQL version: $ver" +if [ $ver -gt 13 ] +then + echo "compute_query_id = 'regress'" >> $M/postgresql.conf +fi + +# Speed up the 'Join Order Benchmark' test +echo "shared_buffers = 1GB" >> $M/postgresql.conf +echo "work_mem = 128MB" >> $M/postgresql.conf +echo "fsync = off" >> $M/postgresql.conf +echo "autovacuum = 'off'" >> $M/postgresql.conf + +# AQO preferences +echo "shared_preload_libraries = 'aqo, pg_stat_statements'" >> $M/postgresql.conf +echo "aqo.mode = 'disabled'" >> $M/postgresql.conf +echo "aqo.join_threshold = 0" >> $M/postgresql.conf +echo "aqo.force_collect_stat = 'off'" >> $M/postgresql.conf +echo "aqo.fs_max_items = 10000" >> $M/postgresql.conf +echo "aqo.fss_max_items = 20000" >> $M/postgresql.conf + +pg_ctl -w -D $M -l logfile.log start +createdb $U +psql -c "CREATE EXTENSION aqo;" +psql -c "CREATE EXTENSION pg_stat_statements" diff --git a/.github/scripts/job/check_result.sh b/.github/scripts/job/check_result.sh new file mode 100755 index 00000000..ab194cfc --- /dev/null +++ b/.github/scripts/job/check_result.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# ############################################################################## +# +# +# ############################################################################## + +# Show error delta (Negative result is a signal of possible issue) +result=$(psql -t -c "SELECT count(*) FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o USING (id) WHERE (o.error - c.error) < 0") + +if [ $result -gt 0 ]; then + exit 1; +fi + +exit 0; diff --git a/.github/scripts/job/dump_knowledge.sh b/.github/scripts/job/dump_knowledge.sh new file mode 100755 index 00000000..c5cb9736 --- /dev/null +++ b/.github/scripts/job/dump_knowledge.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# ############################################################################## +# +# Make dump of a knowledge base +# +# ############################################################################## + +psql -c "CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data;" +psql -c "CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries;" +psql -c "CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts;" +psql -c "CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat;" + +pg_dump --table='aqo*' -f knowledge_base.dump $PGDATABASE + +psql -c "DROP TABLE aqo_data_dump, aqo_queries_dump, aqo_query_texts_dump, aqo_query_stat_dump" + diff --git a/.github/scripts/job/job_pass.sh b/.github/scripts/job/job_pass.sh new file mode 100755 index 00000000..1ad62fbd --- /dev/null +++ b/.github/scripts/job/job_pass.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# ############################################################################## +# +# Pass each JOB query over the DBMS instance. Use $1 to specify a number of +# iterations, if needed. +# +# Results: +# - explains.txt - explain of each query +# - job_onepass_aqo_stat.dat - short report on execution time +# - knowledge_base.dump - dump of the AQO knowledge base +# +# ############################################################################## + +echo "The Join Order Benchmark 1Pass" +echo -e "Query Number\tITER\tQuery Name\tExecution Time, ms" > report.txt +echo -e "Clear a file with explains" > explains.txt + +if [ $# -eq 0 ] +then + ITERS=1 +else + ITERS=$1 +fi + +echo "Execute JOB with the $ITERS iterations" + +filenum=1 +for file in $JOB_DIR/queries/*.sql +do + # Get filename + short_file=$(basename "$file") + + echo -n "EXPLAIN (ANALYZE, VERBOSE, FORMAT JSON) " > test.sql + cat $file >> test.sql + + for (( i=1; i<=$ITERS; i++ )) + do + result=$(psql -f test.sql) + echo -e $result >> explains.txt + exec_time=$(echo $result | sed -n 's/.*"Execution Time": \([0-9]*\.[0-9]*\).*/\1/p') + echo -e "$filenum\t$short_file\t$i\t$exec_time" >> report.txt + echo -e "$filenum\t$i\t$short_file\t$exec_time" + done +filenum=$((filenum+1)) +done + +# Show total optimizer error in the test +psql -c "SELECT sum(error) AS total_error FROM aqo_cardinality_error(false)" +psql -c "SELECT sum(error) AS total_error_aqo FROM aqo_cardinality_error(true)" + +# Show error delta (Negative result is a signal of possible issue) +psql -c " +SELECT id, (o.error - c.error) AS errdelta + FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o + USING (id) +" + diff --git a/.github/scripts/job/load_imdb.sh b/.github/scripts/job/load_imdb.sh new file mode 100755 index 00000000..3cb44fb2 --- /dev/null +++ b/.github/scripts/job/load_imdb.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +psql -f $JOB_DIR/schema.sql +psql -vdatadir="'$JOB_DIR'" -f $JOB_DIR/copy.sql + diff --git a/.github/scripts/job/set_test_conditions_1.sh b/.github/scripts/job/set_test_conditions_1.sh new file mode 100755 index 00000000..2140893d --- /dev/null +++ b/.github/scripts/job/set_test_conditions_1.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.1: Quick pass in 'disabled' mode with statistics and +# forced usage of a bunch of parallel workers. +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'disabled'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_disable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_2.sh b/.github/scripts/job/set_test_conditions_2.sh new file mode 100755 index 00000000..609b9624 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_2.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.2: Learn mode with forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_3.sh b/.github/scripts/job/set_test_conditions_3.sh new file mode 100755 index 00000000..00f4dbf3 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_3.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.3: Freeze ML base and forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml new file mode 100644 index 00000000..74e90277 --- /dev/null +++ b/.github/workflows/c-cpp.yml @@ -0,0 +1,93 @@ +name: 'AQO basic CI' + +env: + # Use it just for a report + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" + run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + + - name: "make check" + run: | + cd $PG_DIR + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + env CLIENTS=50 THREADS=50 make -C contrib/aqo check + + echo "Use AQO with debug code included" + git clean -fdx + git -C contrib/aqo clean -fdx + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" > /dev/null + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + env CLIENTS=50 THREADS=50 make -C contrib/aqo check + + - name: Archive artifacts + if: ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + name: make_check_logs + path: | + ${{ env.PG_DIR }}/contrib/aqo/regression.diffs + ${{ env.PG_DIR }}/contrib/aqo/log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log + retention-days: 7 diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml new file mode 100644 index 00000000..075034a0 --- /dev/null +++ b/.github/workflows/installchecks.yml @@ -0,0 +1,166 @@ +name: "InstallChecks" + +env: + # Use it just for a report + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version and set basic environment" + run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" + echo "PG Libs: $LD_LIBRARY_PATH" + + - name: "Compilation" + run: | + cd $PG_DIR + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + make install >> make.log && make -C contrib install > /dev/null + + - name: "Launch AQO instance" + run: | + cd $PG_DIR + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + echo "Use AQO v.$AQO_VERSION" + + # Pass installcheck in disabled mode + - name: installcheck_disabled + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_disabled_forced_stat + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_frozen + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_controlled + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_learn + continue-on-error: true + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + learn_result=$(make -k installcheck-world) + + - name: installcheck_intelligent + continue-on-error: true + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + - name: installcheck_forced + continue-on-error: true + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + # Save Artifacts + - name: Archive artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts + path: | + ${{ env.PG_DIR }}/src/test/regress/regression.diffs + ${{ env.PG_DIR }}/logfile.log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log + retention-days: 2 + diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml new file mode 100644 index 00000000..817f0047 --- /dev/null +++ b/.github/workflows/job.yml @@ -0,0 +1,169 @@ +name: 'Join Order Benchmark' + +env: + # Use it just for a report + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger the workflow on each release or on a manual action +on: + workflow_dispatch: + release: + +jobs: + AQO_JOB_Benchmark: + + runs-on: self-hosted + + steps: + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version and set basic environment" + run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + + # Cleanup, because of self-hosted runner + rm -rf $GITHUB_WORKSPACE/../pg + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # JOB-specific environment + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" + echo "PG Libs: $LD_LIBRARY_PATH" + + # JOB-specific environment variable + echo "JOB path: $JOB_DIR" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" + + - name: "Compilation" + run: | + cd $PG_DIR + ./configure $CONFIGURE_OPTS CFLAGS="-O0" + make clean > /dev/null + make -C contrib clean > /dev/null + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install >> make.log + make -C contrib install >> make.log + make -C doc install > /dev/null + + - name: "Launch AQO instance" + run: | + cd $PG_DIR + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + + - name: "Load a dump of the test database" + run: | + cd $PG_DIR + echo "AQO_VERSION: $AQO_VERSION" + load_imdb.sh + + # Quick pass in parallel mode with statistics + - name: "Test No.1: Gather statistics in disabled mode" + run: | + cd $PG_DIR + set_test_conditions_1.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat + path: | + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log + retention-days: 1 + + # Test No.2: Learn on all incoming queries + - name: "Test No.2: Learning stage" + run: | + cd $PG_DIR + set_test_conditions_2.sh + job_pass.sh 10 + check_result.sh + + # One pass on frozen AQO data, dump knowledge base, check total error + - name: "Test No.3: Frozen execution" + run: | + cd $PG_DIR + set_test_conditions_3.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results - frozen" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen + path: | + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log + retention-days: 7 + + - name: "Cleanup" + run: | + cd $PG_DIR + pg_ctl -D PGDATA stop + diff --git a/.gitignore b/.gitignore index e2fcd401..1811e98d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ regression.out *.gcov tags +# Generated subdirectories +/log/ +/tmp_check/ diff --git a/Makefile b/Makefile old mode 100644 new mode 100755 index 30d5967a..1da2994c --- a/Makefile +++ b/Makefile @@ -1,26 +1,37 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.2 -PGFILEDESC = "AQO - adaptive query optimization" -MODULES = aqo -OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ -hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o $(WIN32RES) +EXTVERSION = 1.6 +PGFILEDESC = "AQO - Adaptive Query Optimization" +MODULE_big = aqo +OBJS = $(WIN32RES) \ + aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ + hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ + selectivity_cache.o storage.o utils.o aqo_shared.o -REGRESS = aqo_disabled \ - aqo_controlled \ - aqo_intelligent \ - aqo_forced \ - aqo_learn \ - schema \ - aqo_CVE-2020-14350 +TAP_TESTS = 1 -EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add +# Use an empty dummy test to define the variable REGRESS and therefore run all +# regression tests. regress_schedule contains the full list of real tests. +REGRESS = aqo_dummy_test +REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule -DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql +# Set default values of some gucs to be stable on custom settings during +# a kind of installcheck +PGOPTIONS = --aqo.force_collect_stat=off --max_parallel_maintenance_workers=1 \ + --aqo.join_threshold=0 --max_parallel_workers_per_gather=1 +export PGOPTIONS + +fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw +stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements +PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf +EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements + +DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ + aqo--1.5--1.6.sql aqo--1.6.sql -MODULE_big = aqo ifdef USE_PGXS PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) @@ -31,4 +42,3 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif - diff --git a/README.md b/README.md index 1b5284dd..252c74ad 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,14 @@ complicated queries. ## Installation The module works with PostgreSQL 9.6 and above. +To avoid compatibility issues, the following branches in the git-repository are allocated: +* `stable9_6`. +* `stable11` - for PG v10 and v11. +* `stable12` - for PG v12. +* `stable13` - for PG v13. +* `stable14` - for PG v14. +* `stable15` - for PG v15. +* the `master` branch of the AQO repository correctly works with PGv15 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL @@ -19,7 +27,7 @@ installed with `make install`. ``` cd postgresql-9.6 # enter postgresql source directory -git clone https://github.com/tigvarts/aqo.git contrib/aqo # clone aqo into contrib +git clone https://github.com/postgrespro/aqo.git contrib/aqo # clone aqo into contrib patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg.patch # patch postgresql make clean && make && make install # recompile postgresql cd contrib/aqo # enter aqo directory @@ -28,7 +36,7 @@ make check # check whether it works ``` Tag `version` at the patch name corresponds to suitable PostgreSQL release. -For PostgreSQL 10 use aqo_pg10.patch; for PostgreSQL 11 use aqo_pg11.patch and so on. +For PostgreSQL 9.6 use the 'aqo_pg9_6.patch' file; PostgreSQL 10 use aqo_pg10.patch; for PostgreSQL 11 use aqo_pg11.patch and so on. Also, you can see git tags at the master branch for more accurate definition of suitable PostgreSQL version. @@ -50,7 +58,7 @@ of per-database. The typical case is follows: you have complicated query, which executes too long. `EXPLAIN ANALYZE` shows, that the possible reason is bad cardinality -estimnation. +estimation. Example: ``` @@ -127,16 +135,16 @@ When the plan stops changing, you can often observe performance improvement: (23 rows) ``` -The settings system in AQO works with normalized queries, i. e. queries with -removed constants. For example, the normalized version of +The settings system in AQO works with normalised queries, i. e. queries with +removed constants. For example, the normalised version of `SELECT * FROM tbl WHERE a < 25 AND b = 'str';` is `SELECT * FROM tbl WHERE a < CONST and b = CONST;` -So the queries have equal normalization if and only if they differ only +So the queries have equal normalisation if and only if they differ only in their constants. -Each normalized query has its own hash. The correspondence between normalized +Each normalised query has its own hash. The correspondence between normalised query hash and query text is stored in aqo_query_texts table: ``` SELECT * FROM aqo_query_texts; @@ -174,6 +182,10 @@ if the data tends to change significantly), you can do `UPDATE SET aqo_learn=false WHERE query_hash = ;` before commit. +The extension includes two GUC's to display the executed cardinality predictions for a query. +The `aqo.show_details = 'on'` (default - off) allows to see the aqo cardinality prediction results for each node of a query plan and an AQO summary. +The `aqo.show_hash = 'on'` (default - off) will print hash signature for each plan node and overall query. It is system-specific information and should be used for situational analysis. + The more detailed reference of AQO settings mechanism is available further. ## Advanced tuning @@ -203,7 +215,7 @@ execution of such query type. Disabling of AQO usage is reasonable for that cases in which query execution time increases after applying AQO. It happens sometimes because of cost models incompleteness. -`Fspace_hash` setting is for extra advanced AQO tuning. It may be changed manually +`fs` setting is for extra advanced AQO tuning. It may be changed manually to optimize a number of queries using the same model. It may decrease the amount of memory for models and even the query execution time, but also it may cause the bad AQO's behavior, so please use it only if you know exactly @@ -221,7 +233,7 @@ ignored. If `aqo.mode` is `'learn'`, then the normalized query hash appends to aqo_queries with the default settings `learn_aqo=true`, `use_aqo=true`, `auto_tuning=false`, and -`fspace_hash = query_hash` which means that AQO uses separate machine learning +`fs = queryid` which means that AQO uses separate machine learning model for this query type optimization. After that the query is processed as if it already was in aqo_queries. @@ -316,7 +328,7 @@ Dynamically generated constants are okay. ## License -© [Postgres Professional](https://postgrespro.com/), 2016-2020. Licensed under +© [Postgres Professional](https://postgrespro.com/), 2016-2022. Licensed under [The PostgreSQL License](LICENSE). ## Reference diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 1f207718..67395744 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -2,20 +2,20 @@ \echo Use "CREATE EXTENSION aqo" to load this file. \quit CREATE TABLE public.aqo_queries ( - query_hash int PRIMARY KEY, + query_hash bigint PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, - fspace_hash int NOT NULL, + fspace_hash bigint NOT NULL, auto_tuning boolean NOT NULL ); CREATE TABLE public.aqo_query_texts ( - query_hash int PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text varchar NOT NULL ); CREATE TABLE public.aqo_query_stat ( - query_hash int PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -27,7 +27,7 @@ CREATE TABLE public.aqo_query_stat ( ); CREATE TABLE public.aqo_data ( - fspace_hash int NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], diff --git a/aqo--1.1--1.2.sql b/aqo--1.1--1.2.sql index 719f455b..9291e7b7 100644 --- a/aqo--1.1--1.2.sql +++ b/aqo--1.1--1.2.sql @@ -28,12 +28,12 @@ DROP FUNCTION aqo_migrate_to_1_2_get_pk(regclass); -- -- Show query state at the AQO knowledge base -CREATE OR REPLACE FUNCTION public.aqo_status(hash int) +CREATE OR REPLACE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -63,7 +63,7 @@ WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash int) +CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -72,7 +72,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash int) +CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -82,7 +82,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash int) +CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_data WHERE fspace_hash=$1; @@ -96,7 +96,7 @@ SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_drop(hash int) +CREATE OR REPLACE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql new file mode 100755 index 00000000..c29a6f10 --- /dev/null +++ b/aqo--1.2--1.3.sql @@ -0,0 +1,138 @@ +ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; + +-- +-- Remove data, related to previously dropped tables, from the AQO tables. +-- +CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ +DECLARE + aqo_data_row aqo_data%ROWTYPE; + aqo_queries_row aqo_queries%ROWTYPE; + aqo_query_texts_row aqo_query_texts%ROWTYPE; + aqo_query_stat_row aqo_query_stat%ROWTYPE; + oid_var text; + fspace_hash_var bigint; + delete_row boolean DEFAULT false; +BEGIN + RAISE NOTICE 'Cleaning aqo_data records'; + + FOR aqo_data_row IN (SELECT * FROM aqo_data) + LOOP + delete_row = false; + SELECT aqo_data_row.fspace_hash INTO fspace_hash_var FROM aqo_data; + + IF (aqo_data_row.oids IS NOT NULL) THEN + FOREACH oid_var IN ARRAY aqo_data_row.oids + LOOP + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid::regclass::text = oid_var) THEN + delete_row = true; + END IF; + END LOOP; + END IF; + + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) + LOOP + IF (delete_row = true AND fspace_hash_var <> 0 AND + fspace_hash_var = aqo_queries_row.fspace_hash AND + aqo_queries_row.fspace_hash = aqo_queries_row.query_hash) THEN + DELETE FROM aqo_data WHERE aqo_data = aqo_data_row; + DELETE FROM aqo_queries WHERE aqo_queries = aqo_queries_row; + + FOR aqo_query_texts_row IN (SELECT * FROM aqo_query_texts) + LOOP + DELETE FROM aqo_query_texts + WHERE aqo_query_texts_row.query_hash = fspace_hash_var AND + aqo_query_texts = aqo_query_texts_row; + END LOOP; + + FOR aqo_query_stat_row IN (SELECT * FROM aqo_query_stat) + LOOP + DELETE FROM aqo_query_stat + WHERE aqo_query_stat_row.query_hash = fspace_hash_var AND + aqo_query_stat = aqo_query_stat_row; + END LOOP; + END IF; + END LOOP; + END LOOP; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION array_avg(arr double precision[]) RETURNS double precision as $$ +BEGIN + RETURN (SELECT AVG(a) FROM UNNEST(arr) AS a); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION array_mse(arr double precision[]) RETURNS double precision as $$ +DECLARE + mean double precision; +BEGIN + mean = array_avg(arr); + RETURN (SELECT AVG(POWER(a - mean, 2)) FROM UNNEST(arr) AS a); +END; +$$ LANGUAGE plpgsql; + + +-- +-- Show top N of 'bad' queries. +-- +-- The AQO extension must be installed, but disabled. +-- Strictly speaking, these functions shows 'query classes' that includes all +-- queries of the same structure. An query example of a class can be found in the +-- aqo_query_texts table. +-- This functions can be used to gentle search of 'bad' queries. User must set: +-- aqo.mode = 'disabled' +-- aqo.force_collect_stat = 'on' +-- + +-- +-- Top of queries with the highest value of execution time. +-- +CREATE OR REPLACE FUNCTION public.top_time_queries(n int) + RETURNS TABLE(num bigint, + fspace_hash bigint, + query_hash bigint, + execution_time float, + deviation float + ) +AS $$ +BEGIN + RAISE NOTICE 'Top % execution time queries', n; + RETURN QUERY + SELECT row_number() OVER(ORDER BY execution_time_without_aqo DESC) num, + aqo_queries.fspace_hash, + aqo_queries.query_hash, + to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, + to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float + FROM public.aqo_queries INNER JOIN aqo_query_stat + ON aqo_queries.query_hash = aqo_query_stat.query_hash + GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) + ORDER BY execution_time DESC LIMIT n; +END; +$$ LANGUAGE plpgsql; + +-- +-- Top of queries with largest value of total cardinality error. +-- +CREATE OR REPLACE FUNCTION public.top_error_queries(n int) + RETURNS TABLE(num bigint, + fspace_hash bigint, + query_hash bigint, + error float, + deviation float + ) +AS $$ +BEGIN + RAISE NOTICE 'Top % cardinality error queries', n; + RETURN QUERY + SELECT row_number() OVER (ORDER BY cardinality_error_without_aqo DESC) num, + aqo_queries.fspace_hash, + aqo_queries.query_hash, + to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, + to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float + FROM public.aqo_queries INNER JOIN aqo_query_stat + ON aqo_queries.query_hash = aqo_query_stat.query_hash + GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) + ORDER BY error DESC LIMIT n; +END; +$$ LANGUAGE plpgsql; + diff --git a/aqo--1.2.sql b/aqo--1.2.sql index 3d96f0cc..7e3abf4a 100644 --- a/aqo--1.2.sql +++ b/aqo--1.2.sql @@ -2,20 +2,20 @@ \echo Use "CREATE EXTENSION aqo" to load this file. \quit CREATE TABLE public.aqo_queries ( - query_hash int CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, + query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, - fspace_hash int NOT NULL, + fspace_hash bigint NOT NULL, auto_tuning boolean NOT NULL ); CREATE TABLE public.aqo_query_texts ( - query_hash int CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); CREATE TABLE public.aqo_query_stat ( - query_hash int CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -27,7 +27,7 @@ CREATE TABLE public.aqo_query_stat ( ); CREATE TABLE public.aqo_data ( - fspace_hash int NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -52,12 +52,12 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE -- -- Show query state at the AQO knowledge base -CREATE FUNCTION public.aqo_status(hash int) +CREATE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -87,7 +87,7 @@ WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_enable_query(hash int) +CREATE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -96,7 +96,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_disable_query(hash int) +CREATE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -106,7 +106,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_clear_hist(hash int) +CREATE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_data WHERE fspace_hash=$1; @@ -120,7 +120,7 @@ SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_drop(hash int) +CREATE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql new file mode 100755 index 00000000..f6df0263 --- /dev/null +++ b/aqo--1.3--1.4.sql @@ -0,0 +1,64 @@ +/* contrib/aqo/aqo--1.3--1.4.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit + +ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; + +DROP FUNCTION public.top_error_queries(int); + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +AS $$ +BEGIN +IF (controlled) THEN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +ELSE + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + array_avg(cardinality_error_without_aqo) AS cerror, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql new file mode 100644 index 00000000..622bb7fa --- /dev/null +++ b/aqo--1.4--1.5.sql @@ -0,0 +1,162 @@ +/* contrib/aqo/aqo--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit + +/* Remove old interface of the extension */ +DROP FUNCTION array_mse; +DROP FUNCTION array_avg; +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION public.aqo_disable_query; +DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.aqo_enable_query; +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_status; +DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION public.top_time_queries; +DROP TABLE public.aqo_data CASCADE; +DROP TABLE public.aqo_queries CASCADE; +DROP TABLE public.aqo_query_texts CASCADE; +DROP TABLE public.aqo_query_stat CASCADE; +DROP FUNCTION invalidate_deactivated_queries_cache; + + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +/* UI functions */ + + +CREATE FUNCTION aqo_enable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql new file mode 100644 index 00000000..448b6023 --- /dev/null +++ b/aqo--1.5--1.6.sql @@ -0,0 +1,114 @@ +/* contrib/aqo/aqo--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit + +DROP VIEW aqo_queries; + +DROP FUNCTION aqo_enable_query; +DROP FUNCTION aqo_disable_query; +DROP FUNCTION aqo_cleanup; +DROP FUNCTION aqo_queries; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +-- Show how much shared memory AQO are using at the moment +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show how much shared memory AQO are using at the moment'; diff --git a/aqo--1.6.sql b/aqo--1.6.sql new file mode 100644 index 00000000..90d4fb06 --- /dev/null +++ b/aqo--1.6.sql @@ -0,0 +1,208 @@ +/* contrib/aqo/aqo--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION aqo" to load this file. \quit + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_disable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning into false for a class of queries with specific queryid.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_enable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning (in intelligent mode) into true for a class of queries with specific queryid.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +-- Show how much shared memory AQO are using at the moment +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show how much shared memory AQO are using at the moment'; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; + +-- ----------------------------------------------------------------------------- +-- +-- VIEWs +-- +-- ----------------------------------------------------------------------------- + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); diff --git a/aqo.c b/aqo.c index 4f0eac87..3e8796cd 100644 --- a/aqo.c +++ b/aqo.c @@ -2,22 +2,52 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.c */ +#include "postgres.h" + +#include "access/relation.h" +#include "access/table.h" +#include "catalog/pg_extension.h" +#include "commands/extension.h" +#include "miscadmin.h" +#include "utils/selfuncs.h" + #include "aqo.h" +#include "aqo_shared.h" +#include "path_utils.h" +#include "storage.h" + PG_MODULE_MAGIC; void _PG_init(void); +#define AQO_MODULE_MAGIC (1234) /* Strategy of determining feature space for new queries. */ -int aqo_mode; +int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; +bool aqo_predict_with_few_neighbors; +int aqo_statement_timeout; + +/* + * Show special info in EXPLAIN mode. + * + * aqo_show_hash - show query class (hash) and a feature space value (hash) + * of each plan node. This is instance-dependent value and can't be used + * in regression and TAP tests. + * + * aqo_show_details - show AQO settings for this class and prediction + * for each plan node. + */ +bool aqo_show_hash; +bool aqo_show_details; +bool change_flex_timeout; /* GUC variables */ static const struct config_enum_entry format_options[] = { @@ -31,7 +61,6 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = 20; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; @@ -41,47 +70,34 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ -/* - * Defines where we do not perform learning procedure - */ -const double object_selection_prediction_threshold = 0.3; - -/* - * This parameter tell us that the new learning sample object has very small - * distance from one whose features stored in matrix already. - * In this case we will not to add new line in matrix, but will modify this - * nearest neighbor features and cardinality with linear smoothing by - * learning_rate coefficient. - */ -const double object_selection_threshold = 0.1; -const double learning_rate = 1e-1; - /* The number of nearest neighbors which will be chosen for ML-operations */ -int aqo_k = 3; +int aqo_k; double log_selectivity_lower_bound = -30; /* * Currently we use it only to store query_text string which is initialized * after a query parsing and is used during the query planning. */ -MemoryContext AQOMemoryContext; + QueryContextData query_context; + +MemoryContext AQOTopMemCtx = NULL; + +/* Is released at the end of transaction */ +MemoryContext AQOCacheMemCtx = NULL; + +/* Is released at the end of planning */ +MemoryContext AQOPredictMemCtx = NULL; + +/* Is released at the end of learning */ +MemoryContext AQOLearnMemCtx = NULL; + +/* Is released at the end of load/store routines */ +MemoryContext AQOStorageMemCtx = NULL; + /* Additional plan info */ -int njoins; - -char *query_text = NULL; - -/* Saved hook values */ -post_parse_analyze_hook_type prev_post_parse_analyze_hook; -planner_hook_type prev_planner_hook; -ExecutorStart_hook_type prev_ExecutorStart_hook; -ExecutorEnd_hook_type prev_ExecutorEnd_hook; -set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; -get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; -set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; -get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -copy_generic_path_info_hook_type prev_copy_generic_path_info_hook; -ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; +int njoins = -1; + /***************************************************************************** * @@ -89,9 +105,36 @@ ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; * *****************************************************************************/ +static void +aqo_free_callback(ResourceReleasePhase phase, + bool isCommit, + bool isTopLevel, + void *arg) +{ + if (phase != RESOURCE_RELEASE_AFTER_LOCKS) + return; + + if (isTopLevel) + { + MemoryContextReset(AQOCacheMemCtx); + cur_classes = NIL; + aqo_eclass_collector = NIL; + } +} + void _PG_init(void) { + /* + * In order to create our shared memory area, we have to be loaded via + * shared_preload_libraries. If not, report an ERROR. + */ + if (!process_shared_preload_libraries_in_progress) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("AQO module could be loaded only on startup."), + errdetail("Add 'aqo' into the shared_preload_libraries list."))); + DefineCustomEnumVariable("aqo.mode", "Mode of aqo usage.", NULL, @@ -102,7 +145,8 @@ _PG_init(void) 0, NULL, NULL, - NULL); + NULL + ); DefineCustomBoolVariable( "aqo.force_collect_stat", @@ -115,34 +159,220 @@ _PG_init(void) NULL, NULL, NULL - ); - - prev_planner_hook = planner_hook; - planner_hook = aqo_planner; - prev_post_parse_analyze_hook = post_parse_analyze_hook; - post_parse_analyze_hook = get_query_text; - prev_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = aqo_ExecutorStart; - prev_ExecutorEnd_hook = ExecutorEnd_hook; - ExecutorEnd_hook = aqo_ExecutorEnd; - prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook; - set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; - prev_get_parameterized_baserel_size_hook = get_parameterized_baserel_size_hook; - get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - prev_set_joinrel_size_estimates_hook = set_joinrel_size_estimates_hook; - set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - prev_get_parameterized_joinrel_size_hook = get_parameterized_joinrel_size_hook; - get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; - prev_copy_generic_path_info_hook = copy_generic_path_info_hook; - copy_generic_path_info_hook = aqo_copy_generic_path_info; - prev_ExplainOnePlan_hook = ExplainOnePlan_hook; - ExplainOnePlan_hook = print_into_explain; - parampathinfo_postinit_hook = ppi_hook; + ); + + DefineCustomBoolVariable( + "aqo.show_hash", + "Show query and node hash on explain.", + "Hash value depend on each instance and is not good to enable it in regression or TAP tests.", + &aqo_show_hash, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomBoolVariable( + "aqo.show_details", + "Show AQO state on a query.", + NULL, + &aqo_show_details, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomBoolVariable( + "aqo.learn_statement_timeout", + "Learn on a plan interrupted by statement timeout.", + "ML data stored in a backend cache, so it works only locally.", + &aqo_learn_statement_timeout, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomBoolVariable( + "aqo.wide_search", + "Search ML data in neighbour feature spaces.", + NULL, + &use_wide_search, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.join_threshold", + "Sets the threshold of number of JOINs in query beyond which AQO is used.", + NULL, + &aqo_join_threshold, + 3, + 0, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fs_max_items", + "Max number of feature spaces that AQO can operate with.", + NULL, + &fs_max_items, + 10000, + 1, INT_MAX, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fss_max_items", + "Max number of feature subspaces that AQO can operate with.", + NULL, + &fss_max_items, + 100000, + 0, INT_MAX, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.querytext_max_size", + "Query max size in aqo_query_texts.", + NULL, + &querytext_max_size, + 1000, + 1, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.dsm_size_max", + "Maximum size of dynamic shared memory which AQO could allocate to store learning data.", + NULL, + &dsm_size_max, + 100, + 0, INT_MAX, + PGC_POSTMASTER, + GUC_UNIT_MB, + NULL, + NULL, + NULL + ); + DefineCustomIntVariable("aqo.statement_timeout", + "Time limit on learning.", + NULL, + &aqo_statement_timeout, + 0, + 0, INT_MAX, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("aqo.min_neighbors_for_predicting", + "Set how many neighbors the cardinality prediction will be calculated", + NULL, + &aqo_k, + 3, + 1, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("aqo.predict_with_few_neighbors", + "Establish the ability to make predictions with fewer neighbors than were found.", + NULL, + &aqo_predict_with_few_neighbors, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + aqo_shmem_init(); + aqo_preprocessing_init(); + aqo_postprocessing_init(); + aqo_cardinality_hooks_init(); + aqo_path_utils_init(); init_deactivated_queries_storage(); - AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, - "AQOMemoryContext", + + /* + * Create own Top memory Context for reporting AQO memory in the future. + */ + AQOTopMemCtx = AllocSetContextCreate(TopMemoryContext, + "AQOTopMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQO Cache Memory Context containe environment data. + */ + AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheMemCtx", + ALLOCSET_DEFAULT_SIZES); + + /* + * AQOPredictMemoryContext save necessary information for making predict of plan nodes + * and clean up in the execution stage of query. + */ + AQOPredictMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOPredictMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOLearnMemoryContext save necessary information for writing down to AQO knowledge table + * and clean up after doing this operation. + */ + AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOLearnMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOStorageMemoryContext containe data for load/store routines. + */ + AQOStorageMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOStorageMemoryContext", ALLOCSET_DEFAULT_SIZES); + RegisterResourceReleaseCallback(aqo_free_callback, NULL); + RegisterAQOPlanNodeMethods(); + + EmitWarningsOnPlaceholders("aqo"); +} + +/* + * AQO is really needed for any activity? + */ +bool +IsQueryDisabled(void) +{ + if (!query_context.learn_aqo && !query_context.use_aqo && + !query_context.auto_tuning && !query_context.collect_stat && + !query_context.adding_query && !query_context.explain_only && + INSTR_TIME_IS_ZERO(query_context.start_planning_time) && + query_context.planning_time < 0.) + return true; + + return false; } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); @@ -154,7 +384,5 @@ PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); Datum invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) { - fini_deactivated_queries_storage(); - init_deactivated_queries_storage(); PG_RETURN_POINTER(NULL); } diff --git a/aqo.conf b/aqo.conf new file mode 100644 index 00000000..705e3dde --- /dev/null +++ b/aqo.conf @@ -0,0 +1,2 @@ +autovacuum = off +shared_preload_libraries = 'postgres_fdw, aqo' diff --git a/aqo.control b/aqo.control index 8edc5fc7..4ca0ecb6 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.2' +default_version = '1.6' module_pathname = '$libdir/aqo' -relocatable = false +relocatable = true diff --git a/aqo.h b/aqo.h index 080d076b..04d9b8b3 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -116,10 +116,6 @@ #include -#include "postgres.h" - -#include "fmgr.h" - #include "access/hash.h" #include "access/htup_details.h" #include "access/xact.h" @@ -135,20 +131,17 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/pathnode.h" -#include "optimizer/planmain.h" #include "optimizer/planner.h" -#include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" -#include "utils/array.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/rel.h" -#include "utils/fmgroids.h" #include "utils/snapmgr.h" +#include "machine_learning.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -174,55 +167,51 @@ typedef enum extern int aqo_mode; extern bool force_collect_stat; - -/* - * It is mostly needed for auto tuning of query. with auto tuning mode aqo - * checks stability of last executions of the query, bad influence of strong - * cardinality estimation on query execution (planner bug?) and so on. - * It can induce aqo to suppress machine learning for this query. - */ -typedef struct -{ - double *execution_time_with_aqo; - double *execution_time_without_aqo; - double *planning_time_with_aqo; - double *planning_time_without_aqo; - double *cardinality_error_with_aqo; - double *cardinality_error_without_aqo; - - int execution_time_with_aqo_size; - int execution_time_without_aqo_size; - int planning_time_with_aqo_size; - int planning_time_without_aqo_size; - int cardinality_error_with_aqo_size; - int cardinality_error_without_aqo_size; - - int64 executions_with_aqo; - int64 executions_without_aqo; -} QueryStat; +extern bool aqo_show_hash; +extern bool aqo_show_details; +extern int aqo_join_threshold; +extern bool use_wide_search; +extern bool aqo_learn_statement_timeout; +extern bool aqo_learn_statement_timeout_enable; /* Parameters for current query */ typedef struct QueryContextData { - int query_hash; + uint64 query_hash; + uint64 fspace_hash; bool learn_aqo; bool use_aqo; - int fspace_hash; bool auto_tuning; bool collect_stat; bool adding_query; bool explain_only; - /* Query execution time */ - instr_time query_starttime; - double query_planning_time; + /* + * Timestamp of start of query planning process. Must be zeroed on execution + * start or in the case of ERROR. Query context is stored in an query env + * field. So, if query has a cached plan, a planning step could be skipped + * by an optimizer. We should realize it at an execution stage by zero value + * of this field. + */ + instr_time start_planning_time; + + instr_time start_execution_time; + double planning_time; + int64 smart_timeout; + int64 count_increase_timeout; } QueryContextData; +/* + * Indicator for using smart statement timeout for query + */ +extern bool change_flex_timeout; + +struct StatEntry; + extern double predicted_ppi_rows; extern double fss_ppi_hash; /* Parameters of autotuning */ -extern int aqo_stat_size; extern int auto_tuning_window_size; extern double auto_tuning_exploration; extern int auto_tuning_max_iterations; @@ -231,148 +220,51 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ -/* Max number of matrix rows - max number of possible neighbors. */ -#define aqo_K (30) - -extern const double object_selection_prediction_threshold; -extern const double object_selection_threshold; -extern const double learning_rate; extern int aqo_k; +extern bool aqo_predict_with_few_neighbors; extern double log_selectivity_lower_bound; /* Parameters for current query */ extern QueryContextData query_context; extern int njoins; -extern char *query_text; -/* Memory context for long-live data */ -extern MemoryContext AQOMemoryContext; +/* AQO Memory contexts */ +extern MemoryContext AQOTopMemCtx; +extern MemoryContext AQOCacheMemCtx; +extern MemoryContext AQOPredictMemCtx; +extern MemoryContext AQOLearnMemCtx; +extern MemoryContext AQOStorageMemCtx; -/* Saved hook values in case of unload */ -extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; -extern planner_hook_type prev_planner_hook; -extern ExecutorStart_hook_type prev_ExecutorStart_hook; -extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_baserel_rows_estimate_hook; -extern get_parameterized_baserel_size_hook_type - prev_get_parameterized_baserel_size_hook; -extern set_joinrel_size_estimates_hook_type - prev_set_joinrel_size_estimates_hook; -extern get_parameterized_joinrel_size_hook_type - prev_get_parameterized_joinrel_size_hook; -extern copy_generic_path_info_hook_type prev_copy_generic_path_info_hook; -extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; - -extern void ppi_hook(ParamPathInfo *ppi); - -/* Hash functions */ -int get_query_hash(Query *parse, const char *query_text); -extern int get_fss_for_object(List *clauselist, List *selectivities, - List *relidslist, int *nfeatures, - double **features); -void get_eclasses(List *clauselist, int *nargs, int **args_hash, - int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); - - -/* Storage interaction */ -bool find_query(int query_hash, - Datum *search_values, - bool *search_nulls); -bool add_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning); -bool update_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning); -bool add_query_text(int query_hash, const char *query_text); -bool load_fss(int fss_hash, int ncols, - double **matrix, double *targets, int *rows); -extern bool update_fss(int fss_hash, int nrows, int ncols, - double **matrix, double *targets); -QueryStat *get_aqo_stat(int query_hash); -void update_aqo_stat(int query_hash, QueryStat * stat); -void init_deactivated_queries_storage(void); -void fini_deactivated_queries_storage(void); -bool query_is_deactivated(int query_hash); -void add_deactivated_query(int query_hash); - -/* Query preprocessing hooks */ -void get_query_text(ParseState *pstate, Query *query); -PlannedStmt *call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -PlannedStmt *aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv); -void disable_aqo_for_query(void); - -/* Cardinality estimation hooks */ -extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -double aqo_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -double aqo_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); - -/* Extracting path information utilities */ -List *get_selectivities(PlannerInfo *root, - List *clauses, - int varRelid, - JoinType jointype, - SpecialJoinInfo *sjinfo); -List *get_list_of_relids(PlannerInfo *root, Relids relids); -List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); +extern int aqo_statement_timeout; /* Cardinality estimation */ -double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relids, int *fss_hash); - -/* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); -void aqo_ExecutorEnd(QueryDesc *queryDesc); - -/* Machine learning techniques */ -extern double OkNNr_predict(int nrows, int ncols, - double **matrix, const double *targets, - double *features); -extern int OkNNr_learn(int matrix_rows, int matrix_cols, - double **matrix, double *targets, - double *features, double target); +extern double predict_for_relation(List *restrict_clauses, List *selectivities, + List *relsigns, int *fss); /* Automatic query tuning */ -void automatical_query_tuning(int query_hash, QueryStat * stat); +extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); +extern double get_mean(double *elems, int nelems); /* Utilities */ -int int_cmp(const void *a, const void *b); -int double_cmp(const void *a, const void *b); -int *argsort(void *a, int n, size_t es, - int (*cmp) (const void *, const void *)); -int *inverse_permutation(int *a, int n); -QueryStat *palloc_query_stat(void); -void pfree_query_stat(QueryStat *stat); +extern int int_cmp(const void *a, const void *b); +extern int double_cmp(const void *a, const void *b); +extern int *argsort(void *a, int n, size_t es, + int (*cmp) (const void *, const void *)); +extern int *inverse_permutation(int *a, int n); /* Selectivity cache for parametrized baserels */ -void cache_selectivity(int clause_hash, - int relid, - int global_relid, - double selectivity); -double *selectivity_cache_find_global_relid(int clause_hash, int global_relid); -void selectivity_cache_clear(void); +extern void cache_selectivity(int clause_hash, int relid, int global_relid, + double selectivity); +extern double *selectivity_cache_find_global_relid(int clause_hash, + int global_relid); +extern void selectivity_cache_clear(void); + +extern bool IsQueryDisabled(void); + +extern List *cur_classes; + +extern void aqo_cardinality_hooks_init(void); +extern void aqo_preprocessing_init(void); +extern void aqo_postprocessing_init(void); #endif diff --git a/aqo_pg10.patch b/aqo_pg10.patch deleted file mode 100644 index 5c0cdf73..00000000 --- a/aqo_pg10.patch +++ /dev/null @@ -1,795 +0,0 @@ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 953e74d..e3f381f 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -46,6 +46,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -599,6 +602,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - 3, es); - } - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 4d67070..6b98fb5 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -126,6 +126,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index b35acb7..c3da124 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -100,6 +100,10 @@ - - #define LOG2(x) (log(x) / 0.693147180559945) - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - double seq_page_cost = DEFAULT_SEQ_PAGE_COST; - double random_page_cost = DEFAULT_RANDOM_PAGE_COST; -@@ -3996,6 +4000,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - - - /* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ -+/* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. - * -@@ -4011,19 +4058,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -4034,13 +4072,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -4070,6 +4128,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -4089,11 +4177,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - outer_rel, -@@ -4108,6 +4196,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -4120,11 +4237,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 2821662..12e643f 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -67,6 +67,8 @@ - #define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -160,7 +162,7 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1025,7 +1027,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1051,7 +1053,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - - plan = make_append(subplans, tlist, best_path->partitioned_rels); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return (Plan *) plan; - } -@@ -1079,7 +1081,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1185,7 +1187,7 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1210,7 +1212,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1238,7 +1240,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1442,7 +1444,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1475,7 +1477,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1504,7 +1506,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = SS_assign_special_param(root); -@@ -1595,7 +1597,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1654,7 +1656,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - - plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1691,7 +1693,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - extract_grouping_ops(best_path->groupClause), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1719,7 +1721,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1760,7 +1762,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1959,7 +1961,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2015,7 +2017,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2109,7 +2111,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->endOffset, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2251,7 +2253,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2287,7 +2289,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2310,7 +2312,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2369,7 +2371,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2393,7 +2395,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2439,7 +2441,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2485,7 +2487,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2666,7 +2668,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -2781,7 +2783,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3046,7 +3048,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3096,7 +3098,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3139,7 +3141,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3182,7 +3184,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3226,7 +3228,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3319,7 +3321,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3358,7 +3360,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3418,7 +3420,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3478,7 +3480,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3612,7 +3614,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -3744,7 +3746,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4049,7 +4051,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4183,7 +4185,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4857,7 +4859,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -4865,6 +4867,9 @@ copy_generic_path_info(Plan *dest, Path *src) - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; - dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index 78822b7..da814ad 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -60,6 +60,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; -+ - - extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, const char *queryString, - ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest); -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index a382331..a014e17 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -149,6 +149,16 @@ typedef struct Plan - * subselects) */ - - /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ -+ /* - * Information for management of parameter-change-driven rescanning - * - * extParam includes the paramIDs of all external PARAM_EXEC params -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 63feba0..9c5fa96 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,34 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -164,21 +192,37 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index f1d16cf..4229886 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -33,6 +33,12 @@ extern int force_parallel_mode; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ diff --git a/aqo_pg11.patch b/aqo_pg11.patch deleted file mode 100644 index df6a6d79..00000000 --- a/aqo_pg11.patch +++ /dev/null @@ -1,909 +0,0 @@ -diff --git a/contrib/Makefile b/contrib/Makefile -index 92184ed487..9b91ad1952 100644 ---- a/contrib/Makefile -+++ b/contrib/Makefile -@@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global - SUBDIRS = \ - adminpack \ - amcheck \ -+ aqo \ - auth_delay \ - auto_explain \ - bloom \ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 799a22e9d5..ece4ffffd9 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -47,6 +47,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -594,6 +597,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, - es); - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -@@ -1448,6 +1455,24 @@ ExplainNode(PlanState *planstate, List *ancestors, - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); -+#ifdef AQO_EXPLAIN -+ if (es->verbose && plan) -+ { -+ int wrkrs = 1; -+ double error = -1.; -+ -+ if (planstate->worker_instrument && plan->parallel_aware) -+ wrkrs = planstate->worker_instrument->num_workers + 1; -+ -+ if (plan->predicted_cardinality > 0.) -+ { -+ error = 100. * (plan->predicted_cardinality-(rows*wrkrs)) / (rows * wrkrs); -+ appendStringInfo(es->str, -+ " (AQO predicted: cardinality=%.0lf, error=%.0lf%%, fss=%d)", -+ plan->predicted_cardinality, error, plan->fss_hash); -+ } -+ } -+#endif - } - else - { -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 648758de4a..5cc1491507 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -127,6 +127,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index c7400941ee..a07c1551dc 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -97,6 +97,10 @@ - #include "utils/spccache.h" - #include "utils/tuplesort.h" - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - #define LOG2(x) (log(x) / 0.693147180559945) - -@@ -4283,6 +4287,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - } - - -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ - /* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. -@@ -4299,19 +4346,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -4322,13 +4360,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -4358,6 +4416,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -4377,11 +4465,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - rel, -@@ -4397,6 +4485,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -4409,11 +4526,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 5f6d2bad7b..eecdf53c21 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -71,6 +71,8 @@ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -157,7 +159,7 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1052,7 +1054,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1119,7 +1121,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - tlist, best_path->partitioned_rels, - partpruneinfo); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return (Plan *) plan; - } -@@ -1150,7 +1152,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1274,7 +1276,7 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1299,7 +1301,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1327,7 +1329,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1521,7 +1523,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1554,7 +1556,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1583,7 +1585,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1711,7 +1713,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1812,7 +1814,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - IS_OTHER_REL(best_path->subpath->parent) ? - best_path->path.parent->relids : NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1849,7 +1851,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - extract_grouping_ops(best_path->groupClause), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1877,7 +1879,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1918,7 +1920,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2117,7 +2119,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2173,7 +2175,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2281,7 +2283,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->inRangeNullsFirst, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2317,7 +2319,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2353,7 +2355,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2376,7 +2378,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2437,7 +2439,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2461,7 +2463,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2507,7 +2509,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2553,7 +2555,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2734,7 +2736,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -2849,7 +2851,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3114,7 +3116,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3164,7 +3166,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3207,7 +3209,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3250,7 +3252,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3294,7 +3296,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3387,7 +3389,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3426,7 +3428,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3486,7 +3488,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3546,7 +3548,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3680,7 +3682,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -3782,7 +3784,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4089,7 +4091,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4235,7 +4237,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4781,7 +4783,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -4789,6 +4791,9 @@ copy_generic_path_info(Plan *dest, Path *src) - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; - dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 284215a717..f5249aca95 100644 ---- a/src/backend/optimizer/util/relnode.c -+++ b/src/backend/optimizer/util/relnode.c -@@ -1226,6 +1226,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) - } - - -+set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook = NULL; - /* - * get_baserel_parampathinfo - * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1294,6 +1295,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = pclauses; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - baserel->ppilist = lappend(baserel->ppilist, ppi); - - return ppi; -@@ -1519,6 +1524,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = NIL; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - joinrel->ppilist = lappend(joinrel->ppilist, ppi); - - return ppi; -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index d3f70fda08..2dd4200282 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -61,6 +61,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; -+ - - extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, const char *queryString, - ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest); -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 249aa6520a..13d0961ed7 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -151,6 +151,19 @@ typedef struct Plan - List *initPlan; /* Init Plan nodes (un-correlated expr - * subselects) */ - -+ /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* - * Information for management of parameter-change-driven rescanning - * -diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h -index e61c1a2a29..2be4cad86b 100644 ---- a/src/include/nodes/relation.h -+++ b/src/include/nodes/relation.h -@@ -694,6 +694,10 @@ typedef struct RelOptInfo - Relids top_parent_relids; /* Relids of topmost parents (if "other" - * rel) */ - -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* used for partitioned relations */ - PartitionScheme part_scheme; /* Partitioning scheme. */ - int nparts; /* number of partitions */ -@@ -1048,6 +1052,10 @@ typedef struct ParamPathInfo - Relids ppi_req_outer; /* rels supplying parameters used by path */ - double ppi_rows; /* estimated number of result tuples */ - List *ppi_clauses; /* join clauses available from outer rels */ -+ -+ /* AQO DEBUG purposes */ -+ double predicted_ppi_rows; -+ double fss_ppi_hash; - } ParamPathInfo; - - -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 77ca7ff837..f95ea34063 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,34 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -174,21 +202,37 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 7c5ff22650..178e7888af 100644 ---- a/src/include/optimizer/pathnode.h -+++ b/src/include/optimizer/pathnode.h -@@ -17,6 +17,9 @@ - #include "nodes/bitmapset.h" - #include "nodes/relation.h" - -+typedef void (*set_parampathinfo_postinit_hook_type) (ParamPathInfo *ppi); -+ -+extern PGDLLIMPORT set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook; - - /* - * prototypes for pathnode.c -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index a081ca689a..d42da2980f 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -34,6 +34,12 @@ extern bool parallel_leader_participation; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ diff --git a/aqo_pg12.patch b/aqo_pg12.patch deleted file mode 100644 index 2075911d..00000000 --- a/aqo_pg12.patch +++ /dev/null @@ -1,1059 +0,0 @@ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 92969636b7..d05b07e037 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -24,6 +24,7 @@ - #include "nodes/extensible.h" - #include "nodes/makefuncs.h" - #include "nodes/nodeFuncs.h" -+#include "optimizer/cost.h" - #include "parser/parsetree.h" - #include "rewrite/rewriteHandler.h" - #include "storage/bufmgr.h" -@@ -46,6 +47,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -596,6 +600,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, - es); - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration, queryEnv); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -@@ -1523,6 +1531,38 @@ ExplainNode(PlanState *planstate, List *ancestors, - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); -+ -+#ifdef AQO_EXPLAIN -+ if (es->verbose && plan && planstate->instrument) -+ { -+ int wrkrs = 1; -+ double error = -1.; -+ -+ if (planstate->worker_instrument && IsParallelTuplesProcessing(plan)) -+ { -+ int i; -+ for (i = 0; i < planstate->worker_instrument->num_workers; i++) -+ { -+ Instrumentation *instrument = &planstate->worker_instrument->instrument[i]; -+ if (instrument->nloops <= 0) -+ continue; -+ wrkrs++; -+ } -+ } -+ -+ if (plan->predicted_cardinality > 0.) -+ { -+ error = 100. * (plan->predicted_cardinality - (rows*wrkrs)) -+ / plan->predicted_cardinality; -+ appendStringInfo(es->str, -+ " (AQO: cardinality=%.0lf, error=%.0lf%%, fsspace_hash=%d)", -+ plan->predicted_cardinality, error, plan->fss_hash); -+ } -+ else -+ appendStringInfo(es->str, " (AQO not used, fsspace_hash=%d)", -+ plan->fss_hash); -+ } -+#endif - } - else - { -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 78deade89b..b1470147e9 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -126,6 +126,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index a2a9b1f7be..4b766b9885 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -96,6 +96,10 @@ - #include "utils/spccache.h" - #include "utils/tuplesort.h" - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - #define LOG2(x) (log(x) / 0.693147180559945) - -@@ -176,7 +180,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, - static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); - static double relation_byte_size(double tuples, int width); - static double page_size(double tuples, int width); --static double get_parallel_divisor(Path *path); - - - /* -@@ -254,7 +257,7 @@ cost_seqscan(Path *path, PlannerInfo *root, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -733,7 +736,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, - /* Adjust costing for parallelism, if used. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); - -@@ -1014,7 +1017,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -1960,7 +1963,7 @@ cost_append(AppendPath *apath) - else /* parallel-aware */ - { - int i = 0; -- double parallel_divisor = get_parallel_divisor(&apath->path); -+ double parallel_divisor = get_parallel_divisor(apath->path.parallel_workers); - - /* Parallel-aware Append never produces ordered output. */ - Assert(apath->path.pathkeys == NIL); -@@ -1994,7 +1997,7 @@ cost_append(AppendPath *apath) - { - double subpath_parallel_divisor; - -- subpath_parallel_divisor = get_parallel_divisor(subpath); -+ subpath_parallel_divisor = get_parallel_divisor(subpath->parallel_workers); - apath->path.rows += subpath->rows * (subpath_parallel_divisor / - parallel_divisor); - apath->path.total_cost += subpath->total_cost; -@@ -2517,7 +2520,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, - /* For partial paths, scale row estimate. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = - clamp_row_est(path->path.rows / parallel_divisor); -@@ -2963,7 +2966,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -3297,7 +3300,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, - * number, so we need to undo the division. - */ - if (parallel_hash) -- inner_path_rows_total *= get_parallel_divisor(inner_path); -+ inner_path_rows_total *= get_parallel_divisor(inner_path->parallel_workers); - - /* - * Get hash table size that executor would use for inner relation. -@@ -3393,7 +3396,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -4387,6 +4390,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - } - - -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ - /* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. -@@ -4403,19 +4449,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -4426,13 +4463,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -4462,6 +4519,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -4481,11 +4568,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - rel, -@@ -4501,6 +4588,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -4513,11 +4629,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -@@ -5474,14 +5590,25 @@ page_size(double tuples, int width) - return ceil(relation_byte_size(tuples, width) / BLCKSZ); - } - -+bool -+IsParallelTuplesProcessing(const Plan *plan) -+{ -+ if (plan->path_parallel_workers > 0 && ( -+ plan->parallel_aware || nodeTag(plan) == T_HashJoin || -+ nodeTag(plan) == T_MergeJoin || -+ nodeTag(plan) == T_NestLoop)) -+ return true; -+ return false; -+} -+ - /* - * Estimate the fraction of the work that each worker will do given the - * number of workers budgeted for the path. - */ --static double --get_parallel_divisor(Path *path) -+double -+get_parallel_divisor(int parallel_workers) - { -- double parallel_divisor = path->parallel_workers; -+ double parallel_divisor = parallel_workers; - - /* - * Early experience with parallel query suggests that when there is only -@@ -5498,7 +5625,7 @@ get_parallel_divisor(Path *path) - { - double leader_contribution; - -- leader_contribution = 1.0 - (0.3 * path->parallel_workers); -+ leader_contribution = 1.0 - (0.3 * parallel_workers); - if (leader_contribution > 0) - parallel_divisor += leader_contribution; - } -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 608d5adfed..222a7a34f3 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -70,6 +70,8 @@ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -164,7 +166,7 @@ static Node *fix_indexqual_clause(PlannerInfo *root, - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1094,7 +1096,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1241,7 +1243,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - plan->first_partial_plan = best_path->first_partial_path; - plan->part_prune_info = partpruneinfo; - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - /* - * If prepare_sort_from_pathkeys added sort columns, but we were told to -@@ -1287,7 +1289,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, &best_path->path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1440,7 +1442,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1465,7 +1467,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1493,7 +1495,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1692,7 +1694,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1725,7 +1727,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1754,7 +1756,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1882,7 +1884,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1983,7 +1985,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - IS_OTHER_REL(best_path->subpath->parent) ? - best_path->path.parent->relids : NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2022,7 +2024,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - subplan->targetlist), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2050,7 +2052,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2093,7 +2095,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2294,7 +2296,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2350,7 +2352,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2466,7 +2468,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->inRangeNullsFirst, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2502,7 +2504,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2538,7 +2540,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2561,7 +2563,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2622,7 +2624,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2646,7 +2648,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2692,7 +2694,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2738,7 +2740,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2916,7 +2918,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -3031,7 +3033,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3351,7 +3353,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3401,7 +3403,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3444,7 +3446,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3487,7 +3489,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3531,7 +3533,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3624,7 +3626,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3663,7 +3665,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3701,7 +3703,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, - - scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -3761,7 +3763,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3821,7 +3823,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3955,7 +3957,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -4057,7 +4059,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4364,7 +4366,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4510,7 +4512,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -5010,7 +5012,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -5018,6 +5020,9 @@ copy_generic_path_info(Plan *dest, Path *src) - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; - dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 6054bd2b53..1c8434174e 100644 ---- a/src/backend/optimizer/util/relnode.c -+++ b/src/backend/optimizer/util/relnode.c -@@ -1233,6 +1233,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) - } - - -+set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook = NULL; - /* - * get_baserel_parampathinfo - * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1301,6 +1302,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = pclauses; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - baserel->ppilist = lappend(baserel->ppilist, ppi); - - return ppi; -@@ -1526,6 +1531,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = NIL; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - joinrel->ppilist = lappend(joinrel->ppilist, ppi); - - return ppi; -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index f8b79ec120..b5eda01907 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -62,6 +62,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration, -+ QueryEnvironment *queryEnv); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; - - extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, const char *queryString, - ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest); -diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 441e64eca9..484bca379a 100644 ---- a/src/include/nodes/pathnodes.h -+++ b/src/include/nodes/pathnodes.h -@@ -710,6 +710,10 @@ typedef struct RelOptInfo - Relids top_parent_relids; /* Relids of topmost parents (if "other" - * rel) */ - -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* used for partitioned relations */ - PartitionScheme part_scheme; /* Partitioning scheme. */ - int nparts; /* number of partitions */ -@@ -1069,6 +1073,10 @@ typedef struct ParamPathInfo - Relids ppi_req_outer; /* rels supplying parameters used by path */ - double ppi_rows; /* estimated number of result tuples */ - List *ppi_clauses; /* join clauses available from outer rels */ -+ -+ /* AQO DEBUG purposes */ -+ double predicted_ppi_rows; -+ double fss_ppi_hash; - } ParamPathInfo; - - -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 70f8b8e22b..d188c2596a 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -144,6 +144,19 @@ typedef struct Plan - List *initPlan; /* Init Plan nodes (un-correlated expr - * subselects) */ - -+ /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* - * Information for management of parameter-change-driven rescanning - * -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 9b6bdbc518..2a0caa6474 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,33 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -171,10 +198,21 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, -@@ -186,6 +224,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -198,5 +241,7 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); - extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, - Path *bitmapqual, int loop_count, Cost *cost, double *tuple); -+extern bool IsParallelTuplesProcessing(const Plan *plan); -+extern double get_parallel_divisor(int parallel_workers); - - #endif /* COST_H */ -diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index e70d6a3f18..21dbfee508 100644 ---- a/src/include/optimizer/pathnode.h -+++ b/src/include/optimizer/pathnode.h -@@ -18,6 +18,10 @@ - #include "nodes/pathnodes.h" - - -+typedef void (*set_parampathinfo_postinit_hook_type) (ParamPathInfo *ppi); -+ -+extern PGDLLIMPORT set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook; -+ - /* - * prototypes for pathnode.c - */ -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index e7aaddd50d..56e58dee25 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ diff --git a/aqo_pg13.patch b/aqo_pg13.patch index b933ca49..d7ecb41c 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index 7a4866e338..47a18b9698 100644 +index 1846d415b6f..95519ac11de 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,7 +11,7 @@ index 7a4866e338..47a18b9698 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 43f9b01e83..707211308c 100644 +index bc05c96b4ce..b6a3abe0d2b 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -22,17 +22,20 @@ index 43f9b01e83..707211308c 100644 #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" #include "storage/bufmgr.h" -@@ -46,6 +47,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; +@@ -46,6 +47,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; +/* Hook for plugins to get control in ExplainOnePlan() */ +ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; ++ ++/* Hook for plugins to get control in ExplainOnePlan() */ ++ExplainOneNode_hook_type ExplainOneNode_hook = NULL; + /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -638,6 +642,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -638,6 +645,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -43,78 +46,70 @@ index 43f9b01e83..707211308c 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1579,6 +1587,38 @@ ExplainNode(PlanState *planstate, List *ancestors, - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); -+ -+#ifdef AQO_EXPLAIN -+ if (es->verbose && plan && planstate->instrument) -+ { -+ int wrkrs = 1; -+ double error = -1.; -+ -+ if (planstate->worker_instrument && IsParallelTuplesProcessing(plan)) -+ { -+ int i; -+ for (i = 0; i < planstate->worker_instrument->num_workers; i++) -+ { -+ Instrumentation *instrument = &planstate->worker_instrument->instrument[i]; -+ if (instrument->nloops <= 0) -+ continue; -+ wrkrs++; -+ } -+ } -+ -+ if (plan->predicted_cardinality > 0.) -+ { -+ error = 100. * (plan->predicted_cardinality - (rows*wrkrs)) -+ / plan->predicted_cardinality; -+ appendStringInfo(es->str, -+ " (AQO: cardinality=%.0lf, error=%.0lf%%, fsspace_hash=%d)", -+ plan->predicted_cardinality, error, plan->fss_hash); -+ } -+ else -+ appendStringInfo(es->str, " (AQO not used, fsspace_hash=%d)", -+ plan->fss_hash); -+ } -+#endif +@@ -1612,6 +1623,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } - else - { + } + ++ if (ExplainOneNode_hook) ++ ExplainOneNode_hook(es, planstate, plan); ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 530aac68a7..1d94feadb9 100644 +index 692b6c1559f..580d04d7844 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c -@@ -126,6 +126,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); +@@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); ++ COPY_NODE_FIELD(ext_nodes); } + + /* +diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c +index 21ececf0c2f..ebfd3ba86de 100644 +--- a/src/backend/nodes/outfuncs.c ++++ b/src/backend/nodes/outfuncs.c +@@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) + WRITE_NODE_FIELD(initPlan); + WRITE_BITMAPSET_FIELD(extParam); + WRITE_BITMAPSET_FIELD(allParam); ++ WRITE_NODE_FIELD(ext_nodes); + } + + /* +diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c +index 7976b369ba8..2e47bd8d950 100644 +--- a/src/backend/nodes/readfuncs.c ++++ b/src/backend/nodes/readfuncs.c +@@ -1580,6 +1580,7 @@ ReadCommonPlan(Plan *local_node) + READ_NODE_FIELD(initPlan); + READ_BITMAPSET_FIELD(extParam); + READ_BITMAPSET_FIELD(allParam); ++ READ_NODE_FIELD(ext_nodes); + } + + /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index f1dfdc1a4a..359cafa531 100644 +index 4edc859cb57..988f2e6ab75 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c -@@ -97,6 +97,10 @@ - #include "utils/spccache.h" +@@ -98,6 +98,12 @@ #include "utils/tuplesort.h" + +set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; ++set_foreign_rows_estimate_hook_type set_foreign_rows_estimate_hook = NULL; +get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; ++ + /* source-code-compatibility hacks for pull_varnos() API change */ + #define pull_varnos(a,b) pull_varnos_new(a,b) - #define LOG2(x) (log(x) / 0.693147180559945) - -@@ -185,7 +189,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, +@@ -181,7 +187,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -122,91 +117,19 @@ index f1dfdc1a4a..359cafa531 100644 /* -@@ -266,7 +269,7 @@ cost_seqscan(Path *path, PlannerInfo *root, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -745,7 +748,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, - /* Adjust costing for parallelism, if used. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); - -@@ -1026,7 +1029,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -2129,7 +2132,7 @@ cost_append(AppendPath *apath) - else /* parallel-aware */ - { - int i = 0; -- double parallel_divisor = get_parallel_divisor(&apath->path); -+ double parallel_divisor = get_parallel_divisor(apath->path.parallel_workers); - - /* Parallel-aware Append never produces ordered output. */ - Assert(apath->path.pathkeys == NIL); -@@ -2163,7 +2166,7 @@ cost_append(AppendPath *apath) - { - double subpath_parallel_divisor; - -- subpath_parallel_divisor = get_parallel_divisor(subpath); -+ subpath_parallel_divisor = get_parallel_divisor(subpath->parallel_workers); - apath->path.rows += subpath->rows * (subpath_parallel_divisor / - parallel_divisor); - apath->path.total_cost += subpath->total_cost; -@@ -2761,7 +2764,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, - /* For partial paths, scale row estimate. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = - clamp_row_est(path->path.rows / parallel_divisor); -@@ -3207,7 +3210,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -3541,7 +3544,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, - * number, so we need to undo the division. - */ - if (parallel_hash) -- inner_path_rows_total *= get_parallel_divisor(inner_path); -+ inner_path_rows_total *= get_parallel_divisor(inner_path->parallel_workers); - - /* - * Get hash table size that executor would use for inner relation. -@@ -3638,7 +3641,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -4633,6 +4636,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4632,6 +4637,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } ++void ++set_foreign_rows_estimate(PlannerInfo *root, RelOptInfo *rel) ++{ ++ if (set_foreign_rows_estimate_hook) ++ (*set_foreign_rows_estimate_hook) (root, rel); ++ else ++ rel->rows = 1000; /* entirely bogus default estimate */ ++} ++ +/* + * set_baserel_rows_estimate + * Set the rows estimate for the given base relation. @@ -253,7 +176,7 @@ index f1dfdc1a4a..359cafa531 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4649,19 +4695,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4648,19 +4705,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -274,7 +197,7 @@ index f1dfdc1a4a..359cafa531 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4672,13 +4709,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4671,13 +4719,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -310,7 +233,7 @@ index f1dfdc1a4a..359cafa531 100644 { List *allclauses; double nrows; -@@ -4707,6 +4764,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4706,6 +4774,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -347,7 +270,7 @@ index f1dfdc1a4a..359cafa531 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4726,11 +4813,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4725,11 +4823,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -364,7 +287,7 @@ index f1dfdc1a4a..359cafa531 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -4746,6 +4833,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4745,6 +4843,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -400,7 +323,7 @@ index f1dfdc1a4a..359cafa531 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -4758,11 +4874,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4757,11 +4884,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -417,459 +340,173 @@ index f1dfdc1a4a..359cafa531 100644 { double nrows; -@@ -5760,14 +5876,25 @@ page_size(double tuples, int width) - return ceil(relation_byte_size(tuples, width) / BLCKSZ); - } +@@ -5430,7 +5557,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) + /* Should only be applied to base relations */ + Assert(rel->relid > 0); -+bool -+IsParallelTuplesProcessing(const Plan *plan) -+{ -+ if (plan->path_parallel_workers > 0 && ( -+ plan->parallel_aware || nodeTag(plan) == T_HashJoin || -+ nodeTag(plan) == T_MergeJoin || -+ nodeTag(plan) == T_NestLoop)) -+ return true; -+ return false; -+} -+ - /* +- rel->rows = 1000; /* entirely bogus default estimate */ ++ set_foreign_rows_estimate(root, rel); + + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); + +@@ -5716,7 +5843,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ -static double --get_parallel_divisor(Path *path) +double -+get_parallel_divisor(int parallel_workers) + get_parallel_divisor(Path *path) { -- double parallel_divisor = path->parallel_workers; -+ double parallel_divisor = parallel_workers; - - /* - * Early experience with parallel query suggests that when there is only -@@ -5784,7 +5911,7 @@ get_parallel_divisor(Path *path) - { - double leader_contribution; - -- leader_contribution = 1.0 - (0.3 * path->parallel_workers); -+ leader_contribution = 1.0 - (0.3 * parallel_workers); - if (leader_contribution > 0) - parallel_divisor += leader_contribution; - } + double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 40abe6f9f6..9edd6daeff 100644 +index 917713c1633..5b7bf1cec69 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c -@@ -70,6 +70,8 @@ +@@ -70,6 +70,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; ++create_plan_hook_type create_plan_hook = NULL; static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -166,7 +168,7 @@ static Node *fix_indexqual_clause(PlannerInfo *root, - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1110,7 +1112,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1257,7 +1259,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - plan->first_partial_plan = best_path->first_partial_path; - plan->part_prune_info = partpruneinfo; - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - /* - * If prepare_sort_from_pathkeys added sort columns, but we were told to -@@ -1303,7 +1305,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, &best_path->path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1456,7 +1458,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1481,7 +1483,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1509,7 +1511,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1709,7 +1711,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) +@@ -524,6 +525,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) + break; } - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1744,7 +1746,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1773,7 +1775,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1901,7 +1903,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -2002,7 +2004,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - IS_OTHER_REL(best_path->subpath->parent) ? - best_path->path.parent->relids : NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2028,7 +2030,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, - best_path->spath.path.parent->relids : NULL, - best_path->nPresortedCols); - -- copy_generic_path_info(&plan->sort.plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->sort.plan, (Path *) best_path); - - return plan; - } -@@ -2067,7 +2069,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - subplan->targetlist), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2095,7 +2097,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2139,7 +2141,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->transitionSpace, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2341,7 +2343,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2399,7 +2401,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2518,7 +2520,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->inRangeNullsFirst, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2554,7 +2556,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2590,7 +2592,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2613,7 +2615,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2674,7 +2676,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2728,7 +2730,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOption, - numUniqkeys, uniqColIdx, uniqOperators, uniqCollations); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - ++ if (create_plan_hook) ++ /* Give an extension a chance to do something */ ++ (*create_plan_hook)(root, best_path, &plan); ++ return plan; } -@@ -2774,7 +2776,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2820,7 +2822,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2998,7 +3000,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -3113,7 +3115,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3433,7 +3435,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3483,7 +3485,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3526,7 +3528,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3569,7 +3571,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3613,7 +3615,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3706,7 +3708,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3745,7 +3747,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3783,7 +3785,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, - - scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -3843,7 +3845,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3903,7 +3905,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -4037,7 +4039,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -4139,7 +4141,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4446,7 +4448,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4619,7 +4621,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -5119,7 +5121,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -5127,6 +5129,9 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5163,6 +5168,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); ++ dest->ext_nodes = NIL; } /* +diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c +index 821693c60ee..fa627f472f9 100644 +--- a/src/backend/optimizer/plan/planner.c ++++ b/src/backend/optimizer/plan/planner.c +@@ -145,7 +145,8 @@ static List *extract_rollup_sets(List *groupingSets); + static List *reorder_grouping_sets(List *groupingSets, List *sortclause); + static void standard_qp_callback(PlannerInfo *root, void *extra); + static double get_number_of_groups(PlannerInfo *root, +- double path_rows, ++ Path *subpath, ++ RelOptInfo *grouped_rel, + grouping_sets_data *gd, + List *target_list); + static RelOptInfo *create_grouping_paths(PlannerInfo *root, +@@ -3682,7 +3683,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) + */ + static double + get_number_of_groups(PlannerInfo *root, +- double path_rows, ++ Path *subpath, ++ RelOptInfo *grouped_rel, + grouping_sets_data *gd, + List *target_list) + { +@@ -3719,7 +3721,7 @@ get_number_of_groups(PlannerInfo *root, + GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); + double numGroups = estimate_num_groups(root, + groupExprs, +- path_rows, ++ subpath->rows, + &gset); + + gs->numGroups = numGroups; +@@ -3744,7 +3746,7 @@ get_number_of_groups(PlannerInfo *root, + GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); + double numGroups = estimate_num_groups(root, + groupExprs, +- path_rows, ++ subpath->rows, + &gset); + + gs->numGroups = numGroups; +@@ -3760,8 +3762,8 @@ get_number_of_groups(PlannerInfo *root, + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + target_list); + +- dNumGroups = estimate_num_groups(root, groupExprs, path_rows, +- NULL); ++ dNumGroups = estimate_num_groups_ext(root, groupExprs, subpath, ++ grouped_rel, NULL); + } + } + else if (parse->groupingSets) +@@ -4147,7 +4149,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, + * Estimate number of groups. + */ + dNumGroups = get_number_of_groups(root, +- cheapest_path->rows, ++ cheapest_path, ++ grouped_rel, + gd, + extra->targetList); + +@@ -6931,13 +6934,15 @@ create_partial_grouping_paths(PlannerInfo *root, + if (cheapest_total_path != NULL) + dNumPartialGroups = + get_number_of_groups(root, +- cheapest_total_path->rows, ++ cheapest_total_path, ++ partially_grouped_rel, + gd, + extra->targetList); + if (cheapest_partial_path != NULL) + dNumPartialPartialGroups = + get_number_of_groups(root, +- cheapest_partial_path->rows, ++ cheapest_partial_path, ++ partially_grouped_rel, + gd, + extra->targetList); + diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 76245c1ff3..cac6adf35e 100644 +index a203e6f1ff5..d31bf5bae63 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c -@@ -1261,6 +1261,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +@@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) + rel->partexprs = NULL; + rel->nullable_partexprs = NULL; + rel->partitioned_child_rels = NIL; ++ rel->ext_nodes = NULL; + + /* + * Pass assorted information down the inheritance hierarchy. +@@ -383,7 +384,6 @@ find_base_rel(PlannerInfo *root, int relid) + if (rel) + return rel; + } +- + elog(ERROR, "no relation entry for relid %d", relid); + + return NULL; /* keep compiler quiet */ +@@ -673,6 +673,7 @@ build_join_rel(PlannerInfo *root, + joinrel->partexprs = NULL; + joinrel->nullable_partexprs = NULL; + joinrel->partitioned_child_rels = NIL; ++ joinrel->ext_nodes = NULL; + + /* Compute information relevant to the foreign relations. */ + set_foreign_rel_properties(joinrel, outer_rel, inner_rel); +@@ -851,6 +852,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, + joinrel->partexprs = NULL; + joinrel->nullable_partexprs = NULL; + joinrel->partitioned_child_rels = NIL; ++ joinrel->ext_nodes = NULL; + + joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, + inner_rel->top_parent_relids); +@@ -1264,6 +1266,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } @@ -877,7 +514,7 @@ index 76245c1ff3..cac6adf35e 100644 /* * get_baserel_parampathinfo * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1329,6 +1330,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, +@@ -1332,6 +1335,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; @@ -888,7 +525,7 @@ index 76245c1ff3..cac6adf35e 100644 baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; -@@ -1554,6 +1559,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, +@@ -1557,6 +1564,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; @@ -899,11 +536,43 @@ index 76245c1ff3..cac6adf35e 100644 joinrel->ppilist = lappend(joinrel->ppilist, ppi); return ppi; +diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c +index 37458da096d..248a1875a18 100644 +--- a/src/backend/utils/adt/selfuncs.c ++++ b/src/backend/utils/adt/selfuncs.c +@@ -147,6 +147,7 @@ + /* Hooks for plugins to get control when we ask for stats */ + get_relation_stats_hook_type get_relation_stats_hook = NULL; + get_index_stats_hook_type get_index_stats_hook = NULL; ++estimate_num_groups_hook_type estimate_num_groups_hook = NULL; + + static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); + static double eqjoinsel_inner(Oid opfuncoid, Oid collation, +@@ -3295,6 +3296,19 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, + return varinfos; + } + ++double ++estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, Path *subpath, ++ RelOptInfo *grouped_rel, List **pgset) ++{ ++ double input_rows = subpath->rows; ++ ++ if (estimate_num_groups_hook != NULL) ++ return (*estimate_num_groups_hook)(root, groupExprs, subpath, ++ grouped_rel, pgset); ++ ++ return estimate_num_groups(root, groupExprs, input_rows, pgset); ++} ++ + /* + * estimate_num_groups - Estimate number of groups in a grouped query + * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index ba661d32a6..3c2595d639 100644 +index ba661d32a63..09d0abe58be 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h -@@ -75,6 +75,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; +@@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; @@ -913,14 +582,20 @@ index ba661d32a6..3c2595d639 100644 + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv); +extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; ++ ++/* Explain a node info */ ++typedef void (*ExplainOneNode_hook_type) (ExplainState *es, ++ PlanState *ps, ++ Plan *plan); ++extern PGDLLIMPORT ExplainOneNode_hook_type ExplainOneNode_hook; extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 8f62d61702..cfcd2c249d 100644 +index d2b4271de9d..559b9db7121 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -734,6 +734,10 @@ typedef struct RelOptInfo +@@ -739,6 +739,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -931,7 +606,20 @@ index 8f62d61702..cfcd2c249d 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -1101,6 +1105,10 @@ typedef struct ParamPathInfo +@@ -754,6 +758,12 @@ typedef struct RelOptInfo + List **partexprs; /* Non-nullable partition key expressions */ + List **nullable_partexprs; /* Nullable partition key expressions */ + List *partitioned_child_rels; /* List of RT indexes */ ++ ++ /* ++ * At this list an extension can add additional nodes to pass an info along ++ * the planning and executing stages. ++ */ ++ List *ext_nodes; + } RelOptInfo; + + /* +@@ -1105,6 +1115,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -943,42 +631,36 @@ index 8f62d61702..cfcd2c249d 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 7e6b10f86b..148720a566 100644 +index 90f02ce6fdd..88c332164dd 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -140,6 +140,19 @@ typedef struct Plan - List *initPlan; /* Init Plan nodes (un-correlated expr - * subselects) */ - -+ /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; +@@ -159,6 +159,9 @@ typedef struct Plan + */ + Bitmapset *extParam; + Bitmapset *allParam; + - /* - * Information for management of parameter-change-driven rescanning - * ++ /* Additional field for an extension purposes. */ ++ List *ext_nodes; + } Plan; + + /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 6141654e47..0915da8618 100644 +index 6141654e478..e6b28cbb05f 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h -@@ -39,6 +39,33 @@ typedef enum +@@ -39,6 +39,37 @@ typedef enum } ConstraintExclusionType; +/* Hook for plugins to get control of cardinality estimation */ +typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, + RelOptInfo *rel); ++typedef void (*set_foreign_rows_estimate_hook_type) (PlannerInfo *root, ++ RelOptInfo *rel); +extern PGDLLIMPORT set_baserel_rows_estimate_hook_type + set_baserel_rows_estimate_hook; ++extern PGDLLIMPORT set_foreign_rows_estimate_hook_type ++ set_foreign_rows_estimate_hook; +typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, + RelOptInfo *rel, + List *param_clauses); @@ -1004,10 +686,11 @@ index 6141654e47..0915da8618 100644 /* * prototypes for costsize.c * routines to compute costs and sizes -@@ -175,10 +202,21 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, +@@ -175,10 +206,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors); ++extern void set_foreign_rows_estimate(PlannerInfo *root, RelOptInfo *rel); +extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); +extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); @@ -1026,7 +709,7 @@ index 6141654e47..0915da8618 100644 extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, -@@ -190,6 +228,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -190,6 +233,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -1038,16 +721,15 @@ index 6141654e47..0915da8618 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -202,5 +245,7 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -202,5 +250,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); -+extern bool IsParallelTuplesProcessing(const Plan *plan); -+extern double get_parallel_divisor(int parallel_workers); ++extern double get_parallel_divisor(Path *path); #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 715a24ad29..7311ba92f4 100644 +index 3bd7072ae8c..21bbaba11c8 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -1062,19 +744,46 @@ index 715a24ad29..7311ba92f4 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 81c4a7e560..59daf7fb81 100644 +index 8ce60e202e5..75415102c2e 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; + ++/* Hook for plugins to get control in ExecutorRun() */ ++typedef void (*create_plan_hook_type) (PlannerInfo *root, ++ Path *best_path, ++ Plan **plan); ++extern PGDLLIMPORT create_plan_hook_type create_plan_hook; /* * prototypes for plan/planmain.c */ +diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h +index 7ac4a063915..74fe91b89f1 100644 +--- a/src/include/utils/selfuncs.h ++++ b/src/include/utils/selfuncs.h +@@ -127,6 +127,12 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, + AttrNumber indexattnum, + VariableStatData *vardata); + extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook; ++typedef double (*estimate_num_groups_hook_type) (PlannerInfo *root, ++ List *groupExprs, ++ Path *subpath, ++ RelOptInfo *grouped_rel, ++ List **pgset); ++extern PGDLLIMPORT estimate_num_groups_hook_type estimate_num_groups_hook; + + /* Functions in selfuncs.c */ + +@@ -193,6 +199,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, + Selectivity *leftstart, Selectivity *leftend, + Selectivity *rightstart, Selectivity *rightend); + ++extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, ++ Path *subpath, RelOptInfo *grouped_rel, ++ List **pgset); + extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, + double input_rows, List **pgset); + diff --git a/aqo_pg9_6.patch b/aqo_pg9_6.patch deleted file mode 100644 index 68ceacdb..00000000 --- a/aqo_pg9_6.patch +++ /dev/null @@ -1,758 +0,0 @@ -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 3244c76..8229702 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -121,6 +121,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 2a49639..7b9bdef 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -100,6 +100,10 @@ - - #define LOG2(x) (log(x) / 0.693147180559945) - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - double seq_page_cost = DEFAULT_SEQ_PAGE_COST; - double random_page_cost = DEFAULT_RANDOM_PAGE_COST; -@@ -3754,6 +3758,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - - - /* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ -+/* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. - * -@@ -3769,19 +3816,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -3792,13 +3830,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -3828,6 +3886,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -3847,11 +3935,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - outer_rel, -@@ -3866,6 +3954,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -3878,11 +3995,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 54d601f..b212325 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -67,6 +67,8 @@ - #define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -154,7 +156,7 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -977,7 +979,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1003,7 +1005,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - - plan = make_append(subplans, tlist); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return (Plan *) plan; - } -@@ -1031,7 +1033,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1136,7 +1138,7 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1164,7 +1166,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1367,7 +1369,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1399,7 +1401,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1460,7 +1462,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1515,7 +1517,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - - plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1552,7 +1554,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - extract_grouping_ops(best_path->groupClause), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1580,7 +1582,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1621,7 +1623,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1809,7 +1811,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -1864,7 +1866,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -1958,7 +1960,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->endOffset, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2100,7 +2102,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2136,7 +2138,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2159,7 +2161,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2217,7 +2219,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2241,7 +2243,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2287,7 +2289,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2333,7 +2335,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2514,7 +2516,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -2627,7 +2629,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -2888,7 +2890,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -2938,7 +2940,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -2981,7 +2983,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3025,7 +3027,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3118,7 +3120,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3178,7 +3180,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3238,7 +3240,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3365,7 +3367,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -3496,7 +3498,7 @@ create_nestloop_plan(PlannerInfo *root, - inner_plan, - best_path->jointype); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -3800,7 +3802,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->jpath.jointype); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -3939,7 +3941,7 @@ create_hashjoin_plan(PlannerInfo *root, - (Plan *) hash_plan, - best_path->jpath.jointype); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4578,13 +4580,16 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-aware flag, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; - dest->plan_rows = src->rows; - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 369179f..6e81ae8 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -125,6 +125,16 @@ typedef struct Plan - * subselects) */ - - /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ -+ /* - * Information for management of parameter-change-driven rescanning - * - * extParam includes the paramIDs of all external PARAM_EXEC params -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 2a4df2f..64994a7 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,34 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -161,21 +189,37 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 4fbb6cc..def55e5 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -33,6 +33,12 @@ extern int force_parallel_mode; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 7a6545f..42b58c0 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -46,6 +46,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -558,6 +561,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - 3, es); - } - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index 1f0bde7..9ca637c 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -58,6 +58,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; -+ - - extern void ExplainQuery(ExplainStmt *stmt, const char *queryString, - ParamListInfo params, DestReceiver *dest); diff --git a/aqo_shared.c b/aqo_shared.c new file mode 100644 index 00000000..9b478552 --- /dev/null +++ b/aqo_shared.c @@ -0,0 +1,149 @@ +/* + * + */ + +#include "postgres.h" + +#include "lib/dshash.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/shmem.h" + +#include "aqo_shared.h" +#include "storage.h" + + +AQOSharedState *aqo_state = NULL; +int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ +int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ + +static shmem_startup_hook_type aqo_shmem_startup_next = NULL; + +static void on_shmem_shutdown(int code, Datum arg); + +static void +aqo_init_shmem(void) +{ + bool found; + HASHCTL info; + + if (aqo_shmem_startup_next) + (*aqo_shmem_startup_next)(); + + aqo_state = NULL; + stat_htab = NULL; + qtexts_htab = NULL; + data_htab = NULL; + queries_htab = NULL; + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); + if (!found) + { + /* First time through ... */ + + aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + + aqo_state->qtexts_changed = false; + aqo_state->stat_changed = false; + aqo_state->data_changed = false; + aqo_state->queries_changed = false; + + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); + } + + info.keysize = sizeof(((StatEntry *) 0)->queryid); + info.entrysize = sizeof(StatEntry); + stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Init shared memory table for query texts */ + info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); + info.entrysize = sizeof(QueryTextEntry); + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Shared memory hash table for the data */ + info.keysize = sizeof(data_key); + info.entrysize = sizeof(DataEntry); + data_htab = ShmemInitHash("AQO Data HTAB", fss_max_items, fss_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Shared memory hash table for queries */ + info.keysize = sizeof(((QueriesEntry *) 0)->queryid); + info.entrysize = sizeof(QueriesEntry); + queries_htab = ShmemInitHash("AQO Queries HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + LWLockRelease(AddinShmemInitLock); + LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); + LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); + LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); + LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); + + if (!IsUnderPostmaster && !found) + { + before_shmem_exit(on_shmem_shutdown, (Datum) 0); + + /* Doesn't use DSA, so can be loaded in postmaster */ + aqo_stat_load(); + aqo_queries_load(); + + check_dsa_file_size(); + } +} + +/* + * Main idea here is to store all ML data in temp files on postmaster shutdown. + */ +static void +on_shmem_shutdown(int code, Datum arg) +{ + Assert(!IsUnderPostmaster); + + /* + * Save ML data to a permanent storage. Do it on postmaster shutdown only + * to save time. We can't do so for query_texts and aqo_data because of DSM + * limits. + */ + aqo_stat_flush(); + aqo_queries_flush(); + return; +} + + +/* + * Requests any additional shared memory required for aqo. + */ +static void +aqo_shmem_request(void) +{ + Size size; + + size = MAXALIGN(sizeof(AQOSharedState)); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); + size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); + + RequestAddinShmemSpace(size); +} + +void +aqo_shmem_init(void) +{ + aqo_shmem_startup_next = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; + + aqo_shmem_request(); +} diff --git a/aqo_shared.h b/aqo_shared.h new file mode 100644 index 00000000..ee9e3087 --- /dev/null +++ b/aqo_shared.h @@ -0,0 +1,38 @@ +#ifndef AQO_SHARED_H +#define AQO_SHARED_H + +#include "storage/lwlock.h" +#include "utils/dsa.h" + +#define AQO_SHARED_MAGIC 0x053163 + +typedef struct AQOSharedState +{ + LWLock lock; /* mutual exclusion */ + + /* Storage fields */ + LWLock stat_lock; /* lock for access to stat storage */ + bool stat_changed; + + LWLock qtexts_lock; /* Lock for shared fields below */ + dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ + int qtext_trancheid; + bool qtexts_changed; + + LWLock data_lock; /* Lock for shared fields below */ + dsa_handle data_dsa_handler; + bool data_changed; + + LWLock queries_lock; /* lock for access to queries storage */ + bool queries_changed; +} AQOSharedState; + + +extern AQOSharedState *aqo_state; + +extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ +extern int fss_max_items; + +extern void aqo_shmem_init(void); + +#endif /* AQO_SHARED_H */ diff --git a/auto_tuning.c b/auto_tuning.c index a19f42d0..36dfe2ef 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,14 +8,17 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c * */ +#include "postgres.h" + #include "aqo.h" +#include "storage.h" /* * Auto tuning criteria criteria of an query convergence by overall cardinality @@ -23,13 +26,11 @@ */ double auto_tuning_convergence_error = 0.01; -static double get_mean(double *elems, int nelems); static double get_estimation(double *elems, int nelems); static bool is_stable(double *elems, int nelems); static bool converged_cq(double *elems, int nelems); static bool is_in_infinite_loop_cq(double *elems, int nelems); - /* * Returns mean value of the array of doubles. */ @@ -39,7 +40,7 @@ get_mean(double *elems, int nelems) double sum = 0; int i; - AssertArg(nelems > 0); + Assert(nelems > 0); for (i = 0; i < nelems; ++i) sum += elems[i]; @@ -50,12 +51,12 @@ get_mean(double *elems, int nelems) * Having a time series it tries to predict its next value. * Now it do simple window averaging. */ -double +static double get_estimation(double *elems, int nelems) { int start; - AssertArg(nelems > 0); + Assert(nelems > 0); if (nelems > auto_tuning_window_size) start = nelems - auto_tuning_window_size; @@ -68,13 +69,13 @@ get_estimation(double *elems, int nelems) /* * Checks whether the series is stable with absolute or relative error. */ -bool +static bool is_stable(double *elems, int nelems) { double est, last; - AssertArg(nelems > 1); + Assert(nelems > 1); est = get_mean(elems, nelems - 1); last = elems[nelems - 1]; @@ -89,7 +90,7 @@ is_stable(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error. */ -bool +static bool converged_cq(double *elems, int nelems) { if (nelems < auto_tuning_window_size + 2) @@ -105,7 +106,7 @@ converged_cq(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error 0.1. */ -bool +static bool is_in_infinite_loop_cq(double *elems, int nelems) { if (nelems - auto_tuning_infinite_loop < auto_tuning_window_size + 2) @@ -142,22 +143,21 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(int query_hash, QueryStat * stat) +automatical_query_tuning(uint64 queryid, StatEntry *stat) { - double unstability = auto_tuning_exploration; - double t_aqo, - t_not_aqo; - double p_use = -1; - int64 num_iterations; + double unstability = auto_tuning_exploration; + double t_aqo, + t_not_aqo; + double p_use = -1; + int64 num_iterations; - num_iterations = stat->executions_with_aqo + stat->executions_without_aqo; + num_iterations = stat->execs_with_aqo + stat->execs_without_aqo; query_context.learn_aqo = true; - if (stat->executions_without_aqo < auto_tuning_window_size + 1) + if (stat->execs_without_aqo < auto_tuning_window_size + 1) query_context.use_aqo = false; - else if (!converged_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size) && - !is_in_infinite_loop_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size)) + else if (!converged_cq(stat->est_error_aqo, stat->cur_stat_slot_aqo) && + !is_in_infinite_loop_cq(stat->est_error_aqo, + stat->cur_stat_slot_aqo)) query_context.use_aqo = true; else { @@ -166,29 +166,44 @@ automatical_query_tuning(int query_hash, QueryStat * stat) * by execution time. It is volatile, probabilistic part of code. * XXX: this logic of auto tuning may be reworked later. */ - t_aqo = get_estimation(stat->execution_time_with_aqo, - stat->execution_time_with_aqo_size) + - get_estimation(stat->planning_time_with_aqo, - stat->planning_time_with_aqo_size); + t_aqo = get_estimation(stat->exec_time_aqo, stat->cur_stat_slot_aqo) + + get_estimation(stat->plan_time_aqo, stat->cur_stat_slot_aqo); - t_not_aqo = get_estimation(stat->execution_time_without_aqo, - stat->execution_time_without_aqo_size) + - get_estimation(stat->planning_time_without_aqo, - stat->planning_time_without_aqo_size); + t_not_aqo = get_estimation(stat->exec_time, stat->cur_stat_slot) + + get_estimation(stat->plan_time, stat->cur_stat_slot); p_use = t_not_aqo / (t_not_aqo + t_aqo); + + /* + * Here p_use<0.5 and p_use->0, if AQO decreases performance, + * Otherwise, p_use>0.5 and p_use->1. + */ + p_use = 1 / (1 + exp((p_use - 0.5) / unstability)); + + /* + * Here p_use in (0.5..max) if AQO decreases preformance. + * p_use in (0..0.5), otherwise. + */ + p_use -= 1 / (1 + exp(-0.5 / unstability)); p_use /= 1 - 2 / (1 + exp(-0.5 / unstability)); - /* borrowed from drandom() in float.c */ - query_context.use_aqo = (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; + /* + * If our decision is using AQO for this query class, then learn on new + * queries of this type. Otherwise, turn off. + */ + query_context.use_aqo = + (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; query_context.learn_aqo = query_context.use_aqo; } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(query_hash, query_context.learn_aqo, query_context.use_aqo, - query_context.fspace_hash, true); + aqo_queries_store(queryid, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, true, + &aqo_queries_nulls); else - update_query(query_hash, false, false, query_context.fspace_hash, false); + aqo_queries_store(queryid, + query_context.fspace_hash, false, false, false, + &aqo_queries_nulls); } diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 89ddf1ee..f0cca328 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,57 +8,104 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c * */ -#include "aqo.h" +#include "postgres.h" + #include "optimizer/optimizer.h" +#include "aqo.h" +#include "hash.h" +#include "machine_learning.h" +#include "storage.h" + + +bool use_wide_search = false; + +#ifdef AQO_DEBUG_PRINT +static void +predict_debug_output(List *clauses, List *selectivities, + List *reloids, int fss, double result) +{ + StringInfoData debug_str; + ListCell *lc; + + initStringInfo(&debug_str); + appendStringInfo(&debug_str, "fss: %d, clausesNum: %d, ", + fss, list_length(clauses)); + + appendStringInfoString(&debug_str, ", selectivities: { "); + foreach(lc, selectivities) + { + Selectivity *s = (Selectivity *) lfirst(lc); + appendStringInfo(&debug_str, "%lf ", *s); + } + + appendStringInfoString(&debug_str, "}, reloids: { "); + foreach(lc, reloids) + { + Oid relname = lfirst_oid(lc); + appendStringInfo(&debug_str, "%d ", relname); + } + + appendStringInfo(&debug_str, "}, result: %lf", result); + elog(DEBUG1, "Prediction: %s", debug_str.data); +} +#endif + /* * General method for prediction the cardinality of given relation. */ double -predict_for_relation(List *restrict_clauses, List *selectivities, - List *relids, int *fss_hash) +predict_for_relation(List *clauses, List *selectivities, List *relsigns, + int *fss) { - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double result; - int rows; - int i; - - *fss_hash = get_fss_for_object(restrict_clauses, selectivities, relids, - &nfeatures, &features); - - if (nfeatures > 0) - for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc0(sizeof(**matrix) * nfeatures); - - if (load_fss(*fss_hash, nfeatures, matrix, targets, &rows)) - result = OkNNr_predict(rows, nfeatures, matrix, targets, features); + double *features; + double result; + int ncols; + OkNNrdata *data; + + if (relsigns == NIL) + /* + * Don't make prediction for query plans without any underlying plane + * tables. Use return value -4 for debug purposes. + */ + return -4.; + + *fss = get_fss_for_object(relsigns, clauses, selectivities, + &ncols, &features); + data = OkNNr_allocate(ncols); + + if (load_aqo_data(query_context.fspace_hash, *fss, data, false) && + data->rows >= (aqo_predict_with_few_neighbors ? 1 : aqo_k)) + result = OkNNr_predict(data, features); + /* Try to search in surrounding feature spaces for the same node */ + else if (use_wide_search && load_aqo_data(query_context.fspace_hash, *fss, data, true)) + { + elog(DEBUG5, "[AQO] Make prediction for fss "INT64_FORMAT" by a neighbour " + "includes %d feature(s) and %d fact(s).", + (int64) *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); + } else { /* * Due to planning optimizer tries to build many alternate paths. Many - * of these not used in final query execution path. Consequently, only - * small part of paths was used for AQO learning and fetch into the AQO - * knowledge base. + * of them aren't used in final query execution path. Consequently, only + * small part of paths was used for AQO learning and stored into + * the AQO knowledge base. */ result = -1; } - pfree(features); - if (nfeatures > 0) - { - for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); - } +#ifdef AQO_DEBUG_PRINT + predict_debug_output(clauses, selectivities, relsigns, *fss, result); +#endif if (result < 0) return -1; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 76f54d68..6546499c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,126 +18,61 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c * */ +#include "postgres.h" + +#include "optimizer/cost.h" +#include "utils/selfuncs.h" + #include "aqo.h" +#include "hash.h" +#include "machine_learning.h" +#include "path_utils.h" +#include "storage.h" double predicted_ppi_rows; double fss_ppi_hash; -static void call_default_set_baserel_rows_estimate(PlannerInfo *root, - RelOptInfo *rel); -static double call_default_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -static void call_default_set_joinrel_size_estimates(PlannerInfo *root, - RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -static double call_default_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); - - -/* - * Calls standard set_baserel_rows_estimate or its previous hook. - */ -void -call_default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -{ - if (prev_set_baserel_rows_estimate_hook) - prev_set_baserel_rows_estimate_hook(root, rel); - else - set_baserel_rows_estimate_standard(root, rel); -} - /* - * Calls standard get_parameterized_baserel_size or its previous hook. + * Cardinality prediction hooks. + * It isn't clear what to do if someone else tries to live in this chain. + * Of course, someone may want to just report some stat or something like that. + * So, it can be legal, sometimees. So far, we only report this fact. */ -double -call_default_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses) -{ - if (prev_get_parameterized_baserel_size_hook) - return prev_get_parameterized_baserel_size_hook(root, rel, param_clauses); - else - return get_parameterized_baserel_size_standard(root, rel, param_clauses); -} - -/* - * Calls standard get_parameterized_joinrel_size or its previous hook. - */ -double -call_default_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses) -{ - if (prev_get_parameterized_joinrel_size_hook) - return prev_get_parameterized_joinrel_size_hook(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); - else - return get_parameterized_joinrel_size_standard(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); -} - -/* - * Calls standard set_joinrel_size_estimates or its previous hook. - */ -void -call_default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist) -{ - if (prev_set_joinrel_size_estimates_hook) - prev_set_joinrel_size_estimates_hook(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - else - set_joinrel_size_estimates_standard(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); -} +static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; +static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; +static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; +static get_parameterized_joinrel_size_hook_type aqo_get_parameterized_joinrel_size_next = NULL; +static set_parampathinfo_postinit_hook_type aqo_set_parampathinfo_postinit_next = NULL; +static estimate_num_groups_hook_type aqo_estimate_num_groups_next = NULL; /* * Our hook for setting baserel rows estimate. * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - Oid relid; - List *relids; - List *selectivities = NULL; - List *restrict_clauses; - int fss = 0; + double predicted; + RangeTblEntry *rte; + RelSortOut rels = {NIL, NIL}; + List *selectivities = NULL; + List *clauses; + int fss = 0; + MemoryContext old_ctx_m; + + if (IsQueryDisabled()) + /* Fast path. */ + goto default_estimator; + + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); if (query_context.use_aqo || query_context.learn_aqo) selectivities = get_selectivities(root, rel->baserestrictinfo, 0, @@ -145,40 +80,59 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(selectivities); - - call_default_set_baserel_rows_estimate(root, rel); - return; + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); + goto default_estimator; } - relid = planner_rt_fetch(rel->relid, root)->relid; - relids = list_make1_int(relid); + rte = planner_rt_fetch(rel->relid, root); + if (rte && OidIsValid(rte->relid)) + { + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + get_list_of_relids(root, rel->relids, &rels); + } - restrict_clauses = list_copy(rel->baserestrictinfo); - predicted = predict_for_relation(restrict_clauses, selectivities, relids, &fss); + clauses = aqo_get_clauses(root, rel->baserestrictinfo); + predicted = predict_for_relation(clauses, selectivities, rels.signatures, + &fss); rel->fss_hash = fss; - if (predicted >= 0) - { - rel->rows = predicted; - rel->predicted_cardinality = predicted; - } - else - { - call_default_set_baserel_rows_estimate(root, rel); - rel->predicted_cardinality = -1.; - } + /* Return to the caller's memory context. */ + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); - list_free_deep(selectivities); - list_free(restrict_clauses); - list_free(relids); -} + if (predicted < 0) + goto default_estimator; + if ((aqo_set_baserel_rows_estimate_next != set_baserel_rows_estimate_standard || + set_baserel_rows_estimate_hook != aqo_set_baserel_rows_estimate)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_baserel_rows_estimate_hook chain"); -void -ppi_hook(ParamPathInfo *ppi) + rel->rows = predicted; + rel->predicted_cardinality = predicted; + return; + +default_estimator: + rel->predicted_cardinality = -1.; + (*aqo_set_baserel_rows_estimate_next)(root, rel); +} + +static void +aqo_parampathinfo_postinit(ParamPathInfo *ppi) { + if (aqo_set_parampathinfo_postinit_next) + (*aqo_set_parampathinfo_postinit_next)(ppi); + + if (IsQueryDisabled()) + return; + + if ((aqo_set_parampathinfo_postinit_next != NULL || + parampathinfo_postinit_hook != aqo_parampathinfo_postinit)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the parampathinfo_postinit_hook chain"); + ppi->predicted_ppi_rows = predicted_ppi_rows; ppi->fss_ppi_hash = fss_ppi_hash; } @@ -188,14 +142,14 @@ ppi_hook(ParamPathInfo *ppi) * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, List *param_clauses) { double predicted; - Oid relid = InvalidOid; - List *relids = NULL; + RangeTblEntry *rte = NULL; + RelSortOut rels = {NIL, NIL}; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -204,51 +158,79 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *args_hash; int *eclass_hash; int current_hash; - int fss = 0; + int fss = 0; + MemoryContext oldctx; + + if (IsQueryDisabled()) + /* Fast path */ + goto default_estimator; + + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (query_context.use_aqo || query_context.learn_aqo) { - allclauses = list_concat(list_copy(param_clauses), - list_copy(rel->baserestrictinfo)); - selectivities = get_selectivities(root, allclauses, rel->relid, - JOIN_INNER, NULL); - relid = planner_rt_fetch(rel->relid, root)->relid; + + selectivities = list_concat( + get_selectivities(root, param_clauses, rel->relid, + JOIN_INNER, NULL), + get_selectivities(root, rel->baserestrictinfo, + rel->relid, + JOIN_INNER, NULL)); + + /* Make specific copy of clauses with mutated subplans */ + allclauses = list_concat(aqo_get_clauses(root, param_clauses), + aqo_get_clauses(root, rel->baserestrictinfo)); + + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); + forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash( - ((RestrictInfo *) lfirst(l))->clause, + current_hash = get_clause_hash((AQOClause *) lfirst(l), nargs, args_hash, eclass_hash); - cache_selectivity(current_hash, rel->relid, relid, + cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } + pfree(args_hash); pfree(eclass_hash); } if (!query_context.use_aqo) { - if (query_context.learn_aqo) - { - list_free_deep(selectivities); - list_free(allclauses); - } - return call_default_get_parameterized_baserel_size(root, rel, - param_clauses); + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOPredictMemCtx); + goto default_estimator; } - relids = list_make1_int(relid); + if (rte && OidIsValid(rte->relid)) + { + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + get_list_of_relids(root, rel->relids, &rels); + } + + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + /* Return to the caller's memory context */ + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; - else - return call_default_get_parameterized_baserel_size(root, rel, - param_clauses); + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_baserel_size_next != get_parameterized_baserel_size_standard || + get_parameterized_baserel_size_hook != aqo_get_parameterized_baserel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the aqo_get_parameterized_baserel_size_next chain"); + + return predicted; + +default_estimator: + return (*aqo_get_parameterized_baserel_size_next)(root, rel, param_clauses); } /* @@ -256,7 +238,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, @@ -264,7 +246,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relids; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -272,53 +254,61 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *inner_selectivities; List *outer_selectivities; List *current_selectivities = NULL; - int fss = 0; + int fss = 0; + MemoryContext old_ctx_m; + + if (IsQueryDisabled()) + /* Fast path */ + goto default_estimator; + + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, restrictlist, 0, sjinfo->jointype, sjinfo); - if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - - call_default_set_joinrel_size_estimates(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - return; + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); + goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, &inner_selectivities); - allclauses = list_concat(list_copy(restrictlist), + allclauses = list_concat(aqo_get_clauses(root, restrictlist), list_concat(outer_clauses, inner_clauses)); selectivities = list_concat(current_selectivities, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); + rel->fss_hash = fss; - if (predicted >= 0) - { - rel->predicted_cardinality = predicted; - rel->rows = predicted; - } - else - { - rel->predicted_cardinality = -1; - call_default_set_joinrel_size_estimates(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_joinrel_size_estimates_next != set_joinrel_size_estimates_standard || + set_joinrel_size_estimates_hook != aqo_set_joinrel_size_estimates)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_joinrel_size_estimates_hook chain"); + + rel->predicted_cardinality = predicted; + rel->rows = predicted; + return; + +default_estimator: + rel->predicted_cardinality = -1; + (*aqo_set_joinrel_size_estimates_next)(root, rel, outer_rel, inner_rel, + sjinfo, restrictlist); } /* @@ -326,16 +316,16 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, - List *restrict_clauses) + List *clauses) { double predicted; - List *relids; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -344,43 +334,175 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; + + if (IsQueryDisabled()) + /* Fast path */ + goto default_estimator; + + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); if (query_context.use_aqo || query_context.learn_aqo) - current_selectivities = get_selectivities(root, restrict_clauses, 0, + current_selectivities = get_selectivities(root, clauses, 0, sjinfo->jointype, sjinfo); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - - return call_default_get_parameterized_joinrel_size(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); + goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); - allclauses = list_concat(list_copy(restrict_clauses), + allclauses = list_concat(aqo_get_clauses(root, clauses), list_concat(outer_clauses, inner_clauses)); selectivities = list_concat(current_selectivities, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_joinrel_size_next != get_parameterized_joinrel_size_standard || + get_parameterized_joinrel_size_hook != aqo_get_parameterized_joinrel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the get_parameterized_joinrel_size_hook chain"); + + return predicted; + +default_estimator: + return (*aqo_get_parameterized_joinrel_size_next)(root, rel, + outer_path, inner_path, + sjinfo, clauses); +} + +static double +predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, + int *fss) +{ + int child_fss = 0; + double prediction; + OkNNrdata data; + + if (subpath->parent->predicted_cardinality > 0.) + /* A fast path. Here we can use a fss hash of a leaf. */ + child_fss = subpath->parent->fss_hash; + else + { + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities = NIL; + + get_list_of_relids(root, subpath->parent->relids, &rels); + clauses = get_path_clauses(subpath, root, &selectivities); + (void) predict_for_relation(clauses, selectivities, rels.signatures, + &child_fss); + } + + *fss = get_grouped_exprs_hash(child_fss, group_exprs); + memset(&data, 0, sizeof(OkNNrdata)); + + if (!load_aqo_data(query_context.fspace_hash, *fss, &data, false)) + return -1; + + Assert(data.rows == 1); + prediction = exp(data.targets[0]); + return (prediction <= 0) ? -1 : prediction; +} + +static double +aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset) +{ + int fss; + double predicted; + MemoryContext old_ctx_m; + + if (!query_context.use_aqo) + goto default_estimator; + + if (pgset || groupExprs == NIL) + /* XXX: Don't support some GROUPING options */ + goto default_estimator; + + if (aqo_estimate_num_groups_next != NULL || + estimate_num_groups_hook != aqo_estimate_num_groups) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); + + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + + predicted = predict_num_groups(root, subpath, groupExprs, &fss); + grouped_rel->fss_hash = fss; + if (predicted > 0.) + { + grouped_rel->predicted_cardinality = predicted; + grouped_rel->rows = predicted; + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); return predicted; + } + else + /* + * Some nodes AQO doesn't know yet, some nodes are ignored by AQO + * permanently - as an example, SubqueryScan. + */ + grouped_rel->predicted_cardinality = -1; + + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); + +default_estimator: + if (aqo_estimate_num_groups_next) + return (*aqo_estimate_num_groups_next)(root, groupExprs, subpath, + grouped_rel, pgset); else - return call_default_get_parameterized_joinrel_size(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); + return estimate_num_groups(root, groupExprs, subpath->rows, + pgset); +} + +void +aqo_cardinality_hooks_init(void) +{ + if (set_baserel_rows_estimate_hook || + set_foreign_rows_estimate_hook || + get_parameterized_baserel_size_hook || + set_joinrel_size_estimates_hook || + get_parameterized_joinrel_size_hook || + parampathinfo_postinit_hook || + estimate_num_groups_hook) + elog(ERROR, "AQO estimation hooks shouldn't be intercepted"); + + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_standard; + set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + /* XXX: we have a problem here. Should be redesigned later */ + set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_standard; + get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; + + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_standard; + set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; + + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_standard; + get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; + + aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; + parampathinfo_postinit_hook = aqo_parampathinfo_postinit; + + aqo_estimate_num_groups_next = estimate_num_groups_hook; + estimate_num_groups_hook = aqo_estimate_num_groups; } diff --git a/conf.add b/conf.add deleted file mode 100644 index 21843d00..00000000 --- a/conf.add +++ /dev/null @@ -1 +0,0 @@ -shared_preload_libraries = 'aqo' diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 5dafac09..5deb45ae 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -9,6 +9,7 @@ CREATE ROLE regress_hacker LOGIN; -- Test 1 RESET ROLE; ALTER ROLE regress_hacker NOSUPERUSER; +GRANT CREATE ON SCHEMA public TO regress_hacker; SET ROLE regress_hacker; SHOW is_superuser; is_superuser @@ -26,6 +27,7 @@ END $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; @@ -47,51 +49,32 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_status(hash int) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" INT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_status" already exists with same argument types +ERROR: function "aqo_reset" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash int) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" INT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); - aqo_status ------------- -(0 rows) +SELECT aqo_reset(); + aqo_reset +----------- + 2 +(1 row) SET ROLE regress_hacker; SHOW is_superuser; @@ -101,7 +84,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_status(int); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 3 @@ -114,7 +97,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash int) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -123,9 +106,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -133,8 +116,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -147,7 +130,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(int); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -160,7 +143,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash int) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -169,9 +152,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -179,8 +162,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -193,7 +176,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(int); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 @@ -206,29 +189,31 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_clear_hist(hash int) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_clear_hist" already exists with same argument types +ERROR: function "aqo_drop_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash int) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); - aqo_clear_hist +SELECT aqo_drop_class(42); + aqo_drop_class ---------------- - + 2 (1 row) SET ROLE regress_hacker; @@ -239,7 +224,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_clear_hist(int); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 6 @@ -252,8 +237,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_drop(hash int) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -261,21 +246,20 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_drop" already exists with same argument types +ERROR: function "aqo_execution_time" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash int) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); - aqo_drop ----------- - -(1 row) +SELECT aqo_execution_time(true); + aqo_execution_time +-------------------- +(0 rows) SET ROLE regress_hacker; SHOW is_superuser; @@ -285,7 +269,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_drop(int); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 7 @@ -298,8 +282,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -307,19 +291,19 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_ne_queries" already exists with same argument types +ERROR: function "aqo_memory_usage" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_ne_queries(); - aqo_ne_queries ----------------- +SELECT aqo_memory_usage(); + aqo_memory_usage +------------------ (0 rows) SET ROLE regress_hacker; @@ -330,43 +314,10 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_ne_queries(); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass -AS $$ -DECLARE - ret regclass; -BEGIN - ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; -END -$$ LANGUAGE plpgsql; -RESET ROLE; -CREATE EXTENSION aqo; --- Test result (must be 'off') -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); -DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; +DROP OWNED BY regress_hacker CASCADE; DROP ROLE regress_hacker; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 316ade00..43d27d74 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -25,7 +32,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -106,9 +112,15 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false + count +------- + 12 +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -190,7 +202,16 @@ WHERE t1.a = t2.b AND t2.a = t3.b; -> Seq Scan on aqo_test1 t3 (9 rows) -UPDATE aqo_queries SET use_aqo=true; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret +; -- set use = true + count +------- + 1 +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 3162fa6a..cf12e2fb 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -1,3 +1,12 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,6 +25,58 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +SET aqo.mode = 'controlled'; +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; + count +------- + 3 +(1 row) + +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; + count +------- + 0 +(1 row) + +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Index Scan using aqo_test0_idx_a on aqo_test0 (cost=0.28..8.35 rows=1 width=16) + Index Cond: (a < 3) + Filter: ((b < 3) AND (c < 3) AND (d < 3)) +(3 rows) + +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Nested Loop (cost=0.28..50.59 rows=1 width=12) + Join Filter: (t1.b = t3.b) + -> Nested Loop (cost=0.28..9.56 rows=1 width=12) + -> Seq Scan on aqo_test1 t1 (cost=0.00..1.25 rows=1 width=8) + Filter: (a < 1) + -> Index Scan using aqo_test0_idx_a on aqo_test0 t2 (cost=0.28..8.30 rows=1 width=8) + Index Cond: (a = t1.a) + Filter: (c < 1) + -> Seq Scan on aqo_test0 t3 (cost=0.00..41.02 rows=1 width=8) + Filter: ((b < 1) AND (d < 0)) +(10 rows) + +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -62,7 +123,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -CREATE EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -83,8 +149,23 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret +; -- Enable all disabled query classes + count +------- + 1 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -111,6 +192,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -138,8 +225,14 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -DROP EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +DROP EXTENSION aqo; diff --git a/expected/aqo_dummy_test.out b/expected/aqo_dummy_test.out new file mode 100644 index 00000000..e69de29b diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out new file mode 100644 index 00000000..ca69fab4 --- /dev/null +++ b/expected/aqo_fdw.out @@ -0,0 +1,302 @@ +-- Tests on cardinality estimation of FDW-queries: +-- simple ForeignScan. +-- JOIN push-down (check push of baserestrictinfo and joininfo) +-- Aggregate push-down +-- Push-down of groupings with HAVING clause. +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. +SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +DO $d$ + BEGIN + EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; + END; +$d$; +CREATE USER MAPPING FOR PUBLIC SERVER loopback; +CREATE TABLE local (x int); +CREATE FOREIGN TABLE frgn(x int) SERVER loopback OPTIONS (table_name 'local'); +INSERT INTO frgn (x) VALUES (1); +ANALYZE local; +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- Trivial foreign scan. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn; + QUERY PLAN +---------------------------------------------- + Foreign Scan on frgn (actual rows=1 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(5 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn; + QUERY PLAN +---------------------------------------------- + Foreign Scan on frgn (actual rows=1 loops=1) + AQO: rows=1, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(5 rows) + +-- Push down base filters. Use verbose mode to see filters. +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; + str +----------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) + AQO not used + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; + str +----------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) + AQO: rows=1, error=0% + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants + QUERY PLAN +---------------------------------------------- + Foreign Scan on frgn (actual rows=0 loops=1) + AQO: rows=1, error=100% + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(5 rows) + +-- Trivial JOIN push-down. +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Sort Method%'; + str +------------------------------------------------------------ + Merge Join (actual rows=1 loops=1) + AQO not used + Merge Cond: (a.x = b.x) + -> Sort (actual rows=1 loops=1) + Sort Key: a.x + -> Foreign Scan on frgn a (actual rows=1 loops=1) + AQO not used + -> Sort (actual rows=1 loops=1) + Sort Key: b.x + -> Foreign Scan on frgn b (actual rows=1 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(14 rows) + +-- Should learn on postgres_fdw nodes +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str; + str +-------------------------------------------------------------------------------------------------------- + Foreign Scan (actual rows=1 loops=1) + AQO: rows=1, error=0% + Output: a.x, b.x + Relations: (public.frgn a) INNER JOIN (public.frgn b) + Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO not used + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +-- Partitioned join over foreign tables +set enable_partitionwise_join = on; +ALTER SERVER loopback OPTIONS (ADD fdw_tuple_cost '1.0'); +CREATE TABLE local_main_p0(aid int, aval text); +CREATE TABLE local_main_p1(aid int, aval text); +CREATE TABLE main (aid int, aval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE main_p0 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_main_p0'); +CREATE FOREIGN TABLE main_p1 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_main_p1'); +CREATE TABLE main_p2 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 2); +CREATE TABLE local_ref_p0(bid int, aid int, bval text); +CREATE TABLE local_ref_p1(bid int, aid int, bval text); +CREATE TABLE ref (bid int, aid int, bval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE ref_p0 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_ref_p0'); +CREATE FOREIGN TABLE ref_p1 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_ref_p1'); +CREATE TABLE ref_p2 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_main_p0, local_main_p1, main_p2; +ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO not used + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO not used + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO not used + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO not used + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO: rows=400, error=0% + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO: rows=300, error=0% + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO: rows=300, error=0% + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO: rows=300, error=0% + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO: rows=38, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +DROP TABLE main, local_main_p0, local_main_p1; +DROP TABLE ref, local_ref_p0, local_ref_p1; +ALTER SERVER loopback OPTIONS (DROP fdw_tuple_cost); +reset enable_partitionwise_join; +-- TODO: Non-mergejoinable join condition. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM frgn AS a, frgn AS b WHERE a.x Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 9e6c21ee..9a5ca8dd 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,30 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,7 +43,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -214,9 +240,57 @@ SELECT count(*) FROM tmp1; 17 (1 row) +-- Remove data on some unneeded instances of tmp1 table. +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; + queryid | query_text | queryid | query_text +---------+------------+---------+------------ +(0 rows) + +-- Fix the state of the AQO data +SELECT min(reliability),sum(nfeatures),query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.queryid = ad.fs +GROUP BY (query_text) ORDER BY (md5(query_text)) +; + min | sum | query_text +---------+-----+---------------------------------------------------------------------------------------- + {1} | 10 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 14 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 8 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 6 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +(7 rows) + DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -268,7 +342,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -285,21 +359,15 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.65 rows=20 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=20 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 20 | 18 +(1 row) EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 @@ -321,7 +389,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -373,7 +449,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -390,42 +466,257 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 18 | 18 +(1 row) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - QUERY PLAN -------------------------------------------------------------------------------------- - Hash Join (cost=4.35..6.33 rows=17 width=16) - Hash Cond: (t3.a = t4.b) - -> Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) -(13 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + estimated | actual +-----------+-------- + 17 | 17 +(1 row) + +-- Test limit on number of joins +SET aqo.mode = 'learn'; +SELECT * FROM aqo_drop_class(0); +ERROR: [AQO] Cannot remove basic class 0. +SELECT * FROM aqo_drop_class(42); +ERROR: [AQO] Nothing to remove for the class 42. +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 +) AS q2; + count +------- + 7 +(1 row) + +SELECT count(*) FROM aqo_data; + count +------- + 0 +(1 row) + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + count +------- + 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); -- Learn on the query + estimated | actual +-----------+-------- + 20 | 17 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; + count +------- + 1 +(1 row) + +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query + query_text +---------------------------------------------------------------------------- + explain analyze + + SELECT * + + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4+ + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + + +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join + count +------- + 2 +(1 row) + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on the query without any joins now + count +------- + 3 +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- See one more query in the AQO knowledge base + count +------- + 4 +(1 row) + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 5 +(1 row) + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 6 +(1 row) + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 7 +(1 row) + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 7 +(1 row) + +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 8 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 9 +(1 row) +DROP FUNCTION check_estimated_rows; +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; diff --git a/expected/aqo_query_stat.out b/expected/aqo_query_stat.out new file mode 100644 index 00000000..2478b4e5 --- /dev/null +++ b/expected/aqo_query_stat.out @@ -0,0 +1,155 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE IF EXISTS A; +NOTICE: table "a" does not exist, skipping +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; +DROP TABLE IF EXISTS B; +NOTICE: table "b" does not exist, skipping +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; + count +------- + 8 +(1 row) + +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +--------------------+--------------+---------------------+------------------------ + {0.22,0.362,0.398} | {0.392,0.21} | 3 | 2 +(1 row) + +SELECT true AS success from aqo_reset(); + success +--------- + t +(1 row) + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; + count +------- + 135 +(1 row) + +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; + aqo_query_stat_update +----------------------- + t +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +------------------------------------------------------+------------------------------------------------------+---------------------+------------------------ + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | 100 | 50 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + count +------- + 100 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +---------------------------------------------------------------------+----------------------------------------------------------+---------------------+------------------------ + {5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.392,0.344,0.34,0.362} | {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.218} | 104 | 51 +(1 row) + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out new file mode 100644 index 00000000..49b64832 --- /dev/null +++ b/expected/clean_aqo_data.out @@ -0,0 +1,300 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'learn'; +DROP TABLE IF EXISTS a; +NOTICE: table "a" does not exist, skipping +DROP TABLE IF EXISTS b; +NOTICE: table "b" does not exist, skipping +CREATE TABLE a(); +SELECT * FROM a; +-- +(0 rows) + +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +/* + * lines with a_oid in aqo_data, + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat + * should remain + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 1 +(1 row) + +DROP TABLE a; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +/* + * lines with a_oid in aqo_data, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, + * should be deleted +*/ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 0 +(1 row) + +CREATE TABLE a(); +CREATE TABLE b(); +SELECT * FROM a; +-- +(0 rows) + +SELECT * FROM b; +-- +(0 rows) + +SELECT * FROM b CROSS JOIN a; +-- +(0 rows) + +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset +-- new lines added to aqo_data +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 3 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); + count +------- + 3 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + count +------- + 2 +(1 row) + +DROP TABLE a; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +/* + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 0 +(1 row) + +-- lines corresponding to b_oid in all theese tables should remain +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 1 +(1 row) + +DROP TABLE b; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +-- lines corresponding to b_oid in theese tables deleted +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + count +------- + 0 +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/eclasses.out b/expected/eclasses.out new file mode 100644 index 00000000..01650286 --- /dev/null +++ b/expected/eclasses.out @@ -0,0 +1,1085 @@ +-- Testing for working with equivalence classes +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ANY ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=0 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ALL ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 10000 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 5 +SELECT count(*) FROM aqo_data; + count +------- + 5 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((b = 0) AND (a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Tests with JOIN clauses. +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(13 rows) + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=10 loops=10) + AQO: rows=10, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Nested Loop (actual rows=10000 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1000 loops=10) + AQO not used + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Nested Loop Semi Join (actual rows=10 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(12 rows) + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + Nested Loop Anti Join (actual rows=0 loops=1) + AQO not used + Join Filter: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1 loops=10) + AQO: rows=1, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=1) + AQO: rows=1, error=0% + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; + count +------- + 13 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------------- + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: ((aqo_test_int1.b)::text = (aqo_test_int.b)::text) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + -> Hash (actual rows=10 loops=1) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; + count +------- + 4 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; +DROP EXTENSION aqo; diff --git a/expected/eclasses_mchar.out b/expected/eclasses_mchar.out new file mode 100644 index 00000000..5593e045 --- /dev/null +++ b/expected/eclasses_mchar.out @@ -0,0 +1,6 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit diff --git a/expected/eclasses_mchar_1.out b/expected/eclasses_mchar_1.out new file mode 100644 index 00000000..a50422cb --- /dev/null +++ b/expected/eclasses_mchar_1.out @@ -0,0 +1,181 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit +\endif +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_mchar; +DROP EXTENSION mchar; +DROP EXTENSION aqo; diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out new file mode 100644 index 00000000..eceb0eb1 --- /dev/null +++ b/expected/feature_subspace.out @@ -0,0 +1,83 @@ +-- This test related to some issues on feature subspace calculation +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------- + Merge Left Join (actual rows=10 loops=1) + AQO not used + Merge Cond: (a.x = b.x) + -> Sort (actual rows=10 loops=1) + Sort Key: a.x + -> Seq Scan on a (actual rows=10 loops=1) + AQO not used + -> Sort (actual rows=11 loops=1) + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(14 rows) + +-- TODO: Using method of other classes neighbours we get a bad estimation. +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------- + Merge Left Join (actual rows=100 loops=1) + AQO not used + Merge Cond: (b.x = a.x) + -> Sort (actual rows=100 loops=1) + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + -> Sort (actual rows=10 loops=1) + Sort Key: a.x + -> Seq Scan on a (actual rows=10 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(14 rows) + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; + target +-------- + 2.30 + 4.61 +(2 rows) + +DROP TABLE a,b CASCADE; +DROP EXTENSION aqo; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out new file mode 100644 index 00000000..c5a6ac0e --- /dev/null +++ b/expected/forced_stat_collection.out @@ -0,0 +1,72 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +\set citizens 1000 +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'off'; +CREATE TABLE person ( + id serial PRIMARY KEY, + age integer, + gender text, + passport integer +); +-- Fill the person table with workers data. +INSERT INTO person (id,age,gender,passport) + (SELECT q1.id,q1.age, + CASE WHEN q1.id % 4 = 0 THEN 'Female' + ELSE 'Male' + END, + CASE WHEN (q1.age>18) THEN 1E6 + q1.id * 1E3 + ELSE NULL + END + FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 + ); +SET aqo.force_collect_stat = 'on'; +SELECT count(*) FROM person WHERE age<18; + count +------- + 67 +(1 row) + +SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; + count +------- + 0 +(1 row) + +SELECT * FROM aqo_data; + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); + learn_aqo | use_aqo | auto_tuning | ce | nex +-----------+---------+-------------+---------+----- + f | f | f | {0.864} | 1 + f | f | f | {2.963} | 1 +(2 rows) + +SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); + query_text +-------------------------------------------------------------------- + SELECT count(*) FROM person WHERE age<18; + COMMON feature space (do not delete!) + SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; +(3 rows) + +DROP TABLE person; +DROP EXTENSION aqo; diff --git a/expected/gucs.out b/expected/gucs.out new file mode 100644 index 00000000..a31219df --- /dev/null +++ b/expected/gucs.out @@ -0,0 +1,146 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; +CREATE TABLE t(x int); +INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Check AQO addons to explain (the only stable data) +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; + str +------------------------------------------------ + Seq Scan on public.t (actual rows=100 loops=1) + AQO not used + Output: x + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; + str +------------------------------------------------ + Seq Scan on public.t (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: x + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +SET aqo.mode = 'disabled'; +-- Check existence of the interface functions. +SELECT obj_description('aqo_cardinality_error'::regproc::oid); + obj_description +--------------------------------------------------------------------------------------------------------------- + Get cardinality error of queries the last time they were executed. Order queries according to an error value. +(1 row) + +SELECT obj_description('aqo_execution_time'::regproc::oid); + obj_description +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. +(1 row) + +SELECT obj_description('aqo_drop_class'::regproc::oid); + obj_description +-------------------------------------------------------------- + Remove info about an query class from AQO ML knowledge base. +(1 row) + +SELECT obj_description('aqo_cleanup'::regproc::oid); + obj_description +---------------------------------------------- + Remove unneeded rows from the AQO ML storage +(1 row) + +SELECT obj_description('aqo_reset'::regproc::oid); + obj_description +-------------------------------- + Reset all data gathered by AQO +(1 row) + +\df aqo_cardinality_error + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------------+-------------------------------------------------------------------------------------+---------------------+------ + public | aqo_cardinality_error | TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df aqo_execution_time + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+--------------------+-----------------------------------------------------------------------------------------+---------------------+------ + public | aqo_execution_time | TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df aqo_drop_class + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+----------------+------------------+---------------------+------ + public | aqo_drop_class | integer | queryid bigint | func +(1 row) + +\df aqo_cleanup + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-----------------------------------+------ + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func +(1 row) + +\df aqo_reset + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------+------------------+---------------------+------ + public | aqo_reset | bigint | | func +(1 row) + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_query_stat; + count +------- + 0 +(1 row) + +DROP TABLE t; +DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out new file mode 100644 index 00000000..854bb852 --- /dev/null +++ b/expected/look_a_like.out @@ -0,0 +1,684 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.wide_search = 'on'; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; +SET enable_material = 'off'; +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------- + Nested Loop (actual rows=1000 loops=1) + AQO not used + Output: a.x1, b.y1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=100 loops=10) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(16 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +----------------------------------------------------------- + Hash Right Join (actual rows=1000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 + -> Hash (actual rows=10 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Output: a.x1 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 90 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(19 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used + Output: b.y1 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO: rows=5000, error=0% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=50 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO: rows=50, error=0% + Output: a.x1 + Filter: ((a.x1 < 10) AND (a.x2 < 5)) + Rows Removed by Filter: 50 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +----------------------------------------------------------- + Hash Join (actual rows=7000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=70 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=70 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 2) AND (a.x2 > 2)) + Rows Removed by Filter: 30 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=4000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=40 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=40 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) + Rows Removed by Filter: 60 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=4000 loops=1) + AQO: rows=5000, error=20% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=40 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=40 loops=1) + AQO: rows=50, error=20% + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 60 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO: rows=20000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=20, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------------- + Group (actual rows=1 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=10000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=10000 loops=1) + AQO: rows=20000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=10000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=10000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=14000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=14000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=10, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=700 loops=20) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=7000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=7000 loops=1) + AQO: rows=14000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=700 loops=10) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (never executed) + AQO: rows=1000 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) + Output: a.x1, a.x2, a.x3 + -> Hash Anti Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 + Using aqo: true + AQO mode: LEARN + JOINS: 2 +(24 rows) + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Hash Right Join (actual rows=1000000 loops=1) + AQO: rows=1, error=-99999900% + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=10000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Right Join (actual rows=10000 loops=1) + AQO: rows=1, error=-999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (c.z1 = a.x1) + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 + -> Hash (actual rows=100 loops=1) + Output: a.x1, a.x2, a.x3 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 + Using aqo: true + AQO mode: LEARN + JOINS: 2 +(24 rows) + +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +---------------------------------------------------------------------- + Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=20, error=-400% + Output: x1, x2, x3 + Filter: ((a.x1 > '-100'::integer) AND (a.x2 < 10) AND (a.x3 < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------- + Seq Scan on public.a (actual rows=80 loops=1) + AQO: rows=77, error=-4% + Output: x1, x2, x3 + Filter: ((a.x1 > 1) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 20 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------- + HashAggregate (actual rows=6 loops=1) + AQO not used + Output: x2 + Group Key: a.x2 + -> Seq Scan on public.a (actual rows=60 loops=1) + AQO: rows=71, error=15% + Output: x1, x2, x3 + Filter: ((a.x1 > 3) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 40 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +----- +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO not used + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO: rows=9987, error=-0% + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; +DROP EXTENSION aqo CASCADE; +DROP TABLE a; +DROP TABLE b; +DROP TABLE c; +DROP TABLE t; +DROP FUNCTION expln; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out new file mode 100644 index 00000000..c64aed61 --- /dev/null +++ b/expected/parallel_workers.out @@ -0,0 +1,128 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage + count +------- + 1000 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + str +-------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + AQO not used + -> Gather (actual rows=3 loops=1) + AQO not used + -> Partial Aggregate (actual rows=1 loops=3) + AQO not used + -> Parallel Seq Scan on t (actual rows=333 loops=3) + AQO: rows=1000, error=0% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage + count +------- + 0 +(1 row) + +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%'; + str +-------------------------------------------------------------------------------------------------- + Aggregate + AQO not used + -> Merge Join + AQO not used + Merge Cond: (q2.id = t_1.id) + -> Sort + Sort Key: q2.id + -> Subquery Scan on q2 + AQO not used + -> Finalize GroupAggregate + AQO not used + Group Key: t.payload + -> Gather Merge + AQO not used + -> Partial GroupAggregate + AQO not used + Group Key: t.payload + -> Sort + AQO not used + Sort Key: t.payload + -> Parallel Seq Scan on t + AQO: rows=991 + Filter: ((id % '101'::numeric) = '0'::numeric) + -> Group + AQO not used + Group Key: t_1.id + -> Gather Merge + AQO not used + -> Group + AQO not used + Group Key: t_1.id + -> Sort + AQO not used + Sort Key: t_1.id + -> Parallel Seq Scan on t t_1 + AQO: rows=991 + Filter: ((id % '100'::numeric) = '0'::numeric) + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(40 rows) + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; diff --git a/expected/plancache.out b/expected/plancache.out new file mode 100644 index 00000000..88698463 --- /dev/null +++ b/expected/plancache.out @@ -0,0 +1,52 @@ +-- Tests on interaction of AQO with cached plans. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'intelligent'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +CREATE TABLE test AS SELECT x FROM generate_series(1,10) AS x; +ANALYZE test; +-- Function which implements a test where AQO is used for both situations where +-- a query is planned or got from a plan cache. +-- Use a function to hide a system dependent hash value. +CREATE FUNCTION f1() RETURNS TABLE ( + nnex bigint, + nex bigint, + pt double precision[] +) AS $$ +DECLARE + i integer; + qhash bigint; +BEGIN + PREPARE fooplan (int) AS SELECT count(*) FROM test WHERE x = $1; + + FOR i IN 1..10 LOOP + execute 'EXECUTE fooplan(1)'; + END LOOP; + + SELECT queryid FROM aqo_query_texts + WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; + + RETURN QUERY SELECT executions_without_aqo nnex, + executions_with_aqo nex, + planning_time_with_aqo pt + FROM aqo_query_stat WHERE queryid = qhash; +END $$ LANGUAGE 'plpgsql'; +-- The function shows 6 executions without an AQO support (nnex) and +-- 4 executions with usage of an AQO knowledge base (nex). Planning time in the +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted +-- from the plan cache. +SELECT * FROM f1(); + nnex | nex | pt +------+-----+--------------- + 6 | 4 | {-1,-1,-1,-1} +(1 row) + +DROP FUNCTION f1; +DROP TABLE test CASCADE; +DROP EXTENSION aqo; diff --git a/expected/relocatable.out b/expected/relocatable.out new file mode 100644 index 00000000..3d7f386f --- /dev/null +++ b/expected/relocatable.out @@ -0,0 +1,127 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'learn'; -- use this mode for unconditional learning +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; +-- Learn on a query +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. TODO: use aqo_status() + query_text | learn_aqo | use_aqo | auto_tuning +---------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f +(2 rows) + +-- Create a schema and move AQO into it. +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; +-- Do something to be confident that AQO works +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Find out both queries executed above + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + set_config +----------------------- + "$user", public, test +(1 row) + +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; + aqo_disable_class +------------------- + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + f | f | f + f | f | f +(3 rows) + +SELECT aqo_enable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; + aqo_enable_class +------------------ + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | t | f + t | t | f +(3 rows) + +RESET search_path; +DROP TABLE test CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +NOTICE: drop cascades to extension aqo +DROP EXTENSION IF EXISTS aqo CASCADE; +NOTICE: extension "aqo" does not exist, skipping diff --git a/expected/schema.out b/expected/schema.out index 8508f582..e712f407 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -1,5 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -NOTICE: extension "aqo" does not exist, skipping DROP SCHEMA IF EXISTS test CASCADE; NOTICE: schema "test" does not exist, skipping -- Check Zero-schema path behaviour @@ -12,6 +10,12 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); @@ -24,24 +28,21 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; - query_text ------------------------------------------------- +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; + query_text +--------------------------------------- COMMON feature space (do not delete!) - INSERT INTO test (data) VALUES ('string'); SELECT * FROM test; - SELECT query_text FROM public.aqo_query_texts; -(4 rows) +(2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f t | f | t - t | f | t - t | f | t - t | f | t -(5 rows) +(2 rows) DROP SCHEMA IF EXISTS test1 CASCADE; NOTICE: drop cascades to 2 other objects diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out new file mode 100644 index 00000000..1d957df7 --- /dev/null +++ b/expected/statement_timeout.out @@ -0,0 +1,158 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 80; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data + check_estimated_rows +---------------------- + 50 +(1 row) + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 50 +(1 row) + +-- We have a real learning data. +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET statement_timeout = 80; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 2 +(1 row) + +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 3 +(1 row) + +SET statement_timeout = 550; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +-- Interrupted query should immediately appear in aqo_data +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero + count +------- + 0 +(1 row) + +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT count(*) FROM aqo_data; -- Must be one + count +------- + 1 +(1 row) + +DROP TABLE t; +DROP FUNCTION check_estimated_rows; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/temp_tables.out b/expected/temp_tables.out new file mode 100644 index 00000000..9fa20e7c --- /dev/null +++ b/expected/temp_tables.out @@ -0,0 +1,198 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.wide_search = 'on'; +SET aqo.mode = 'learn'; +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM tt AS t1, tt AS t2; + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) + +-- Should be stored in the ML base +SELECT count(*) FROM pt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt, tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans + count +------- + 10 +(1 row) + +DROP TABLE tt; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above + count +------- + 10 +(1 row) + +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- Should be 0 + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.queryid = aqt.queryid +ORDER BY (md5(query_text)); -- The only the common class is returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; +-- Check: AQO learns on queries with temp tables +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; +-- Check: use AQO knowledge with different temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; +DROP TABLE pt CASCADE; +DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out new file mode 100644 index 00000000..62186efc --- /dev/null +++ b/expected/top_queries.out @@ -0,0 +1,104 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'on'; +-- +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain +-- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. +-- +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it + cnt +----- + 0 +(1 row) + +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; + cnt +----- + 0 +(1 row) + +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. + num +----- +(0 rows) + +SELECT num FROM aqo_execution_time(false); + num +----- + 1 +(1 row) + +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM aqo_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------+-------- + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 +(1 row) + +-- +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first +-- +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,100000) AS gs; +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +WHERE te.fshash = ( + SELECT fs FROM aqo_queries + WHERE aqo_queries.queryid = ( + SELECT aqo_query_texts.queryid FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + to_char +----------- + 1.94e+00 +(1 row) + +-- Should return zero +SELECT count(*) FROM aqo_cardinality_error(true); + count +------- + 0 +(1 row) + +-- Fix list of logged queries +SELECT query_text,nexecs +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------------------------------------------------+-------- + SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 +(3 rows) + +DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out new file mode 100644 index 00000000..a088a47c --- /dev/null +++ b/expected/unsupported.out @@ -0,0 +1,703 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +ANALYZE t; +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +ANALYZE t, t1; +-- +-- Do not support HAVING clauses for now. +-- +SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + count +------- + 17 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=17 + Group Key: t.x + -> Seq Scan on t + AQO: rows=801 + Filter: (x > 3) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + -> Seq Scan on t (actual rows=801 loops=1) + AQO not used + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +-- +-- Doesn't estimates GROUP BY clause +-- +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + QUERY PLAN +---------------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, (t1.x * t1.y) + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + QUERY PLAN +------------------------------------- + Aggregate + AQO not used + -> Aggregate + AQO not used + Filter: (count(*) > 1) + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- Doesn't support GROUPING SETS clause +-- +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + QUERY PLAN +------------------------------ + Aggregate + AQO not used + -> MixedAggregate + AQO not used + Hash Key: t1.x, t1.y + Hash Key: t1.x + Hash Key: t1.y + Group Key: () + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- The subplans issue +-- +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t WHERE x = 1 + ); + QUERY PLAN +---------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + InitPlan 1 (returns $0) + -> Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t t_1 (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: (x = 1) + Rows Removed by Filter: 950 + -> Seq Scan on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: ((x)::numeric = $0) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(16 rows) + +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + count +------- + 1000 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t t0 WHERE t0.x = t.x + ); + QUERY PLAN +------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((x)::numeric = (SubPlan 1)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(15 rows) + +-- Two identical subplans in a clause list +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO not used + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((x = (SubPlan 1)) AND (SubPlan 2)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 2) AND (x = (SubPlan 1))) + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- It's OK to use the knowledge for a query with different constants. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 22) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 23); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 22)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 23)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +-- Different SubPlans in the quals of leafs of JOIN. +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; + count +------- + 42550 +(1 row) + +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +------------------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Hash Join (actual rows=42550 loops=1) + AQO: rows=42550, error=0% + Hash Cond: ((t_1.x + 1) = t.x) + -> Seq Scan on t t_1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (((x % 3))::numeric < (SubPlan 2)) + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=950 loops=1000) + AQO: rows=950, error=-0% + Filter: (x <> t_1.x) + Rows Removed by Filter: 50 + -> Hash (actual rows=851 loops=1) + -> Seq Scan on t (actual rows=851 loops=1) + AQO: rows=851, error=0% + Filter: (((x % 3))::numeric < (SubPlan 1)) + Rows Removed by Filter: 149 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(30 rows) + +-- Two identical subplans in a clause +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +-- +-- Not executed nodes +-- +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + x +--- +(0 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + QUERY PLAN +--------------------------------------------- + Nested Loop (actual rows=0 loops=1) + AQO: rows=1, error=100% + Join Filter: (t.x = t_1.x) + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (x < 0) + Rows Removed by Filter: 1000 + -> Seq Scan on t t_1 (never executed) + AQO: rows=1 + Filter: (x > 20) + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(13 rows) + +-- AQO needs to predict total fetched tuples in a table. +-- +-- At a non-leaf node we have prediction about input tuples - is a number of +-- predicted output rows in underlying node. But for Scan nodes we don't have +-- any prediction on number of fetched tuples. +-- So, if selectivity was wrong we could make bad choice of Scan operation. +-- For example, we could choose suboptimal index. +-- Turn off statistics gathering for simple demonstration of filtering problem. +ALTER TABLE t SET (autovacuum_enabled = 'false'); +CREATE INDEX ind1 ON t(x); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +---------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Index Only Scan using ind1 on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Rows Removed by Filter: 99 + Heap Fetches: 149 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +-- Because of bad statistics we use a last created index instead of best choice. +-- Here we filter more tuples than with the ind1 index. +CREATE INDEX ind2 ON t(mod(x,3)); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + str +----------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + Output: count(*) + -> Bitmap Heap Scan on public.t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Recheck Cond: (mod(t.x, 3) = 1) + Filter: (t.x < 3) + Rows Removed by Filter: 300 + -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) + Index Cond: (mod(t.x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- Best choice is ... +ANALYZE t; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +--------------------------------------- + Aggregate + AQO not used + -> Index Only Scan using ind1 on t + AQO: rows=50 + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(9 rows) + +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT round(error::numeric, 3) AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------------------------------------------------------------------------------------------ + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.554 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t WHERE + + | x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + + | x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.000 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | + 0.000 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; +(14 rows) + +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? + count +------- + 48 +(1 row) + +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- No one row should be returned + count +------- + 0 +(1 row) + +-- Look for any remaining queries in the ML storage. +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------ +(0 rows) + +DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out new file mode 100644 index 00000000..d2e7c84c --- /dev/null +++ b/expected/update_functions.out @@ -0,0 +1,449 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; +SET aqo.mode='intelligent'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + count +------- + 20 +(1 row) + +SET aqo.mode='learn'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + count +------- + 10 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SET aqo.mode='controlled'; +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- +-- aqo_query_texts_update() testing. +-- +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- +-- aqo_queries_update testing. +-- +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ +(0 rows) + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ +(0 rows) + +-- +-- aqo_query_stat_update() testing. +-- +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- +-- aqo_data_update() testing. +-- +-- Populate aqo_data with dump data. +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Update aqo_data with dump data. +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset +SELECT :res1 = :res2 AS ml_sizes_are_equal; + ml_sizes_are_equal +-------------------- + t +(1 row) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.querytext_max_size = 0; +ERROR: 0 is outside the valid range for parameter "aqo.querytext_max_size" (1 .. 2147483647) +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ +(0 rows) + +SELECT aqo_query_texts_update(1, 'test'); + aqo_query_texts_update +------------------------ + t +(1 row) + +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ + 1 | +(1 row) + +DROP EXTENSION aqo CASCADE; +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/hash.c b/hash.c index b039be9e..2b0d3675 100644 --- a/hash.c +++ b/hash.c @@ -12,22 +12,29 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/hash.c * */ +#include "postgres.h" + +#include "access/htup.h" +#include "common/fe_memutils.h" + +#include "math.h" #include "aqo.h" +#include "hash.h" +#include "path_utils.h" static int get_str_hash(const char *str); static int get_node_hash(Node *node); -static int get_int_array_hash(int *arr, int len); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int get_relidslist_hash(List *relidslist); +static int get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -40,46 +47,148 @@ static int get_id_in_sorted_int_array(int val, int n, int *arr); static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash); -static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash); -static int disjoint_set_get_parent(int *p, int v); -static void disjoint_set_merge_eclasses(int *p, int v1, int v2); -static int *perform_eclasses_join(List *clauselist, int nargs, int *args_hash); +static int *get_clauselist_args(List *clauselist, int *nargs, int **args_hash); static bool is_brace(char ch); static bool has_consts(List *lst); static List **get_clause_args_ptr(Expr *clause); -static bool clause_is_eq_clause(Expr *clause); /* - * Computes hash for given query. + * Computes hash for given query.Query Identifier: = * Hash is supposed to be constant-insensitive. * XXX: Hashing depend on Oids of database objects. It is restrict usability of * the AQO knowledge base by current database at current Postgres instance. */ -int +uint64 get_query_hash(Query *parse, const char *query_text) { char *str_repr; - int hash; + uint64 hash; + /* XXX: remove_locations and remove_consts are heavy routines. */ str_repr = remove_locations(remove_consts(nodeToString(parse))); - hash = DatumGetInt32(hash_any((const unsigned char *) str_repr, - strlen(str_repr) * sizeof(*str_repr))); - pfree(str_repr); + hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); return hash; } +/********************************************************************************* + * + * Because List natively works with OID, integer and a postgres node types, + * implement separate set of functions which manages list of uint64 values + * (need for the query hash type). + * + ********************************************************************************/ + +bool +list_member_uint64(const List *list, uint64 datum) +{ + const ListCell *cell; + + foreach(cell, list) + { + if (*((uint64 *)lfirst(cell)) == datum) + return true; + } + + return false; +} + +/* + * Deep copy of uint64 list. + * Each element here is dynamically allocated in some memory context. + * If we copy the list in another memctx we should allocate memory for new + * elements too. + */ +List * +list_copy_uint64(List *list) +{ + ListCell *lc; + List *nlist = NIL; + + foreach(lc, list) + { + uint64 *val = palloc(sizeof(uint64)); + + *val = *(uint64 *) lfirst(lc); + nlist = lappend(nlist, (void *) val); + } + + return nlist; +} + +List * +lappend_uint64(List *list, uint64 datum) +{ + uint64 *val = palloc(sizeof(uint64)); + + *val = datum; + list = lappend(list, (void *) val); + return list; +} + +/* + * Remove element from a list and free the memory which was allocated to it. + * Looks unconventional, but we unconventionally allocate memory on append, so + * it maybe ok. + */ +List * +ldelete_uint64(List *list, uint64 datum) +{ + ListCell *cell; + + foreach(cell, list) + { + if (*((uint64 *)lfirst(cell)) == datum) + { + list = list_delete_ptr(list, lfirst(cell)); + return list; + } + } + return list; +} + +/********************************************************************************/ + +int +get_grouped_exprs_hash(int child_fss, List *group_exprs) +{ + ListCell *lc; + int *hashes = palloc(list_length(group_exprs) * sizeof(int)); + int i = 0; + int final_hashes[2]; + + /* Calculate hash of each grouping expression. */ + foreach(lc, group_exprs) + { + Node *clause = (Node *) lfirst(lc); + + hashes[i++] = get_node_hash(clause); + } + + /* Sort to get rid of expressions permutation. */ + qsort(hashes, i, sizeof(int), int_cmp); + + final_hashes[0] = child_fss; + final_hashes[1] = get_int_array_hash(hashes, i); + + pfree(hashes); + + return get_int_array_hash(final_hashes, 2); +} + /* - * For given object (clauselist, selectivities, relidslist) creates feature + * For given object (clauselist, selectivities, reloids) creates feature * subspace: * sets nfeatures * creates and computes fss_hash * transforms selectivities to features + * + * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, - int *nfeatures, double **features) +get_fss_for_object(List *relsigns, List *clauselist, + List *selectivities, int *nfeatures, double **features) { int n; int *clause_hashes; @@ -92,49 +201,74 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relidslist_hash; + int relations_hash; List **args; - ListCell *l; + ListCell *lc; int i, j, k, m; int sh = 0, old_sh; - int fss_hash; + int fss_hash; n = list_length(clauselist); - get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + /* Check parameters state invariant. */ + Assert(n == list_length(selectivities) || + (nfeatures == NULL && features == NULL)); + + /* + * It should be allocated in a caller memory context, because it will be + * returned. + */ + if (nfeatures != NULL) + *features = palloc0(sizeof(**features) * n); + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); - *features = palloc0(sizeof(**features) * n); i = 0; - foreach(l, clauselist) + foreach(lc, clauselist) { - clause_hashes[i] = get_clause_hash( - ((RestrictInfo *) lfirst(l))->clause, - nargs, args_hash, eclass_hash); - args = get_clause_args_ptr(((RestrictInfo *) lfirst(l))->clause); + AQOClause *clause = (AQOClause *) lfirst(lc); + + clause_hashes[i] = get_clause_hash(clause, nargs, args_hash, + eclass_hash); + args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } + pfree(args_hash); idx = argsort(clause_hashes, n, sizeof(*clause_hashes), int_cmp); inverse_idx = inverse_permutation(idx, n); i = 0; - foreach(l, selectivities) + foreach(lc, clauselist) { - (*features)[inverse_idx[i]] = log(*((double *) (lfirst(l)))); - if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) - (*features)[inverse_idx[i]] = log_selectivity_lower_bound; sorted_clauses[inverse_idx[i]] = clause_hashes[i]; i++; } + pfree(clause_hashes); + + i = 0; + foreach(lc, selectivities) + { + Selectivity *s = (Selectivity *) lfirst(lc); + + if (nfeatures != NULL) + { + (*features)[inverse_idx[i]] = log(*s); + Assert(!isnan(log(*s))); + if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) + (*features)[inverse_idx[i]] = log_selectivity_lower_bound; + } + i++; + } + pfree(inverse_idx); for (i = 0; i < n;) { @@ -146,36 +280,37 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, for (j = i; j < n && sorted_clauses[j] == sorted_clauses[i]; ++j) if (clause_has_consts[idx[j]] || k + 1 == m - i) { - (*features)[j - sh] = (*features)[j]; + if (nfeatures != NULL) + (*features)[j - sh] = (*features)[j]; sorted_clauses[j - sh] = sorted_clauses[j]; } else sh++; - qsort(&((*features)[i - old_sh]), j - sh - (i - old_sh), - sizeof(**features), double_cmp); + + if (nfeatures != NULL) + qsort(&((*features)[i - old_sh]), j - sh - (i - old_sh), + sizeof(**features), double_cmp); i = j; } - - *nfeatures = n - sh; - (*features) = repalloc(*features, (*nfeatures) * sizeof(**features)); + pfree(idx); + pfree(clause_has_consts); /* * Generate feature subspace hash. - * XXX: Remember! that relidslist_hash isn't portable between postgres - * instances. */ - clauses_hash = get_int_array_hash(sorted_clauses, *nfeatures); - eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relidslist_hash = get_relidslist_hash(relidslist); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relidslist_hash); - pfree(clause_hashes); + clauses_hash = get_int_array_hash(sorted_clauses, n - sh); + eclasses_hash = get_int_array_hash(eclass_hash, nargs); + relations_hash = get_relations_hash(relsigns); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); pfree(sorted_clauses); - pfree(idx); - pfree(inverse_idx); - pfree(clause_has_consts); - pfree(args_hash); pfree(eclass_hash); + + if (nfeatures != NULL) + { + *nfeatures = n - sh; + (*features) = repalloc(*features, (*nfeatures) * sizeof(**features)); + } return fss_hash; } @@ -185,29 +320,29 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, * Also args-order-insensitiveness for equal clause is required. */ int -get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) +get_clause_hash(AQOClause *clause, int nargs, int *args_hash, int *eclass_hash) { Expr *cclause; - List **args = get_clause_args_ptr(clause); + List **args = get_clause_args_ptr(clause->clause); int arg_eclass; ListCell *l; if (args == NULL) - return get_node_hash((Node *) clause); + return get_node_hash((Node *) clause->clause); - cclause = copyObject(clause); + cclause = copyObject(clause->clause); args = get_clause_args_ptr(cclause); + foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), nargs, args_hash, eclass_hash); if (arg_eclass != 0) { - lfirst(l) = makeNode(Param); - ((Param *) lfirst(l))->paramid = arg_eclass; + lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } - if (!clause_is_eq_clause(clause) || has_consts(*args)) + if (!clause->is_eq_clause || has_consts(*args)) return get_node_hash((Node *) cclause); return get_node_hash((Node *) linitial(*args)); } @@ -215,7 +350,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) /* * Computes hash for given string. */ -int +static int get_str_hash(const char *str) { return DatumGetInt32(hash_any((const unsigned char *) str, @@ -225,15 +360,21 @@ get_str_hash(const char *str) /* * Computes hash for given node. */ -int +static int get_node_hash(Node *node) { char *str; + char *no_consts; + char *no_locations; int hash; - str = remove_locations(remove_consts(nodeToString(node))); - hash = get_str_hash(str); + str = nodeToString(node); + no_consts = remove_consts(str); pfree(str); + no_locations = remove_locations(no_consts); + pfree(no_consts); + hash = get_str_hash(no_locations); + pfree(no_locations); return hash; } @@ -252,7 +393,7 @@ get_int_array_hash(int *arr, int len) * Sorts given array in-place to compute hash. * The hash is order-insensitive. */ -int +static int get_unsorted_unsafe_int_array_hash(int *arr, int len) { qsort(arr, len, sizeof(*arr), int_cmp); @@ -267,7 +408,7 @@ get_unsorted_unsafe_int_array_hash(int *arr, int len) * using 'hash_any'. * Frees allocated memory before returning hash. */ -int +static int get_unordered_int_list_hash(List *lst) { int i = 0; @@ -290,7 +431,7 @@ get_unordered_int_list_hash(List *lst) * "[^]*" are replaced with substring * "". */ -char * +static char * replace_patterns(const char *str, const char *start_pattern, bool (*end_pattern) (char ch)) { @@ -319,7 +460,7 @@ replace_patterns(const char *str, const char *start_pattern, * Computes hash for given feature subspace. * Hash is supposed to be clause-order-insensitive. */ -int +static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) { int hashes[3]; @@ -332,26 +473,48 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) } /* - * Computes hash for given list of relids. - * Hash is supposed to be relids-order-insensitive. + * Computes hash for given list of relations. + * Hash is supposed to be relations-order-insensitive. + * Each element of a list must have a String type, */ -int -get_relidslist_hash(List *relidslist) +static int +get_relations_hash(List *relsigns) { - return get_unordered_int_list_hash(relidslist); + int nhashes = 0; + uint32 *hashes = palloc(list_length(relsigns) * sizeof(uint32)); + ListCell *lc; + int result; + + foreach(lc, relsigns) + { + hashes[nhashes++] = (uint32) lfirst_int(lc); + } + + /* Sort the array to make query insensitive to input order of relations. */ + qsort(hashes, nhashes, sizeof(uint32), int_cmp); + + /* Make a final hash value */ + + result = DatumGetInt32(hash_any((const unsigned char *) hashes, + nhashes * sizeof(uint32))); + pfree(hashes); + + return result; } /* * Returns the C-string in which the substrings of kind "{CONST.*}" are * replaced with substring "{CONST}". */ -char * +static char * remove_consts(const char *str) { char *res; + char *tmp; - res = replace_patterns(str, "{CONST", is_brace); - res = replace_patterns(res, ":stmt_len", is_brace); + tmp = replace_patterns(str, "{CONST", is_brace); + res = replace_patterns(tmp, ":stmt_len", is_brace); + pfree(tmp); return res; } @@ -359,7 +522,7 @@ remove_consts(const char *str) * Returns the C-string in which the substrings of kind " :location.*}" are * replaced with substring " :location}". */ -char * +static char * remove_locations(const char *str) { return replace_patterns(str, " :location", is_brace); @@ -369,7 +532,7 @@ remove_locations(const char *str) * Returns index of given value in given sorted integer array * or -1 if not found. */ -int +static int get_id_in_sorted_int_array(int val, int n, int *arr) { int *i; @@ -388,7 +551,7 @@ get_id_in_sorted_int_array(int val, int n, int *arr) * Returns class of equivalence for given argument hash or 0 if such hash * does not belong to any equivalence class. */ -int +static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) { int di = get_id_in_sorted_int_array(arg_hash, nargs, args_hash); @@ -403,121 +566,108 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -void +static int * get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; List **args; ListCell *l; - ListCell *l2; int i = 0; int sh = 0; int cnt = 0; + int *p; + int *p_sorted; + int *args_hash_sorted; + int *idx; + + /* Not more than 2 args in each clause from clauselist */ + *args_hash = palloc(2 * list_length(clauselist) * sizeof(**args_hash)); + p = palloc(2 * list_length(clauselist) * sizeof(*p)); foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - cnt++; + Expr *e; + + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args == NULL || !clause->is_eq_clause) + continue; + + /* Left argument */ + e = (args != NULL && list_length(*args) ? linitial(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->left_ec; + } + + /* Right argument */ + e = (args != NULL && list_length(*args) >= 2 ? lsecond(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->right_ec; + } } - *args_hash = palloc(cnt * sizeof(**args_hash)); - foreach(l, clauselist) + /* Use argsort for simultaniously sorting of args_hash and p arrays */ + idx = argsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + + args_hash_sorted = palloc(cnt * sizeof(*args_hash_sorted)); + p_sorted = palloc(cnt * sizeof(*p_sorted)); + + for (i = 0; i < cnt; ++i) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - (*args_hash)[i++] = get_node_hash(lfirst(l2)); + args_hash_sorted[i] = (*args_hash)[idx[i]]; + p_sorted[i] = p[idx[i]]; } - qsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + pfree(idx); + pfree(p); + pfree(*args_hash); + + *args_hash = args_hash_sorted; + /* Remove duplicates of the hashes */ for (i = 1; i < cnt; ++i) if ((*args_hash)[i - 1] == (*args_hash)[i]) sh++; else + { (*args_hash)[i - sh] = (*args_hash)[i]; + p_sorted[i - sh] = p_sorted[i]; + } *nargs = cnt - sh; *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); -} + p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); -/* - * Returns class of an object in disjoint set. - */ -int -disjoint_set_get_parent(int *p, int v) -{ - if (p[v] == -1) - return v; - else - return p[v] = disjoint_set_get_parent(p, p[v]); -} - -/* - * Merges two equivalence classes in disjoint set. - */ -void -disjoint_set_merge_eclasses(int *p, int v1, int v2) -{ - int p1, - p2; - - p1 = disjoint_set_get_parent(p, v1); - p2 = disjoint_set_get_parent(p, v2); - if (p1 != p2) - { - if ((v1 + v2) % 2) - p[p1] = p2; - else - p[p2] = p1; - } -} - -/* - * Constructs disjoint set on arguments. - */ -int * -perform_eclasses_join(List *clauselist, int nargs, int *args_hash) -{ - RestrictInfo *rinfo; - int *p; - ListCell *l, - *l2; - List **args; - int h2; - int i2, - i3; - - p = palloc(nargs * sizeof(*p)); - memset(p, -1, nargs * sizeof(*p)); + /* + * Compress the values of eclasses. + * It is only sorted in order of args_hash. + * Get the indexes in ascending order of the elements. + */ + idx = argsort(p_sorted, *nargs, sizeof(*p_sorted), int_cmp); - foreach(l, clauselist) + /* + * Remove the holes from given array. + * Later we can use it as indexes of args_hash. + */ + if (*nargs > 0) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + int prev = p_sorted[idx[0]]; + p_sorted[idx[0]] = 0; + for (i = 1; i < *nargs; i++) { - i3 = -1; - foreach(l2, *args) - { - if (!IsA(lfirst(l2), Const)) - { - h2 = get_node_hash(lfirst(l2)); - i2 = get_id_in_sorted_int_array(h2, nargs, args_hash); - if (i3 != -1) - disjoint_set_merge_eclasses(p, i2, i3); - i3 = i2; - } - } + int cur = p_sorted[idx[i]]; + if (cur == prev) + p_sorted[idx[i]] = p_sorted[idx[i-1]]; + else + p_sorted[idx[i]] = p_sorted[idx[i-1]] + 1; + prev = cur; } } - return p; + return p_sorted; } /* @@ -529,42 +679,39 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) { int *p; List **lsts; - int i, - v; + int i; + /* + * An auxiliary array of equivalence clauses hashes + * used to improve performance. + */ int *e_hashes; - get_clauselist_args(clauselist, nargs, args_hash); - - p = perform_eclasses_join(clauselist, *nargs, *args_hash); + p = get_clauselist_args(clauselist, nargs, args_hash); + *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - lsts = palloc((*nargs) * sizeof(*lsts)); + lsts = palloc0((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); - for (i = 0; i < *nargs; ++i) - lsts[i] = NIL; + /* Combine args hashes corresponding to the same eclass into one list. */ for (i = 0; i < *nargs; ++i) - { - v = disjoint_set_get_parent(p, i); - lsts[v] = lappend_int(lsts[v], (*args_hash)[i]); - } - for (i = 0; i < *nargs; ++i) - e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + lsts[p[i]] = lappend_int(lsts[p[i]], (*args_hash)[i]); - *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); + /* Precompute eclasses hashes only once per eclass. */ for (i = 0; i < *nargs; ++i) - (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + if (lsts[i] != NIL) + e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + /* Determine the hashes of each eclass. */ for (i = 0; i < *nargs; ++i) - list_free(lsts[i]); - pfree(lsts); - pfree(p); + (*eclass_hash)[i] = e_hashes[p[i]]; + pfree(e_hashes); } /* * Checks whether the given char is brace, i. e. '{' or '}'. */ -bool +static bool is_brace(char ch) { return ch == '{' || ch == '}'; @@ -573,7 +720,7 @@ is_brace(char ch) /* * Returns whether arguments list contain constants. */ -bool +static bool has_consts(List *lst) { ListCell *l; @@ -587,7 +734,7 @@ has_consts(List *lst) /* * Returns pointer on the args list in clause or NULL. */ -List ** +static List ** get_clause_args_ptr(Expr *clause) { switch (clause->type) @@ -609,75 +756,3 @@ get_clause_args_ptr(Expr *clause) break; } } - -/* - * Returns whether the clause is an equivalence clause. - */ -bool -clause_is_eq_clause(Expr *clause) -{ - /* TODO: fix this horrible mess */ - return ( - clause->type == T_OpExpr || - clause->type == T_DistinctExpr || - clause->type == T_NullIfExpr || - clause->type == T_ScalarArrayOpExpr - ) && ( - ((OpExpr *) clause)->opno == Int4EqualOperator || - ((OpExpr *) clause)->opno == BooleanEqualOperator || - ((OpExpr *) clause)->opno == TextEqualOperator || - ((OpExpr *) clause)->opno == TIDEqualOperator || - ((OpExpr *) clause)->opno == ARRAY_EQ_OP || - ((OpExpr *) clause)->opno == RECORD_EQ_OP || - ((OpExpr *) clause)->opno == 15 || - ((OpExpr *) clause)->opno == 92 || - ((OpExpr *) clause)->opno == 93 || - ((OpExpr *) clause)->opno == 94 || - ((OpExpr *) clause)->opno == 352 || - ((OpExpr *) clause)->opno == 353 || - ((OpExpr *) clause)->opno == 385 || - ((OpExpr *) clause)->opno == 386 || - ((OpExpr *) clause)->opno == 410 || - ((OpExpr *) clause)->opno == 416 || - ((OpExpr *) clause)->opno == 503 || - ((OpExpr *) clause)->opno == 532 || - ((OpExpr *) clause)->opno == 533 || - ((OpExpr *) clause)->opno == 560 || - ((OpExpr *) clause)->opno == 566 || - ((OpExpr *) clause)->opno == 607 || - ((OpExpr *) clause)->opno == 649 || - ((OpExpr *) clause)->opno == 620 || - ((OpExpr *) clause)->opno == 670 || - ((OpExpr *) clause)->opno == 792 || - ((OpExpr *) clause)->opno == 811 || - ((OpExpr *) clause)->opno == 900 || - ((OpExpr *) clause)->opno == 1093 || - ((OpExpr *) clause)->opno == 1108 || - ((OpExpr *) clause)->opno == 1550 || - ((OpExpr *) clause)->opno == 1120 || - ((OpExpr *) clause)->opno == 1130 || - ((OpExpr *) clause)->opno == 1320 || - ((OpExpr *) clause)->opno == 1330 || - ((OpExpr *) clause)->opno == 1500 || - ((OpExpr *) clause)->opno == 1535 || - ((OpExpr *) clause)->opno == 1616 || - ((OpExpr *) clause)->opno == 1220 || - ((OpExpr *) clause)->opno == 1201 || - ((OpExpr *) clause)->opno == 1752 || - ((OpExpr *) clause)->opno == 1784 || - ((OpExpr *) clause)->opno == 1804 || - ((OpExpr *) clause)->opno == 1862 || - ((OpExpr *) clause)->opno == 1868 || - ((OpExpr *) clause)->opno == 1955 || - ((OpExpr *) clause)->opno == 2060 || - ((OpExpr *) clause)->opno == 2542 || - ((OpExpr *) clause)->opno == 2972 || - ((OpExpr *) clause)->opno == 3222 || - ((OpExpr *) clause)->opno == 3516 || - ((OpExpr *) clause)->opno == 3629 || - ((OpExpr *) clause)->opno == 3676 || - ((OpExpr *) clause)->opno == 3882 || - ((OpExpr *) clause)->opno == 3240 || - ((OpExpr *) clause)->opno == 3240 - ); -} diff --git a/hash.h b/hash.h new file mode 100644 index 00000000..d9d3cbfd --- /dev/null +++ b/hash.h @@ -0,0 +1,24 @@ +#ifndef AQO_HASH_H +#define AQO_HASH_H + +#include "nodes/pg_list.h" +#include "path_utils.h" + +extern uint64 get_query_hash(Query *parse, const char *query_text); +extern bool list_member_uint64(const List *list, uint64 datum); +extern List *list_copy_uint64(List *list); +extern List *lappend_uint64(List *list, uint64 datum); +extern List *ldelete_uint64(List *list, uint64 datum); +extern int get_fss_for_object(List *relsigns, List *clauselist, + List *selectivities, int *nfeatures, + double **features); +extern int get_int_array_hash(int *arr, int len); +extern int get_grouped_exprs_hash(int fss, List *group_exprs); + +/* Hash functions */ +void get_eclasses(List *clauselist, int *nargs, int **args_hash, + int **eclass_hash); +int get_clause_hash(AQOClause *clause, int nargs, int *args_hash, + int *eclass_hash); + +#endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/machine_learning.c b/machine_learning.c index 7b4612cd..d7520a94 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,40 +12,76 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c * */ +#include "postgres.h" + #include "aqo.h" +#include "machine_learning.h" + + +/* + * This parameter tell us that the new learning sample object has very small + * distance from one whose features stored in matrix already. + * In this case we will not to add new line in matrix, but will modify this + * nearest neighbor features and cardinality with linear smoothing by + * learning_rate coefficient. + */ +const double object_selection_threshold = 0.1; +const double learning_rate = 1e-1; + static double fs_distance(double *a, double *b, int len); static double fs_similarity(double dist); static double compute_weights(double *distances, int nrows, double *w, int *idx); +OkNNrdata* +OkNNr_allocate(int ncols) +{ + OkNNrdata *data = palloc(sizeof(OkNNrdata)); + int i; + + if (ncols > 0) + for (i = 0; i < aqo_K; i++) + data->matrix[i] = palloc0(ncols * sizeof(double)); + else + for (i = 0; i < aqo_K; i++) + data->matrix[i] = NULL; + + data->cols = ncols; + data->rows = -1; + return data; +} + /* * Computes L2-distance between two given vectors. */ -double +static double fs_distance(double *a, double *b, int len) { double res = 0; int i; for (i = 0; i < len; ++i) + { + Assert(!isnan(a[i])); res += (a[i] - b[i]) * (a[i] - b[i]); + } if (len != 0) - res = sqrt(res / len); + res = sqrt(res); return res; } /* * Returns similarity between objects based on distance between them. */ -double +static double fs_similarity(double dist) { return 1.0 / (0.001 + dist); @@ -58,7 +94,7 @@ fs_similarity(double dist) * Appeared as a separate function because of "don't repeat your code" * principle. */ -double +static double compute_weights(double *distances, int nrows, double *w, int *idx) { int i, @@ -101,31 +137,36 @@ compute_weights(double *distances, int nrows, double *w, int *idx) * positive targets are assumed. */ double -OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, - double *features) +OkNNr_predict(OkNNrdata *data, double *features) { double distances[aqo_K]; int i; int idx[aqo_K]; /* indexes of nearest neighbors */ double w[aqo_K]; double w_sum; - double result = 0; + double result = 0.; - for (i = 0; i < nrows; ++i) - distances[i] = fs_distance(matrix[i], features, ncols); + Assert(data != NULL); + + if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) + return -1.; + Assert(data->rows > 0); + + for (i = 0; i < data->rows; ++i) + distances[i] = fs_distance(data->matrix[i], features, data->cols); - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); for (i = 0; i < aqo_k; ++i) if (idx[i] != -1) - result += targets[idx[i]] * w[i] / w_sum; + result += data->targets[idx[i]] * w[i] / w_sum; - if (result < 0) - result = 0; + if (result < 0.) + result = 0.; /* this should never happen */ if (idx[0] == -1) - result = -1; + result = -1.; return result; } @@ -137,23 +178,25 @@ OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, * updates this line in database, otherwise adds new line with given index. * It is supposed that indexes of new lines are consequent numbers * starting from matrix_rows. + * reliability: 1 - value after normal end of a query; 0.1 - data from partially + * executed node (we don't want this part); 0.9 - from finished node, but + * partially executed statement. */ int -OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, - double *features, double target) +OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) { - double distances[aqo_K]; - int i, - j; - int mid = 0; /* index of row with minimum distance value */ - int idx[aqo_K]; + double distances[aqo_K]; + int i; + int j; + int mid = 0; /* index of row with minimum distance value */ + int idx[aqo_K]; /* * For each neighbor compute distance and search for nearest object. */ - for (i = 0; i < nrows; ++i) + for (i = 0; i < data->rows; ++i) { - distances[i] = fs_distance(matrix[i], features, nfeatures); + distances[i] = fs_distance(data->matrix[i], features, data->cols); if (distances[i] < distances[mid]) mid = i; } @@ -163,32 +206,44 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * replace data for the neighbor to avoid some fluctuations. * We will change it's row with linear smoothing by learning_rate. */ - if (nrows > 0 && distances[mid] < object_selection_threshold) + if (data->rows > 0 && distances[mid] < object_selection_threshold) { - for (j = 0; j < nfeatures; ++j) - matrix[mid][j] += learning_rate * (features[j] - matrix[mid][j]); - targets[mid] += learning_rate * (target - targets[mid]); + double lr = learning_rate * rfactor / data->rfactors[mid]; - return nrows; - } + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } - if (nrows < aqo_K) + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + + for (j = 0; j < data->cols; ++j) + data->matrix[mid][j] += lr * (features[j] - data->matrix[mid][j]); + data->targets[mid] += lr * (target - data->targets[mid]); + data->rfactors[mid] += lr * (rfactor - data->rfactors[mid]); + + return data->rows; + } + else if (data->rows < aqo_K) { - /* We can't reached limit of stored neighbors */ + /* We don't reach a limit of stored neighbors */ /* - * Add new line into the matrix. We can do this because matrix_rows + * Add new line into the matrix. We can do this because data->rows * is not the boundary of matrix. Matrix has aqo_K free lines */ - for (j = 0; j < nfeatures; ++j) - matrix[nrows][j] = features[j]; - targets[nrows] = target; + for (j = 0; j < data->cols; ++j) + data->matrix[data->rows][j] = features[j]; + data->targets[data->rows] = target; + data->rfactors[data->rows] = rfactor; - return nrows+1; + return data->rows + 1; } else { - double *feature; + double *feature; double avg_target = 0; double tc_coef; /* Target correction coefficient */ double fc_coef; /* Feature correction coefficient */ @@ -206,34 +261,45 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * idx array. Compute weight for each nearest neighbor and total weight * of all nearest neighbor. */ - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); /* * Compute average value for target by nearest neighbors. We need to * check idx[i] != -1 because we may have smaller value of nearest * neighbors than aqo_k. - * Semantics of coef1: it is defined distance between new object and + * Semantics of tc_coef: it is defined distance between new object and * this superposition value (with linear smoothing). + * fc_coef - feature changing rate. * */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) - avg_target += targets[idx[i]] * w[i] / w_sum; + avg_target += data->targets[idx[i]] * w[i] / w_sum; tc_coef = learning_rate * (avg_target - target); /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(nfeatures) / w_sum; + double lr = learning_rate * rfactor / data->rfactors[mid]; - targets[idx[i]] -= tc_coef * w[i] / w_sum; - for (j = 0; j < nfeatures; ++j) + if (lr > 1.) { - feature = matrix[idx[i]]; + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + + fc_coef = tc_coef * lr * (data->targets[idx[i]] - avg_target) * + w[i] * w[i] / sqrt(data->cols) / w_sum; + + data->targets[idx[i]] -= tc_coef * lr * w[i] / w_sum; + for (j = 0; j < data->cols; ++j) + { + feature = data->matrix[idx[i]]; feature[j] -= fc_coef * (features[j] - feature[j]) / distances[idx[i]]; } } } - - return nrows; + return data->rows; } diff --git a/machine_learning.h b/machine_learning.h new file mode 100644 index 00000000..1d6d8303 --- /dev/null +++ b/machine_learning.h @@ -0,0 +1,48 @@ +#ifndef MACHINE_LEARNING_H +#define MACHINE_LEARNING_H + +/* Max number of matrix rows - max number of possible neighbors. */ +#define aqo_K (30) + +extern const double object_selection_threshold; +extern const double learning_rate; + +#define RELIABILITY_MIN (0.1) +#define RELIABILITY_MAX (1.0) + +typedef struct OkNNrdata +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + + double *matrix[aqo_K]; /* Contains the matrix - learning data for the same + * value of (fs, fss), but different features. */ + double targets[aqo_K]; /* Right side of the equations system */ + double rfactors[aqo_K]; +} OkNNrdata; + +/* + * Auxiliary struct, used for passing arguments + * to aqo_data_store() function. + */ +typedef struct AqoDataArgs +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + int nrels; /* Number of oids */ + + double **matrix; /* Pointer ot matrix array */ + double *targets; /* Pointer to array of 'targets' */ + double *rfactors; /* Pointer to array of 'rfactors' */ + Oid *oids; /* Array of relation OIDs */ +} AqoDataArgs; + +extern OkNNrdata* OkNNr_allocate(int ncols); +extern void OkNNr_free(OkNNrdata *data); + +/* Machine learning techniques */ +extern double OkNNr_predict(OkNNrdata *data, double *features); +extern int OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor); + +#endif /* MACHINE_LEARNING_H */ diff --git a/path_utils.c b/path_utils.c index 6e809818..15cf20ad 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,15 +5,173 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c * */ +#include "postgres.h" -#include "aqo.h" +#include "access/relation.h" +#include "nodes/readfuncs.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" +#include "optimizer/planmain.h" +#include "path_utils.h" +#include "storage/lmgr.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" +#include "common/shortest_dec.h" + +#include "aqo.h" +#include "hash.h" + +#include "postgres_fdw.h" + +#ifdef PGPRO_STD +# define expression_tree_mutator(node, mutator, context) \ + expression_tree_mutator(node, mutator, context, 0) +#endif + + +static AQOPlanNode DefaultAQOPlanNode = +{ + .node.type = T_ExtensibleNode, + .node.extnodename = AQO_PLAN_NODE, + .had_path = false, + .rels.hrels = NIL, + .rels.signatures = NIL, + .clauses = NIL, + .selectivities = NIL, + .grouping_exprs = NIL, + .jointype = -1, + .parallel_divisor = -1., + .was_parametrized = false, + .fss = INT_MAX, + .prediction = -1. +}; + +/* + * Auxiliary list for relabel equivalence classes + * from pointers to the serial numbers - indexes of this list. + * XXX: Maybe it's need to use some smart data structure such a HTAB? + * It must be allocated in AQOCacheMemCtx. + */ +List *aqo_eclass_collector = NIL; + +/* + * Hook on creation of a plan node. We need to store AQO-specific data to + * support learning stage. + */ +static create_plan_hook_type aqo_create_plan_next = NULL; + +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ + + +/* Return a copy of the given list of AQOClause structs */ +static List * +copy_aqo_clauses(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + AQOClause *old = (AQOClause *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + memcpy(new, old, sizeof(AQOClause)); + new->clause = copyObject(old->clause); + + result = lappend(result, (void *) new); + } + + return result; +} + +static AQOPlanNode * +create_aqo_plan_node() +{ + AQOPlanNode *node = (AQOPlanNode *) newNode(sizeof(AQOPlanNode), + T_ExtensibleNode); + Assert(node != NULL); + memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); + return node; +} + +AQOConstNode * +create_aqo_const_node(AQOConstType type, int fss) +{ + AQOConstNode *node = (AQOConstNode *) newNode(sizeof(AQOConstNode), + T_ExtensibleNode); + Assert(node != NULL); + node->node.extnodename = AQO_CONST_NODE; + node->type = type; + node->fss = fss; + return node; +} + +/* Ensure that it's postgres_fdw's foreign server oid */ +static bool +is_postgres_fdw_server(Oid serverid) +{ + ForeignServer *server; + ForeignDataWrapper *fdw; + + if (!OidIsValid(serverid)) + return false; + + server = GetForeignServerExtended(serverid, FSV_MISSING_OK); + if (!server) + return false; + + fdw = GetForeignDataWrapperExtended(server->fdwid, FDW_MISSING_OK); + if (!fdw || !fdw->fdwname) + return false; + + if (strcmp(fdw->fdwname, "postgres_fdw") != 0) + return false; + + return true; +} + +/* + * Extract an AQO node from the plan private field. + * If no one node was found, return pointer to the default value or return NULL. + */ +AQOPlanNode * +get_aqo_plan_node(Plan *plan, bool create) +{ + AQOPlanNode *node = NULL; + ListCell *lc; + + foreach(lc, plan->ext_nodes) + { + AQOPlanNode *candidate = (AQOPlanNode *) lfirst(lc); + + if (!IsA(candidate, ExtensibleNode)) + continue; + + if (strcmp(candidate->node.extnodename, AQO_PLAN_NODE) != 0) + continue; + + node = candidate; + break; + } + + if (node == NULL) + { + if (!create) + return NULL; + + node = create_aqo_plan_node(); + plan->ext_nodes = lappend(plan->ext_nodes, node); + } + + Assert(node); + return node; +} /* * Returns list of marginal selectivities using as an arguments for each clause @@ -44,42 +202,232 @@ get_selectivities(PlannerInfo *root, } /* - * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relids. + * Based on the hashTupleDesc() routine */ -List * -get_list_of_relids(PlannerInfo *root, Relids relids) +static uint32 +hashTempTupleDesc(TupleDesc desc) { + uint32 s; int i; - RangeTblEntry *entry; - List *l = NIL; + + s = hash_combine(0, hash_uint32(desc->natts)); + + for (i = 0; i < desc->natts; ++i) + { + const char *attname = NameStr(TupleDescAttr(desc, i)->attname); + uint32 s1; + + s = hash_combine(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes((const unsigned char *) attname, strlen(attname)); + s = hash_combine(s, s1); + } + return s; +} + +/* + * Get list of relation indexes and prepare list of permanent table reloids, + * list of temporary table reloids (can be changed between query launches) and + * array of table signatures. + */ +void +get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) +{ + int index; + RangeTblEntry *entry; + List *hrels = NIL; + List *hashes = NIL; if (relids == NULL) - return NIL; + return; + + index = -1; + while ((index = bms_next_member(relids, index)) >= 0) + { + HeapTuple htup; + Form_pg_class classForm; + char *relname = NULL; + Oid relrewrite; + char relpersistence; + + entry = planner_rt_fetch(index, root); + + if (!OidIsValid(entry->relid)) + { + /* TODO: Explain this logic. */ + hashes = lappend_int(hashes, INT32_MAX / 3); + continue; + } + + htup = SearchSysCache1(RELOID, ObjectIdGetDatum(entry->relid)); + if (!HeapTupleIsValid(htup)) + elog(PANIC, "cache lookup failed for reloid %u", entry->relid); + + /* Copy the fields from syscache and release the slot as quickly as possible. */ + classForm = (Form_pg_class) GETSTRUCT(htup); + relpersistence = classForm->relpersistence; + relrewrite = classForm->relrewrite; + relname = pstrdup(NameStr(classForm->relname)); + ReleaseSysCache(htup); + + if (relpersistence == RELPERSISTENCE_TEMP) + { + /* The case of temporary table */ + + Relation trel; + TupleDesc tdesc; - i = -1; - while ((i = bms_next_member(relids, i)) >= 0) + trel = relation_open(entry->relid, NoLock); + tdesc = RelationGetDescr(trel); + Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); + hashes = lappend_int(hashes, hashTempTupleDesc(tdesc)); + relation_close(trel, NoLock); + } + else + { + /* The case of regular table */ + relname = quote_qualified_identifier( + get_namespace_name(get_rel_namespace(entry->relid)), + relrewrite ? get_rel_name(relrewrite) : relname); + + hashes = lappend_int(hashes, DatumGetInt32(hash_any( + (unsigned char *) relname, + strlen(relname)))); + + hrels = lappend_oid(hrels, entry->relid); + } + } + + rels->hrels = list_concat(rels->hrels, hrels); + rels->signatures = list_concat(rels->signatures, hashes); + return; +} + +/* + * Search for any subplans or initplans. + * if subplan is found, replace it by zero Const. + */ +static Node * +subplan_hunter(Node *node, void *context) +{ + if (node == NULL) + /* Continue recursion in other subtrees. */ + return false; + + if (IsA(node, SubPlan)) { - entry = planner_rt_fetch(i, root); - l = lappend_int(l, entry->relid); + /* TODO: use fss of SubPlan here */ + return (Node *) create_aqo_const_node(AQO_NODE_SUBPLAN, 0); } - return l; + return expression_tree_mutator(node, subplan_hunter, context); } /* - * For given path returns the list of all clauses used in it. - * Also returns selectivities for the clauses throw the selectivities variable. - * Both clauses and selectivities returned lists are copies and therefore - * may be modified without corruption of the input data. + * Get independent copy of the clauses list. + * During this operation clauses could be changed and we couldn't walk across + * this list next. + */ +static List * +aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) +{ + List *clauses = NIL; + ListCell *lc; + + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + rinfo = copyObject(rinfo); + rinfo->clause = (Expr *) expression_tree_mutator((Node *) rinfo->clause, + subplan_hunter, + (void *) root); + clauses = lappend(clauses, (void *) rinfo); + } + return clauses; +} + +static int +get_eclass_index(EquivalenceClass *ec) +{ + ListCell *lc; + int i = 0; + MemoryContext old_ctx; + + if (ec == NULL) + return -1; + + /* Get the top of merged eclasses */ + while(ec->ec_merged) + ec = ec->ec_merged; + + foreach (lc, aqo_eclass_collector) + { + if (lfirst(lc) == ec) + break; + i++; + } + + old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); + if (i == list_length(aqo_eclass_collector)) + aqo_eclass_collector = lappend(aqo_eclass_collector, ec); + MemoryContextSwitchTo(old_ctx); + + return i; +} + +static List * +copy_aqo_clauses_from_rinfo(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + RestrictInfo *old = (RestrictInfo *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + new->clause = copyObject(old->clause); + new->norm_selec = old->norm_selec; + new->outer_selec = old->outer_selec; + + new->left_ec = get_eclass_index(old->left_ec); + new->right_ec = get_eclass_index(old->right_ec); + + new->is_eq_clause = (old->left_ec != NULL || old->left_ec != NULL); + + result = lappend(result, (void *) new); + } + + return result; +} + +/* + * Return copy of clauses returned from the aqo_get_raw_clause() routine + * and convert it into AQOClause struct. */ List * -get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +aqo_get_clauses(PlannerInfo *root, List *restrictlist) +{ + List *clauses = aqo_get_raw_clauses(root, restrictlist); + List *result = copy_aqo_clauses_from_rinfo(clauses); + + list_free_deep(clauses); + return result; +} + +/* + * Returns a list of all used clauses for the given path. + * Also returns selectivities for the clauses to 'selectivities' variable. + * The returned list of the selectivities is a copy and therefore + * may be modified without corruption of the input data. + */ +static List * +get_path_clauses_recurse(Path *path, PlannerInfo *root, List **selectivities) { List *inner; List *inner_sel = NIL; List *outer; List *outer_sel = NIL; - List *cur; + List *cur = NIL; List *cur_sel = NIL; Assert(selectivities != NULL); @@ -93,84 +441,691 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_NestPath: case T_MergePath: case T_HashPath: - cur = ((JoinPath *) path)->joinrestrictinfo; + cur = list_concat(cur, ((JoinPath *) path)->joinrestrictinfo); /* Not quite correct to avoid sjinfo, but we believe in caching */ cur_sel = get_selectivities(root, cur, 0, ((JoinPath *) path)->jointype, NULL); - outer = get_path_clauses(((JoinPath *) path)->outerjoinpath, root, + outer = get_path_clauses_recurse(((JoinPath *) path)->outerjoinpath, root, &outer_sel); - inner = get_path_clauses(((JoinPath *) path)->innerjoinpath, root, + inner = get_path_clauses_recurse(((JoinPath *) path)->innerjoinpath, root, &inner_sel); *selectivities = list_concat(cur_sel, list_concat(outer_sel, inner_sel)); - return list_concat(list_copy(cur), list_concat(outer, inner)); + return list_concat(cur, list_concat(outer, inner)); break; case T_UniquePath: - return get_path_clauses(((UniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UniquePath *) path)->subpath, root, selectivities); break; case T_GatherPath: - return get_path_clauses(((GatherPath *) path)->subpath, root, + case T_GatherMergePath: + return get_path_clauses_recurse(((GatherPath *) path)->subpath, root, selectivities); break; case T_MaterialPath: - return get_path_clauses(((MaterialPath *) path)->subpath, root, + return get_path_clauses_recurse(((MaterialPath *) path)->subpath, root, selectivities); break; case T_ProjectionPath: - return get_path_clauses(((ProjectionPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectionPath *) path)->subpath, root, + selectivities); + break; + case T_ProjectSetPath: + return get_path_clauses_recurse(((ProjectSetPath *) path)->subpath, root, selectivities); break; case T_SortPath: - return get_path_clauses(((SortPath *) path)->subpath, root, + return get_path_clauses_recurse(((SortPath *) path)->subpath, root, selectivities); break; + case T_IncrementalSortPath: + { + IncrementalSortPath *p = (IncrementalSortPath *) path; + return get_path_clauses_recurse(p->spath.subpath, root, + selectivities); + } + break; case T_GroupPath: - return get_path_clauses(((GroupPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupPath *) path)->subpath, root, selectivities); break; case T_UpperUniquePath: - return get_path_clauses(((UpperUniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UpperUniquePath *) path)->subpath, root, selectivities); break; case T_AggPath: - return get_path_clauses(((AggPath *) path)->subpath, root, + return get_path_clauses_recurse(((AggPath *) path)->subpath, root, selectivities); break; case T_GroupingSetsPath: - return get_path_clauses(((GroupingSetsPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupingSetsPath *) path)->subpath, root, selectivities); break; case T_WindowAggPath: - return get_path_clauses(((WindowAggPath *) path)->subpath, root, + return get_path_clauses_recurse(((WindowAggPath *) path)->subpath, root, selectivities); break; case T_SetOpPath: - return get_path_clauses(((SetOpPath *) path)->subpath, root, + return get_path_clauses_recurse(((SetOpPath *) path)->subpath, root, selectivities); break; case T_LockRowsPath: - return get_path_clauses(((LockRowsPath *) path)->subpath, root, + return get_path_clauses_recurse(((LockRowsPath *) path)->subpath, root, selectivities); break; case T_LimitPath: - return get_path_clauses(((LimitPath *) path)->subpath, root, + return get_path_clauses_recurse(((LimitPath *) path)->subpath, root, + selectivities); + break; + case T_SubqueryScanPath: + /* Recursing into Subquery we must use subroot */ + Assert(path->parent->subroot != NULL); + return get_path_clauses_recurse(((SubqueryScanPath *) path)->subpath, + path->parent->subroot, selectivities); break; + case T_ModifyTablePath: + { + ListCell *lc; + + foreach (lc, ((ModifyTablePath *) path)->subpaths) + { + Path *subpath = lfirst(lc); + + cur = list_concat(cur, + get_path_clauses_recurse(subpath, root, selectivities)); + cur_sel = list_concat(cur_sel, *selectivities); + } + cur = list_concat(cur, aqo_get_raw_clauses(root, + path->parent->baserestrictinfo)); + *selectivities = list_concat(cur_sel, + get_selectivities(root, + path->parent->baserestrictinfo, + 0, JOIN_INNER, NULL)); + return cur; + } + break; + /* TODO: RecursiveUnionPath */ + case T_AppendPath: + case T_MergeAppendPath: + { + ListCell *lc; + + /* + * It isn't a safe style, but we use the only subpaths field that is + * the first at both Append and MergeAppend nodes. + */ + foreach (lc, ((AppendPath *) path)->subpaths) + { + Path *subpath = lfirst(lc); + + cur = list_concat(cur, + get_path_clauses_recurse(subpath, root, selectivities)); + cur_sel = list_concat(cur_sel, *selectivities); + } + cur = list_concat(cur, aqo_get_raw_clauses(root, + path->parent->baserestrictinfo)); + *selectivities = list_concat(cur_sel, + get_selectivities(root, + path->parent->baserestrictinfo, + 0, JOIN_INNER, NULL)); + return cur; + } + break; + case T_ForeignPath: + /* The same as in the default case */ default: - cur = list_concat(list_copy(path->parent->baserestrictinfo), + cur = list_concat(list_concat(cur, path->parent->baserestrictinfo), path->param_info ? - list_copy(path->param_info->ppi_clauses) : NIL); + path->param_info->ppi_clauses : NIL); if (path->param_info) cur_sel = get_selectivities(root, cur, path->parent->relid, JOIN_INNER, NULL); else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; + cur = aqo_get_raw_clauses(root, cur); return cur; break; } } + +/* + * Returns a list of AQOClauses for the given path, which is a copy + * of the clauses returned from the get_path_clauses_recurse() routine. + * Also returns selectivities for the clauses to 'selectivities' variable. + * Both returned lists are copies and therefore may be modified without + * corruption of the input data. + */ +List * +get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +{ + return copy_aqo_clauses_from_rinfo( + get_path_clauses_recurse(path, root, selectivities)); +} + +/* + * Some of paths are kind of utility path. I mean, It isn't corresponding to + * specific RelOptInfo node. So, it should be omitted in process of clauses + * gathering to avoid duplication of the same clauses. + * XXX: only a dump plug implemented for now. + */ +static bool +is_appropriate_path(Path *path) +{ + bool appropriate = true; + + switch (path->type) + { + case T_SortPath: + case T_IncrementalSortPath: + case T_GatherPath: + case T_GatherMergePath: + appropriate = false; + break; + default: + break; + } + + return appropriate; +} + +/* + * Add AQO data into the plan node, if necessary. + * + * The necesssary case is when AQO is learning on this query, used for a + * prediction (and we will need the data to show prediction error at the end) or + * just to gather a plan statistics. + * Don't switch here to any AQO-specific memory contexts, because we should + * store AQO prediction in the same context, as the plan. So, explicitly free + * all unneeded data. + */ +static void +aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) +{ + bool is_join_path; + Plan *plan = *dest; + AQOPlanNode *node; + + if (aqo_create_plan_next) + (*aqo_create_plan_next)(root, src, dest); + + if (!query_context.use_aqo && !query_context.learn_aqo && + !query_context.collect_stat) + return; + + is_join_path = (src->type == T_NestPath || src->type == T_MergePath || + src->type == T_HashPath || + (src->type == T_ForeignPath && IS_JOIN_REL(src->parent))); + + node = get_aqo_plan_node(plan, true); + + if (node->had_path) + { + /* + * The convention is that any extension that sets had_path is also + * responsible for setting path_clauses, path_jointype, path_relids, + * path_parallel_workers, and was_parameterized. + */ + return; + } + + if (is_join_path) + { + if (IsA(src, ForeignPath)) + { + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) src->parent->fdw_private; + List *restrictclauses = NIL; + + if (!fpinfo) + return; + + /* We have to ensure that this is postgres_fdw ForeignPath */ + if (!is_postgres_fdw_server(src->parent->serverid)) + return; + + restrictclauses = list_concat(restrictclauses, fpinfo->joinclauses); + restrictclauses = list_concat(restrictclauses, fpinfo->remote_conds); + restrictclauses = list_concat(restrictclauses, fpinfo->local_conds); + + node->clauses = aqo_get_clauses(root, restrictclauses); + node->jointype = fpinfo->jointype; + + list_free(restrictclauses); + } + else + { + node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); + node->jointype = ((JoinPath *) src)->jointype; + } + } + else if (IsA(src, AggPath)) + /* Aggregation node must store grouping clauses. */ + { + AggPath *ap = (AggPath *) src; + + /* Get TLE's from child target list corresponding to the list of exprs. */ + List *groupExprs = get_sortgrouplist_exprs(ap->groupClause, + (*dest)->lefttree->targetlist); + /* Copy bare expressions for further AQO learning case. */ + node->grouping_exprs = copyObject(groupExprs); + get_list_of_relids(root, ap->subpath->parent->relids, &node->rels); + node->jointype = JOIN_INNER; + } + else if (is_appropriate_path(src)) + { + node->clauses = list_concat( + aqo_get_clauses(root, src->parent->baserestrictinfo), + src->param_info ? aqo_get_clauses(root, src->param_info->ppi_clauses) : NIL); + node->jointype = JOIN_INNER; + } + + get_list_of_relids(root, src->parent->relids, &node->rels); + + if (src->parallel_workers > 0) + node->parallel_divisor = get_parallel_divisor(src); + node->was_parametrized = (src->param_info != NULL); + + if (src->param_info) + { + node->prediction = src->param_info->predicted_ppi_rows; + node->fss = src->param_info->fss_ppi_hash; + } + else + { + /* + * In the case of forced stat gathering AQO must store fss as well as + * parallel divisor. Negative predicted cardinality field will be a sign + * that it is not a prediction, just statistics. + */ + node->prediction = src->parent->predicted_cardinality; + node->fss = src->parent->fss_hash; + } + + node->had_path = true; +} + +static void +AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) +{ + AQOPlanNode *new = (AQOPlanNode *) enew; + AQOPlanNode *old = (AQOPlanNode *) eold; + + Assert(IsA(old, ExtensibleNode)); + Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); + Assert(new && old); + + /* + * Copy static fields in one command. + * But do not copy fields of the old->node. + * Elsewise, we can use pointers that will be freed. + * For example, it is old->node.extnodename. + */ + memcpy(&new->had_path, &old->had_path, sizeof(AQOPlanNode) - offsetof(AQOPlanNode, had_path)); + + /* These lists couldn't contain AQO nodes. Use basic machinery */ + new->rels.hrels = list_copy(old->rels.hrels); + new->rels.signatures = list_copy(old->rels.signatures); + + new->clauses = copy_aqo_clauses(old->clauses); + new->grouping_exprs = copyObject(old->grouping_exprs); + new->selectivities = copyObject(old->selectivities); + enew = (ExtensibleNode *) new; +} + +static bool +AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) +{ + return false; +} + +static void +AQOconstCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) +{ + AQOConstNode *new = (AQOConstNode *) enew; + AQOConstNode *old = (AQOConstNode *) eold; + + Assert(IsA(old, ExtensibleNode)); + Assert(strcmp(old->node.extnodename, AQO_CONST_NODE) == 0); + Assert(new && old); + + new->type = old->type; + new->fss = old->fss; + enew = (ExtensibleNode *) new; +} + +static bool +AQOconstEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) +{ + return false; +} + +/* + * Convert a double value, attempting to ensure the value is preserved exactly. + */ +static void +outDouble(StringInfo str, double d) +{ + char buf[DOUBLE_SHORTEST_DECIMAL_LEN]; + + double_to_shortest_decimal_buf(d, buf); + appendStringInfoString(str, buf); +} + +#define WRITE_INT_FIELD(fldname) \ + appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) + +/* Write a boolean field */ +#define WRITE_BOOL_FIELD(fldname) \ + appendStringInfo(str, " :" CppAsString(fldname) " %s", \ + booltostr(node->fldname)) + +#define WRITE_NODE_FIELD(fldname) \ + (appendStringInfoString(str, " :" CppAsString(fldname) " "), \ + outNode(str, node->fldname)) + +/* Write an enumerated-type field as an integer code */ +#define WRITE_ENUM_FIELD(fldname, enumtype) \ + appendStringInfo(str, " :" CppAsString(fldname) " %d", \ + (int) node->fldname) + +/* Write a float field */ +#define WRITE_FLOAT_FIELD(fldname) \ + (appendStringInfo(str, " :" CppAsString(fldname) " "), \ + outDouble(str, node->fldname)) + +/* The start part of a custom list writer */ +#define WRITE_CUSTOM_LIST_START(fldname) \ + { \ + appendStringInfo(str, " :N_" CppAsString(fldname) " %d ", \ + list_length(node->fldname)); \ + /* Serialize this list like an array */ \ + if (list_length(node->fldname)) \ + { \ + ListCell *lc; \ + appendStringInfo(str, "("); \ + foreach (lc, node->fldname) + +/* The end part of a custom list writer */ +#define WRITE_CUSTOM_LIST_END() \ + appendStringInfo(str, " )"); \ + } \ + else \ + appendStringInfo(str, "<>"); \ + } + +/* Write a list of int values */ +#define WRITE_INT_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(fldname) \ + { \ + int val = lfirst_int(lc); \ + appendStringInfo(str, " %d", val); \ + } \ + WRITE_CUSTOM_LIST_END() + +/* Write a list of AQOClause values */ +#define WRITE_AQOCLAUSE_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(clauses) \ + { \ + AQOClause *node = lfirst(lc); \ + /* Serialize this struct like a node */ \ + appendStringInfo(str, " {"); \ + WRITE_NODE_FIELD(clause); \ + WRITE_FLOAT_FIELD(norm_selec); \ + WRITE_FLOAT_FIELD(outer_selec); \ + WRITE_INT_FIELD(left_ec); \ + WRITE_INT_FIELD(right_ec); \ + WRITE_BOOL_FIELD(is_eq_clause); \ + appendStringInfo(str, " }"); \ + } \ + WRITE_CUSTOM_LIST_END() + +/* + * Serialize AQO plan node to a string. + * + * Some extensions may manipulate by parts of serialized plan too. + */ +static void +AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) +{ + AQOPlanNode *node = (AQOPlanNode *) enode; + + WRITE_BOOL_FIELD(had_path); + + WRITE_NODE_FIELD(rels.hrels); + WRITE_INT_LIST(rels.signatures); + + WRITE_AQOCLAUSE_LIST(clauses); + + WRITE_NODE_FIELD(selectivities); + WRITE_NODE_FIELD(grouping_exprs); + WRITE_ENUM_FIELD(jointype, JoinType); + + WRITE_FLOAT_FIELD(parallel_divisor); + WRITE_BOOL_FIELD(was_parametrized); + + WRITE_INT_FIELD(fss); + WRITE_FLOAT_FIELD(prediction); +} + +/* + * Serialize AQO const node to a string. + * + * Some extensions may manipulate by parts of serialized plan too. + */ +static void +AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) +{ + AQOConstNode *node = (AQOConstNode *) enode; + + WRITE_ENUM_FIELD(type, AQOConstType); + WRITE_INT_FIELD(fss); +} + +/* Read an integer field (anything written as ":fldname %d") */ +#define READ_INT_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = atoi(token) + +/* Read an enumerated-type field that was written as an integer code */ +#define READ_ENUM_FIELD(fldname, enumtype) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = (enumtype) atoi(token) + +/* Read a float field */ +#define READ_FLOAT_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = atof(token) + +/* Read a boolean field */ +#define READ_BOOL_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = strtobool(token) + +/* Read a Node field */ +#define READ_NODE_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + (void) token; /* in case not used elsewhere */ \ + local_node->fldname = nodeRead(NULL, 0) + +/* The start part of a custom list reader */ +#define READ_CUSTOM_LIST_START() \ + { \ + int counter; \ + token = pg_strtok(&length); /* skip the name */ \ + token = pg_strtok(&length); \ + counter = atoi(token); \ + token = pg_strtok(&length); /* left bracket "(" */ \ + if (length) \ + { \ + for (int i = 0; i < counter; i++) + +/* The end part of a custom list reader */ +#define READ_CUSTOM_LIST_END(fldname) \ + token = pg_strtok(&length); /* right bracket ")" */ \ + } \ + else \ + local_node->fldname = NIL; \ + } + +/* Read a list of int values */ +#define READ_INT_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + int val; \ + token = pg_strtok(&length); \ + val = atoi(token); \ + local_node->fldname = lappend_int( \ + local_node->fldname, val); \ + } \ + READ_CUSTOM_LIST_END(fldname) + +/* Read a list of AQOClause values */ +#define READ_AQOCLAUSE_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + /* copy to use in the inner blocks of code */ \ + AQOPlanNode *node_copy = local_node; \ + AQOClause *local_node = palloc(sizeof(AQOClause)); \ + token = pg_strtok(&length); /* left bracket "{" */ \ + READ_NODE_FIELD(clause); \ + READ_FLOAT_FIELD(norm_selec); \ + READ_FLOAT_FIELD(outer_selec); \ + READ_INT_FIELD(left_ec); \ + READ_INT_FIELD(right_ec); \ + READ_BOOL_FIELD(is_eq_clause); \ + token = pg_strtok(&length); /* right bracket "}" */ \ + node_copy->fldname = lappend(node_copy->fldname, local_node); \ + } \ + READ_CUSTOM_LIST_END(fldname) + +/* + * Deserialize AQO plan node from a string to internal representation. + * + * Should work in coherence with AQOnodeOut(). + */ +static void +AQOnodeRead(struct ExtensibleNode *enode) +{ + AQOPlanNode *local_node = (AQOPlanNode *) enode; + const char *token; + int length; + + READ_BOOL_FIELD(had_path); + + READ_NODE_FIELD(rels.hrels); + READ_INT_LIST(rels.signatures); + + READ_AQOCLAUSE_LIST(clauses); + + READ_NODE_FIELD(selectivities); + READ_NODE_FIELD(grouping_exprs); + READ_ENUM_FIELD(jointype, JoinType); + + READ_FLOAT_FIELD(parallel_divisor); + READ_BOOL_FIELD(was_parametrized); + + READ_INT_FIELD(fss); + READ_FLOAT_FIELD(prediction); +} + +/* + * Deserialize AQO const node from a string to internal representation. + * + * Should work in coherence with AQOconstOut(). + */ +static void +AQOconstRead(struct ExtensibleNode *enode) +{ + AQOConstNode *local_node = (AQOConstNode *) enode; + const char *token; + int length; + + READ_ENUM_FIELD(type, AQOConstType); + READ_INT_FIELD(fss); +} + +static const ExtensibleNodeMethods aqo_node_method = +{ + .extnodename = AQO_PLAN_NODE, + .node_size = sizeof(AQOPlanNode), + .nodeCopy = AQOnodeCopy, + .nodeEqual = AQOnodeEqual, + .nodeOut = AQOnodeOut, + .nodeRead = AQOnodeRead +}; + +static const ExtensibleNodeMethods aqo_const_method = +{ + .extnodename = AQO_CONST_NODE, + .node_size = sizeof(AQOConstNode), + .nodeCopy = AQOconstCopy, + .nodeEqual = AQOconstEqual, + .nodeOut = AQOconstOut, + .nodeRead = AQOconstRead +}; + +void +RegisterAQOPlanNodeMethods(void) +{ + RegisterExtensibleNodeMethods(&aqo_node_method); + RegisterExtensibleNodeMethods(&aqo_const_method); +} + +/* + * Warning! This function does not word properly. + * Because value of Const nodes removed by hash routine. + * + * Hook for create_upper_paths_hook + * + * Assume, that we are last in the chain of path creators. + */ +/*static void +aqo_store_upper_signature(PlannerInfo *root, + UpperRelationKind stage, + RelOptInfo *input_rel, + RelOptInfo *output_rel, + void *extra) +{ + A_Const *fss_node = makeNode(A_Const); + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities; + + if (aqo_create_upper_paths_next) + (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); + + if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) + / * Includes 'disabled query' state. * / + return; + + if (stage != UPPERREL_FINAL) + return; + + set_cheapest(input_rel); + clauses = get_path_clauses(input_rel->cheapest_total_path, + root, &selectivities); + get_list_of_relids(root, input_rel->relids, &rels); + fss_node->val.type = T_Integer; + fss_node->location = -1; + fss_node->val.val.ival = get_fss_for_object(rels.signatures, clauses, NIL, + NULL, NULL); + output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); +}*/ + +void +aqo_path_utils_init(void) +{ + aqo_create_plan_next = create_plan_hook; + create_plan_hook = aqo_create_plan; + + /*aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature;*/ +} diff --git a/path_utils.h b/path_utils.h new file mode 100644 index 00000000..dec9eb1e --- /dev/null +++ b/path_utils.h @@ -0,0 +1,120 @@ +#ifndef PATH_UTILS_H +#define PATH_UTILS_H + +#include "nodes/extensible.h" +#include "nodes/pathnodes.h" +#include "optimizer/planner.h" + +#define AQO_PLAN_NODE "AQOPlanNode" +#define AQO_CONST_NODE "AQOConstNode" + +extern List *aqo_eclass_collector; + +/* + * Find and sort out relations that used in the query: + * Use oids of relations to store dependency of ML row on a set of tables. + * Use oids of temporary tables to get access to these structure for preparing + * a kind of signature. + */ +typedef struct +{ + List *hrels; /* oids of persistent relations */ + List *signatures; /* list of hashes: on qualified name of a persistent + * table or on a table structure for temp table */ +} RelSortOut; + +/* + * Fields of the RestrictInfo needed in the AQOPlanNode + */ +typedef struct AQOClause +{ + /* the represented clause of WHERE or JOIN */ + Expr *clause; + /* selectivity for "normal" (JOIN_INNER) semantics; -1 if not yet set */ + Selectivity norm_selec; + /* selectivity for outer join semantics; -1 if not yet set */ + Selectivity outer_selec; + + /* Serial number of EquivalenceClass containing lefthand */ + int left_ec; + /* Serial number of EquivalenceClass containing righthand */ + int right_ec; + /* Quick check for equivalence class */ + bool is_eq_clause; + + EquivalenceClass *ec; +} AQOClause; + +/* + * information for adaptive query optimization + */ +typedef struct AQOPlanNode +{ + ExtensibleNode node; + bool had_path; + RelSortOut rels; + List *clauses; + List *selectivities; + + /* Grouping expressions from a target list. */ + List *grouping_exprs; + + JoinType jointype; + double parallel_divisor; + bool was_parametrized; + + /* For Adaptive optimization DEBUG purposes */ + int fss; + double prediction; +} AQOPlanNode; + +/* + * The type of a node that is replaced by AQOConstNode. + */ +typedef enum AQOConstType +{ + AQO_NODE_EXPR = 0, + AQO_NODE_SUBPLAN +} AQOConstType; + +/* + * A custom node that is used to calcucate a fss instead of regular node, + * such as SubPlan or Expr. + */ +typedef struct AQOConstNode +{ + ExtensibleNode node; + AQOConstType type; /* The type of the replaced node */ + int fss; /* The fss of the replaced node */ +} AQOConstNode; + +#define strtobool(x) ((*(x) == 't') ? true : false) + +#define nullable_string(token,length) \ + ((length) == 0 ? NULL : debackslash(token, length)) + +#define booltostr(x) ((x) ? "true" : "false") + +/* Extracting path information utilities */ +extern List *get_selectivities(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo); +extern void get_list_of_relids(PlannerInfo *root, Relids relids, + RelSortOut *rels); + +extern List *get_path_clauses(Path *path, + PlannerInfo *root, + List **selectivities); + +extern AQOConstNode *create_aqo_const_node(AQOConstType type, int fss); + +extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); +extern void RegisterAQOPlanNodeMethods(void); + +extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); + +void aqo_path_utils_init(void); + +#endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index db38b2d4..50d27624 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,17 +9,31 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c * */ -#include "aqo.h" +#include "postgres.h" + #include "access/parallel.h" #include "optimizer/optimizer.h" +#include "postgres_fdw.h" #include "utils/queryenvironment.h" +#include "miscadmin.h" + +#include "aqo.h" +#include "hash.h" +#include "path_utils.h" +#include "machine_learning.h" +#include "storage.h" + +#define SMART_TIMEOUT_ERROR_THRESHOLD (0.1) + + +bool aqo_learn_statement_timeout = false; typedef struct { @@ -27,42 +41,51 @@ typedef struct List *selectivities; List *relidslist; bool learn; + bool isTimedOut; /* Is execution was interrupted by timeout? */ } aqo_obj_stat; static double cardinality_sum_errors; static int cardinality_num_objects; +static int64 max_timeout_value; +static int64 growth_rate = 3; -/* It is needed to recognize stored Query-related aqo data in the query +/* + * Store an AQO-related query data into the Query Environment structure. + * + * It is very sad that we have to use such unsuitable field, but alternative is + * to introduce a private field in a PlannedStmt struct. + * It is needed to recognize stored Query-related aqo data in the query * environment field. */ static char *AQOPrivateData = "AQOPrivateData"; static char *PlanStateInfo = "PlanStateInfo"; +/* Saved hooks */ +static ExecutorStart_hook_type aqo_ExecutorStart_next = NULL; +static ExecutorRun_hook_type aqo_ExecutorRun_next = NULL; +static ExecutorEnd_hook_type aqo_ExecutorEnd_next = NULL; +static ExplainOnePlan_hook_type aqo_ExplainOnePlan_next = NULL; +static ExplainOneNode_hook_type aqo_ExplainOneNode_next = NULL; + /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target); -static void learn_sample(List *clauselist, - List *selectivities, - List *relidslist, - double true_cardinality, - double predicted_cardinality); +static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, + double *features, double target, + double rfactor, List *reloids); +static bool learnOnPlanState(PlanState *p, void *context); +static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, Plan *plan, + bool notExecuted); +static void learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, + Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, - List *relidslist, - JoinType join_type, - bool was_parametrized); -static void update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec); -static void StoreToQueryContext(QueryDesc *queryDesc); + List *relidslist, + JoinType join_type, + bool was_parametrized); +static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); -static bool ExtractFromQueryContext(QueryDesc *queryDesc); -static void RemoveFromQueryContext(QueryDesc *queryDesc); +static bool ExtractFromQueryEnv(QueryDesc *queryDesc); /* * This is the critical section: only one runner is allowed to be inside this @@ -70,17 +93,45 @@ static void RemoveFromQueryContext(QueryDesc *queryDesc); * matrix and targets are just preallocated memory for computations. */ static void -atomic_fss_learn_step(int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target) +atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, + double *features, double target, double rfactor, + List *reloids) +{ + if (!load_aqo_data(fs, fss, data, false)) + data->rows = 0; + + data->rows = OkNNr_learn(data, features, target, rfactor); + update_fss_ext(fs, fss, data, reloids); +} + +static void +learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, Plan *plan, bool notExecuted) { - int nrows; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fs = query_context.fspace_hash; + int child_fss; + double target; + OkNNrdata *data = OkNNr_allocate(0); + int fss; - if (!load_fss(fss_hash, ncols, matrix, targets, &nrows)) - nrows = 0; + /* + * Learn 'not executed' nodes only once, if no one another knowledge exists + * for current feature subspace. + */ + if (notExecuted && aqo_node && aqo_node->prediction > 0.) + return; + + target = log(learned); + child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, + NIL, NULL,NULL); + fss = get_grouped_exprs_hash(child_fss, + aqo_node ? aqo_node->grouping_exprs : NIL); - nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss(fss_hash, nrows, ncols, matrix, targets); + /* Critical section */ + atomic_fss_learn_step(fs, fss, data, NULL, + target, rfactor, rels->hrels); + /* End of critical section */ } /* @@ -88,63 +139,52 @@ atomic_fss_learn_step(int fss_hash, int ncols, * true cardinalities) performs learning procedure. */ static void -learn_sample(List *clauselist, List *selectivities, List *relidslist, - double true_cardinality, double predicted_cardinality) +learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, Plan *plan, bool notExecuted) { - int fss_hash; - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double target; - int i; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fs = query_context.fspace_hash; + double *features; + double target; + OkNNrdata *data; + int fss; + int ncols; -/* - * Suppress the optimization for debug purposes. - if (fabs(log(predicted_cardinality) - log(true_cardinality)) < - object_selection_prediction_threshold) - { - return; - } -*/ - target = log(true_cardinality); - - fss_hash = get_fss_for_object(clauselist, selectivities, relidslist, - &nfeatures, &features); + target = log(learned); + fss = get_fss_for_object(rels->signatures, ctx->clauselist, + ctx->selectivities, &ncols, &features); - if (nfeatures > 0) - for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc(sizeof(double) * nfeatures); + /* Only Agg nodes can have non-empty a grouping expressions list. */ + Assert(!IsA(plan, Agg) || !aqo_node || aqo_node->grouping_exprs != NIL); - /* Here should be critical section */ - atomic_fss_learn_step(fss_hash, nfeatures, matrix, targets, features, target); - /* Here should be the end of critical section */ + /* + * Learn 'not executed' nodes only once, if no one another knowledge exists + * for current feature subspace. + */ + if (notExecuted && aqo_node && aqo_node->prediction > 0) + return; - if (nfeatures > 0) - for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); + data = OkNNr_allocate(ncols); - pfree(features); + /* Critical section */ + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels); + /* End of critical section */ } /* * For given node specified by clauselist, relidslist and join_type restores * the same selectivities of clauses as were used at query optimization stage. */ -List * -restore_selectivities(List *clauselist, - List *relidslist, - JoinType join_type, +static List * +restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { - List *lst = NIL; - ListCell *l; - int i = 0; + List *lst = NIL; + ListCell *l; bool parametrized_sel; int nargs; - int *args_hash; - int *eclass_hash; - double *cur_sel; + int *args_hash; + int *eclass_hash; int cur_hash; int cur_relid; @@ -152,34 +192,37 @@ restore_selectivities(List *clauselist, if (parametrized_sel) { cur_relid = linitial_int(relidslist); + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); } foreach(l, clauselist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + AQOClause *clause = (AQOClause *) lfirst(l); + Selectivity *cur_sel = NULL; - cur_sel = NULL; if (parametrized_sel) { - cur_hash = get_clause_hash(rinfo->clause, nargs, - args_hash, eclass_hash); + cur_hash = get_clause_hash(clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); - if (cur_sel == NULL) - { - if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - } } - else if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; + + if (cur_sel == NULL) + { + cur_sel = palloc(sizeof(double)); + + if (join_type == JOIN_INNER) + *cur_sel = clause->norm_selec; + else + *cur_sel = clause->outer_selec; + + if (*cur_sel < 0) + *cur_sel = 0; + } + + Assert(*cur_sel >= 0); lst = lappend(lst, cur_sel); - i++; } if (parametrized_sel) @@ -191,22 +234,128 @@ restore_selectivities(List *clauselist, return lst; } +static bool +IsParallelTuplesProcessing(const Plan *plan, bool IsParallel) +{ + if (IsParallel && (plan->parallel_aware || nodeTag(plan) == T_HashJoin || + nodeTag(plan) == T_MergeJoin || nodeTag(plan) == T_NestLoop)) + return true; + return false; +} + /* - * Check for the nodes that never executed. If at least one node exists in the - * plan than actual rows of any another node can be false. - * Suppress such knowledge because it can worsen the query execution time. + * learn_subplan_recurse + * + * Emphasize recursion operation into separate function because of increasing + * complexity of this logic. */ static bool -HasNeverExecutedNodes(PlanState *ps, void *context) +learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) +{ + List *saved_subplan_list = NIL; + List *saved_initplan_list = NIL; + ListCell *lc; + + if (!p->instrument) + return true; + + if (!ctx->isTimedOut) + InstrEndLoop(p->instrument); + else if (p->instrument->running) + { + /* + * We can't use node instrumentation functions because after the end + * of this timeout handler query can work for some time. + * We change ntuples and nloops to unify walking logic and because we + * know that the query execution results meaningless. + */ + p->instrument->ntuples += p->instrument->tuplecount; + p->instrument->nloops += 1; + + /* + * TODO: can we simply use ExecParallelCleanup to implement gathering of + * instrument data in the case of parallel workers? + */ + } + + saved_subplan_list = p->subPlan; + saved_initplan_list = p->initPlan; + p->subPlan = NIL; + p->initPlan = NIL; + + if (planstate_tree_walker(p, learnOnPlanState, (void *) ctx)) + return true; + + /* + * Learn on subplans and initplans separately. Discard learn context of these + * subplans because we will use their fss'es directly. + */ + foreach(lc, saved_subplan_list) + { + SubPlanState *sps = lfirst_node(SubPlanState, lc); + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; + + if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) + return true; + } + foreach(lc, saved_initplan_list) + { + SubPlanState *sps = lfirst_node(SubPlanState, lc); + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; + + if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) + return true; + } + + p->subPlan = saved_subplan_list; + p->initPlan = saved_initplan_list; + return false; +} + +static bool +should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, + double predicted, double nrows, double *rfactor) { - Assert(context == NULL); + if (ctx->isTimedOut) + { + if (ctx->learn && nrows > predicted * 1.2) + { + /* This node s*/ + if (aqo_show_details) + elog(NOTICE, + "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, nrows); + + *rfactor = RELIABILITY_MIN; + return true; + } - InstrEndLoop(ps->instrument); - if (ps->instrument == NULL || ps->instrument->nloops == 0) + /* Has the executor finished its work? */ + if (!ps->instrument->running && TupIsNull(ps->ps_ResultTupleSlot) && + ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ + { + /* This is much more reliable data. So we can correct our prediction. */ + if (ctx->learn && aqo_show_details && + fabs(nrows - predicted) / predicted > 0.2) + elog(NOTICE, + "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, nrows); + + *rfactor = RELIABILITY_MIN + 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); + return true; + } + } + else if (ctx->learn) + { + *rfactor = RELIABILITY_MAX; return true; + } - return planstate_tree_walker(ps, HasNeverExecutedNodes, NULL); + return false; } + /* * Walks over obtained PlanState tree, collects relation objects with their * clauses, selectivities and relids and passes each object to learn_sample. @@ -214,7 +363,7 @@ HasNeverExecutedNodes(PlanState *ps, void *context) * Returns clauselist, selectivities and relids. * Store observed subPlans into other_plans list. * - * We use list_copy() of p->plan->path_clauses and p->plan->path_relids + * We use list_copy() of AQOPlanNode->clauses and AQOPlanNode->relids * because the plan may be stored in the cache after this. Operation * list_concat() changes input lists and may destruct cached plan. */ @@ -222,164 +371,181 @@ static bool learnOnPlanState(PlanState *p, void *context) { aqo_obj_stat *ctx = (aqo_obj_stat *) context; - aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; + double predicted = 0.; + double learn_rows = 0.; + AQOPlanNode *aqo_node; + bool notExecuted = false; + + /* Recurse into subtree and collect clauses. */ + if (learn_subplan_recurse(p, &SubplanCtx)) + /* If something goes wrong, return quickly. */ + return true; - planstate_tree_walker(p, learnOnPlanState, (void *) &SubplanCtx); + if ((aqo_node = get_aqo_plan_node(p->plan, false)) == NULL) + /* + * Skip the node even for error calculation. It can be incorrect in the + * case of parallel workers (parallel_divisor not known). + */ + goto end; + + /* + * Compute real value of rows, passed through this node. Summarize rows + * for parallel workers. + * If 'never executed' node will be found - set specific sign, because we + * allow to learn on such node only once. + */ + if (p->instrument->nloops > 0.) + { + /* If we can strongly calculate produced rows, do it. */ + if (p->worker_instrument && + IsParallelTuplesProcessing(p->plan, aqo_node->parallel_divisor > 0)) + { + double wnloops = 0.; + double wntuples = 0.; + int i; + + for (i = 0; i < p->worker_instrument->num_workers; i++) + { + double t = p->worker_instrument->instrument[i].ntuples; + double l = p->worker_instrument->instrument[i].nloops; + + if (l <= 0) + continue; + + wntuples += t; + wnloops += l; + learn_rows += t/l; + } + + Assert(p->instrument->nloops >= wnloops); + Assert(p->instrument->ntuples >= wntuples); + if (p->instrument->nloops - wnloops > 0.5) + learn_rows += (p->instrument->ntuples - wntuples) / + (p->instrument->nloops - wnloops); + } + else + /* This node does not required to sum tuples of each worker + * to calculate produced rows. */ + learn_rows = p->instrument->ntuples / p->instrument->nloops; + } + else + { + /* The case of 'not executed' node. */ + learn_rows = 1.; + notExecuted = true; + } + + /* + * Calculate predicted cardinality. + * We could find a positive value of predicted cardinality in the case of + * reusing plan caused by the rewriting procedure. + * Also it may be caused by using of a generic plan. + */ + if (aqo_node->prediction > 0. && query_context.use_aqo) + { + /* AQO made prediction. use it. */ + predicted = aqo_node->prediction; + } + else if (IsParallelTuplesProcessing(p->plan, aqo_node->parallel_divisor > 0)) + /* + * AQO didn't make a prediction and we need to calculate real number + * of tuples passed because of parallel workers. + */ + predicted = p->plan->plan_rows * aqo_node->parallel_divisor; + else + /* No AQO prediction. Parallel workers not used for this plan node. */ + predicted = p->plan->plan_rows; + + if (!ctx->learn && query_context.collect_stat) + { + double p,l; + + /* Special case of forced gathering of statistics. */ + Assert(predicted >= 0 && learn_rows >= 0); + p = (predicted < 1) ? 0 : log(predicted); + l = (learn_rows < 1) ? 0 : log(learn_rows); + cardinality_sum_errors += fabs(p - l); + cardinality_num_objects += 1; + return false; + } + else if (!ctx->learn) + return true; + + /* + * Need learn. + */ + + /* + * It is needed for correct exp(result) calculation. + * Do it before cardinality error estimation because we can predict no less + * than 1 tuple, but get zero tuples. + */ + predicted = clamp_row_est(predicted); + learn_rows = clamp_row_est(learn_rows); + + /* Exclude "not executed" nodes from error calculation to reduce fluctuations. */ + if (!notExecuted) + { + cardinality_sum_errors += fabs(log(predicted) - log(learn_rows)); + cardinality_num_objects += 1; + } /* * Some nodes inserts after planning step (See T_Hash node type). - * In this case we have'nt AQO prediction and fss record. + * In this case we haven't AQO prediction and fss record. */ - if (p->plan->had_path) + if (aqo_node->had_path) { List *cur_selectivities; - cur_selectivities = restore_selectivities(p->plan->path_clauses, - p->plan->path_relids, - p->plan->path_jointype, - p->plan->was_parametrized); + cur_selectivities = restore_selectivities(aqo_node->clauses, + aqo_node->rels.hrels, + aqo_node->jointype, + aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, cur_selectivities); SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, - list_copy(p->plan->path_clauses)); + list_copy(aqo_node->clauses)); - if (p->plan->path_relids != NIL) + if (aqo_node->rels.hrels != NIL) + { /* - * This plan can be stored as cached plan. In the case we will have + * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(p->plan->path_relids); + ctx->relidslist = list_copy(aqo_node->rels.hrels); - if (p->instrument && (p->righttree != NULL || p->lefttree == NULL || - p->plan->path_clauses != NIL)) - { - double learn_rows = 0.; - double predicted = 0.; - - if (p->instrument->nloops > 0.) + if (p->instrument) { - /* If we can strongly calculate produced rows, do it. */ - if (p->worker_instrument && IsParallelTuplesProcessing(p->plan)) + double rfactor = 1.; + + Assert(predicted >= 1. && learn_rows >= 1.); + + if (should_learn(p, aqo_node, ctx, predicted, learn_rows, &rfactor)) { - double wnloops = 0.; - double wntuples = 0.; - int i; - - for (i = 0; i < p->worker_instrument->num_workers; i++) - { - double t = p->worker_instrument->instrument[i].ntuples; - double l = p->worker_instrument->instrument[i].nloops; - - if (l <= 0) - continue; - - wntuples += t; - wnloops += l; - learn_rows += t/l; - } - - Assert(p->instrument->nloops >= wnloops); - Assert(p->instrument->ntuples >= wntuples); - if (p->instrument->nloops - wnloops > 0.5) - learn_rows += (p->instrument->ntuples - wntuples) / - (p->instrument->nloops - wnloops); + if (IsA(p, AggState)) + learn_agg_sample(&SubplanCtx, + &aqo_node->rels, learn_rows, rfactor, + p->plan, notExecuted); + + else + learn_sample(&SubplanCtx, + &aqo_node->rels, learn_rows, rfactor, + p->plan, notExecuted); } - else - /* This node does not required to sum tuples of each worker - * to calculate produced rows. */ - learn_rows = p->instrument->ntuples / p->instrument->nloops; - - if (p->plan->predicted_cardinality > 0.) - predicted = p->plan->predicted_cardinality; - else if (IsParallelTuplesProcessing(p->plan)) - predicted = p->plan->plan_rows * - get_parallel_divisor(p->plan->path_parallel_workers); - else - predicted = p->plan->plan_rows; - - /* It is needed for correct exp(result) calculation. */ - predicted = clamp_row_est(predicted); - learn_rows = clamp_row_est(learn_rows); } - else - { - /* - * LAV: I found two cases for this code: - * 1. if query returns with error. - * 2. plan node has never visited. - * Both cases can't be used to learning AQO because give an - * incorrect number of rows. - */ - elog(PANIC, "AQO: impossible situation"); - } - - Assert(predicted >= 1 && learn_rows >= 1); - cardinality_sum_errors += fabs(log(predicted) - log(learn_rows)); - cardinality_num_objects += 1; - - /* - * A subtree was not visited. In this case we can not teach AQO - * because ntuples value is equal to 0 and we will got - * learn rows == 1. - * It is false knowledge: at another place of a plan, scanning of - * the node may produce many tuples. - */ - Assert(p->instrument->nloops >= 1); - - if (ctx->learn) - learn_sample(SubplanCtx.clauselist, SubplanCtx.selectivities, - p->plan->path_relids, learn_rows, predicted); } } +end: ctx->clauselist = list_concat(ctx->clauselist, SubplanCtx.clauselist); ctx->selectivities = list_concat(ctx->selectivities, - SubplanCtx.selectivities); + SubplanCtx.selectivities); return false; } -/* - * Updates given row of query statistics. - */ -void -update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec) -{ - int i; - - /* - * If plan contains one or more "never visited" nodes, cardinality_error - * have -1 value and will be written to the knowledge base. User can use it - * as a sign that AQO ignores this query. - */ - if (*ce_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - ce[i - 1] = ce[i]; - *ce_size = (*ce_size >= aqo_stat_size) ? aqo_stat_size : (*ce_size + 1); - ce[*ce_size - 1] = cardinality_error; - - if (*et_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - et[i - 1] = et[i]; - - *et_size = (*et_size >= aqo_stat_size) ? aqo_stat_size : (*et_size + 1); - et[*et_size - 1] = execution_time; - - if (*pt_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - pt[i - 1] = pt[i]; - - *pt_size = (*pt_size >= aqo_stat_size) ? aqo_stat_size : (*pt_size + 1); - pt[*pt_size - 1] = planning_time; - (*n_exec)++; -} - /***************************************************************************** * * QUERY EXECUTION STATISTICS COLLECTING HOOKS @@ -389,20 +555,47 @@ update_query_stat_row(double *et, int *et_size, /* * Set up flags to store cardinality statistics. */ -void +static void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) { - instr_time current_time; + instr_time now; bool use_aqo; - use_aqo = !IsParallelWorker() && (query_context.use_aqo || - query_context.learn_aqo || force_collect_stat); + /* + * If the plan pulled from a plan cache, planning don't needed. Restore + * query context from the query environment. + */ + if (ExtractFromQueryEnv(queryDesc)) + Assert(INSTR_TIME_IS_ZERO(query_context.start_planning_time)); + + use_aqo = !IsQueryDisabled() && !IsParallelWorker() && + (query_context.use_aqo || query_context.learn_aqo || + force_collect_stat); if (use_aqo) { - INSTR_TIME_SET_CURRENT(current_time); - INSTR_TIME_SUBTRACT(current_time, query_context.query_starttime); - query_context.query_planning_time = INSTR_TIME_GET_DOUBLE(current_time); + if (!INSTR_TIME_IS_ZERO(query_context.start_planning_time)) + { + INSTR_TIME_SET_CURRENT(now); + INSTR_TIME_SUBTRACT(now, query_context.start_planning_time); + query_context.planning_time = INSTR_TIME_GET_DOUBLE(now); + } + else + /* + * Should set anyway. It will be stored in a query env. The query + * can be reused later by extracting from a plan cache. + */ + query_context.planning_time = -1; + + /* + * To zero this timestamp preventing a false time calculation in the + * case, when the plan was got from a plan cache. + */ + INSTR_TIME_SET_ZERO(query_context.start_planning_time); + + /* Make a timestamp for execution stage. */ + INSTR_TIME_SET_CURRENT(now); + query_context.start_execution_time = now; query_context.explain_only = ((eflags & EXEC_FLAG_EXPLAIN_ONLY) != 0); @@ -411,37 +604,171 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) queryDesc->instrument_options |= INSTRUMENT_ROWS; /* Save all query-related parameters into the query context. */ - StoreToQueryContext(queryDesc); + StoreToQueryEnv(queryDesc); } - if (prev_ExecutorStart_hook) - prev_ExecutorStart_hook(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); + (*aqo_ExecutorStart_next)(queryDesc, eflags); - /* Plan state has initialized */ if (use_aqo) StorePlanInternals(queryDesc); } +#include "utils/timeout.h" + +static struct +{ + TimeoutId id; + QueryDesc *queryDesc; +} timeoutCtl = {0, NULL}; + +static int exec_nested_level = 0; + +static void +aqo_timeout_handler(void) +{ + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); + aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; + + if (CritSectionCount > 0 || !timeoutCtl.queryDesc || + !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + { + MemoryContextSwitchTo(oldctx); + return; + } + + /* Now we can analyze execution state of the query. */ + + ctx.learn = query_context.learn_aqo; + ctx.isTimedOut = true; + + if (aqo_statement_timeout == 0) + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + else + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is "INT64_FORMAT, max_timeout_value); + + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); + MemoryContextSwitchTo(oldctx); +} + +/* + * Function for updating smart statement timeout + */ +static int64 +increase_smart_timeout() +{ + int64 smart_timeout_fin_time = (query_context.smart_timeout + 1) * pow(growth_rate, query_context.count_increase_timeout); + + if (query_context.smart_timeout == max_timeout_value && !update_query_timeout(query_context.query_hash, smart_timeout_fin_time)) + elog(NOTICE, "[AQO] Timeout is not updated!"); + + return smart_timeout_fin_time; +} + +static bool +set_timeout_if_need(QueryDesc *queryDesc) +{ + int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; + + if (aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0) + { + max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); + if (max_timeout_value > fintime) + { + max_timeout_value = fintime; + } + } + else + { + max_timeout_value = fintime; + } + + if (IsParallelWorker()) + /* + * AQO timeout should stop only main worker. Other workers would be + * terminated by a regular ERROR machinery. + */ + return false; + + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout_enable) + return false; + + if (!ExtractFromQueryEnv(queryDesc)) + return false; + + if (IsQueryDisabled() || IsParallelWorker() || + !(query_context.use_aqo || query_context.learn_aqo)) + return false; + + /* + * Statement timeout exists. AQO should create user timeout right before the + * timeout. + */ + + if (timeoutCtl.id < USER_TIMEOUT) + /* Register once per backend, because of timeouts implementation. */ + timeoutCtl.id = RegisterTimeout(USER_TIMEOUT, aqo_timeout_handler); + else + Assert(!get_timeout_active(timeoutCtl.id)); + + enable_timeout_at(timeoutCtl.id, (TimestampTz) max_timeout_value); + + /* Save pointer to queryDesc to use at learning after a timeout interruption. */ + timeoutCtl.queryDesc = queryDesc; + return true; +} + +/* + * ExecutorRun hook. + */ +static void +aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, + bool execute_once) +{ + bool timeout_enabled = false; + + if (exec_nested_level <= 0) + timeout_enabled = set_timeout_if_need(queryDesc); + + Assert(!timeout_enabled || + (timeoutCtl.queryDesc && timeoutCtl.id >= USER_TIMEOUT)); + + exec_nested_level++; + + PG_TRY(); + { + (*aqo_ExecutorRun_next)(queryDesc, direction, count, execute_once); + } + PG_FINALLY(); + { + exec_nested_level--; + timeoutCtl.queryDesc = NULL; + + if (timeout_enabled) + disable_timeout(timeoutCtl.id, false); + } + PG_END_TRY(); +} + /* * General hook which runs before ExecutorEnd and collects query execution * cardinality statistics. * Also it updates query execution statistics in aqo_query_stat. */ -void +static void aqo_ExecutorEnd(QueryDesc *queryDesc) { - double totaltime; - double cardinality_error; - QueryStat *stat = NULL; - instr_time endtime; - EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + double execution_time; + double cardinality_error; + StatEntry *stat; + instr_time endtime; + EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); cardinality_sum_errors = 0.; cardinality_num_objects = 0; + njoins = -1; - if (!ExtractFromQueryContext(queryDesc)) + if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. * It is needed to prevent from possible recursive changes, at * preprocessing stage of subqueries. @@ -460,167 +787,125 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) query_context.collect_stat = false; } - if ((query_context.learn_aqo || query_context.collect_stat) && - !HasNeverExecutedNodes(queryDesc->planstate, NULL)) + if (query_context.learn_aqo || + (!query_context.learn_aqo && query_context.collect_stat)) { - aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo}; + aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; + /* + * Analyze plan if AQO need to learn or need to collect statistics only. + */ learnOnPlanState(queryDesc->planstate, (void *) &ctx); - list_free(ctx.clauselist); - list_free(ctx.relidslist); - list_free(ctx.selectivities); } + /* Calculate execution time. */ + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); + execution_time = INSTR_TIME_GET_DOUBLE(endtime); + + if (cardinality_num_objects > 0) + cardinality_error = cardinality_sum_errors / cardinality_num_objects; + else + cardinality_error = -1; + if (query_context.collect_stat) { - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, query_context.query_starttime); - totaltime = INSTR_TIME_GET_DOUBLE(endtime); - if (cardinality_num_objects > 0) - cardinality_error = cardinality_sum_errors / cardinality_num_objects; - else - cardinality_error = -1; + /* + * aqo_stat_store() is used in 'append' mode. + * 'AqoStatArgs' fields execs_with_aqo, execs_without_aqo, + * cur_stat_slot, cur_stat_slot_aqo are not used in this + * mode and dummy values(0) are set in this case. + */ + AqoStatArgs stat_arg = { 0, 0, 0, + &execution_time, &query_context.planning_time, &cardinality_error, + 0, + &execution_time, &query_context.planning_time, &cardinality_error}; - stat = get_aqo_stat(query_context.query_hash); + /* Write AQO statistics to the aqo_query_stat table */ + stat = aqo_stat_store(query_context.query_hash, + query_context.use_aqo, + &stat_arg, true); if (stat != NULL) { - if (query_context.use_aqo) - update_query_stat_row(stat->execution_time_with_aqo, - &stat->execution_time_with_aqo_size, - stat->planning_time_with_aqo, - &stat->planning_time_with_aqo_size, - stat->cardinality_error_with_aqo, - &stat->cardinality_error_with_aqo_size, - query_context.query_planning_time, - totaltime - query_context.query_planning_time, - cardinality_error, - &stat->executions_with_aqo); - else - update_query_stat_row(stat->execution_time_without_aqo, - &stat->execution_time_without_aqo_size, - stat->planning_time_without_aqo, - &stat->planning_time_without_aqo_size, - stat->cardinality_error_without_aqo, - &stat->cardinality_error_without_aqo_size, - query_context.query_planning_time, - totaltime - query_context.query_planning_time, - cardinality_error, - &stat->executions_without_aqo); - } - } - selectivity_cache_clear(); + Assert(!query_context.use_aqo || stat->cur_stat_slot_aqo > 0); + /* If query used aqo, increase smart timeout if needed */ + if (query_context.use_aqo && + aqo_learn_statement_timeout_enable && + aqo_statement_timeout > 0 && + stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - + cardinality_sum_errors/(1 + cardinality_num_objects) >= SMART_TIMEOUT_ERROR_THRESHOLD) + { + int64 fintime = increase_smart_timeout(); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); + } - /* - * Store all learn data into the AQO service relations. - */ - if ((query_context.collect_stat) && (stat != NULL)) - { - if (!query_context.adding_query && query_context.auto_tuning) - automatical_query_tuning(query_context.query_hash, stat); + /* Store all learn data into the AQO service relations. */ + if (!query_context.adding_query && query_context.auto_tuning) + automatical_query_tuning(query_context.query_hash, stat); - update_aqo_stat(query_context.fspace_hash, stat); - pfree_query_stat(stat); + pfree(stat); + } } - RemoveFromQueryContext(queryDesc); + + cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: - if (prev_ExecutorEnd_hook) - prev_ExecutorEnd_hook(queryDesc); - else - standard_ExecutorEnd(queryDesc); + /* Release all AQO-specific memory, allocated during learning procedure */ + selectivity_cache_clear(); + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOLearnMemCtx); + + (*aqo_ExecutorEnd_next)(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no * one operation with the plan can be made. */ -} - -/* - * Converts path info into plan node for collecting it after query execution. - */ -void -aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) -{ - bool is_join_path; - - if (prev_copy_generic_path_info_hook) - prev_copy_generic_path_info_hook(root, dest, src); - - is_join_path = (src->type == T_NestPath || src->type == T_MergePath || - src->type == T_HashPath); - - if (dest->had_path) - { - /* - * The convention is that any extension that sets had_path is also - * responsible for setting path_clauses, path_jointype, path_relids, - * path_parallel_workers, and was_parameterized. - */ - Assert(dest->path_clauses && dest->path_jointype && - dest->path_relids && dest->path_parallel_workers); - return; - } - - if (is_join_path) - { - dest->path_clauses = ((JoinPath *) src)->joinrestrictinfo; - dest->path_jointype = ((JoinPath *) src)->jointype; - } - else - { - dest->path_clauses = list_concat( - list_copy(src->parent->baserestrictinfo), - src->param_info ? src->param_info->ppi_clauses : NIL); - dest->path_jointype = JOIN_INNER; - } - - dest->path_relids = get_list_of_relids(root, src->parent->relids); - dest->path_parallel_workers = src->parallel_workers; - dest->was_parametrized = (src->param_info != NULL); - if (src->param_info) - { - dest->predicted_cardinality = src->param_info->predicted_ppi_rows; - dest->fss_hash = src->param_info->fss_ppi_hash; - } - else - { - dest->predicted_cardinality = src->parent->predicted_cardinality; - dest->fss_hash = src->parent->fss_hash; - } - - dest->had_path = true; + timeoutCtl.queryDesc = NULL; } /* - * Store into query environment field AQO data related to the query. + * Store into a query environment field an AQO data related to the query. * We introduce this machinery to avoid problems with subqueries, induced by * top-level query. + * If such enr exists, routine will replace it with current value of the + * query context. */ static void -StoreToQueryContext(QueryDesc *queryDesc) +StoreToQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; int qcsize = sizeof(QueryContextData); - MemoryContext oldCxt; + bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - enr = palloc0(sizeof(EphemeralNamedRelationData)); if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); + Assert(queryDesc->queryEnv); + enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); + if (enr == NULL) + { + /* If such query environment don't exists, allocate new. */ + enr = palloc0(sizeof(EphemeralNamedRelationData)); + newentry = true; + } + enr->md.name = AQOPrivateData; enr->md.enrtuples = 0; enr->md.enrtype = 0; enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; - enr->reldata = palloc0(qcsize); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &query_context, qcsize); - register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + if (newentry) + register_ENR(queryDesc->queryEnv, enr); + + MemoryContextSwitchTo(oldctx); } static bool @@ -642,15 +927,23 @@ static void StorePlanInternals(QueryDesc *queryDesc) { EphemeralNamedRelation enr; - MemoryContext oldCxt; + bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; - planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); + calculateJoinNum(queryDesc->planstate, &njoins); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - enr = palloc0(sizeof(EphemeralNamedRelationData)); if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + queryDesc->queryEnv = create_queryEnv(); + + Assert(queryDesc->queryEnv); + enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + if (enr == NULL) + { + /* If such query environment field doesn't exist, allocate new. */ + enr = palloc0(sizeof(EphemeralNamedRelationData)); + newentry = true; + } enr->md.name = PlanStateInfo; enr->md.enrtuples = 0; @@ -658,16 +951,20 @@ StorePlanInternals(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(sizeof(int)); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &njoins, sizeof(int)); - register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + + if (newentry) + register_ENR(queryDesc->queryEnv, enr); + + MemoryContextSwitchTo(oldctx); } /* * Restore AQO data, related to the query. */ static bool -ExtractFromQueryContext(QueryDesc *queryDesc) +ExtractFromQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; @@ -684,79 +981,148 @@ ExtractFromQueryContext(QueryDesc *queryDesc) if (enr == NULL) return false; + Assert(enr->reldata != NULL); memcpy(&query_context, enr->reldata, sizeof(QueryContextData)); return true; } +/* + * Prints if the plan was constructed with AQO. + */ static void -RemoveFromQueryContext(QueryDesc *queryDesc) +print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv) { - EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); - unregister_ENR(queryDesc->queryEnv, AQOPrivateData); - pfree(enr->reldata); - pfree(enr); + if (aqo_ExplainOnePlan_next) + (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, + params, planduration, queryEnv); - /* Remove the plan state internals */ - enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - unregister_ENR(queryDesc->queryEnv, PlanStateInfo); - pfree(enr->reldata); - pfree(enr); + if (!(aqo_mode != AQO_MODE_DISABLED || force_collect_stat) || + !aqo_show_details) + return; + + /* Report to user about aqo state only in verbose mode */ + ExplainPropertyBool("Using aqo", query_context.use_aqo, es); + + switch (aqo_mode) + { + case AQO_MODE_INTELLIGENT: + ExplainPropertyText("AQO mode", "INTELLIGENT", es); + break; + case AQO_MODE_FORCED: + ExplainPropertyText("AQO mode", "FORCED", es); + break; + case AQO_MODE_CONTROLLED: + ExplainPropertyText("AQO mode", "CONTROLLED", es); + break; + case AQO_MODE_LEARN: + ExplainPropertyText("AQO mode", "LEARN", es); + break; + case AQO_MODE_FROZEN: + ExplainPropertyText("AQO mode", "FROZEN", es); + break; + case AQO_MODE_DISABLED: + ExplainPropertyText("AQO mode", "DISABLED", es); + break; + default: + elog(ERROR, "Bad AQO state"); + break; + } + + /* + * Query class provides an user the conveniently use of the AQO + * auxiliary functions. + */ + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + (int64) query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); } -/* - * Prints if the plan was constructed with AQO. - */ -void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv) +static void +print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { - if (prev_ExplainOnePlan_hook) - prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, - params, planduration, queryEnv); + int wrkrs = 1; + double error = -1.; + AQOPlanNode *aqo_node; -#ifdef AQO_EXPLAIN - /* Report to user about aqo state only in verbose mode */ - if (es->verbose) + /* Extension, which took a hook early can be executed early too. */ + if (aqo_ExplainOneNode_next) + (*aqo_ExplainOneNode_next)(es, ps, plan); + + if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) + return; + + if ((aqo_node = get_aqo_plan_node(plan, false)) == NULL) + return; + + if (!aqo_show_details || !ps) + goto explain_end; + + if (!ps->instrument) + /* We can show only prediction, without error calculation */ + goto explain_print; + + if (ps->worker_instrument && + IsParallelTuplesProcessing(plan, aqo_node->parallel_divisor > 0)) { - ExplainPropertyBool("Using aqo", query_context.use_aqo, es); + int i; - switch (aqo_mode) + for (i = 0; i < ps->worker_instrument->num_workers; i++) { - case AQO_MODE_INTELLIGENT: - ExplainPropertyText("AQO mode", "INTELLIGENT", es); - break; - case AQO_MODE_FORCED: - ExplainPropertyText("AQO mode", "FORCED", es); - break; - case AQO_MODE_CONTROLLED: - ExplainPropertyText("AQO mode", "CONTROLLED", es); - break; - case AQO_MODE_LEARN: - ExplainPropertyText("AQO mode", "LEARN", es); - break; - case AQO_MODE_FROZEN: - ExplainPropertyText("AQO mode", "FROZEN", es); - break; - case AQO_MODE_DISABLED: - ExplainPropertyText("AQO mode", "DISABLED", es); - break; - default: - elog(ERROR, "Bad AQO state"); - break; + Instrumentation *instrument = &ps->worker_instrument->instrument[i]; + + if (instrument->nloops <= 0) + continue; + + wrkrs++; } + } - /* - * Query hash provides an user the conveniently use of the AQO - * auxiliary functions. - */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) +explain_print: + appendStringInfoChar(es->str, '\n'); + if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n') + appendStringInfoSpaces(es->str, es->indent * 2); + + if (aqo_node->prediction > 0.) + { + appendStringInfo(es->str, "AQO: rows=%.0lf", aqo_node->prediction); + + if (ps->instrument && ps->instrument->nloops > 0.) { - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); + double rows = ps->instrument->ntuples / ps->instrument->nloops; + + error = 100. * (aqo_node->prediction - (rows*wrkrs)) + / aqo_node->prediction; + appendStringInfo(es->str, ", error=%.0lf%%", error); } } -#endif + else + appendStringInfo(es->str, "AQO not used"); + +explain_end: + /* XXX: Do we really have situations when the plan is a NULL pointer? */ + if (plan && aqo_show_hash) + appendStringInfo(es->str, ", fss=%d", aqo_node->fss); +} + +void +aqo_postprocessing_init(void) +{ + /* Executor hooks */ + aqo_ExecutorStart_next = ExecutorStart_hook ? ExecutorStart_hook : standard_ExecutorStart; + ExecutorStart_hook = aqo_ExecutorStart; + aqo_ExecutorRun_next = ExecutorRun_hook ? ExecutorRun_hook : standard_ExecutorRun; + ExecutorRun_hook = aqo_ExecutorRun; + aqo_ExecutorEnd_next = ExecutorEnd_hook ? ExecutorEnd_hook : standard_ExecutorEnd; + ExecutorEnd_hook = aqo_ExecutorEnd; + + /* Service hooks. */ + aqo_ExplainOnePlan_next = ExplainOnePlan_hook; + ExplainOnePlan_hook = print_into_explain; + aqo_ExplainOneNode_next = ExplainOneNode_hook; + ExplainOneNode_hook = print_node_explain; } diff --git a/preprocessing.c b/preprocessing.c index 79097a92..5d7053ae 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -11,7 +11,7 @@ * 'use_aqo': whether to use AQO estimations in query optimization * 'learn_aqo': whether to update AQO data based on query execution * statistics - * 'fspace_hash': hash of feature space to use with given query + * 'fs': hash of feature space to use with given query * 'auto_tuning': whether AQO may change use_aqo and learn_aqo values * for the next execution of such type of query using * its self-tuning algorithm @@ -49,62 +49,50 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c * */ -#include "aqo.h" +#include "postgres.h" + #include "access/parallel.h" #include "access/table.h" #include "commands/extension.h" +#include "parser/scansup.h" +#include "aqo.h" +#include "hash.h" +#include "storage.h" -static bool isQueryUsingSystemRelation(Query *query); -static bool isQueryUsingSystemRelation_walker(Node *node, void *context); +/* List of feature spaces, that are processing in this backend. */ +List *cur_classes = NIL; -/* - * Saves query text into query_text variable. - * Query text field in aqo_queries table is for user. - */ -void -get_query_text(ParseState *pstate, Query *query) -{ - MemoryContext oldCxt; +int aqo_join_threshold = 3; - /* - * Duplicate query string into private AQO memory context for guard - * from possible memory context switching. - */ - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - if (pstate) - query_text = pstrdup(pstate->p_sourcetext); - MemoryContextSwitchTo(oldCxt); +bool aqo_learn_statement_timeout_enable = false; - if (prev_post_parse_analyze_hook) - prev_post_parse_analyze_hook(pstate, query); -} +static planner_hook_type aqo_planner_next = NULL; +static post_parse_analyze_hook_type aqo_post_parse_analyze_hook = NULL; + +static void disable_aqo_for_query(void); +static bool isQueryUsingSystemRelation(Query *query); +static bool isQueryUsingSystemRelation_walker(Node *node, void *context); /* - * Calls standard query planner or its previous hook. + * Can AQO be used for the query? */ -PlannedStmt * -call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams) +static bool +aqoIsEnabled(Query *parse) { - if (prev_planner_hook) - return prev_planner_hook(parse, - query_string, - cursorOptions, - boundParams); - else - return standard_planner(parse, - query_string, - cursorOptions, - boundParams); + if (creating_extension || + (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || + (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && + parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE)) + return false; + + return true; } /* @@ -115,54 +103,76 @@ call_default_planner(Query *parse, * Creates an entry in aqo_queries for new type of query if it is * necessary, i. e. AQO mode is "intelligent". */ -PlannedStmt * -aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, +static PlannedStmt * +aqo_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams) { - bool query_is_stored; - Datum query_params[5]; - bool query_nulls[5] = {false, false, false, false, false}; + bool query_is_stored = false; + MemoryContext oldctx; - selectivity_cache_clear(); + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* * We do not work inside an parallel worker now by reason of insert into - * the heap during planning. Transactions is synchronized between parallel + * the heap during planning. Transactions are synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ - if ((parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && - parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE) || - get_extension_oid("aqo", true) == InvalidOid || - creating_extension || - IsParallelWorker() || - (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || + if (!aqoIsEnabled(parse) || + IsInParallelMode() || IsParallelWorker() || + strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ + strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ isQueryUsingSystemRelation(parse) || RecoveryInProgress()) { + /* + * We should disable AQO for this query to remember this decision along + * all execution stages. + */ + MemoryContextSwitchTo(oldctx); + disable_aqo_for_query(); + query_context.query_hash = 0; + + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); + } + + selectivity_cache_clear(); + + query_context.query_hash = get_query_hash(parse, query_string); + + /* By default, they should be equal */ + query_context.fspace_hash = query_context.query_hash; + + if (query_is_deactivated(query_context.query_hash) || + list_member_uint64(cur_classes,query_context.query_hash)) + { + /* + * Disable AQO for deactivated query or for query belonged to a + * feature space, that is processing yet (disallow invalidation + * recursion, as an example). + */ + MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } - INSTR_TIME_SET_CURRENT(query_context.query_starttime); + elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, + query_string ? query_string : "null string", query_context.query_hash); - query_context.query_hash = get_query_hash(parse, query_text); + MemoryContextSwitchTo(oldctx); + oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); + cur_classes = lappend_uint64(cur_classes, query_context.query_hash); + MemoryContextSwitchTo(oldctx); + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); - if (query_is_deactivated(query_context.query_hash)) + if (aqo_mode == AQO_MODE_DISABLED) { + /* Skip access to a database in this mode. */ disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_params[0], - &query_nulls[0]); + query_is_stored = aqo_queries_find(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -172,7 +182,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = false; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = true; query_context.collect_stat = true; break; @@ -181,7 +190,7 @@ aqo_planner(Query *parse, query_context.learn_aqo = true; query_context.use_aqo = true; query_context.auto_tuning = false; - query_context.fspace_hash = 0; + query_context.fspace_hash = 0; /* Use common feature space */ query_context.collect_stat = false; break; case AQO_MODE_CONTROLLED: @@ -200,39 +209,36 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = true; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = false; query_context.collect_stat = true; break; case AQO_MODE_DISABLED: /* Should never happen */ - query_context.fspace_hash = query_context.query_hash; + Assert(0); break; default: elog(ERROR, "unrecognized mode in AQO: %d", aqo_mode); break; } - - if (query_context.adding_query || force_collect_stat) - { - add_query(query_context.query_hash, query_context.learn_aqo, - query_context.use_aqo, query_context.fspace_hash, - query_context.auto_tuning); - add_query_text(query_context.query_hash, query_text); - } + query_context.count_increase_timeout = 0; + query_context.smart_timeout = 0; } - else + else /* Query class exists in a ML knowledge base. */ { query_context.adding_query = false; - query_context.learn_aqo = DatumGetBool(query_params[1]); - query_context.use_aqo = DatumGetBool(query_params[2]); - query_context.fspace_hash = DatumGetInt32(query_params[3]); - query_context.auto_tuning = DatumGetBool(query_params[4]); - query_context.collect_stat = query_context.auto_tuning; + /* Other query_context fields filled in the find_query() routine. */ + + /* + * Deactivate query if no one reason exists for usage of an AQO machinery. + */ if (!query_context.learn_aqo && !query_context.use_aqo && - !query_context.auto_tuning) + !query_context.auto_tuning && !force_collect_stat) + { add_deactivated_query(query_context.query_hash); + disable_aqo_for_query(); + goto ignore_query_settings; + } /* * That we can do if query exists in database. @@ -256,7 +262,6 @@ aqo_planner(Query *parse, * suppressed manually) and collect stats. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; break; case AQO_MODE_INTELLIGENT: @@ -271,62 +276,183 @@ aqo_planner(Query *parse, } } - /* - * This mode is possible here, because force collect statistics uses AQO - * machinery. - */ - if (aqo_mode == AQO_MODE_DISABLED) - disable_aqo_for_query(); +ignore_query_settings: + if (!query_is_stored && (query_context.adding_query || force_collect_stat)) + { + /* + * Add query into the AQO knowledge base. To process an error with + * concurrent addition from another backend we will try to restart + * preprocessing routine. + */ + if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, + query_context.auto_tuning, &aqo_queries_nulls)) + { + bool dsa_valid = true; + /* + * Add query text into the ML-knowledge base. Just for further + * analysis. In the case of cached plans we may have NULL query text. + */ + if (!aqo_qtext_store(query_context.query_hash, query_string, &dsa_valid)) + { + if (!dsa_valid) + { + disable_aqo_for_query(); + elog(WARNING, "[AQO] Not enough DSA. AQO was disabled for this query"); + } + else + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } + } + } + else + { + /* + * In the case of problems (shmem overflow, as a typical issue) - + * disable AQO for the query class. + */ + disable_aqo_for_query(); + + /* + * Switch AQO to frozen mode. In this mode we wouldn't collect + * any new data, just read collected statistics for already + * known query classes. + */ + aqo_mode = AQO_MODE_FROZEN; + } + } if (force_collect_stat) - { /* * If this GUC is set, AQO will analyze query results and collect * query execution statistics in any mode. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; - } - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + if (!IsQueryDisabled()) + /* It's good place to set timestamp of start of a planning process. */ + INSTR_TIME_SET_CURRENT(query_context.start_planning_time); + { + PlannedStmt *stmt; + + MemoryContextSwitchTo(oldctx); + stmt = (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); + + /* Release the memory, allocated for AQO predictions */ + MemoryContextReset(AQOPredictMemCtx); + return stmt; + } } /* * Turn off all AQO functionality for the current query. */ -void +static void disable_aqo_for_query(void) { - query_context.adding_query = false; query_context.learn_aqo = false; query_context.use_aqo = false; query_context.auto_tuning = false; query_context.collect_stat = false; + query_context.adding_query = false; + query_context.explain_only = false; + + INSTR_TIME_SET_ZERO(query_context.start_planning_time); + query_context.planning_time = -1.; } +typedef struct AQOPreWalkerCtx +{ + bool trivQuery; + int njoins; +} AQOPreWalkerCtx; + /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation. + * the query is a system relation or no one permanent (non-temporary) relation + * touched by the query. */ -bool +static bool isQueryUsingSystemRelation(Query *query) { - return isQueryUsingSystemRelation_walker((Node *) query, NULL); + AQOPreWalkerCtx ctx; + bool result; + + ctx.trivQuery = true; + ctx.njoins = 0; + result = isQueryUsingSystemRelation_walker((Node *) query, &ctx); + + if (result || ctx.trivQuery || ctx.njoins < aqo_join_threshold) + return true; + return false; +} + + +static bool +IsAQORelation(Relation rel) +{ + char *relname = NameStr(rel->rd_rel->relname); + + if (strcmp(relname, "aqo_data") == 0 || + strcmp(relname, "aqo_query_texts") == 0 || + strcmp(relname, "aqo_query_stat") == 0 || + strcmp(relname, "aqo_queries") == 0 + ) + return true; + + return false; +} + +/* + * Walk through jointree and calculate number of potential joins + */ +static void +jointree_walker(Node *jtnode, void *context) +{ + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + + if (jtnode == NULL || IsA(jtnode, RangeTblRef)) + return; + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + /* Count number of potential joins by number of sources in FROM list */ + ctx->njoins += list_length(f->fromlist) - 1; + + foreach(l, f->fromlist) + jointree_walker(lfirst(l), context); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Don't forget about explicit JOIN statement */ + ctx->njoins++; + jointree_walker(j->larg, context); + jointree_walker(j->rarg, context); + } + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(jtnode)); + return; } -bool +static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + if (node == NULL) return false; if (IsA(node, Query)) { - Query *query = (Query *) node; - ListCell *rtable; + Query *query = (Query *) node; + ListCell *rtable; foreach(rtable, query->rtable) { @@ -336,13 +462,32 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) { Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); + bool is_aqo_rel = IsAQORelation(rel); - table_close(rel, AccessShareLock); - if (is_catalog) + if (is_catalog || is_aqo_rel) + { + table_close(rel, AccessShareLock); return true; + } + + if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + /* Plane non TEMP table */ + ctx->trivQuery = false; + + table_close(rel, AccessShareLock); + } + else if (rte->rtekind == RTE_FUNCTION) + { + /* + * TODO: Exclude queries with AQO functions. + */ } } + jointree_walker((Node *) query->jointree, context); + MemoryContextSwitchTo(oldctx); + + /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, isQueryUsingSystemRelation_walker, context, @@ -353,3 +498,27 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) isQueryUsingSystemRelation_walker, context); } + +static void +aqo_post_parse_analyze(ParseState *pstate, Query *query) +{ + aqo_learn_statement_timeout_enable = false; + /* + * Enable learn_statement_timeout for + * the top level SELECT statement only. + */ + if (query->commandType == CMD_SELECT) + aqo_learn_statement_timeout_enable = aqo_learn_statement_timeout; + + if (aqo_post_parse_analyze_hook) + aqo_post_parse_analyze_hook(pstate, query); +} + +void +aqo_preprocessing_init(void) +{ + aqo_planner_next = planner_hook ? planner_hook : standard_planner; + planner_hook = aqo_planner; + aqo_post_parse_analyze_hook = post_parse_analyze_hook; + post_parse_analyze_hook = aqo_post_parse_analyze; +} diff --git a/regress_schedule b/regress_schedule new file mode 100644 index 00000000..f3084fc8 --- /dev/null +++ b/regress_schedule @@ -0,0 +1,26 @@ +test: aqo_disabled +test: aqo_controlled +test: aqo_intelligent +test: aqo_forced +test: aqo_learn +test: schema +test: aqo_fdw +test: aqo_CVE-2020-14350 +test: gucs +test: forced_stat_collection +test: unsupported +test: clean_aqo_data +test: parallel_workers +test: plancache +test: update_functions +# Performance-dependent test. Can be ignored if executes in containers or on slow machines +ignore: statement_timeout +test: statement_timeout +test: temp_tables +test: top_queries +test: relocatable +test: look_a_like +test: feature_subspace +test: eclasses +test: eclasses_mchar +test: aqo_query_stat diff --git a/selectivity_cache.c b/selectivity_cache.c index 455d13b1..fbaa8829 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -9,13 +9,15 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/selectivity_cache.c * */ +#include "postgres.h" + #include "aqo.h" typedef struct @@ -28,6 +30,9 @@ typedef struct List *objects = NIL; +/* Specific memory context for selectivity objects */ +MemoryContext AQOCacheSelectivity = NULL; + /* * Stores the given selectivity for clause_hash, relid and global_relid * of the clause. @@ -40,6 +45,13 @@ cache_selectivity(int clause_hash, { ListCell *l; Entry *cur_element; + MemoryContext old_ctx; + + if (!AQOCacheSelectivity) + AQOCacheSelectivity = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheSelectivity", + ALLOCSET_DEFAULT_SIZES); + foreach(l, objects) { @@ -51,13 +63,14 @@ cache_selectivity(int clause_hash, return; } } - + old_ctx = MemoryContextSwitchTo(AQOCacheSelectivity); cur_element = palloc(sizeof(*cur_element)); cur_element->clause_hash = clause_hash; cur_element->relid = relid; cur_element->global_relid = global_relid; cur_element->selectivity = selectivity; objects = lappend(objects, cur_element); + MemoryContextSwitchTo(old_ctx); } /* @@ -87,5 +100,12 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { + if (!AQOCacheSelectivity) + { + Assert(objects == NIL); + return; + } + + MemoryContextReset(AQOCacheSelectivity); objects = NIL; } diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 7fff18a4..c4979344 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -12,6 +12,7 @@ CREATE ROLE regress_hacker LOGIN; -- Test 1 RESET ROLE; ALTER ROLE regress_hacker NOSUPERUSER; +GRANT CREATE ON SCHEMA public TO regress_hacker; SET ROLE regress_hacker; SHOW is_superuser; @@ -26,6 +27,7 @@ $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; @@ -42,21 +44,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_status(hash int) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" INT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -65,33 +57,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash int) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" INT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); +SELECT aqo_reset(); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_status(int); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; -- Test 3 @@ -101,7 +83,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash int) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -113,7 +95,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -122,13 +104,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(int); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -138,7 +120,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash int) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -150,7 +132,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -159,13 +141,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(int); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 @@ -175,10 +157,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_clear_hist(hash int) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -187,22 +170,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash int) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); +SELECT aqo_drop_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_clear_hist(int); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 6 @@ -212,8 +196,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_drop(hash int) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -224,8 +208,8 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash int) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; @@ -233,13 +217,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); +SELECT aqo_execution_time(true); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_drop(int); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; -- Test 7 @@ -249,8 +233,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -261,55 +245,26 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int -AS $$ -BEGIN - ALTER ROLE regress_hacker SUPERUSER; -END -$$ LANGUAGE plpgsql; - -RESET ROLE; -SELECT aqo_ne_queries(); - -SET ROLE regress_hacker; -SHOW is_superuser; - -RESET ROLE; -DROP FUNCTION aqo_ne_queries(); -DROP EXTENSION IF EXISTS aqo; - --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; - -SET ROLE regress_hacker; -SHOW is_superuser; - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ -DECLARE - ret regclass; BEGIN ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; END $$ LANGUAGE plpgsql; RESET ROLE; -CREATE EXTENSION aqo; +SELECT aqo_memory_usage(); --- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; +DROP OWNED BY regress_hacker CASCADE; DROP ROLE regress_hacker; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index c4d1db08..8c8e5fb8 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -28,8 +31,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -76,10 +77,11 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -107,7 +109,11 @@ SELECT t1.a AS a, t2.a AS b, t3.a AS c FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 WHERE t1.a = t2.b AND t2.a = t3.b; -UPDATE aqo_queries SET use_aqo=true; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret +; -- set use = true EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -142,10 +148,8 @@ WHERE t1.a = t2.b AND t2.a = t3.b; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; - DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 7d755be9..8397f847 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -1,3 +1,8 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,6 +23,24 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +SET aqo.mode = 'controlled'; + +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + SET aqo.mode = 'disabled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -38,8 +61,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -CREATE EXTENSION aqo; - +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -53,9 +75,14 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret +; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -64,6 +91,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -72,11 +100,11 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - -DROP EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; + +DROP EXTENSION aqo; diff --git a/sql/aqo_dummy_test.sql b/sql/aqo_dummy_test.sql new file mode 100644 index 00000000..e69de29b diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql new file mode 100644 index 00000000..5425dcf4 --- /dev/null +++ b/sql/aqo_fdw.sql @@ -0,0 +1,148 @@ +-- Tests on cardinality estimation of FDW-queries: +-- simple ForeignScan. +-- JOIN push-down (check push of baserestrictinfo and joininfo) +-- Aggregate push-down +-- Push-down of groupings with HAVING clause. + +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. +SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. + +DO $d$ + BEGIN + EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; + END; +$d$; + +CREATE USER MAPPING FOR PUBLIC SERVER loopback; + +CREATE TABLE local (x int); +CREATE FOREIGN TABLE frgn(x int) SERVER loopback OPTIONS (table_name 'local'); +INSERT INTO frgn (x) VALUES (1); +ANALYZE local; + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- Trivial foreign scan. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn; + +-- Push down base filters. Use verbose mode to see filters. +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants + +-- Trivial JOIN push-down. +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Sort Method%'; + +-- Should learn on postgres_fdw nodes +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str; + +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; + +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + +-- Partitioned join over foreign tables +set enable_partitionwise_join = on; +ALTER SERVER loopback OPTIONS (ADD fdw_tuple_cost '1.0'); + +CREATE TABLE local_main_p0(aid int, aval text); +CREATE TABLE local_main_p1(aid int, aval text); +CREATE TABLE main (aid int, aval text) PARTITION BY HASH(aid); + +CREATE FOREIGN TABLE main_p0 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_main_p0'); +CREATE FOREIGN TABLE main_p1 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_main_p1'); +CREATE TABLE main_p2 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 2); + +CREATE TABLE local_ref_p0(bid int, aid int, bval text); +CREATE TABLE local_ref_p1(bid int, aid int, bval text); +CREATE TABLE ref (bid int, aid int, bval text) PARTITION BY HASH(aid); + +CREATE FOREIGN TABLE ref_p0 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_ref_p0'); +CREATE FOREIGN TABLE ref_p1 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_ref_p1'); +CREATE TABLE ref_p2 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 2); + +INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; + +ANALYZE local_main_p0, local_main_p1, main_p2; +ANALYZE local_ref_p0, local_ref_p1, ref_p2; + +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + +DROP TABLE main, local_main_p0, local_main_p1; +DROP TABLE ref, local_ref_p0, local_ref_p1; +ALTER SERVER loopback OPTIONS (DROP fdw_tuple_cost); +reset enable_partitionwise_join; + +-- TODO: Non-mergejoinable join condition. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM frgn AS a, frgn AS b WHERE a.x aqt2.queryid; + +-- Fix the state of the AQO data +SELECT min(reliability),sum(nfeatures),query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.queryid = ad.fs +GROUP BY (query_text) ORDER BY (md5(query_text)) +; + DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -121,19 +161,24 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -150,21 +195,122 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + +-- Test limit on number of joins +SET aqo.mode = 'learn'; + +SELECT * FROM aqo_drop_class(0); +SELECT * FROM aqo_drop_class(42); + +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 +) AS q2; +SELECT count(*) FROM aqo_data; + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); -- Learn on the query +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on the query without any joins now + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- See one more query in the AQO knowledge base + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +DROP FUNCTION check_estimated_rows; +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; diff --git a/sql/aqo_query_stat.sql b/sql/aqo_query_stat.sql new file mode 100644 index 00000000..a9228b5e --- /dev/null +++ b/sql/aqo_query_stat.sql @@ -0,0 +1,74 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE IF EXISTS A; +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; + +DROP TABLE IF EXISTS B; +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ + +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; + +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT true AS success from aqo_reset(); + + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; + +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql new file mode 100644 index 00000000..3c504bdb --- /dev/null +++ b/sql/clean_aqo_data.sql @@ -0,0 +1,135 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'learn'; + +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +CREATE TABLE a(); +SELECT * FROM a; +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT true AS success FROM aqo_cleanup(); + +/* + * lines with a_oid in aqo_data, + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat + * should remain + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + +DROP TABLE a; +SELECT true AS success FROM aqo_cleanup(); + +/* + * lines with a_oid in aqo_data, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, + * should be deleted +*/ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + +CREATE TABLE a(); +CREATE TABLE b(); +SELECT * FROM a; +SELECT * FROM b; +SELECT * FROM b CROSS JOIN a; +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset + +-- new lines added to aqo_data +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + +DROP TABLE a; +SELECT true AS success FROM aqo_cleanup(); + +/* + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + +-- lines corresponding to b_oid in all theese tables should remain +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + +DROP TABLE b; +SELECT true AS success FROM aqo_cleanup(); + +-- lines corresponding to b_oid in theese tables deleted +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); + +DROP EXTENSION aqo; diff --git a/sql/eclasses.sql b/sql/eclasses.sql new file mode 100644 index 00000000..a041d2cb --- /dev/null +++ b/sql/eclasses.sql @@ -0,0 +1,394 @@ +-- Testing for working with equivalence classes + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; + +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; + +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); +-- Must be 5 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Tests with JOIN clauses. + +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; + +DROP EXTENSION aqo; diff --git a/sql/eclasses_mchar.sql b/sql/eclasses_mchar.sql new file mode 100644 index 00000000..62e10802 --- /dev/null +++ b/sql/eclasses_mchar.sql @@ -0,0 +1,73 @@ +-- Testing for working with equivalence classes for mchar type + +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset + +\if :skip_test +\quit +\endif + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_mchar; + +DROP EXTENSION mchar; +DROP EXTENSION aqo; diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql new file mode 100644 index 00000000..c9463d55 --- /dev/null +++ b/sql/feature_subspace.sql @@ -0,0 +1,45 @@ +-- This test related to some issues on feature subspace calculation + +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; + +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + +-- TODO: Using method of other classes neighbours we get a bad estimation. +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; + +DROP TABLE a,b CASCADE; + +DROP EXTENSION aqo; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql new file mode 100644 index 00000000..cf3990fc --- /dev/null +++ b/sql/forced_stat_collection.sql @@ -0,0 +1,51 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +\set citizens 1000 + +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'off'; + +CREATE TABLE person ( + id serial PRIMARY KEY, + age integer, + gender text, + passport integer +); + +-- Fill the person table with workers data. +INSERT INTO person (id,age,gender,passport) + (SELECT q1.id,q1.age, + CASE WHEN q1.id % 4 = 0 THEN 'Female' + ELSE 'Male' + END, + CASE WHEN (q1.age>18) THEN 1E6 + q1.id * 1E3 + ELSE NULL + END + FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 + ); + +SET aqo.force_collect_stat = 'on'; + +SELECT count(*) FROM person WHERE age<18; +SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; +SELECT * FROM aqo_data; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; + +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); + +SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); + +DROP TABLE person; + +DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql new file mode 100644 index 00000000..f949bbec --- /dev/null +++ b/sql/gucs.sql @@ -0,0 +1,54 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +SET aqo.mode = 'learn'; +SET aqo.show_details = true; + +CREATE TABLE t(x int); +INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); +ANALYZE t; + +SELECT true AS success FROM aqo_reset(); +-- Check AQO addons to explain (the only stable data) +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; +SET aqo.mode = 'disabled'; + +-- Check existence of the interface functions. +SELECT obj_description('aqo_cardinality_error'::regproc::oid); +SELECT obj_description('aqo_execution_time'::regproc::oid); +SELECT obj_description('aqo_drop_class'::regproc::oid); +SELECT obj_description('aqo_cleanup'::regproc::oid); +SELECT obj_description('aqo_reset'::regproc::oid); + +\df aqo_cardinality_error +\df aqo_execution_time +\df aqo_drop_class +\df aqo_cleanup +\df aqo_reset + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; +SELECT true AS success FROM aqo_reset(); +SELECT count(*) FROM aqo_query_stat; + +DROP TABLE t; +DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql new file mode 100644 index 00000000..5eb47a65 --- /dev/null +++ b/sql/look_a_like.sql @@ -0,0 +1,215 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; +SET enable_material = 'off'; + +DROP TABLE IF EXISTS a,b CASCADE; + +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +ANALYZE a, b; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +ANALYZE c; + +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + + +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. + +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) + +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) + +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +----- +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + + +RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; +DROP EXTENSION aqo CASCADE; + +DROP TABLE a; +DROP TABLE b; +DROP TABLE c; +DROP TABLE t; +DROP FUNCTION expln; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql new file mode 100644 index 00000000..419f23e6 --- /dev/null +++ b/sql/parallel_workers.sql @@ -0,0 +1,60 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. + +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +SET aqo.mode = 'learn'; +SET aqo.show_details = true; + +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; + +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; + +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%'; + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; diff --git a/sql/plancache.sql b/sql/plancache.sql new file mode 100644 index 00000000..b2d1c6d6 --- /dev/null +++ b/sql/plancache.sql @@ -0,0 +1,49 @@ +-- Tests on interaction of AQO with cached plans. + +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'intelligent'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; + +CREATE TABLE test AS SELECT x FROM generate_series(1,10) AS x; +ANALYZE test; + +-- Function which implements a test where AQO is used for both situations where +-- a query is planned or got from a plan cache. +-- Use a function to hide a system dependent hash value. +CREATE FUNCTION f1() RETURNS TABLE ( + nnex bigint, + nex bigint, + pt double precision[] +) AS $$ +DECLARE + i integer; + qhash bigint; +BEGIN + PREPARE fooplan (int) AS SELECT count(*) FROM test WHERE x = $1; + + FOR i IN 1..10 LOOP + execute 'EXECUTE fooplan(1)'; + END LOOP; + + SELECT queryid FROM aqo_query_texts + WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; + + RETURN QUERY SELECT executions_without_aqo nnex, + executions_with_aqo nex, + planning_time_with_aqo pt + FROM aqo_query_stat WHERE queryid = qhash; +END $$ LANGUAGE 'plpgsql'; + +-- The function shows 6 executions without an AQO support (nnex) and +-- 4 executions with usage of an AQO knowledge base (nex). Planning time in the +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted +-- from the plan cache. +SELECT * FROM f1(); + +DROP FUNCTION f1; +DROP TABLE test CASCADE; + +DROP EXTENSION aqo; diff --git a/sql/relocatable.sql b/sql/relocatable.sql new file mode 100644 index 00000000..adf20983 --- /dev/null +++ b/sql/relocatable.sql @@ -0,0 +1,55 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'learn'; -- use this mode for unconditional learning + +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; + +-- Learn on a query +SELECT count(*) FROM test; +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. TODO: use aqo_status() + +-- Create a schema and move AQO into it. +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; + +-- Do something to be confident that AQO works +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Find out both queries executed above + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); +SELECT aqo_enable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + +RESET search_path; +DROP TABLE test CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +DROP EXTENSION IF EXISTS aqo CASCADE; diff --git a/sql/schema.sql b/sql/schema.sql index 8e61dedb..28185710 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,4 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; DROP SCHEMA IF EXISTS test CASCADE; -- Check Zero-schema path behaviour @@ -11,6 +10,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); @@ -20,6 +20,8 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); DROP SCHEMA IF EXISTS test1 CASCADE; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql new file mode 100644 index 00000000..4ca9171f --- /dev/null +++ b/sql/statement_timeout.sql @@ -0,0 +1,90 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; + +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; + +SET statement_timeout = 80; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +-- We have a real learning data. +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +SELECT true AS success FROM aqo_reset(); + +SET statement_timeout = 80; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +SET statement_timeout = 550; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +-- Interrupted query should immediately appear in aqo_data +SELECT true AS success FROM aqo_reset(); +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; + +RESET statement_timeout; +SELECT count(*) FROM aqo_data; -- Must be one + +DROP TABLE t; +DROP FUNCTION check_estimated_rows; + +SELECT true AS success FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql new file mode 100644 index 00000000..e7bc8fe5 --- /dev/null +++ b/sql/temp_tables.sql @@ -0,0 +1,102 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; +SET aqo.mode = 'learn'; + +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); + +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; +SELECT count(*) FROM tt AS t1, tt AS t2; +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + +-- Should be stored in the ML base +SELECT count(*) FROM pt; +SELECT count(*) FROM pt, tt; +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans + +DROP TABLE tt; +SELECT true AS success FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should be 0 +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.queryid = aqt.queryid +ORDER BY (md5(query_text)); -- The only the common class is returned + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; + +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + +-- Check: AQO learns on queries with temp tables + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; + +-- Check: use AQO knowledge with different temp table of the same structure + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + +DROP TABLE pt CASCADE; +DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql new file mode 100755 index 00000000..76000ac4 --- /dev/null +++ b/sql/top_queries.sql @@ -0,0 +1,56 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'on'; + +-- +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain +-- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. +-- +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. +SELECT num FROM aqo_execution_time(false); + +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM aqo_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + +-- +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first +-- +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,100000) AS gs; +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; +SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +WHERE te.fshash = ( + SELECT fs FROM aqo_queries + WHERE aqo_queries.queryid = ( + SELECT aqo_query_texts.queryid FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + +-- Should return zero +SELECT count(*) FROM aqo_cardinality_error(true); + +-- Fix list of logged queries +SELECT query_text,nexecs +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + +DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql new file mode 100644 index 00000000..e5853306 --- /dev/null +++ b/sql/unsupported.sql @@ -0,0 +1,204 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; + +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +ANALYZE t; + +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +ANALYZE t, t1; + +-- +-- Do not support HAVING clauses for now. +-- +SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + +-- +-- Doesn't estimates GROUP BY clause +-- +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + +-- +-- Doesn't support GROUPING SETS clause +-- +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + +-- +-- The subplans issue +-- +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t WHERE x = 1 + ); + +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t t0 WHERE t0.x = t.x + ); + +-- Two identical subplans in a clause list +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + +-- It's OK to use the knowledge for a query with different constants. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 22) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 23); + +-- Different SubPlans in the quals of leafs of JOIN. +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + +-- Two identical subplans in a clause +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + +-- +-- Not executed nodes +-- +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + +-- AQO needs to predict total fetched tuples in a table. +-- +-- At a non-leaf node we have prediction about input tuples - is a number of +-- predicted output rows in underlying node. But for Scan nodes we don't have +-- any prediction on number of fetched tuples. +-- So, if selectivity was wrong we could make bad choice of Scan operation. +-- For example, we could choose suboptimal index. + +-- Turn off statistics gathering for simple demonstration of filtering problem. +ALTER TABLE t SET (autovacuum_enabled = 'false'); +CREATE INDEX ind1 ON t(x); + +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + +-- Because of bad statistics we use a last created index instead of best choice. +-- Here we filter more tuples than with the ind1 index. +CREATE INDEX ind2 ON t(mod(x,3)); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + +-- Best choice is ... +ANALYZE t; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT round(error::numeric, 3) AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test + +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? +SELECT true AS success FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- No one row should be returned + +-- Look for any remaining queries in the ML storage. +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + +DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql new file mode 100644 index 00000000..4c7fee53 --- /dev/null +++ b/sql/update_functions.sql @@ -0,0 +1,218 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; + +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; + +SET aqo.mode='intelligent'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + +SET aqo.mode='learn'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SET aqo.mode='controlled'; + +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; + +SELECT true AS success FROM aqo_reset(); + +-- +-- aqo_query_texts_update() testing. +-- + +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- +-- aqo_queries_update testing. +-- + +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- +-- aqo_query_stat_update() testing. +-- + +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- +-- aqo_data_update() testing. +-- + +-- Populate aqo_data with dump data. +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + +-- Update aqo_data with dump data. +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset + +SELECT :res1 = :res2 AS ml_sizes_are_equal; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + +SET aqo.mode='disabled'; + +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); +SET aqo.querytext_max_size = 0; +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; +SELECT aqo_query_texts_update(1, 'test'); +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + +DROP EXTENSION aqo CASCADE; + +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/storage.c b/storage.c index 936a37c2..a65ce463 100644 --- a/storage.c +++ b/storage.c @@ -8,1007 +8,2901 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/storage.c * */ +#include "postgres.h" + +#include + +#include "funcapi.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/ipc.h" + #include "aqo.h" +#include "aqo_shared.h" +#include "machine_learning.h" +#include "storage.h" + + +/* AQO storage file names */ +#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" +#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" +#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" +#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" + +#define AQO_DATA_COLUMNS (7) +#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) + + +typedef enum { + QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, + EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS +} aqo_stat_cols; -#include "access/heapam.h" -#include "access/table.h" -#include "access/tableam.h" +typedef enum { + QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS +} aqo_qtexts_cols; +typedef enum { + AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, + AD_OIDS, AD_TOTAL_NCOLS +} aqo_data_cols; + +typedef enum { + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, AQ_SMART_TIMEOUT, AQ_COUNT_INCREASE_TIMEOUT, + AQ_TOTAL_NCOLS +} aqo_queries_cols; + +typedef void* (*form_record_t) (void *ctx, size_t *size); +typedef bool (*deform_record_t) (void *data, size_t size); + + +int querytext_max_size = 1000; +int dsm_size_max = 100; /* in MB */ + +HTAB *stat_htab = NULL; +HTAB *queries_htab = NULL; +HTAB *qtexts_htab = NULL; +dsa_area *qtext_dsa = NULL; +HTAB *data_htab = NULL; +dsa_area *data_dsa = NULL; HTAB *deactivated_queries = NULL; -static ArrayType *form_matrix(double **matrix, int nrows, int ncols); -static void deform_matrix(Datum datum, double **matrix); +/* + * Used to check data file consistency + * When changing data structures, PGAQO_FILE_HEADER should also be changed. + * In this case, all AQO file storages will be reset. + */ +static const uint32 PGAQO_FILE_HEADER = 0x20230330; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +AqoQueriesNullArgs aqo_queries_nulls = { false, false, false, false }; + + +static ArrayType *form_matrix(double *matrix, int nrows, int ncols); +static void dsa_init(void); +static int data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx); +static void data_load(const char *filename, deform_record_t callback, void *ctx); +static size_t _compute_data_dsa(const DataEntry *entry); + +static bool _aqo_stat_remove(uint64 queryid); +static bool _aqo_queries_remove(uint64 queryid); +static bool _aqo_qtexts_remove(uint64 queryid); +static bool _aqo_data_remove(data_key *key); +static bool nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static double fs_distance(double *a, double *b, int len); + +PG_FUNCTION_INFO_V1(aqo_query_stat); +PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_data); +PG_FUNCTION_INFO_V1(aqo_queries); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_queries_update); +PG_FUNCTION_INFO_V1(aqo_reset); +PG_FUNCTION_INFO_V1(aqo_cleanup); +PG_FUNCTION_INFO_V1(aqo_drop_class); +PG_FUNCTION_INFO_V1(aqo_cardinality_error); +PG_FUNCTION_INFO_V1(aqo_execution_time); +PG_FUNCTION_INFO_V1(aqo_query_texts_update); +PG_FUNCTION_INFO_V1(aqo_query_stat_update); +PG_FUNCTION_INFO_V1(aqo_data_update); + + +bool +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) +{ + /* + * 'reloids' explictly passed to aqo_data_store(). + * So AqoDataArgs fields 'nrels' & 'oids' are + * set to 0 and NULL repectively. + */ + AqoDataArgs data_arg = + {data->rows, data->cols, 0, data->matrix, + data->targets, data->rfactors, NULL}; + return aqo_data_store(fs, fss, &data_arg, reloids); +} + +/* + * Forms ArrayType object for storage from simple C-array matrix. + */ +static ArrayType * +form_matrix(double *matrix, int nrows, int ncols) +{ + Datum *elems; + ArrayType *array; + int dims[2] = {nrows, ncols}; + int lbs[2]; + int i, + j; + + lbs[0] = lbs[1] = 1; + elems = palloc(sizeof(*elems) * nrows * ncols); + for (i = 0; i < nrows; ++i) + for (j = 0; j < ncols; ++j) + { + elems[i * ncols + j] = Float8GetDatum(matrix[i * ncols + j]); + Assert(!isnan(matrix[i * ncols + j])); + } + + array = construct_md_array(elems, NULL, 2, dims, lbs, + FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); + return array; +} + +/* + * Forms ArrayType object for storage from simple C-array vector. + */ +static ArrayType * +form_vector(double *vector, int nrows) +{ + Datum *elems; + ArrayType *array; + int dims[1]; + int lbs[1]; + int i; + + dims[0] = nrows; + lbs[0] = 1; + elems = palloc(sizeof(*elems) * nrows); + for (i = 0; i < nrows; ++i) + elems[i] = Float8GetDatum(vector[i]); + array = construct_md_array(elems, NULL, 1, dims, lbs, + FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); + return array; +} + +/* Creates a storage for hashes of deactivated queries */ +void +init_deactivated_queries_storage(void) +{ + HASHCTL hash_ctl; + + /* Create the hashtable proper */ + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(uint64); + hash_ctl.entrysize = sizeof(uint64); + deactivated_queries = hash_create("AQO deactivated queries", + 128, /* start small and extend */ + &hash_ctl, + HASH_ELEM | HASH_BLOBS); +} + +/* Checks whether the query with given hash is deactivated */ +bool +query_is_deactivated(uint64 queryid) +{ + bool found; + + (void) hash_search(deactivated_queries, &queryid, HASH_FIND, &found); + return found; +} + +/* Adds given query hash into the set of hashes of deactivated queries */ +void +add_deactivated_query(uint64 queryid) +{ + (void) hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); +} + +static void +reset_deactivated_queries(void) +{ + HASH_SEQ_STATUS hash_seq; + uint64 *queryid; + + hash_seq_init(&hash_seq, deactivated_queries); + while ((queryid = hash_seq_search(&hash_seq)) != NULL) + { + if (!hash_search(deactivated_queries, queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + } +} + +/* + * Update AQO statistics. + * + * In append mode, append one element to exec_time, plan_time, est_error arrays + * (or their *_aqo counterparts, if use_aqo is true). Without append mode, add a + * record (or overwrite an existing) to stat storage for the query class. + * Returns a copy of stat entry, allocated in current memory context. Caller is + * in charge to free this struct after usage. + * If stat hash table is full, return NULL and log this fact. + */ +StatEntry * +aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, + bool append_mode) +{ + StatEntry *entry; + bool found; + int pos; + bool tblOverflow; + HASHACTION action; + + Assert(stat_htab); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + tblOverflow = hash_get_num_entries(stat_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + entry = (StatEntry *) hash_search(stat_htab, &queryid, action, &found); + + /* Initialize entry on first usage */ + if (!found) + { + uint64 qid; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->stat_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Stat storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return NULL; + } + + qid = entry->queryid; + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = qid; + } + + if (!append_mode) + { + size_t sz; + if (found) + { + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = queryid; + } + + sz = stat_arg->cur_stat_slot_aqo * sizeof(entry->est_error_aqo[0]); + memcpy(entry->plan_time_aqo, stat_arg->plan_time_aqo, sz); + memcpy(entry->exec_time_aqo, stat_arg->exec_time_aqo, sz); + memcpy(entry->est_error_aqo, stat_arg->est_error_aqo, sz); + entry->execs_with_aqo = stat_arg->execs_with_aqo; + entry->cur_stat_slot_aqo = stat_arg->cur_stat_slot_aqo; + + sz = stat_arg->cur_stat_slot * sizeof(entry->est_error[0]); + memcpy(entry->plan_time, stat_arg->plan_time, sz); + memcpy(entry->exec_time, stat_arg->exec_time, sz); + memcpy(entry->est_error, stat_arg->est_error, sz); + entry->execs_without_aqo = stat_arg->execs_without_aqo; + entry->cur_stat_slot = stat_arg->cur_stat_slot; + + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + return entry; + } + + /* Update the entry data */ + + if (use_aqo) + { + Assert(entry->cur_stat_slot_aqo >= 0); + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE) + entry->cur_stat_slot_aqo++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); + + Assert(entry->cur_stat_slot_aqo == STAT_SAMPLE_SIZE); + + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); + memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); + memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); + } + + pos = entry->cur_stat_slot_aqo - 1; + entry->execs_with_aqo++; + entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; + entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; + entry->est_error_aqo[pos] = *stat_arg->est_error_aqo; + } + else + { + Assert(entry->cur_stat_slot >= 0); + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE) + entry->cur_stat_slot++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); + + Assert(entry->cur_stat_slot == STAT_SAMPLE_SIZE); + + memmove(entry->plan_time, &entry->plan_time[1], sz); + memmove(entry->exec_time, &entry->exec_time[1], sz); + memmove(entry->est_error, &entry->est_error[1], sz); + } + + pos = entry->cur_stat_slot - 1; + entry->execs_without_aqo++; + entry->plan_time[pos] = *stat_arg->plan_time; + entry->exec_time[pos] = *stat_arg->exec_time; + entry->est_error[pos] = *stat_arg->est_error; + } + + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + return entry; +} + +/* + * Returns AQO statistics on controlled query classes. + */ +Datum +aqo_query_stat(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[TOTAL_NCOLS]; + bool nulls[TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, TOTAL_NCOLS); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + values[QUERYID] = Int64GetDatum(entry->queryid); + values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); + values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); + values[EXEC_TIME_AQO] = PointerGetDatum(form_vector(entry->exec_time_aqo, entry->cur_stat_slot_aqo)); + values[EXEC_TIME] = PointerGetDatum(form_vector(entry->exec_time, entry->cur_stat_slot)); + values[PLAN_TIME_AQO] = PointerGetDatum(form_vector(entry->plan_time_aqo, entry->cur_stat_slot_aqo)); + values[PLAN_TIME] = PointerGetDatum(form_vector(entry->plan_time, entry->cur_stat_slot)); + values[EST_ERROR_AQO] = PointerGetDatum(form_vector(entry->est_error_aqo, entry->cur_stat_slot_aqo)); + values[EST_ERROR] = PointerGetDatum(form_vector(entry->est_error, entry->cur_stat_slot)); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +static long +aqo_stat_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (!hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + + if (num_remove != num_entries) + elog(ERROR, "[AQO] Stat memory storage is corrupted or parallel access without a lock was detected."); + + aqo_stat_flush(); + + return num_remove; +} + +static void * +_form_stat_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + StatEntry *entry; + + *size = sizeof(StatEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +/* Implement data flushing according to pgss_shmem_shutdown() */ + +void +aqo_stat_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + /* Use exclusive lock to prevent concurrent flushing in different backends. */ + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + if (!aqo_state->stat_changed) + /* Hash table wasn't changed, meaningless to store it in permanent storage */ + goto end; + + entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->stat_changed = false; + +end: + LWLockRelease(&aqo_state->stat_lock); +} + +static void * +_form_qtext_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueryTextEntry *entry; + void *data; + char *query_string; + char *ptr; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + query_string = dsa_get_address(qtext_dsa, entry->qtext_dp); + Assert(query_string != NULL); + *size = sizeof(entry->queryid) + strlen(query_string) + 1; + ptr = data = palloc(*size); + Assert(ptr != NULL); + memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); + ptr += sizeof(entry->queryid); + memcpy(ptr, query_string, strlen(query_string) + 1); + return data; +} + +void +aqo_qtexts_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + if (!aqo_state->qtexts_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + ret = data_store(PGAQO_TEXT_FILE, _form_qtext_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->qtexts_changed = false; + +end: + LWLockRelease(&aqo_state->qtexts_lock); +} + +/* + * Getting a hash table iterator, return a newly allocated memory chunk and its + * size for subsequent writing into storage. + */ +static void * +_form_data_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + DataEntry *entry; + char *data; + char *ptr, + *dsa_ptr; + size_t sz; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + /* Size of data is DataEntry (without DSA pointer) plus size of DSA chunk */ + sz = offsetof(DataEntry, data_dp) + _compute_data_dsa(entry); + ptr = data = palloc(sz); + + /* Put the data into the chunk */ + + /* Plane copy of all bytes of hash table entry */ + memcpy(ptr, entry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert((sz - (ptr - data)) == _compute_data_dsa(entry)); + memcpy(ptr, dsa_ptr, sz - (ptr - data)); + *size = sz; + return data; +} + +void +aqo_data_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + if (!aqo_state->data_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + ret = data_store(PGAQO_DATA_FILE, _form_data_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + /* + * Something happened and storing procedure hasn't finished walking + * along all records of the hash table. + */ + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->data_changed = false; +end: + LWLockRelease(&aqo_state->data_lock); +} + +static void * +_form_queries_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueriesEntry *entry; + + *size = sizeof(QueriesEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +void +aqo_queries_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + if (!aqo_state->queries_changed) + goto end; + + entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->queries_changed = false; + +end: + LWLockRelease(&aqo_state->queries_lock); +} + +static int +data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx) +{ + FILE *file; + size_t size; + uint32 counter = 0; + void *data; + char *tmpfile; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); + + tmpfile = psprintf("%s.tmp", filename); + file = AllocateFile(tmpfile, PG_BINARY_W); + if (file == NULL) + goto error; + + if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1 || + fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1 || + fwrite(&nrecs, sizeof(long), 1, file) != 1) + goto error; + + while ((data = callback(ctx, &size)) != NULL) + { + /* TODO: Add CRC code ? */ + if (fwrite(&size, sizeof(size), 1, file) != 1 || + fwrite(data, size, 1, file) != 1) + { + pfree(data); + goto error; + } + pfree(data); + counter++; + } + + Assert(counter == nrecs); + if (FreeFile(file)) + { + file = NULL; + goto error; + } + + /* Parallel (re)writing into a file haven't happen. */ + (void) durable_rename(tmpfile, filename, PANIC); + elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); + return 0; -static ArrayType *form_vector(double *vector, int nrows); -static void deform_vector(Datum datum, double *vector, int *nelems); +error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write AQO file \"%s\": %m", tmpfile))); -#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) -#define DeformVectorSz(datum, v_name) (deform_vector((datum), (v_name), &(v_name ## _size))) + if (file) + FreeFile(file); + unlink(tmpfile); + pfree(tmpfile); + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); + return -1; +} + +static bool +_deform_stat_record_cb(void *data, size_t size) +{ + bool found; + StatEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->stat_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(StatEntry)); + + queryid = ((StatEntry *) data)->queryid; + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + Assert(!found && entry); + memcpy(entry, data, sizeof(StatEntry)); + return true; +} + +void +aqo_stat_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(stat_htab) == 0); + + data_load(PGAQO_STAT_FILE, _deform_stat_record_cb, NULL); + + LWLockRelease(&aqo_state->stat_lock); +} + +static bool +_check_dsa_validity(dsa_pointer ptr) +{ + if (DsaPointerIsValid(ptr)) + return true; + + elog(LOG, "[AQO] DSA Pointer isn't valid. Is the memory limit exceeded?"); + return false; +} + +static bool +_deform_qtexts_record_cb(void *data, size_t size) +{ + bool found; + QueryTextEntry *entry; + uint64 queryid = *(uint64 *) data; + char *query_string = (char *) data + sizeof(queryid); + size_t len = size - sizeof(queryid); + char *strptr; + + Assert(LWLockHeldByMeInMode(&aqo_state->qtexts_lock, LW_EXCLUSIVE)); + Assert(strlen(query_string) + 1 == len); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, + HASH_ENTER, &found); + Assert(!found); + + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, len, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + return false; + } -static bool my_simple_heap_update(Relation relation, - ItemPointer otid, - HeapTuple tup, - bool *update_indexes); + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, len); + return true; +} + +void +aqo_qtexts_load(void) +{ + uint64 queryid = 0; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + Assert(qtext_dsa != NULL); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + if (hash_get_num_entries(qtexts_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query texts concurrently."); + LWLockRelease(&aqo_state->qtexts_lock); + return; + } + + data_load(PGAQO_TEXT_FILE, _deform_qtexts_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); + + aqo_state->qtexts_changed = false; /* mem data consistent with disk */ + LWLockRelease(&aqo_state->qtexts_lock); + + if (!found) + { + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)", NULL)) + elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); + } +} + +/* + * Getting a data chunk from a caller, add a record into the 'ML data' + * shmem hash table. Allocate and fill DSA chunk for variadic part of the data. + */ +static bool +_deform_data_record_cb(void *data, size_t size) +{ + bool found; + DataEntry *fentry = (DataEntry *) data; /*Depends on a platform? */ + DataEntry *entry; + size_t sz; + char *ptr = (char *) data, + *dsa_ptr; + + Assert(ptr != NULL); + Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); + + entry = (DataEntry *) hash_search(data_htab, &fentry->key, + HASH_ENTER, &found); + Assert(!found); + + /* Copy fixed-size part of entry byte-by-byte even with caves */ + memcpy(entry, fentry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + sz = _compute_data_dsa(entry); + Assert(sz + offsetof(DataEntry, data_dp) == size); + entry->data_dp = dsa_allocate(data_dsa, sz); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &fentry->key, HASH_REMOVE, NULL); + return false; + } + + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(dsa_ptr != NULL); + memcpy(dsa_ptr, ptr, sz); + return true; +} + +void +aqo_data_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data_dsa != NULL); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + if (hash_get_num_entries(data_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query data concurrently."); + LWLockRelease(&aqo_state->data_lock); + return; + } + + data_load(PGAQO_DATA_FILE, _deform_data_record_cb, NULL); + + aqo_state->data_changed = false; /* mem data is consistent with disk */ + LWLockRelease(&aqo_state->data_lock); +} + +static bool +_deform_queries_record_cb(void *data, size_t size) +{ + bool found; + QueriesEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->queries_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(QueriesEntry)); + + queryid = ((QueriesEntry *) data)->queryid; + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + Assert(!found); + memcpy(entry, data, sizeof(QueriesEntry)); + return true; +} + +void +aqo_queries_load(void) +{ + bool found; + uint64 queryid = 0; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + /* Load on postmaster startup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(queries_htab) == 0); + + data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + LWLockRelease(&aqo_state->queries_lock); + if (!found) + { + if (!aqo_queries_store(0, 0, 0, 0, 0, &aqo_queries_nulls)) + elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); + } +} + +static long +aqo_get_file_size(const char *filename) +{ + FILE *file; + long size = 0; + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return size; + } + + fseek(file, 0L, SEEK_END); + size = ftell(file); + + FreeFile(file); + return size; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + unlink(filename); + return -1; +} + +void +check_dsa_file_size(void) +{ + long qtext_size = aqo_get_file_size(PGAQO_TEXT_FILE); + long data_size = aqo_get_file_size(PGAQO_DATA_FILE); + + if (qtext_size == -1 || data_size == -1 || + ((unsigned long) qtext_size + (unsigned long) data_size) >> 20 >= dsm_size_max) + { + elog(ERROR, "aqo.dsm_size_max is too small"); + } +} + +static void +data_load(const char *filename, deform_record_t callback, void *ctx) +{ + FILE *file; + long i; + uint32 header; + int32 pgver; + long num; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + + MemoryContextSwitchTo(old_context); + return; + } + + if (fread(&header, sizeof(uint32), 1, file) != 1 || + fread(&pgver, sizeof(uint32), 1, file) != 1 || + fread(&num, sizeof(long), 1, file) != 1) + goto read_error; + + if (header != PGAQO_FILE_HEADER || pgver != PGAQO_PG_MAJOR_VERSION) + goto data_error; + + for (i = 0; i < num; i++) + { + void *data; + size_t size; + bool res; + + if (fread(&size, sizeof(size), 1, file) != 1) + goto read_error; + data = palloc(size); + if (fread(data, size, 1, file) != 1) + { + pfree(data); + goto read_error; + } + res = callback(data, size); + pfree(data); + + if (!res) + { + /* Error detected. Do not try to read tails of the storage. */ + elog(LOG, "[AQO] Because of an error skip %ld storage records.", + num - i); + break; + } + } + + FreeFile(file); + + elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); + return; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + goto fail; +data_error: + ereport(LOG, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ignoring invalid data in file \"%s\"", filename))); +fail: + if (file) + FreeFile(file); + unlink(filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); +} + +static void +on_shmem_shutdown(int code, Datum arg) +{ + /* + * XXX: It can be expensive to rewrite a file on each shutdown of a backend. + */ + aqo_qtexts_flush(); + aqo_data_flush(); +} + +/* + * Initialize DSA memory for AQO shared data with variable length. + * On first call, create DSA segments and load data into hash table and DSA + * from disk. + */ +static void +dsa_init() +{ + MemoryContext old_context; + + if (qtext_dsa) + return; + + Assert(data_dsa == NULL && data_dsa == NULL); + old_context = MemoryContextSwitchTo(TopMemoryContext); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->qtexts_dsa_handler == DSM_HANDLE_INVALID) + { + Assert(aqo_state->data_dsa_handler == DSM_HANDLE_INVALID); + + qtext_dsa = dsa_create(aqo_state->qtext_trancheid); + Assert(qtext_dsa != NULL); + + if (dsm_size_max > 0) + dsa_set_size_limit(qtext_dsa, dsm_size_max * 1024 * 1024); + + dsa_pin(qtext_dsa); + aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); + + data_dsa = qtext_dsa; + aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); + + /* Load and initialize query texts hash table */ + aqo_qtexts_load(); + aqo_data_load(); + } + else + { + qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); + data_dsa = qtext_dsa; + } + + dsa_pin_mapping(qtext_dsa); + MemoryContextSwitchTo(old_context); + LWLockRelease(&aqo_state->lock); + + before_shmem_exit(on_shmem_shutdown, (Datum) 0); +} + +/* ************************************************************************** */ + +/* + * XXX: Maybe merge with aqo_queries ? + */ +bool +aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid) +{ + QueryTextEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; + + if (dsa_valid) + *dsa_valid = true; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + if (query_string == NULL || querytext_max_size == 0) + return false; + + dsa_init(); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(qtexts_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found) + { + size_t size = strlen(query_string) + 1; + char *strptr; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->qtexts_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Query texts storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; + } + + entry->queryid = queryid; + size = size > querytext_max_size ? querytext_max_size : size; + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + _aqo_queries_remove(queryid); + LWLockRelease(&aqo_state->qtexts_lock); + if (dsa_valid) + *dsa_valid = false; + return false; + } + + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, size); + aqo_state->qtexts_changed = true; + } + LWLockRelease(&aqo_state->qtexts_lock); + return true; +} + +Datum +aqo_query_texts(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[QT_TOTAL_NCOLS]; + bool nulls[QT_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != QT_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + memset(nulls, 0, QT_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); + values[QT_QUERYID] = Int64GetDatum(entry->queryid); + values[QT_QUERY_STRING] = CStringGetTextDatum(ptr); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->qtexts_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +static bool +_aqo_stat_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + (void) hash_search(stat_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->stat_changed = true; + } + + LWLockRelease(&aqo_state->stat_lock); + return found; +} + +static bool +_aqo_queries_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->queries_changed = true; + } + + LWLockRelease(&aqo_state->queries_lock); + return found; +} + +static bool +_aqo_qtexts_remove(uint64 queryid) +{ + bool found = false; + QueryTextEntry *entry; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + /* + * Look for a record with this queryid. DSA fields must be freed before + * deletion of the record. + */ + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, + &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->qtexts_changed = true; + } + + LWLockRelease(&aqo_state->qtexts_lock); + return found; +} + +static bool +_aqo_data_remove(data_key *key) +{ + DataEntry *entry; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + entry = (DataEntry *) hash_search(data_htab, key, HASH_FIND, &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + + if (!hash_search(data_htab, key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] Inconsistent data hash table"); + + aqo_state->data_changed = true; + } + + LWLockRelease(&aqo_state->data_lock); + return found; +} + +static long +aqo_qtexts_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->queryid == 0) + continue; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + if (!hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->qtexts_changed = true; + LWLockRelease(&aqo_state->qtexts_lock); + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Query texts memory storage is corrupted or parallel access without a lock was detected."); + + aqo_qtexts_flush(); + + return num_remove; +} + +static size_t +_compute_data_dsa(const DataEntry *entry) +{ + size_t size = sizeof(data_key); /* header's size */ + + size += sizeof(double) * entry->rows * entry->cols; /* matrix */ + size += 2 * sizeof(double) * entry->rows; /* targets, rfactors */ + + /* Calculate memory size needed to store relation names */ + size += entry->nrels * sizeof(Oid); + return size; +} + +/* + * Insert new record or update existed in the AQO data storage. + * Return true if data was changed. + */ +bool +aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + int i; + char *ptr; + ListCell *lc; + size_t size; + bool tblOverflow; + HASHACTION action; + bool result; + /* + * We should distinguish incoming data between internally + * passed structured data(reloids) and externaly + * passed data(plain arrays) from aqo_data_update() function. + */ + bool is_raw_data = (reloids == NULL); + int nrels = is_raw_data ? data->nrels : list_length(reloids); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data->rows > 0); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(data_htab) < fss_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (DataEntry *) hash_search(data_htab, &key, action, &found); + + /* Initialize entry on first usage */ + if (!found) + { + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->data_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Data storage is full. No more data can be added."), + errhint("Increase value of aqo.fss_max_items on restart of the instance"))); + return false; + } + + entry->cols = data->cols; + entry->rows = data->rows; + entry->nrels = nrels; + + size = _compute_data_dsa(entry); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); + return false; + } + } + + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols || entry->nrels != nrels) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " + UINT64_FORMAT", fss: %d).", + fs, fss); + goto end; + } + + if (entry->rows < data->rows) + { + entry->rows = data->rows; + size = _compute_data_dsa(entry); + + /* Need to re-allocate DSA chunk */ + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); + return false; + } + } + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(ptr != NULL); + + /* + * Copy AQO data into allocated DSA segment + */ + + memcpy(ptr, &key, sizeof(data_key)); /* Just for debug */ + ptr += sizeof(data_key); + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + Assert(data->matrix[i]); + memcpy(ptr, data->matrix[i], sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* store list of relations. XXX: optimize ? */ + if (is_raw_data) + { + memcpy(ptr, data->oids, nrels * sizeof(Oid)); + ptr += nrels * sizeof(Oid); + } + else + { + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + } + aqo_state->data_changed = true; + Assert(entry->rows > 0); +end: + result = aqo_state->data_changed; + LWLockRelease(&aqo_state->data_lock); + return result; +} + +static double +fs_distance(double *a, double *b, int len) +{ + double res = 0; + int i; + + for (i = 0; i < len; ++i) + res += (a[i] - b[i]) * (a[i] - b[i]); + if (len != 0) + res = sqrt(res); + return res; +} + +static bool +nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols) +{ + int i; + for (i=0; irows is kept <= aqo_K. + */ +static void +update_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) +{ + int k = (data->rows < 0) ? 0 : data->rows; + int i; + + Assert(data->cols == temp_data->cols); + Assert(data->matrix); + + if (data->cols > 0) + { + for (i = 0; i < temp_data->rows && k < aqo_K; i++) + { + if (!nearest_neighbor(data->matrix, k, temp_data->matrix[i], data->cols)) + { + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; + } + } + } + /* Data has no columns. Only one record can be added */ + else if (k == 0 && temp_data->rows > 0) + { + data->rfactors[0] = temp_data->rfactors[0]; + data->targets[0] = temp_data->targets[0]; + k = 1; + } + data->rows = k; + + Assert(data->rows >= 0 && data->rows <= aqo_K); +} + +static OkNNrdata * +_fill_knn_data(const DataEntry *entry, List **reloids) +{ + OkNNrdata *data; + char *ptr; + int i; + size_t offset; + size_t sz = _compute_data_dsa(entry); + + data = OkNNr_allocate(entry->cols); + data->rows = entry->rows; + + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + + /* Check invariants */ + Assert(entry->rows <= aqo_K); + Assert(ptr != NULL); + Assert(entry->key.fss == ((data_key *)ptr)->fss); + Assert(data->matrix); + + ptr += sizeof(data_key); + + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + + /* copy targets from DSM storage */ + memcpy(data->targets, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset < sz); + + /* copy rfactors from DSM storage */ + memcpy(data->rfactors, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset <= sz); + + if (reloids == NULL) + /* Isn't needed to load reloids list */ + return data; + + /* store list of relations. XXX: optimize ? */ + for (i = 0; i < entry->nrels; i++) + { + *reloids = lappend_oid(*reloids, ObjectIdGetDatum(*(Oid*)ptr)); + ptr += sizeof(Oid); + } + + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + if (offset != sz) + elog(PANIC, "[AQO] Shared memory ML storage is corrupted."); + + return data; +} + +/* + * By given feature space and subspace, build kNN data structure. + * + * If wideSearch is true - make seqscan on the hash table to see for relevant + * data across neighbours. + * + * Return false if the operation was unsuccessful. + */ +bool +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + OkNNrdata *temp_data; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(wideSearch || data->rows <= 0); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + if (!wideSearch) + { + entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); + + if (!found) + goto end; + + /* One entry with all correctly filled fields is found */ + Assert(entry && entry->rows > 0); + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols) + { + /* Collision happened? */ + elog(LOG, "[AQO] Did a collision happen? Check it if possible " + "(fs: "UINT64_FORMAT", fss: %d).", + fs, fss); + found = false; /* Sign of unsuccessful operation */ + goto end; + } + + temp_data = _fill_knn_data(entry, NULL); + Assert(temp_data->rows > 0); + update_knn_matrix(data, temp_data); + Assert(data->rows > 0); + } + else + /* Iterate across all elements of the table. XXX: Maybe slow. */ + { + HASH_SEQ_STATUS hash_seq; + int noids = -1; + + found = false; + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + List *tmp_oids = NIL; + + Assert(entry->rows > 0); + + if (entry->key.fss != fss || entry->cols != data->cols) + continue; + + temp_data = _fill_knn_data(entry, &tmp_oids); + + if (noids >= 0 && list_length(tmp_oids) != noids) + { + /* Dubious case. So log it and skip these data */ + elog(LOG, + "[AQO] different number depended oids for the same fss %d: " + "%d and %d correspondingly.", + fss, list_length(tmp_oids), noids); + list_free(tmp_oids); + continue; + } + + noids = list_length(tmp_oids); + list_free(tmp_oids); + + update_knn_matrix(data, temp_data); + found = true; + + /* Abort if data is full */ + if (data->rows == aqo_K || (data->cols == 0 && data->rows == 1)) + { + hash_seq_term(&hash_seq); + break; + } + } + + } + + Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); +end: + LWLockRelease(&aqo_state->data_lock); + + return found; +} + +Datum +aqo_data(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AD_TOTAL_NCOLS]; + bool nulls[AD_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != AD_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; + + memset(nulls, 0, AD_TOTAL_NCOLS); + + values[AD_FS] = Int64GetDatum(entry->key.fs); + values[AD_FSS] = Int32GetDatum((int) entry->key.fss); + values[AD_NFEATURES] = Int32GetDatum(entry->cols); + + /* Fill values from the DSA data chunk */ + Assert(DsaPointerIsValid(entry->data_dp)); + ptr = dsa_get_address(data_dsa, entry->data_dp); + Assert(entry->key.fs == ((data_key*)ptr)->fs && entry->key.fss == ((data_key*)ptr)->fss); + ptr += sizeof(data_key); + + if (entry->cols > 0) + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *) ptr, + entry->rows, entry->cols)); + else + nulls[AD_FEATURES] = true; + + ptr += sizeof(double) * entry->rows * entry->cols; + values[AD_TARGETS] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + values[AD_RELIABILITY] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + + if (entry->nrels > 0) + { + Datum *elems; + ArrayType *array; + int i; + + elems = palloc(sizeof(*elems) * entry->nrels); + for(i = 0; i < entry->nrels; i++) + { + elems[i] = ObjectIdGetDatum(*(Oid *)ptr); + ptr += sizeof(Oid); + } + + array = construct_array(elems, entry->nrels, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + values[AD_OIDS] = PointerGetDatum(array); + } + else + nulls[AD_OIDS] = true; -static bool my_index_insert(Relation indexRelation, - Datum *values, - bool *isnull, - ItemPointer heap_t_ctid, - Relation heapRelation, - IndexUniqueCheck checkUnique); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + LWLockRelease(&aqo_state->data_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} -/* - * Returns whether the query with given hash is in aqo_queries. - * If yes, returns the content of the first line with given hash. - */ -bool -find_query(int query_hash, - Datum *search_values, - bool *search_nulls) +static long +_aqo_data_clean(uint64 fs) { - RangeVar *aqo_queries_table_rv; - Relation aqo_queries_heap; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree; - - LOCKMODE lockmode = AccessShareLock; + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long removed = 0; - Relation query_index_rel; - Oid query_index_rel_oid; - IndexScanDesc query_index_scan; - ScanKeyData key; + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - bool find_ok = false; - - query_index_rel_oid = RelnameGetRelid("aqo_queries_query_hash_idx"); - if (!OidIsValid(query_index_rel_oid)) + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - disable_aqo_for_query(); - return false; + if (entry->key.fs != fs) + continue; + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + removed++; } - aqo_queries_table_rv = makeRangeVar("public", "aqo_queries", -1); - aqo_queries_heap = table_openrv(aqo_queries_table_rv, lockmode); + LWLockRelease(&aqo_state->data_lock); + return removed; +} - query_index_rel = index_open(query_index_rel_oid, lockmode); - query_index_scan = index_beginscan(aqo_queries_heap, - query_index_rel, - SnapshotSelf, - 1, - 0); +static long +aqo_data_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; + } - ScanKeyInit(&key, - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_hash)); + if (num_remove > 0) + aqo_state->data_changed = true; + LWLockRelease(&aqo_state->data_lock); + if (num_remove != num_entries) + elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); - index_rescan(query_index_scan, &key, 1, NULL, 0); + aqo_data_flush(); - slot = MakeSingleTupleTableSlot(query_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(query_index_scan, ForwardScanDirection, slot); + return num_remove; +} - if (find_ok) +Datum +aqo_queries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQ_TOTAL_NCOLS]; + bool nulls[AQ_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != AQ_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, AQ_TOTAL_NCOLS); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_queries_heap->rd_att, - search_values, search_nulls); + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); + values[AQ_FS] = Int64GetDatum(entry->fs); + values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); + values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); + values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + values[AQ_SMART_TIMEOUT] = Int64GetDatum(entry->smart_timeout); + values[AQ_COUNT_INCREASE_TIMEOUT] = Int64GetDatum(entry->count_increase_timeout); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); } - ExecDropSingleTupleTableSlot(slot); - index_endscan(query_index_scan); - index_close(query_index_rel, lockmode); - table_close(aqo_queries_heap, lockmode); - - return find_ok; + LWLockRelease(&aqo_state->queries_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; } -/* - * Creates entry for new query in aqo_queries table with given fields. - * Returns false if the operation failed, true otherwise. - */ bool -add_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning) +aqo_queries_store(uint64 queryid, + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args) { - RangeVar *aqo_queries_table_rv; - Relation aqo_queries_heap; - HeapTuple tuple; + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; - LOCKMODE lockmode = RowExclusiveLock; + /* Insert is allowed if no args are NULL. */ + bool safe_insert = + (!null_args->fs_is_null && !null_args->learn_aqo_is_null && + !null_args->use_aqo_is_null && !null_args->auto_tuning_is_null); - Datum values[5]; - bool nulls[5] = {false, false, false, false, false}; + Assert(queries_htab); - Relation query_index_rel; - Oid query_index_rel_oid; + /* Guard for default feature space */ + Assert(queryid != 0 || (fs == 0 && learn_aqo == false && + use_aqo == false && auto_tuning == false)); - values[0] = Int32GetDatum(query_hash); - values[1] = BoolGetDatum(learn_aqo); - values[2] = BoolGetDatum(use_aqo); - values[3] = Int32GetDatum(fspace_hash); - values[4] = BoolGetDatum(auto_tuning); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - query_index_rel_oid = RelnameGetRelid("aqo_queries_query_hash_idx"); - if (!OidIsValid(query_index_rel_oid)) - { - disable_aqo_for_query(); - return false; - } - query_index_rel = index_open(query_index_rel_oid, lockmode); + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = (tblOverflow || !safe_insert) ? HASH_FIND : HASH_ENTER; - aqo_queries_table_rv = makeRangeVar("public", "aqo_queries", -1); - aqo_queries_heap = table_openrv(aqo_queries_table_rv, lockmode); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); - tuple = heap_form_tuple(RelationGetDescr(aqo_queries_heap), - values, nulls); - PG_TRY(); - { - simple_heap_insert(aqo_queries_heap, tuple); - my_index_insert(query_index_rel, - values, nulls, - &(tuple->t_self), - aqo_queries_heap, - UNIQUE_CHECK_YES); - } - PG_CATCH(); + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) { /* - * Main goal is to catch deadlock errors during the index insertion. + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit */ - CommandCounterIncrement(); - simple_heap_delete(aqo_queries_heap, &(tuple->t_self)); - PG_RE_THROW(); + LWLockRelease(&aqo_state->queries_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Queries storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; } - PG_END_TRY(); - - index_close(query_index_rel, lockmode); - table_close(aqo_queries_heap, lockmode); - - CommandCounterIncrement(); + if (!null_args->fs_is_null) + entry->fs = fs; + if (!null_args->learn_aqo_is_null) + entry->learn_aqo = learn_aqo; + if (!null_args->use_aqo_is_null) + entry->use_aqo = use_aqo; + if (!null_args->auto_tuning_is_null) + entry->auto_tuning = auto_tuning; + if (!null_args->smart_timeout) + entry->smart_timeout = 0; + if (!null_args->count_increase_timeout) + entry->count_increase_timeout = 0; + + if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + + aqo_state->queries_changed = true; + aqo_state->queries_changed = true; + LWLockRelease(&aqo_state->queries_lock); return true; } -bool -update_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning) +static long +aqo_queries_reset(void) { - RangeVar *aqo_queries_table_rv; - Relation aqo_queries_heap; - HeapTuple tuple, - nw_tuple; - - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - - LOCKMODE lockmode = RowExclusiveLock; - - Relation query_index_rel; - Oid query_index_rel_oid; - IndexScanDesc query_index_scan; - ScanKeyData key; - - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, true, true, true, true }; - - query_index_rel_oid = RelnameGetRelid("aqo_queries_query_hash_idx"); - if (!OidIsValid(query_index_rel_oid)) + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - disable_aqo_for_query(); - return false; - } + if (entry->queryid == 0) + /* Don't remove default feature space */ + continue; - aqo_queries_table_rv = makeRangeVar("public", "aqo_queries", -1); - aqo_queries_heap = table_openrv(aqo_queries_table_rv, lockmode); - - query_index_rel = index_open(query_index_rel_oid, lockmode); - query_index_scan = index_beginscan(aqo_queries_heap, - query_index_rel, - SnapshotSelf, - 1, - 0); - - ScanKeyInit(&key, - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_hash)); - - index_rescan(query_index_scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(query_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(query_index_scan, ForwardScanDirection, slot); - Assert(find_ok); - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - - heap_deform_tuple(tuple, aqo_queries_heap->rd_att, - values, isnull); - - values[1] = BoolGetDatum(learn_aqo); - values[2] = BoolGetDatum(use_aqo); - values[3] = Int32GetDatum(fspace_hash); - values[4] = BoolGetDatum(auto_tuning); - - nw_tuple = heap_modify_tuple(tuple, aqo_queries_heap->rd_att, - values, isnull, replace); - if (my_simple_heap_update(aqo_queries_heap, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(query_index_rel, values, isnull, - &(nw_tuple->t_self), - aqo_queries_heap, UNIQUE_CHECK_YES); - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. We have to merge - * our changes somehow, but now we just discard ours. We don't believe - * in high probability of simultaneously finishing of two long, - * complex, and important queries, so we don't loss important data. - */ + if (!hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; } - ExecDropSingleTupleTableSlot(slot); - index_endscan(query_index_scan); - index_close(query_index_rel, lockmode); - table_close(aqo_queries_heap, lockmode); + if (num_remove > 0) + aqo_state->queries_changed = true; - CommandCounterIncrement(); + LWLockRelease(&aqo_state->queries_lock); - return true; + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Queries memory storage is corrupted or parallel access without a lock has detected."); + + aqo_queries_flush(); + + return num_remove; } -/* - * Creates entry for new query in aqo_query_texts table with given fields. - * Returns false if the operation failed, true otherwise. - */ -bool -add_query_text(int query_hash, const char *query_text) +Datum +aqo_enable_query(PG_FUNCTION_ARGS) { - RangeVar *aqo_query_texts_table_rv; - Relation aqo_query_texts_heap; - HeapTuple tuple; + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; - LOCKMODE lockmode = RowExclusiveLock; + Assert(queries_htab); - Datum values[2]; - bool isnull[2] = {false, false}; + if (queryid == 0) + elog(ERROR, "[AQO] Default class can't be updated."); - Relation query_index_rel; - Oid query_index_rel_oid; + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); - values[0] = Int32GetDatum(query_hash); - values[1] = CStringGetTextDatum(query_text); - - query_index_rel_oid = RelnameGetRelid("aqo_query_texts_query_hash_idx"); - if (!OidIsValid(query_index_rel_oid)) + if (found) { - disable_aqo_for_query(); - return false; + entry->learn_aqo = true; + entry->use_aqo = true; + if (aqo_mode == AQO_MODE_INTELLIGENT) + entry->auto_tuning = true; } - query_index_rel = index_open(query_index_rel_oid, lockmode); + else + elog(ERROR, "[AQO] Entry with queryid "INT64_FORMAT + " not contained in table", (int64) queryid); + + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} + +Datum +aqo_disable_query(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; - aqo_query_texts_table_rv = makeRangeVar("public", - "aqo_query_texts", - -1); - aqo_query_texts_heap = table_openrv(aqo_query_texts_table_rv, - lockmode); + Assert(queries_htab); - tuple = heap_form_tuple(RelationGetDescr(aqo_query_texts_heap), - values, isnull); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); - PG_TRY(); + if(found) { - simple_heap_insert(aqo_query_texts_heap, tuple); - my_index_insert(query_index_rel, - values, isnull, - &(tuple->t_self), - aqo_query_texts_heap, - UNIQUE_CHECK_YES); + entry->learn_aqo = false; + entry->use_aqo = false; + entry->auto_tuning = false; } - PG_CATCH(); + else { - CommandCounterIncrement(); - simple_heap_delete(aqo_query_texts_heap, &(tuple->t_self)); - index_close(query_index_rel, lockmode); - table_close(aqo_query_texts_heap, lockmode); - PG_RE_THROW(); + elog(ERROR, "[AQO] Entry with "INT64_FORMAT" not contained in table", + (int64) queryid); } - PG_END_TRY(); + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} - index_close(query_index_rel, lockmode); - table_close(aqo_query_texts_heap, lockmode); +bool +aqo_queries_find(uint64 queryid, QueryContextData *ctx) +{ + bool found; + QueriesEntry *entry; - CommandCounterIncrement(); + Assert(queries_htab); - return true; + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + if (found) + { + ctx->query_hash = entry->queryid; + ctx->learn_aqo = entry->learn_aqo; + ctx->use_aqo = entry->use_aqo; + ctx->auto_tuning = entry->auto_tuning; + ctx->smart_timeout = entry->smart_timeout; + ctx->count_increase_timeout = entry->count_increase_timeout; + } + LWLockRelease(&aqo_state->queries_lock); + return found; } /* - * Loads feature subspace (fss) from table aqo_data into memory. - * The last column of the returned matrix is for target values of objects. - * Returns false if the operation failed, true otherwise. - * - * 'fss_hash' is the hash of feature subspace which is supposed to be loaded - * 'ncols' is the number of clauses in the feature subspace - * 'matrix' is an allocated memory for matrix with the size of aqo_K rows - * and nhashes columns - * 'targets' is an allocated memory with size aqo_K for target values - * of the objects - * 'rows' is the pointer in which the function stores actual number of - * objects in the given feature space + * Function for update and save value of smart statement timeout + * for query in aqo_queries table */ bool -load_fss(int fss_hash, int ncols, double **matrix, double *targets, int *rows) +update_query_timeout(uint64 queryid, int64 smart_timeout) { - RangeVar *aqo_data_table_rv; - Relation aqo_data_heap; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; - Relation data_index_rel; - Oid data_index_rel_oid; - IndexScanDesc data_index_scan; - ScanKeyData key[2]; + Assert(queries_htab); - LOCKMODE lockmode = AccessShareLock; + /* Guard for default feature space */ + Assert(queryid != 0); - Datum values[5]; - bool isnull[5]; + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - bool success = true; + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; - data_index_rel_oid = RelnameGetRelid("aqo_fss_access_idx"); - if (!OidIsValid(data_index_rel_oid)) + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) { - disable_aqo_for_query(); + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->queries_lock); return false; } - aqo_data_table_rv = makeRangeVar("public", "aqo_data", -1); - aqo_data_heap = table_openrv(aqo_data_table_rv, lockmode); - - data_index_rel = index_open(data_index_rel_oid, lockmode); - data_index_scan = index_beginscan(aqo_data_heap, - data_index_rel, - SnapshotSelf, - 2, - 0); + entry->smart_timeout = smart_timeout; + entry->count_increase_timeout = entry->count_increase_timeout + 1; - ScanKeyInit(&key[0], - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_context.fspace_hash)); - - ScanKeyInit(&key[1], - 2, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(fss_hash)); - - index_rescan(data_index_scan, key, 2, NULL, 0); - - slot = MakeSingleTupleTableSlot(data_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(data_index_scan, ForwardScanDirection, slot); + LWLockRelease(&aqo_state->queries_lock); + return true; +} - if (find_ok) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_data_heap->rd_att, values, isnull); +/* + * Update AQO preferences for a given queryid value. + * if incoming param is null - leave it unchanged. + * if forced is false, do nothing if query with such ID isn't exists yet. + * Return true if operation have done some changes. + */ +Datum +aqo_queries_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + uint64 fs = 0; + bool learn_aqo = false; + bool use_aqo = false; + bool auto_tuning = false; + + AqoQueriesNullArgs null_args = + { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), + PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; + + if (PG_ARGISNULL(AQ_QUERYID)) + PG_RETURN_BOOL(false); + + queryid = PG_GETARG_INT64(AQ_QUERYID); + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + if (!null_args.fs_is_null) + fs = PG_GETARG_INT64(AQ_FS); + if (!null_args.learn_aqo_is_null) + learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); + if (!null_args.use_aqo_is_null) + use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); + if (!null_args.auto_tuning_is_null) + auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + + PG_RETURN_BOOL(aqo_queries_store(queryid, + fs, learn_aqo, use_aqo, auto_tuning, + &null_args)); +} - if (DatumGetInt32(values[2]) == ncols) - { - if (ncols > 0) - /* - * The case than an object has not any filters and selectivities - */ - deform_matrix(values[3], matrix); +Datum +aqo_reset(PG_FUNCTION_ARGS) +{ + long counter = 0; - deform_vector(values[4], targets, rows); - } - else - { - elog(WARNING, "unexpected number of features for hash (%d, %d):\ - expected %d features, obtained %d", - query_context.fspace_hash, - fss_hash, ncols, DatumGetInt32(values[2])); - success = false; - } - } - else - success = false; + counter += aqo_stat_reset(); + counter += aqo_qtexts_reset(); + counter += aqo_data_reset(); + counter += aqo_queries_reset(); - ExecDropSingleTupleTableSlot(slot); - index_endscan(data_index_scan); - index_close(data_index_rel, lockmode); - table_close(aqo_data_heap, lockmode); + /* Cleanup cache of deactivated queries */ + reset_deactivated_queries(); - return success; + PG_RETURN_INT64(counter); } +#include "utils/syscache.h" + /* - * Updates the specified line in the specified feature subspace. - * Returns false if the operation failed, true otherwise. + * Scan aqo_queries. For each FS lookup aqo_data records: detect a record, where + * list of oids links to deleted tables. + * If * - * 'fss_hash' specifies the feature subspace - * 'nrows' x 'ncols' is the shape of 'matrix' - * 'targets' is vector of size 'nrows' + * Scan aqo_data hash table. Detect a record, where list of oids links to + * deleted tables. If gentle is TRUE, remove this record only. Another case, + * remove all records with the same (not default) fs from aqo_data. + * Scan aqo_queries. If no one record in aqo_data exists for this fs - remove + * the record from aqo_queries, aqo_query_stat and aqo_query_texts. */ -bool -update_fss(int fss_hash, int nrows, int ncols, double **matrix, double *targets) +static void +cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) { - RangeVar *aqo_data_table_rv; - Relation aqo_data_heap; - TupleDesc tuple_desc; - HeapTuple tuple, - nw_tuple; - - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - - LOCKMODE lockmode = RowExclusiveLock; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; - Relation data_index_rel; - Oid data_index_rel_oid; - IndexScanDesc data_index_scan; - ScanKeyData key[2]; + /* Call it because we might touch DSA segments during the cleanup */ + dsa_init(); - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, false, false, true, true }; + *fs_num = 0; + *fss_num = 0; - data_index_rel_oid = RelnameGetRelid("aqo_fss_access_idx"); - if (!OidIsValid(data_index_rel_oid)) + /* + * It's a long haul. So, make seq scan without any lock. It is possible + * because only this operation can delete data from hash table. + */ + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - disable_aqo_for_query(); - return false; - } + HASH_SEQ_STATUS hash_seq2; + DataEntry *dentry; + List *junk_fss = NIL; + List *actual_fss = NIL; + ListCell *lc; + + /* Scan aqo_data for any junk records related to this FS */ + hash_seq_init(&hash_seq2, data_htab); + while ((dentry = hash_seq_search(&hash_seq2)) != NULL) + { + char *ptr; - aqo_data_table_rv = makeRangeVar("public", "aqo_data", -1); - aqo_data_heap = table_openrv(aqo_data_table_rv, lockmode); + if (entry->fs != dentry->key.fs) + /* Another FS */ + continue; - tuple_desc = RelationGetDescr(aqo_data_heap); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); - data_index_rel = index_open(data_index_rel_oid, lockmode); - data_index_scan = index_beginscan(aqo_data_heap, - data_index_rel, - SnapshotSelf, - 2, - 0); + Assert(DsaPointerIsValid(dentry->data_dp)); + ptr = dsa_get_address(data_dsa, dentry->data_dp); - ScanKeyInit(&key[0], - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_context.fspace_hash)); + ptr += sizeof(data_key); + ptr += sizeof(double) * dentry->rows * dentry->cols; + ptr += sizeof(double) * 2 * dentry->rows; - ScanKeyInit(&key[1], - 2, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(fss_hash)); + if (dentry->nrels > 0) + { + int i; - index_rescan(data_index_scan, key, 2, NULL, 0); + /* Check each OID to be existed. */ + for(i = 0; i < dentry->nrels; i++) + { + Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); - slot = MakeSingleTupleTableSlot(data_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(data_index_scan, ForwardScanDirection, slot); + if (!SearchSysCacheExists1(RELOID, reloid)) + /* Remember this value */ + junk_fss = list_append_unique_int(junk_fss, + dentry->key.fss); + else + actual_fss = list_append_unique_int(actual_fss, + dentry->key.fss); - if (!find_ok) - { - values[0] = Int32GetDatum(query_context.fspace_hash); - values[1] = Int32GetDatum(fss_hash); - values[2] = Int32GetDatum(ncols); + ptr += sizeof(Oid); + } + } + else + { + /* + * Impossible case. We don't use AQO for so simple or synthetic + * data. Just detect errors in this logic. + */ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("AQO detected incorrect behaviour: fs=" + UINT64_FORMAT" fss=%d", + dentry->key.fs, (int32) dentry->key.fss))); + } + + LWLockRelease(&aqo_state->data_lock); + } - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); - else - isnull[3] = true; + /* + * In forced mode remove all child FSSes even some of them are still + * link to existed tables. + */ + if (junk_fss != NIL && !gentle) + junk_fss = list_concat(junk_fss, actual_fss); - values[4] = PointerGetDatum(form_vector(targets, nrows)); - tuple = heap_form_tuple(tuple_desc, values, isnull); - PG_TRY(); + /* Remove junk records from aqo_data */ + foreach(lc, junk_fss) { - simple_heap_insert(aqo_data_heap, tuple); - my_index_insert(data_index_rel, values, isnull, &(tuple->t_self), - aqo_data_heap, UNIQUE_CHECK_YES); + data_key key = {.fs = entry->fs, .fss = lfirst_int(lc)}; + (*fss_num) += (int) _aqo_data_remove(&key); } - PG_CATCH(); + + /* + * If no one live FSS exists, remove the class totally. Don't touch + * default query class. + */ + if (entry->fs != 0 && (actual_fss == NIL || (junk_fss != NIL && !gentle))) { - CommandCounterIncrement(); - simple_heap_delete(aqo_data_heap, &(tuple->t_self)); - PG_RE_THROW(); - } - PG_END_TRY(); - } - else - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_data_heap->rd_att, values, isnull); + /* Query Stat */ + _aqo_stat_remove(entry->queryid); - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); - else - isnull[3] = true; + /* Query text */ + _aqo_qtexts_remove(entry->queryid); - values[4] = PointerGetDatum(form_vector(targets, nrows)); - nw_tuple = heap_modify_tuple(tuple, tuple_desc, - values, isnull, replace); - if (my_simple_heap_update(aqo_data_heap, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(data_index_rel, values, isnull, - &(nw_tuple->t_self), - aqo_data_heap, UNIQUE_CHECK_YES); - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. We have to - * merge our changes somehow, but now we just discard ours. We - * don't believe in high probability of simultaneously finishing - * of two long, complex, and important queries, so we don't loss - * important data. - */ + /* Query class preferences */ + (*fs_num) += (int) _aqo_queries_remove(entry->queryid); } } - ExecDropSingleTupleTableSlot(slot); - index_endscan(data_index_scan); - index_close(data_index_rel, lockmode); - table_close(aqo_data_heap, lockmode); - - CommandCounterIncrement(); + /* + * The best place to flush updated AQO storage: calling the routine, user + * realizes how heavy it is. + */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); +} - return true; +Datum +aqo_cleanup(PG_FUNCTION_ARGS) +{ + int fs_num; + int fss_num; + TupleDesc tupDesc; + HeapTuple tuple; + Datum result; + Datum values[2]; + bool nulls[2] = {0, 0}; + + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != 2) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + /* + * Make forced cleanup: if at least one fss isn't actual, remove parent FS + * and all its FSSes. + * Main idea of such behaviour here is, if a table was deleted, we have + * little chance to use this class in future. Only one use case here can be + * a reason: to use it as a base for search data in a set of neighbours. + * But, invent another UI function for such logic. + */ + cleanup_aqo_database(false, &fs_num, &fss_num); + + values[0] = Int32GetDatum(fs_num); + values[1] = Int32GetDatum(fss_num); + tuple = heap_form_tuple(tupDesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); } /* - * Returns QueryStat for the given query_hash. Returns empty QueryStat if - * no statistics is stored for the given query_hash in table aqo_query_stat. - * Returns NULL and executes disable_aqo_for_query if aqo_query_stat - * is not found. + * XXX: Maybe to allow usage of NULL value to make a reset? */ -QueryStat * -get_aqo_stat(int query_hash) +Datum +aqo_drop_class(PG_FUNCTION_ARGS) { - RangeVar *aqo_stat_table_rv; - Relation aqo_stat_heap; - HeapTuple tuple; - LOCKMODE heap_lock = AccessShareLock; - - Relation stat_index_rel; - Oid stat_index_rel_oid; - IndexScanDesc stat_index_scan; - ScanKeyData key; - LOCKMODE index_lock = AccessShareLock; - - Datum values[9]; - bool nulls[9]; - - QueryStat *stat = palloc_query_stat(); - - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - - stat_index_rel_oid = RelnameGetRelid("aqo_query_stat_idx"); - if (!OidIsValid(stat_index_rel_oid)) - { - disable_aqo_for_query(); - return NULL; - } - - aqo_stat_table_rv = makeRangeVar("public", "aqo_query_stat", -1); - aqo_stat_heap = table_openrv(aqo_stat_table_rv, heap_lock); - - stat_index_rel = index_open(stat_index_rel_oid, index_lock); - stat_index_scan = index_beginscan(aqo_stat_heap, - stat_index_rel, - SnapshotSelf, - 1, - 0); - - ScanKeyInit(&key, - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_hash)); - - index_rescan(stat_index_scan, &key, 1, NULL, 0); + uint64 queryid = PG_GETARG_INT64(0); + bool found; + QueriesEntry *entry; + uint64 fs; + long cnt; + + if (queryid == 0) + elog(ERROR, "[AQO] Cannot remove basic class "INT64_FORMAT".", + (int64) queryid); + + /* Extract FS value for the queryid */ + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + if (!found) + elog(ERROR, "[AQO] Nothing to remove for the class "INT64_FORMAT".", + (int64) queryid); + + fs = entry->fs; + LWLockRelease(&aqo_state->queries_lock); + + if (fs == 0) + elog(ERROR, "[AQO] Cannot remove class "INT64_FORMAT" with default FS.", + (int64) queryid); + if (fs != queryid) + elog(WARNING, + "[AQO] Removing query class has non-generic feature space value: " + "id = "INT64_FORMAT", fs = "UINT64_FORMAT".", (int64) queryid, fs); + + /* Now, remove all data related to the class */ + _aqo_queries_remove(queryid); + _aqo_stat_remove(queryid); + _aqo_qtexts_remove(queryid); + cnt = _aqo_data_clean(fs); + + /* Immediately save changes to permanent storage. */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); + + PG_RETURN_INT32(cnt); +} - slot = MakeSingleTupleTableSlot(stat_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(stat_index_scan, ForwardScanDirection, slot); +typedef enum { + AQE_NN = 0, AQE_QUERYID, AQE_FS, AQE_CERROR, AQE_NEXECS, AQE_TOTAL_NCOLS +} ce_output_order; - if (find_ok) +/* + * Show cardinality error gathered on last execution. + * Skip entries with empty stat slots. XXX: is it possible? + */ +Datum +aqo_cardinality_error(PG_FUNCTION_ARGS) +{ + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != AQE_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_stat_heap->rd_att, values, nulls); - - DeformVectorSz(values[1], stat->execution_time_with_aqo); - DeformVectorSz(values[2], stat->execution_time_without_aqo); - DeformVectorSz(values[3], stat->planning_time_with_aqo); - DeformVectorSz(values[4], stat->planning_time_without_aqo); - DeformVectorSz(values[5], stat->cardinality_error_with_aqo); - DeformVectorSz(values[6], stat->cardinality_error_without_aqo); - - stat->executions_with_aqo = DatumGetInt64(values[7]); - stat->executions_without_aqo = DatumGetInt64(values[8]); + bool found; + double *ce; + int64 nexecs; + int nvals; + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + ce = controlled ? sentry->est_error_aqo : sentry->est_error; + + values[AQE_NN] = Int32GetDatum(++counter); + values[AQE_QUERYID] = Int64GetDatum(qentry->queryid); + values[AQE_FS] = Int64GetDatum(qentry->fs); + values[AQE_NEXECS] = Int64GetDatum(nexecs); + values[AQE_CERROR] = Float8GetDatum(ce[nvals - 1]); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); } - ExecDropSingleTupleTableSlot(slot); - index_endscan(stat_index_scan); - index_close(stat_index_rel, index_lock); - table_close(aqo_stat_heap, heap_lock); + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); - return stat; + tuplestore_donestoring(tupstore); + return (Datum) 0; } +typedef enum { + ET_NN = 0, ET_QUERYID, ET_FS, ET_EXECTIME, ET_NEXECS, ET_TOTAL_NCOLS +} et_output_order; + /* - * Saves given QueryStat for the given query_hash. - * Executes disable_aqo_for_query if aqo_query_stat is not found. + * XXX: maybe to merge with aqo_cardinality_error ? + * XXX: Do we really want sequental number ? */ -void -update_aqo_stat(int query_hash, QueryStat *stat) -{ - RangeVar *aqo_stat_table_rv; - Relation aqo_stat_heap; - HeapTuple tuple, - nw_tuple; - TupleDesc tuple_desc; - - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - - LOCKMODE lockmode = RowExclusiveLock; - - Relation stat_index_rel; - Oid stat_index_rel_oid; - IndexScanDesc stat_index_scan; - ScanKeyData key; - - Datum values[9]; - bool isnull[9] = { false, false, false, - false, false, false, - false, false, false }; - bool replace[9] = { false, true, true, - true, true, true, - true, true, true }; - - stat_index_rel_oid = RelnameGetRelid("aqo_query_stat_idx"); - if (!OidIsValid(stat_index_rel_oid)) - { - disable_aqo_for_query(); - return; - } - - aqo_stat_table_rv = makeRangeVar("public", "aqo_query_stat", -1); - aqo_stat_heap = table_openrv(aqo_stat_table_rv, lockmode); - - tuple_desc = RelationGetDescr(aqo_stat_heap); - - stat_index_rel = index_open(stat_index_rel_oid, lockmode); - stat_index_scan = index_beginscan(aqo_stat_heap, - stat_index_rel, - SnapshotSelf, - 1, - 0); - - ScanKeyInit(&key, - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_hash)); - - index_rescan(stat_index_scan, &key, 1, NULL, 0); - - slot = MakeSingleTupleTableSlot(stat_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(stat_index_scan, ForwardScanDirection, slot); - - /*values[0] will be initialized later */ - values[1] = PointerGetDatum(FormVectorSz(stat->execution_time_with_aqo)); - values[2] = PointerGetDatum(FormVectorSz(stat->execution_time_without_aqo)); - values[3] = PointerGetDatum(FormVectorSz(stat->planning_time_with_aqo)); - values[4] = PointerGetDatum(FormVectorSz(stat->planning_time_without_aqo)); - values[5] = PointerGetDatum(FormVectorSz(stat->cardinality_error_with_aqo)); - values[6] = PointerGetDatum(FormVectorSz(stat->cardinality_error_without_aqo)); - - values[7] = Int64GetDatum(stat->executions_with_aqo); - values[8] = Int64GetDatum(stat->executions_without_aqo); - - if (!find_ok) - { - values[0] = Int32GetDatum(query_hash); - tuple = heap_form_tuple(tuple_desc, values, isnull); - PG_TRY(); - { - simple_heap_insert(aqo_stat_heap, tuple); - my_index_insert(stat_index_rel, values, isnull, &(tuple->t_self), - aqo_stat_heap, UNIQUE_CHECK_YES); - } - PG_CATCH(); - { - CommandCounterIncrement(); - simple_heap_delete(aqo_stat_heap, &(tuple->t_self)); - PG_RE_THROW(); - } - PG_END_TRY(); - } - else +Datum +aqo_execution_time(PG_FUNCTION_ARGS) +{ + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[ET_TOTAL_NCOLS]; + bool nulls[ET_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != ET_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - values[0] = heap_getattr(tuple, 1, - RelationGetDescr(aqo_stat_heap), &isnull[0]); - nw_tuple = heap_modify_tuple(tuple, tuple_desc, - values, isnull, replace); - if (my_simple_heap_update(aqo_stat_heap, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) + bool found; + double *et; + int64 nexecs; + int nvals; + double tm = 0; + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + et = controlled ? sentry->exec_time_aqo : sentry->exec_time; + + if (!controlled) { - /* NOTE: insert index tuple iff heap update succeeded! */ - if (update_indexes) - my_index_insert(stat_index_rel, values, isnull, - &(nw_tuple->t_self), - aqo_stat_heap, UNIQUE_CHECK_YES); + int i; + /* Calculate average execution time */ + for (i = 0; i < nvals; i++) + tm += et[i]; + tm /= nvals; } else - { - /* - * Ooops, somebody concurrently updated the tuple. We have to - * merge our changes somehow, but now we just discard ours. We - * don't believe in high probability of simultaneously finishing - * of two long, complex, and important queries, so we don't loss - * important data. - */ - } + tm = et[nvals - 1]; + + values[ET_NN] = Int32GetDatum(++counter); + values[ET_QUERYID] = Int64GetDatum(qentry->queryid); + values[ET_FS] = Int64GetDatum(qentry->fs); + values[ET_NEXECS] = Int64GetDatum(nexecs); + values[ET_EXECTIME] = Float8GetDatum(tm); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); } - ExecDropSingleTupleTableSlot(slot); - index_endscan(stat_index_scan); - index_close(stat_index_rel, lockmode); - table_close(aqo_stat_heap, lockmode); + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); - CommandCounterIncrement(); + tuplestore_donestoring(tupstore); + return (Datum) 0; } /* - * Expands matrix from storage into simple C-array. + * Update AQO query text for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. */ -void -deform_matrix(Datum datum, double **matrix) +Datum +aqo_query_texts_update(PG_FUNCTION_ARGS) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - int nelems; - Datum *values; - int rows; - int cols; - int i, - j; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, &nelems); - if (nelems != 0) - { - rows = ARR_DIMS(array)[0]; - cols = ARR_DIMS(array)[1]; - for (i = 0; i < rows; ++i) - for (j = 0; j < cols; ++j) - matrix[i][j] = DatumGetFloat8(values[i * cols + j]); - } - pfree(values); - pfree(array); + uint64 queryid; + int str_len; + text *str; + char *str_buff; + bool res = false; + + /* Do nothing if any arguments are NULLs */ + if ((PG_ARGISNULL(QT_QUERYID) || PG_ARGISNULL(QT_QUERY_STRING))) + PG_RETURN_BOOL(false); + + if (!(queryid = PG_GETARG_INT64(QT_QUERYID))) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + str = PG_GETARG_TEXT_PP(QT_QUERY_STRING); + str_len = VARSIZE_ANY_EXHDR(str) + 1; + if (str_len > querytext_max_size) + str_len = querytext_max_size; + + str_buff = (char*) palloc(str_len); + text_to_cstring_buffer(str, str_buff, str_len); + res = aqo_qtext_store(queryid, str_buff, NULL); + pfree(str_buff); + + PG_RETURN_BOOL(res); } /* - * Expands vector from storage into simple C-array. - * Also returns its number of elements. + * Check if incoming array is one dimensional array + * and array elements are not null. Init array field + * and return number of elements if check passed, + * otherwize return -1. */ -void -deform_vector(Datum datum, double *vector, int *nelems) +static int init_dbl_array(double **dest, ArrayType *arr) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, nelems); - for (i = 0; i < *nelems; ++i) - vector[i] = DatumGetFloat8(values[i]); - pfree(values); - pfree(array); + if (ARR_NDIM(arr) > 1 || ARR_HASNULL(arr)) + return -1; + *dest = (double *) ARR_DATA_PTR(arr); + return ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); } /* - * Forms ArrayType object for storage from simple C-array matrix. + * Update AQO query stat table for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. */ -ArrayType * -form_matrix(double **matrix, int nrows, int ncols) +Datum +aqo_query_stat_update(PG_FUNCTION_ARGS) { - Datum *elems; - ArrayType *array; - int dims[2]; - int lbs[2]; - int i, - j; - - dims[0] = nrows; - dims[1] = ncols; - lbs[0] = lbs[1] = 1; - elems = palloc(sizeof(*elems) * nrows * ncols); - for (i = 0; i < nrows; ++i) - for (j = 0; j < ncols; ++j) - elems[i * ncols + j] = Float8GetDatum(matrix[i][j]); - - array = construct_md_array(elems, NULL, 2, dims, lbs, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); - return array; + uint64 queryid; + AqoStatArgs stat_arg; + + /* + * Arguments cannot be NULL. + */ + if (PG_ARGISNULL(QUERYID) || PG_ARGISNULL(NEXECS_AQO) || + PG_ARGISNULL(NEXECS) || PG_ARGISNULL(EXEC_TIME_AQO) || + PG_ARGISNULL(PLAN_TIME_AQO) || PG_ARGISNULL(EST_ERROR_AQO) || + PG_ARGISNULL(EXEC_TIME) || PG_ARGISNULL(PLAN_TIME) || + PG_ARGISNULL(EST_ERROR)) + PG_RETURN_BOOL(false); + + queryid = PG_GETARG_INT64(QUERYID); + stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); + stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); + if (queryid == 0 || stat_arg.execs_with_aqo < 0 || + stat_arg.execs_without_aqo < 0) + PG_RETURN_BOOL(false); + + /* + * Init 'with aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot_aqo = + init_dbl_array(&stat_arg.exec_time_aqo, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME_AQO)); + if (stat_arg.cur_stat_slot_aqo == -1 || + stat_arg.cur_stat_slot_aqo > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.plan_time_aqo, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME_AQO)) || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.est_error_aqo, + PG_GETARG_ARRAYTYPE_P(EST_ERROR_AQO))) + PG_RETURN_BOOL(false); + + /* + * Init 'without aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot = init_dbl_array(&stat_arg.exec_time, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME)); + if (stat_arg.cur_stat_slot == -1 || + stat_arg.cur_stat_slot > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.plan_time, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME)) || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.est_error, + PG_GETARG_ARRAYTYPE_P(EST_ERROR))) + PG_RETURN_BOOL(false); + + PG_RETURN_BOOL(aqo_stat_store(queryid, false, + &stat_arg, false) != NULL); } /* - * Forms ArrayType object for storage from simple C-array vector. + * Update AQO data for a given {fs, fss} values. + * Return true if operation have done some changes, + * false otherwize. */ -ArrayType * -form_vector(double *vector, int nrows) +Datum +aqo_data_update(PG_FUNCTION_ARGS) { - Datum *elems; - ArrayType *array; - int dims[1]; - int lbs[1]; - int i; - - dims[0] = nrows; - lbs[0] = 1; - elems = palloc(sizeof(*elems) * nrows); - for (i = 0; i < nrows; ++i) - elems[i] = Float8GetDatum(vector[i]); - array = construct_md_array(elems, NULL, 1, dims, lbs, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); - return array; -} - -/* - * Returns true if updated successfully, false if updated concurrently by - * another session, error otherwise. - */ -static bool -my_simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup, - bool *update_indexes) -{ - TM_Result result; - TM_FailureData hufd; - LockTupleMode lockmode; - - Assert(update_indexes != NULL); - result = heap_update(relation, otid, tup, - GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &hufd, &lockmode); - switch (result) - { - case TM_SelfModified: - /* Tuple was already updated in current command? */ - elog(ERROR, "tuple already updated by self"); - break; - - case TM_Ok: - /* done successfully */ - if (!HeapTupleIsHeapOnly(tup)) - *update_indexes = true; - else - *update_indexes = false; - return true; - - case TM_Updated: - return false; - break; - - case TM_BeingModified: - return false; - break; - - default: - elog(ERROR, "unrecognized heap_update status: %u", result); - break; + uint64 fs; + int fss; + double *features_arr[aqo_K]; + AqoDataArgs data_arg; + + ArrayType *arr; + + if (PG_ARGISNULL(AD_FS) || PG_ARGISNULL(AD_FSS) || + PG_ARGISNULL(AD_NFEATURES) || PG_ARGISNULL(AD_TARGETS) || + PG_ARGISNULL(AD_RELIABILITY) || PG_ARGISNULL(AD_OIDS)) + PG_RETURN_BOOL(false); + + fs = PG_GETARG_INT64(AD_FS); + fss = PG_GETARG_INT32(AD_FSS); + data_arg.cols = PG_GETARG_INT32(AD_NFEATURES); + + /* Init traget & reliability arrays. */ + data_arg.rows = + init_dbl_array(&data_arg.targets, + PG_GETARG_ARRAYTYPE_P(AD_TARGETS)); + if (data_arg.rows == -1 || data_arg.rows > aqo_K || + data_arg.rows != init_dbl_array(&data_arg.rfactors, + PG_GETARG_ARRAYTYPE_P(AD_RELIABILITY))) + PG_RETURN_BOOL(false); + + /* Init matrix array. */ + if (data_arg.cols == 0 && !PG_ARGISNULL(AD_FEATURES)) + PG_RETURN_BOOL(false); + if (PG_ARGISNULL(AD_FEATURES)) + { + if (data_arg.cols != 0) + PG_RETURN_BOOL(false); + data_arg.matrix = NULL; } - return false; -} - - -/* Provides correct insert in both PostgreQL 9.6.X and 10.X.X */ -static bool -my_index_insert(Relation indexRelation, - Datum *values, bool *isnull, - ItemPointer heap_t_ctid, - Relation heapRelation, - IndexUniqueCheck checkUnique) -{ - /* Index must be UNIQUE to support uniqueness checks */ - Assert(checkUnique == UNIQUE_CHECK_NO || - indexRelation->rd_index->indisunique); - -#if PG_VERSION_NUM < 100000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique); -#else - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, - BuildIndexInfo(indexRelation)); -#endif -} - -/* Creates a storage for hashes of deactivated queries */ -void -init_deactivated_queries_storage(void) -{ - HASHCTL hash_ctl; - - /* Create the hashtable proper */ - MemSet(&hash_ctl, 0, sizeof(hash_ctl)); - hash_ctl.keysize = sizeof(int); - hash_ctl.entrysize = sizeof(int); - deactivated_queries = hash_create("aqo_deactivated_queries", - 128, /* start small and extend */ - &hash_ctl, - HASH_ELEM | HASH_BLOBS); -} + else + { + int i; -/* Destroys the storage for hash of deactivated queries */ -void -fini_deactivated_queries_storage(void) -{ - hash_destroy(deactivated_queries); - deactivated_queries = NULL; -} + arr = PG_GETARG_ARRAYTYPE_P(AD_FEATURES); + /* + * Features is two dimensional array. + * Number of rows should be the same as for + * traget & reliability arrays. + */ + if (ARR_HASNULL(arr) || ARR_NDIM(arr) != 2 || + data_arg.rows != ARR_DIMS(arr)[0] || + data_arg.cols != ARR_DIMS(arr)[1]) + PG_RETURN_BOOL(false); -/* Checks whether the query with given hash is deactivated */ -bool -query_is_deactivated(int query_hash) -{ - bool found; + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + features_arr[i] = (double *) ARR_DATA_PTR(arr) + + i * ARR_DIMS(arr)[1]; + } + data_arg.matrix = features_arr; + } - hash_search(deactivated_queries, &query_hash, HASH_FIND, &found); - return found; -} + /* Init oids array. */ + arr = PG_GETARG_ARRAYTYPE_P(AD_OIDS); + if (ARR_HASNULL(arr)) + PG_RETURN_BOOL(false); + data_arg.oids = (Oid *) ARR_DATA_PTR(arr); + data_arg.nrels = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); -/* Adds given query hash into the set of hashes of deactivated queries*/ -void -add_deactivated_query(int query_hash) -{ - hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); + PG_RETURN_BOOL(aqo_data_store(fs, fss, &data_arg, NULL)); } diff --git a/storage.h b/storage.h new file mode 100644 index 00000000..692014c3 --- /dev/null +++ b/storage.h @@ -0,0 +1,172 @@ +#ifndef STORAGE_H +#define STORAGE_H + +#include "nodes/pg_list.h" +#include "utils/array.h" +#include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ + +#include "aqo.h" +#include "machine_learning.h" + +#define STAT_SAMPLE_SIZE (20) + +/* + * Storage struct for AQO statistics + * It is mostly needed for auto tuning feature. With auto tuning mode aqo + * analyzes stability of last executions of the query, negative influence of + * strong cardinality estimation on a query execution (planner bug?) and so on. + * It can motivate aqo to suppress machine learning for this query class. + * Also, it can be used for an analytics. + */ +typedef struct StatEntry +{ + uint64 queryid; /* The key in the hash table, should be the first field ever */ + + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double exec_time[STAT_SAMPLE_SIZE]; + double plan_time[STAT_SAMPLE_SIZE]; + double est_error[STAT_SAMPLE_SIZE]; + + int cur_stat_slot_aqo; + double exec_time_aqo[STAT_SAMPLE_SIZE]; + double plan_time_aqo[STAT_SAMPLE_SIZE]; + double est_error_aqo[STAT_SAMPLE_SIZE]; +} StatEntry; + +/* + * Auxiliary struct, used for passing arguments + * to aqo_stat_store() function. + */ +typedef struct AqoStatArgs +{ + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double *exec_time; + double *plan_time; + double *est_error; + + int cur_stat_slot_aqo; + double *exec_time_aqo; + double *plan_time_aqo; + double *est_error_aqo; +} AqoStatArgs; + +/* + * Storage entry for query texts. + * Query strings may have very different sizes. So, in hash table we store only + * link to DSA-allocated memory. + */ +typedef struct QueryTextEntry +{ + uint64 queryid; + + /* Link to DSA-allocated memory block. Can be shared across backends */ + dsa_pointer qtext_dp; +} QueryTextEntry; + +typedef struct data_key +{ + uint64 fs; + int64 fss; /* just for alignment */ +} data_key; + +typedef struct DataEntry +{ + data_key key; + + /* defines a size and data placement in the DSA memory block */ + int cols; /* aka nfeatures */ + int rows; /* aka number of equations */ + int nrels; + + /* + * Link to DSA-allocated memory block. Can be shared across backends. + * Contains: + * matrix[][], targets[], reliability[], oids. + */ + dsa_pointer data_dp; +} DataEntry; + +typedef struct QueriesEntry +{ + uint64 queryid; + + uint64 fs; + bool learn_aqo; + bool use_aqo; + bool auto_tuning; + + int64 smart_timeout; + int64 count_increase_timeout; +} QueriesEntry; + +/* + * Auxiliary struct, used for passing arg NULL signs + * to aqo_queries_store() function. + */ +typedef struct AqoQueriesNullArgs +{ + bool fs_is_null; + bool learn_aqo_is_null; + bool use_aqo_is_null; + bool auto_tuning_is_null; + int64 smart_timeout; + int64 count_increase_timeout; +} AqoQueriesNullArgs; + +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +extern AqoQueriesNullArgs aqo_queries_nulls; + +extern int querytext_max_size; +extern int dsm_size_max; + +extern HTAB *stat_htab; +extern HTAB *qtexts_htab; +extern HTAB *queries_htab; /* TODO */ +extern HTAB *data_htab; /* TODO */ + +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, + AqoStatArgs *stat_arg, bool append_mode); +extern void aqo_stat_flush(void); +extern void aqo_stat_load(void); + +extern bool aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid); +extern void aqo_qtexts_flush(void); +extern void aqo_qtexts_load(void); + +extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, + List *reloids); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch); +extern void aqo_data_flush(void); +extern void aqo_data_load(void); + +extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); +extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, + bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args); +extern void aqo_queries_flush(void); +extern void aqo_queries_load(void); + +extern void check_dsa_file_size(void); +/* + * Machinery for deactivated queries cache. + * TODO: Should live in a custom memory context + */ +extern void init_deactivated_queries_storage(void); +extern bool query_is_deactivated(uint64 query_hash); +extern void add_deactivated_query(uint64 query_hash); + +/* Storage interaction */ +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); + +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); + +#endif /* STORAGE_H */ diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl new file mode 100644 index 00000000..def7786e --- /dev/null +++ b/t/001_pgbench.pl @@ -0,0 +1,418 @@ +use strict; +use warnings; + +use Config; +use PostgresNode; +use TestLib; +use Test::More tests => 27; + +my $node = get_new_node('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.mode = 'intelligent' + log_statement = 'ddl' + aqo.join_threshold = 0 + }); + +# Test constants. Default values. +my $TRANSACTIONS = 1000; +my $CLIENTS = 10; +my $THREADS = 10; + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} +if (defined $ENV{CLIENTS}) +{ + $CLIENTS = $ENV{CLIENTS}; +} +if (defined $ENV{THREADS}) +{ + $THREADS = $ENV{THREADS}; +} + +# General purpose variables. +my $res; +my $fss_count; +my $fs_count; +my $fs_samples_count; +my $stat_count; + +$node->start(); + +# The AQO module loaded, but extension still not created. +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench without enabled AQO'); + +# Check conflicts of accessing to the ML knowledge base +# intelligent mode +$node->safe_psql('postgres', "CREATE EXTENSION aqo"); +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'intelligent'"); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in intelligent mode'); + +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'controlled'"); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in controlled mode'); + +# ############################################################################## +# +# pgbench on a database with AQO extension in 'disabled' mode. +# +# ############################################################################## + +# Cleanup of AQO kbowledge base. Also test correctness of DROP procedure. +$node->safe_psql('postgres', "DROP EXTENSION aqo"); +$node->safe_psql('postgres', "CREATE EXTENSION aqo"); + +# Check: no problems with concurrency in disabled mode. +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'disabled'; + SELECT pg_reload_conf(); + SELECT * FROM aqo_reset(); -- Remove old data +"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in disabled mode'); + +# Check: no any data added into AQO-related tables. +# Each of aqo_queries and aqo_query_texts tables contains one predefined record. +$fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); +$fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); +$fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +$stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +note("counter: $fss_count, $fs_count, $fs_samples_count, $stat_count"); +is( (($fss_count == 0) and ($fs_count == 1) and ($fs_samples_count == 1) and ($stat_count == 0)), 1); + +# Check: no problems with stats collection in highly concurrent environment. +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.force_collect_stat = 'on'; + SELECT pg_reload_conf(); +"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in disabled mode - 2'); + +# Check: no any tuples added into the aqo_data table in this mode. +$fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); +is( ($fss_count == 0), 1); + +# Check: in forced stat collection state AQO writes into aqo_query_stat, +# aqo_queries and aqo_query_texts to give user a chance to find problematic +# queries. +$fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries"); +$fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat"); +$stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts"); +# This constants looks like magic numbers. But query set of the pgbench test +# is fixed for a long time. +note("fs: $fs_count, $fs_samples_count, $stat_count"); +is( (($fs_count == 6) and ($fs_samples_count == 5) and ($stat_count == 6)), 1); + +my $analytics = File::Temp->new(); +append_to_file($analytics, q{ + \set border random(1, 1E5) + SELECT count(aid) FROM pgbench_accounts GROUP BY abalance ORDER BY abalance DESC; + SELECT count(aid) FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border; + + SELECT count(*) FROM pgbench_branches pgbb, + (SELECT count(aid) AS x FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border) AS q1 + WHERE pgbb.bid = q1.x; +}); + +# Avoid problems with an error fluctuations during the test above. +$node->safe_psql('postgres', "SELECT aqo_reset()"); + +# Look for top of problematic queries. +$node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", + '-f', "$analytics" ], + 'analytical queries in pgbench (disabled mode)'); + +$res = $node->safe_psql('postgres', + "SELECT count(*) FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) + WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); +is($res, 3); +$res = $node->safe_psql('postgres', + "SELECT * FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) + WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); +note("\n TopN: \n $res \n"); +$res = $node->safe_psql('postgres', + "SELECT v.error, t.query_text FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) + WHERE v.error > 0."); +note("\n Queries: \n $res \n"); +$res = $node->safe_psql('postgres', "SELECT * FROM public.aqo_execution_time(false) v"); +note("\n TIMES: \n $res \n"); + +$res = $node->safe_psql('postgres', + "SELECT count(*) FROM public.aqo_execution_time(false) v + WHERE v.exec_time > 0."); +is($res, 3); + +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + +# ############################################################################## +# +# pgbench on a database with AQO in 'learn' mode. +# +# ############################################################################## + +$node->safe_psql('postgres', "SELECT aqo_reset()"); +$node->safe_psql('postgres', "DROP EXTENSION aqo"); +$node->safe_psql('postgres', "CREATE EXTENSION aqo"); + +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'learn'"); +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.force_collect_stat = 'off'"); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in learn mode'); + + +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'frozen'"); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in frozen mode'); + +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + +# ############################################################################## +# +# Check procedure of ML-knowledge data cleaning. +# +# ############################################################################## + +# Store OIDs of pgbench tables +my $aoid = $node->safe_psql('postgres', + "SELECT ('pgbench_accounts'::regclass)::oid"); +my $boid = $node->safe_psql('postgres', + "SELECT ('pgbench_branches'::regclass)::oid"); +my $toid = $node->safe_psql('postgres', + "SELECT ('pgbench_tellers'::regclass)::oid"); +my $hoid = $node->safe_psql('postgres', + "SELECT ('pgbench_history'::regclass)::oid"); +note("oids: $aoid, $boid, $toid, $hoid"); + +# Add data into AQO to control that cleaning procedure won't delete nothing extra +$node->safe_psql('postgres', " + CREATE TABLE detector(a int); + INSERT INTO detector (a) VALUES (1); + UPDATE detector SET a = a + 1; + DELETE FROM detector; + SELECT count(*) FROM detector; +"); + +# New queries won't add rows into AQO knowledge base. +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->restart(); # AQO data storage should survive after a restart +$res = $node->safe_psql('postgres', "SHOW aqo.mode"); +is($res, 'disabled'); + +# Number of rows in aqo_data: related to pgbench test and total value. +my $pgb_fss_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_data + WHERE $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) +"); +$fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); + +# Number of rows in aqo_queries: related to pgbench test and total value. +my $pgb_fs_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE fs IN ( + SELECT fs FROM aqo_data + WHERE + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) + ) +"); +$fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); + +# Number of rows in aqo_query_texts: related to pgbench test and total value. +my $pgb_fs_samples_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts + WHERE queryid IN ( + SELECT fs FROM aqo_data + WHERE + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) + ) +"); +$fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +is($pgb_fs_samples_count > 0, 1, "AQO query texts exists"); + +# Number of rows in aqo_query_stat: related to pgbench test and total value. +my $pgb_stat_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_stat + WHERE queryid IN ( + SELECT fs FROM aqo_data + WHERE + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) + ) +"); +$stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); + +$node->safe_psql('postgres', " + DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, + pgbench_history CASCADE;"); + +# Remove unnecessary AQO knowledge +$node->safe_psql('postgres', "SELECT * FROM aqo_cleanup()"); + +# Calculate total number of rows in AQO-related tables. +my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); +my $new_fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); +my $new_fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +my $new_stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +note("Total AQO rows after dropping pgbench-related tables: + aqo_queries: ($new_fs_count, $fs_count, $pgb_fs_count), + aqo_data: ($new_fss_count, $fss_count, $pgb_fss_count), + aqo_query_texts: ($new_fs_samples_count, $fs_samples_count, $pgb_fs_samples_count), + aqo_query_stat: ($new_stat_count, $stat_count, $pgb_stat_count)"); + +# Check total number of rows in AQO knowledge base after removing of +# pgbench-related data. +is($new_fs_count == $fs_count - $pgb_fs_count, 1, + 'Total number of feature spaces'); +is($new_fss_count == $fss_count - $pgb_fss_count, 1, + 'Total number of feature subspaces'); +is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, + 'Total number of samples in aqo_query_texts'); +is($new_stat_count == $stat_count - $pgb_stat_count, 1, + 'Total number of samples in aqo_query_stat'); + +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + +# ############################################################################## +# +# AQO works after moving to another schema +# +# ############################################################################## + +# Move the extension to not-in-search-path schema +# use LEARN mode to guarantee that AQO will be triggered on each query. +$node->safe_psql('postgres', "CREATE SCHEMA test; ALTER EXTENSION aqo SET SCHEMA test"); +$node->safe_psql('postgres', "SELECT * FROM test.aqo_reset()"); # Clear data + +$res = $node->safe_psql('postgres', "SELECT count(*) FROM test.aqo_queries"); +is($res, 1, 'The extension data was reset'); + +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET log_statement = 'ddl'; + SELECT pg_reload_conf(); +"); +$node->restart(); + +$node->command_ok([ 'pgbench', '-t', "25", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench should work with moved AQO.'); + +# DEBUG +$res = $node->safe_psql('postgres', " + SELECT executions_with_aqo, query_text + FROM test.aqo_query_stat a, test.aqo_query_texts b + WHERE a.queryid = b.queryid +"); +note("executions:\n$res\n"); + +$res = $node->safe_psql('postgres', + "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); + +# 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches +is($res, $CLIENTS*100+1, 'Each query should be logged in LEARN mode'); +$res = $node->safe_psql('postgres', + "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); +is($res, 0, 'AQO has learned on the queries - 2'); + +# Try to call UI functions. Break the test on an error +$res = $node->safe_psql('postgres', " + SELECT * FROM test.aqo_cardinality_error(true); + SELECT * FROM test.aqo_execution_time(true); + SELECT * FROM + (SELECT queryid FROM test.aqo_queries WHERE queryid<>0 LIMIT 1) q, + LATERAL test.aqo_drop_class(queryid); + SELECT * FROM test.aqo_cleanup(); +"); +note("OUTPUT:\n$res\n"); + +$node->safe_psql('postgres', "DROP EXTENSION aqo"); + +# ############################################################################## +# +# Check CREATE/DROP AQO extension commands in a highly concurrent environment. +# +# ############################################################################## + +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +my $bank = File::Temp->new(); +append_to_file($bank, q{ + \set aid random(1, 100000 * :scale) + \set bid random(1, 1 * :scale) + \set tid random(1, 10 * :scale) + \set delta random(-5000, 5000) + \set drop_aqo random(0, 5) + \if :client_id = 0 AND :drop_aqo = 0 + DROP EXTENSION aqo; + \sleep 10 ms + CREATE EXTENSION aqo; + \else + BEGIN; + UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid; + SELECT abalance FROM pgbench_accounts WHERE aid = :aid; + UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; + UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; + INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) + VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); + END; + \endif +}); + +$node->safe_psql('postgres', " + CREATE EXTENSION aqo; + ALTER SYSTEM SET aqo.mode = 'intelligent'; + ALTER SYSTEM SET log_statement = 'none'; + SELECT pg_reload_conf(); +"); +$node->restart(); + +# Some specifics of core PostgreSQL pgbench code don't allow to stable pass this +# test on Windows OS. +# See https://www.postgresql.org/message-id/flat/8225e78650dd69f69c8cff37ecce9a09%40postgrespro.ru +SKIP: +{ + skip "Socket allocation issues. ", 1 + if ($windows_os); + $node->command_ok([ 'pgbench', '-T', + "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], + 'Conflicts with an AQO dropping command.'); +} + +$node->stop(); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl new file mode 100644 index 00000000..ae87efeb --- /dev/null +++ b/t/002_pg_stat_statements_aqo.pl @@ -0,0 +1,64 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 2; + +my $node = get_new_node('profiling'); +$node->init; +print "create conf"; + +$node->append_conf('postgresql.conf', qq{ + aqo.mode = 'disabled' + aqo.profile_classes = -1 + aqo.profile_enable = 'true' + aqo.force_collect_stat = 'false' + log_statement = 'ddl' # reduce size of logs. + aqo.join_threshold = 0 + }); +# Test constants. +my $TRANSACTIONS = 100; +my $CLIENTS = 10; +my $THREADS = 10; +my $query_id; + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $res; +my $total_classes; +$node->start(); + # ERROR: AQO allow to load library only on startup +print "Create extension aqo"; +$node->psql('postgres', "CREATE EXTENSION aqo"); +$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +print "create preload libraries"; +$node->append_conf('postgresql.conf', qq{shared_preload_libraries = 'aqo, pg_stat_statements'}); +$node->restart(); +$node->psql('postgres', "CREATE EXTENSION aqo"); +$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.profile_enable = 'true'; + SELECT pg_reload_conf(); +"); + +$node->psql('postgres', "CREATE TABLE aqo_test0(a int, b int, c int, d int); +WITH RECURSIVE t(a, b, c, d) +AS ( + VALUES (0, 0, 0, 0) + UNION ALL + SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 +) INSERT INTO aqo_test0 (SELECT * FROM t); +CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); +ANALYZE aqo_test0;"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'controlled'; +"); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_test0"); +$res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); +is($res, 1); # The same query add in pg_stat_statements +$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); +is($res, 0); # The same query isn't added into aqo_query_texts +$node->stop(); \ No newline at end of file diff --git a/t/003_assertion_error.pl b/t/003_assertion_error.pl new file mode 100644 index 00000000..e85206ff --- /dev/null +++ b/t/003_assertion_error.pl @@ -0,0 +1,59 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 1; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'off' + aqo.learn_statement_timeout = 'on' + }); + +# Test constants. Default values. +my $TRANSACTIONS = 100; + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +# $ENV{PGOPTIONS}=""; + +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} + +my $query_string = ' +CREATE TABLE IF NOT EXISTS aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 10 +) INSERT INTO aqo_test1 (SELECT * FROM t); + +SET statement_timeout = 10; + +CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c +FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +WHERE t1.a = t2.b AND t2.a = t3.b; +DROP TABLE tmp1; +'; + +$node->start(); + +$node->safe_psql('postgres', 'CREATE EXTENSION IF NOT EXISTS aqo;'); + +for (1..$TRANSACTIONS) { + $node->psql('postgres', $query_string); +} + +ok(1, "There are no segfaults"); + +$node->stop(); diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl new file mode 100644 index 00000000..8b7f8e62 --- /dev/null +++ b/t/004_dsm_size_max.pl @@ -0,0 +1,82 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 6; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ +shared_preload_libraries = 'aqo' +aqo.mode = 'learn' +log_statement = 'ddl' +aqo.join_threshold = 0 +aqo.dsm_size_max = 4 +aqo.fs_max_items = 30000 +aqo.querytext_max_size = 1000000 +}); + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $long_string = 'a' x 1000000; + +$node->start(); +$node->psql('postgres', 'CREATE EXTENSION aqo;'); + +for my $i (1 .. 3) { + $node->psql('postgres', "select aqo_query_texts_update(" . $i . ", \'" . $long_string . "\');"); +} +$node->stop(); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); + +$long_string = '1, ' x 10000; +for my $i (1 .. 30) { + $node->psql('postgres', "select aqo_data_update(" . $i . ", 1, 1, '{{1}}', '{1}', '{1}', '{" . $long_string . " 1}');"); +} +$node->stop(); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); +$node->stop(); + +# 3000mb (more than 2*31 bytes) overflows 4-byte signed int +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 3000'); +is($node->start(fail_ok => 1), 1, "Large aqo.dsm_size_max doesn't cause integer overflow"); +$node->stop(); + + +my $regex; +$long_string = 'a' x 100000; +$regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +$node->start(); +my ($stdout, $stderr); +for my $i (1 .. 20) { + $node->psql('postgres', "create table a as select s, md5(random()::text) from generate_Series(1,100) s;"); + $node->psql('postgres', + "SELECT a.s FROM a CROSS JOIN ( SELECT '" . $long_string . "' as long_string) AS extra_rows;", + stdout => \$stdout, stderr => \$stderr); + $node->psql('postgres', "drop table a"); +} +like($stderr, $regex, 'warning for exceeding the dsa limit'); +$node->stop; +done_testing(); diff --git a/t/005_display_groupby_fss.pl b/t/005_display_groupby_fss.pl new file mode 100644 index 00000000..6f663f0c --- /dev/null +++ b/t/005_display_groupby_fss.pl @@ -0,0 +1,79 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 2; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + log_statement = 'ddl' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'on' + aqo.show_hash = 'on' + aqo.min_neighbors_for_predicting = 1 + enable_nestloop = 'off' + enable_mergejoin = 'off' + enable_material = 'off' + }); + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +# Create tables with correlated datas in columns + +$node->safe_psql('postgres', 'CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival'); + +$node->safe_psql('postgres', 'CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival'); + +my $result; + +my $plan = $node->safe_psql('postgres', 'EXPLAIN (analyze true, verbose true) +SELECT a.x1, b.y1, COUNT(*) FROM a, b WHERE a.x2 = b.y2 GROUP BY a.x1, b.y1;'); +my @fss = $plan =~ /fss=(-?\d+)/g; + +$result = $node->safe_psql('postgres', 'SELECT count(*) FROM aqo_data;'); +is($result, 4); + +$result = $node->safe_psql('postgres', 'SELECT fss FROM aqo_data;'); + +my @storage = split(/\n/, $result); + +# compare fss from plan and fss from storage +my $test2 = 1; +if (scalar @fss == scalar @storage) { + foreach my $numb1 (@fss) { + my $found = 0; + + # check fss not zero + if ($numb1 == 0) { + $test2 = 0; + last; + } + + foreach my $numb2 (@storage) { + if ($numb2 == $numb1) { + $found = 1; + last; + } + } + + if (!$found) { + $test2 = 0; + last; + } + } +} else { + $test2 = 0; +} + +is($test2, 1); + +$node->stop(); \ No newline at end of file diff --git a/t/006_overflow.pl b/t/006_overflow.pl new file mode 100644 index 00000000..eb2d71b9 --- /dev/null +++ b/t/006_overflow.pl @@ -0,0 +1,47 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 4; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'frozen' + aqo.show_details = 'on' + aqo.dsm_size_max = 10 + aqo.force_collect_stat = 'on' + aqo.fs_max_items = 3 + aqo.fss_max_items = 10 +}); + +# General purpose variables. +my $res; +my $mode; + +# Disable default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->safe_psql('postgres', 'CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival'); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT x FROM a WHERE x < 5;'); +like($res, qr/AQO mode: FROZEN/); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT count(x) FROM a WHERE x > 5;'); +like($res, qr/AQO mode: FROZEN/); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->stop(); +done_testing(); diff --git a/utils.c b/utils.c index 1ae45abe..13908783 100644 --- a/utils.c +++ b/utils.c @@ -5,13 +5,15 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/utils.c * */ +#include "postgres.h" + #include "aqo.h" /* TODO: get rid of those static vars */ @@ -22,15 +24,22 @@ static int (*argsort_value_cmp) (const void *, const void *); static int argsort_cmp(const void *a, const void *b); +/* + * qsort comparator functions + */ + /* * Function for qsorting an integer arrays */ int -int_cmp(const void *a, const void *b) +int_cmp(const void *arg1, const void *arg2) { - if (*(int *) a < *(int *) b) + int v1 = *((const int *) arg1); + int v2 = *((const int *) arg2); + + if (v1 < v2) return -1; - else if (*(int *) a > *(int *) b) + else if (v1 > v2) return 1; else return 0; @@ -40,11 +49,14 @@ int_cmp(const void *a, const void *b) * Function for qsorting an double arrays */ int -double_cmp(const void *a, const void *b) +double_cmp(const void *arg1, const void *arg2) { - if (*(double *) a < *(double *) b) + double v1 = *((const double *) arg1); + double v2 = *((const double *) arg2); + + if (v1 < v2) return -1; - else if (*(double *) a > *(double *) b) + else if (v1 > v2) return 1; else return 0; @@ -54,12 +66,14 @@ double_cmp(const void *a, const void *b) * Compares elements for two given indexes */ int -argsort_cmp(const void *a, const void *b) +argsort_cmp(const void *arg1, const void *arg2) { - return (*argsort_value_cmp) ((char *) argsort_a + - *((int *) a) * argsort_es, - (char *) argsort_a + - *((int *) b) * argsort_es); + int idx1 = *((const int *) arg1); + int idx2 = *((const int *) arg2); + char *arr = (char *) argsort_a; + + return (*argsort_value_cmp) (&arr[idx1 * argsort_es], + &arr[idx2 * argsort_es]); } /* @@ -96,46 +110,3 @@ inverse_permutation(int *idx, int n) inv[idx[i]] = i; return inv; } - -/* - * Allocates empty QueryStat object. - */ -QueryStat * -palloc_query_stat(void) -{ - QueryStat *res; - MemoryContext oldCxt; - - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - res = palloc0(sizeof(QueryStat)); - res->execution_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_with_aqo[0])); - res->execution_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_without_aqo[0])); - res->planning_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_with_aqo[0])); - res->planning_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_without_aqo[0])); - res->cardinality_error_with_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_with_aqo[0])); - res->cardinality_error_without_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_without_aqo[0])); - MemoryContextSwitchTo(oldCxt); - - return res; -} - -/* - * Frees QueryStat object. - */ -void -pfree_query_stat(QueryStat * stat) -{ - pfree(stat->execution_time_with_aqo); - pfree(stat->execution_time_without_aqo); - pfree(stat->planning_time_with_aqo); - pfree(stat->planning_time_without_aqo); - pfree(stat->cardinality_error_with_aqo); - pfree(stat->cardinality_error_without_aqo); - pfree(stat); -}